[PATCH v6] Match: Support more form for scalar unsigned SAT_ADD

2024-05-31 Thread pan2 . li
From: Pan Li 

Update in v6
* Fix more doc build error.

Update in v5
* Fix some doc build error.

Log in v4:
After we support one gassign form of the unsigned .SAT_ADD,  we
would like to support more forms including both the branch and
branchless.  There are 5 other forms of .SAT_ADD,  list as below:

Form 1:
  #define SAT_ADD_U_1(T) \
  T sat_add_u_1_##T(T x, T y) \
  { \
return (T)(x + y) >= x ? (x + y) : -1; \
  }

Form 2:
  #define SAT_ADD_U_2(T) \
  T sat_add_u_2_##T(T x, T y) \
  { \
T ret; \
T overflow = __builtin_add_overflow (x, y, ); \
return (T)(-overflow) | ret; \
  }

Form 3:
  #define SAT_ADD_U_3(T) \
  T sat_add_u_3_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, ) ? -1 : ret; \
  }

Form 4:
  #define SAT_ADD_U_4(T) \
  T sat_add_u_4_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, ) == 0 ? ret : -1; \
  }

Form 5:
  #define SAT_ADD_U_5(T) \
  T sat_add_u_5_##T(T x, T y) \
  { \
return (T)(x + y) < x ? -1 : (x + y); \
  }

Take the forms 3 of above as example:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, ) ? -1 : ret;
}

Before this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
goto ; [35.00%]
  else
goto ; [65.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
  _1 = REALPART_EXPR <_6>;
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;succ:   EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;succ:   EXIT
}

The flag '^' acts on cond_expr will generate matching code similar as below:

else if (gphi *_a1 = dyn_cast  (_d1))
  {
basic_block _b1 = gimple_bb (_a1);
if (gimple_phi_num_args (_a1) == 2)
  {
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) ? 
_pb_0_1 : ...
basic_block _other_db_1 = safe_dyn_cast  (*gsi_last_bb 
(_pb_0_1)) ? _pb_1_1 : ...
gcond *_ct_1 = safe_dyn_cast  (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
  && EDGE_COUNT (_other_db_1->succs) == 1
  && EDGE_PRED (_other_db_1, 0)->src == _db_1)
  {
tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, 
_cond_lhs_1, ...);
bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags  & 
EDGE_TRUE_VALUE;
tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
switch (TREE_CODE (_p0))
  ...

The below test suites are still running, will update it later.
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* doc/match-and-simplify.texi: Add doc for the matching flag '^'.
* genmatch.cc (enum expr_flag): Add new enum for expr flag.
(dt_node::gen_kids_1): Add cond_expr and flag handling.
(dt_operand::gen_phi_on_cond): Add new func to gen phi matching
on cond_expr.
(parser::parse_expr): Add handling for the expr flag '^'.
* match.pd: Add more form for unsigned .SAT_ADD.
* tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
(match_assign_saturation_arith): Rename to.
(match_phi_saturation_arith): Add new func impl to match the
.SAT_ADD when phi.
(math_opts_dom_walker::after_dom_children): Add phi matching
try for all gimple phi stmt.

Signed-off-by: Pan Li 
---
 gcc/doc/match-and-simplify.texi |  16 
 gcc/genmatch.cc | 126 +++-
 gcc/match.pd|  43 ++-
 gcc/tree-ssa-math-opts.cc   |  51 -
 4 files changed, 231 insertions(+), 5 deletions(-)

diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
index 01f19e2f62c..63d5af159f5 100644
--- a/gcc/doc/match-and-simplify.texi
+++ b/gcc/doc/match-and-simplify.texi
@@ -361,6 +361,22 @@ Usually the types of the generated result expressions are
 determined from the context, but sometimes like in the above case
 it is required that you specify them explicitly.
 
+Another modifier for generated expressions is @code{^} which
+tells the machinery 

[PATCH v5] Match: Support more form for scalar unsigned SAT_ADD

2024-05-30 Thread pan2 . li
From: Pan Li 

Update in v5
* Fix some doc build error.

Log in v4:
After we support one gassign form of the unsigned .SAT_ADD,  we
would like to support more forms including both the branch and
branchless.  There are 5 other forms of .SAT_ADD,  list as below:

Form 1:
  #define SAT_ADD_U_1(T) \
  T sat_add_u_1_##T(T x, T y) \
  { \
return (T)(x + y) >= x ? (x + y) : -1; \
  }

Form 2:
  #define SAT_ADD_U_2(T) \
  T sat_add_u_2_##T(T x, T y) \
  { \
T ret; \
T overflow = __builtin_add_overflow (x, y, ); \
return (T)(-overflow) | ret; \
  }

Form 3:
  #define SAT_ADD_U_3(T) \
  T sat_add_u_3_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, ) ? -1 : ret; \
  }

Form 4:
  #define SAT_ADD_U_4(T) \
  T sat_add_u_4_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, ) == 0 ? ret : -1; \
  }

Form 5:
  #define SAT_ADD_U_5(T) \
  T sat_add_u_5_##T(T x, T y) \
  { \
return (T)(x + y) < x ? -1 : (x + y); \
  }

Take the forms 3 of above as example:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, ) ? -1 : ret;
}

Before this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
goto ; [35.00%]
  else
goto ; [65.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
  _1 = REALPART_EXPR <_6>;
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;succ:   EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;succ:   EXIT
}

The flag '^' acts on cond_expr will generate matching code similar as below:

else if (gphi *_a1 = dyn_cast  (_d1))
  {
basic_block _b1 = gimple_bb (_a1);
if (gimple_phi_num_args (_a1) == 2)
  {
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) ? 
_pb_0_1 : ...
basic_block _other_db_1 = safe_dyn_cast  (*gsi_last_bb 
(_pb_0_1)) ? _pb_1_1 : ...
gcond *_ct_1 = safe_dyn_cast  (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
  && EDGE_COUNT (_other_db_1->succs) == 1
  && EDGE_PRED (_other_db_1, 0)->src == _db_1)
  {
tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, 
_cond_lhs_1, ...);
bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags  & 
EDGE_TRUE_VALUE;
tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
switch (TREE_CODE (_p0))
  ...

The below test suites are still running, will update it later.
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* doc/match-and-simplify.texi: Add doc for the matching flag '^'.
* genmatch.cc (enum expr_flag): Add new enum for expr flag.
(dt_node::gen_kids_1): Add cond_expr and flag handling.
(dt_operand::gen_phi_on_cond): Add new func to gen phi matching
on cond_expr.
(parser::parse_expr): Add handling for the expr flag '^'.
* match.pd: Add more form for unsigned .SAT_ADD.
* tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
(match_assign_saturation_arith): Rename to.
(match_phi_saturation_arith): Add new func impl to match the
.SAT_ADD when phi.
(math_opts_dom_walker::after_dom_children): Add phi matching
try for all gimple phi stmt.

Signed-off-by: Pan Li 
---
 gcc/doc/match-and-simplify.texi |  16 
 gcc/genmatch.cc | 126 +++-
 gcc/match.pd|  43 ++-
 gcc/tree-ssa-math-opts.cc   |  51 -
 4 files changed, 231 insertions(+), 5 deletions(-)

diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
index 01f19e2f62c..9c7316755d4 100644
--- a/gcc/doc/match-and-simplify.texi
+++ b/gcc/doc/match-and-simplify.texi
@@ -361,6 +361,22 @@ Usually the types of the generated result expressions are
 determined from the context, but sometimes like in the above case
 it is required that you specify them explicitly.
 
+Another modifier for generated expressions is @code{^} which
+tells the machinery to try more matches for some special 

[PATCH v4] Match: Support more form for scalar unsigned SAT_ADD

2024-05-30 Thread pan2 . li
From: Pan Li 

After we support one gassign form of the unsigned .SAT_ADD,  we
would like to support more forms including both the branch and
branchless.  There are 5 other forms of .SAT_ADD,  list as below:

Form 1:
  #define SAT_ADD_U_1(T) \
  T sat_add_u_1_##T(T x, T y) \
  { \
return (T)(x + y) >= x ? (x + y) : -1; \
  }

Form 2:
  #define SAT_ADD_U_2(T) \
  T sat_add_u_2_##T(T x, T y) \
  { \
T ret; \
T overflow = __builtin_add_overflow (x, y, ); \
return (T)(-overflow) | ret; \
  }

Form 3:
  #define SAT_ADD_U_3(T) \
  T sat_add_u_3_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, ) ? -1 : ret; \
  }

Form 4:
  #define SAT_ADD_U_4(T) \
  T sat_add_u_4_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, ) == 0 ? ret : -1; \
  }

Form 5:
  #define SAT_ADD_U_5(T) \
  T sat_add_u_5_##T(T x, T y) \
  { \
return (T)(x + y) < x ? -1 : (x + y); \
  }

Take the forms 3 of above as example:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, ) ? -1 : ret;
}

Before this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
goto ; [35.00%]
  else
goto ; [65.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
  _1 = REALPART_EXPR <_6>;
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;succ:   EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;succ:   EXIT
}

The flag '^' acts on cond_expr will generate matching code similar as below:

else if (gphi *_a1 = dyn_cast  (_d1))
  {
basic_block _b1 = gimple_bb (_a1);
if (gimple_phi_num_args (_a1) == 2)
  {
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1)) ? 
_pb_0_1 : ...
basic_block _other_db_1 = safe_dyn_cast  (*gsi_last_bb 
(_pb_0_1)) ? _pb_1_1 : ...
gcond *_ct_1 = safe_dyn_cast  (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
  && EDGE_COUNT (_other_db_1->succs) == 1
  && EDGE_PRED (_other_db_1, 0)->src == _db_1)
  {
tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, 
_cond_lhs_1, ...);
bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags  & 
EDGE_TRUE_VALUE;
tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
switch (TREE_CODE (_p0))
  ...

The below test suites are still running, will update it later.
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* doc/match-and-simplify.texi: Add doc for the matching flag '^'.
* genmatch.cc (enum expr_flag): Add new enum for expr flag.
(dt_node::gen_kids_1): Add cond_expr and flag handling.
(dt_operand::gen_phi_on_cond): Add new func to gen phi matching
on cond_expr.
(parser::parse_expr): Add handling for the expr flag '^'.
* match.pd: Add more form for unsigned .SAT_ADD.
* tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
(match_assign_saturation_arith): Rename to.
(match_phi_saturation_arith): Add new func impl to match the
.SAT_ADD when phi.
(math_opts_dom_walker::after_dom_children): Add phi matching
try for all gimple phi stmt.

Signed-off-by: Pan Li 
---
 gcc/doc/match-and-simplify.texi |  14 
 gcc/genmatch.cc | 126 +++-
 gcc/match.pd|  43 ++-
 gcc/tree-ssa-math-opts.cc   |  51 -
 4 files changed, 229 insertions(+), 5 deletions(-)

diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
index 01f19e2f62c..fc0cf6d7552 100644
--- a/gcc/doc/match-and-simplify.texi
+++ b/gcc/doc/match-and-simplify.texi
@@ -361,6 +361,20 @@ Usually the types of the generated result expressions are
 determined from the context, but sometimes like in the above case
 it is required that you specify them explicitly.
 
+Another modifier for generated expressions is @code{^} which
+tells the machinery to try more matches for some special cases.
+Normally the @code{cond} only allows the gimple 

[PATCH v1] Vect: Support IFN SAT_SUB for unsigned vector int

2024-05-29 Thread pan2 . li
From: Pan Li 

This patch would like to support the .SAT_SUB for the unsigned
vector int.  Given we have below example code:

void
vec_sat_sub_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  for (unsigned i = 0; i < n; i++)
out[i] = (x[i] - y[i]) & (-(uint64_t)(x[i] >= y[i]));
}

Before this patch:
void
vec_sat_sub_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _77 = .SELECT_VL (ivtmp_75, POLY_INT_CST [2, 2]);
  ivtmp_56 = _77 * 8;
  vect__4.7_59 = .MASK_LEN_LOAD (vectp_x.5_57, 64B, { -1, ... }, _77, 0);
  vect__6.10_63 = .MASK_LEN_LOAD (vectp_y.8_61, 64B, { -1, ... }, _77, 0);

  mask__7.11_64 = vect__4.7_59 >= vect__6.10_63;
  _66 = .COND_SUB (mask__7.11_64, vect__4.7_59, vect__6.10_63, { 0, ... });

  .MASK_LEN_STORE (vectp_out.15_71, 64B, { -1, ... }, _77, 0, _66);
  vectp_x.5_58 = vectp_x.5_57 + ivtmp_56;
  vectp_y.8_62 = vectp_y.8_61 + ivtmp_56;
  vectp_out.15_72 = vectp_out.15_71 + ivtmp_56;
  ivtmp_76 = ivtmp_75 - _77;
  ...
}

After this patch:
void
vec_sat_sub_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _76 = .SELECT_VL (ivtmp_74, POLY_INT_CST [2, 2]);
  ivtmp_60 = _76 * 8;
  vect__4.7_63 = .MASK_LEN_LOAD (vectp_x.5_61, 64B, { -1, ... }, _76, 0);
  vect__6.10_67 = .MASK_LEN_LOAD (vectp_y.8_65, 64B, { -1, ... }, _76, 0);

  vect_patt_37.11_68 = .SAT_SUB (vect__4.7_63, vect__6.10_67);

  .MASK_LEN_STORE (vectp_out.12_70, 64B, { -1, ... }, _76, 0, 
vect_patt_37.11_68);
  vectp_x.5_62 = vectp_x.5_61 + ivtmp_60;
  vectp_y.8_66 = vectp_y.8_65 + ivtmp_60;
  vectp_out.12_71 = vectp_out.12_70 + ivtmp_60;
  ivtmp_75 = ivtmp_74 - _76;
  ...
}

The below test suites are passed for this patch
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression tests.

gcc/ChangeLog:

* match.pd: Add new form for vector mode recog.
* tree-vect-patterns.cc (gimple_unsigned_integer_sat_sub): Add
new match func decl;
(vect_recog_build_binary_gimple_call): Extract helper func to
build gcall with given internal_fn.
(vect_recog_sat_sub_pattern): Add new func impl to recog .SAT_SUB.

Signed-off-by: Pan Li 
---
 gcc/match.pd  | 14 +++
 gcc/tree-vect-patterns.cc | 85 ---
 2 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 3e334533ff8..81f389855cd 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3100,6 +3100,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Unsigned saturation sub, case 3 (branchless with gt):
+   SAT_U_SUB = (X - Y) * (X > Y).  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (mult:c (minus @0 @1) (convert (gt @0 @1)))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
+/* Unsigned saturation sub, case 4 (branchless with ge):
+   SAT_U_SUB = (X - Y) * (X >= Y).  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (mult:c (minus @0 @1) (convert (gt @0 @1)))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a313dc64643..09a7c129493 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4488,6 +4488,32 @@ vect_recog_mult_pattern (vec_info *vinfo,
 }
 
 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
+
+static gcall *
+vect_recog_build_binary_gimple_call (vec_info *vinfo, gimple *stmt,
+internal_fn fn, tree *type_out,
+tree op_0, tree op_1)
+{
+  tree itype = TREE_TYPE (op_0);
+  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+
+  if (vtype != NULL_TREE
+&& direct_internal_fn_supported_p (fn, vtype, OPTIMIZE_FOR_BOTH))
+{
+  gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
+
+  gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+  gimple_call_set_nothrow (call, /* nothrow_p */ false);
+  gimple_set_location (call, gimple_location (stmt));
+
+  *type_out = vtype;
+
+  return call;
+}
+
+  return NULL;
+}
 
 /*
  * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
@@ -4510,27 +4536,55 @@ vect_recog_sat_add_pattern (vec_info *vinfo, 
stmt_vec_info stmt_vinfo,
   if (!is_gimple_assign (last_stmt))
 return NULL;
 
-  tree res_ops[2];
+  tree ops[2];
   tree lhs = gimple_assign_lhs (last_stmt);
 
-  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
+  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL))
 {
-  tree itype = TREE_TYPE (res_ops[0]);
-  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
-
-  if (vtype != 

[PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

2024-05-28 Thread pan2 . li
From: Pan Li 

This patch would like to add the middle-end presentation for the
saturation sub.  Aka set the result of add to the min when downflow.
It will take the pattern similar as below.

SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y));

For example for uint8_t, we have

* SAT_SUB (255, 0)   => 255
* SAT_SUB (1, 2) => 0
* SAT_SUB (254, 255) => 0
* SAT_SUB (0, 255)   => 0

Given below SAT_SUB for uint64

uint64_t sat_sub_u64 (uint64_t x, uint64_t y)
{
  return (x + y) & (- (uint64_t)((x >= y)));
}

Before this patch:
uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
{
  _Bool _1;
  long unsigned int _3;
  uint64_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _1 = x_4(D) >= y_5(D);
  _3 = x_4(D) - y_5(D);
  _6 = _1 ? _3 : 0;
  return _6;
;;succ:   EXIT
}

After this patch:
uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
  return _6;
;;succ:   EXIT
}

The below tests are running for this patch:
*. The riscv fully regression tests.
*. The x86 bootstrap tests.
*. The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB.
* match.pd: Add new match for SAT_SUB.
* optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub.
* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add
new decl for generated in match.pd.
(build_saturation_binary_arith_call): Add new helper function
to build the gimple call to binary SAT alu.
(match_saturation_arith): Rename from.
(match_unsigned_saturation_add): Rename to.
(match_unsigned_saturation_sub): Add new func to match the
unsigned sat sub.
(math_opts_dom_walker::after_dom_children): Add SAT_SUB matching
try when COND_EXPR.

Signed-off-by: Pan Li 
---
 gcc/internal-fn.def   |  1 +
 gcc/match.pd  | 14 
 gcc/optabs.def|  4 +--
 gcc/tree-ssa-math-opts.cc | 67 +++
 4 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 25badbb86e5..24539716e5b 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | 
ECF_NOTHROW, first,
  smulhrs, umulhrs, binary)
 
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
 
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 024e3350465..3e334533ff8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+/* Unsigned saturation sub, case 1 (branch with gt):
+   SAT_U_SUB = X > Y ? X - Y : 0  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond (gt @0 @1) (minus @0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
+/* Unsigned saturation sub, case 2 (branch with ge):
+   SAT_U_SUB = X >= Y ? X - Y : 0.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond (ge @0 @1) (minus @0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 3f2cb46aff8..bc2611abdc2 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3")
 OPTAB_NX(sub_optab, "sub$Q$a3")
 OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc)
 OPTAB_VX(subv_optab, "sub$F$a3")
-OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', 
gen_signed_fixed_libfunc)
-OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', 
gen_unsigned_fixed_libfunc)
+OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', 
gen_signed_fixed_libfunc)
+OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', 
gen_unsigned_fixed_libfunc)
 OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc)
 OPTAB_NX(smul_optab, "mul$P$a3")
 OPTAB_NX(smul_optab, "mul$F$a3")
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 62da1c5ee08..4717302b728 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple 
*cast_stmt, gimple *_stmt,
 }
 
 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
+
+static void

[PATCH v1] Internal-fn: Add new IFN mask_len_strided_load/store

2024-05-27 Thread pan2 . li
From: Pan Li 

This patch would like to add new internal fun for the below 2 IFN.
* mask_len_strided_load
* mask_len_strided_store

The GIMPLE v = MASK_LEN_STRIDED_LOAD (ptr, stride, mask, len, bias) will
be expanded into v = mask_len_strided_load (ptr, stried, mask, len, bias).

The GIMPLE MASK_LEN_STRIED_STORE (ptr, stride, v, mask, len, bias)
be expanded into mask_len_stried_store (ptr, stride, v, mask, len, bias).

The below test suites are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* doc/md.texi: Add description for mask_len_strided_load/store.
* internal-fn.cc (strided_load_direct): New internal_fn define
for strided_load_direct.
(strided_store_direct): Ditto but for store.
(expand_strided_load_optab_fn): New expand func for
mask_len_strided_load.
(expand_strided_store_optab_fn): Ditto but for store.
(direct_strided_load_optab_supported_p): New define for load
direct optab supported.
(direct_strided_store_optab_supported_p): Ditto but for store.
(internal_fn_len_index): Add len index for both load and store.
(internal_fn_mask_index): Ditto but for mask index.
(internal_fn_stored_value_index): Add stored index.
* internal-fn.def (MASK_LEN_STRIDED_LOAD): New direct fn define
for strided_load.
(MASK_LEN_STRIDED_STORE): Ditto but for stride_store.
* optabs.def (OPTAB_D): New optab define for load and store.

Signed-off-by: Pan Li 
Co-Authored-By: Juzhe-Zhong 
---
 gcc/doc/md.texi | 27 
 gcc/internal-fn.cc  | 75 +
 gcc/internal-fn.def |  6 
 gcc/optabs.def  |  2 ++
 4 files changed, 110 insertions(+)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 5730bda80dc..3d242675c63 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5138,6 +5138,20 @@ Bit @var{i} of the mask is set if element @var{i} of the 
result should
 be loaded from memory and clear if element @var{i} of the result should be 
undefined.
 Mask elements @var{i} with @var{i} > (operand 6 + operand 7) are ignored.
 
+@cindex @code{mask_len_strided_load@var{m}} instruction pattern
+@item @samp{mask_len_strided_load@var{m}}
+Load several separate memory locations into a destination vector of mode 
@var{m}.
+Operand 0 is a destination vector of mode @var{m}.
+Operand 1 is a scalar base address and operand 2 is a scalar stride of Pmode.
+operand 3 is mask operand, operand 4 is length operand and operand 5 is bias 
operand.
+The instruction can be seen as a special case of 
@code{mask_len_gather_load@var{m}@var{n}}
+with an offset vector that is a @code{vec_series} with operand 1 as base and 
operand 2 as step.
+For each element index i load address is operand 1 + @var{i} * operand 2.
+Similar to mask_len_load, the instruction loads at most (operand 4 + operand 
5) elements from memory.
+Element @var{i} of the mask (operand 3) is set if element @var{i} of the 
result should
+be loaded from memory and clear if element @var{i} of the result should be 
zero.
+Mask elements @var{i} with @var{i} > (operand 4 + operand 5) are ignored.
+
 @cindex @code{scatter_store@var{m}@var{n}} instruction pattern
 @item @samp{scatter_store@var{m}@var{n}}
 Store a vector of mode @var{m} into several distinct memory locations.
@@ -5175,6 +5189,19 @@ at most (operand 6 + operand 7) elements of (operand 4) 
to memory.
 Bit @var{i} of the mask is set if element @var{i} of (operand 4) should be 
stored.
 Mask elements @var{i} with @var{i} > (operand 6 + operand 7) are ignored.
 
+@cindex @code{mask_len_strided_store@var{m}} instruction pattern
+@item @samp{mask_len_strided_store@var{m}}
+Store a vector of mode m into several distinct memory locations.
+Operand 0 is a scalar base address and operand 1 is scalar stride of Pmode.
+Operand 2 is the vector of values that should be stored, which is of mode 
@var{m}.
+operand 3 is mask operand, operand 4 is length operand and operand 5 is bias 
operand.
+The instruction can be seen as a special case of 
@code{mask_len_scatter_store@var{m}@var{n}}
+with an offset vector that is a @code{vec_series} with operand 1 as base and 
operand 1 as step.
+For each element index i store address is operand 0 + @var{i} * operand 1.
+Similar to mask_len_store, the instruction stores at most (operand 4 + operand 
5) elements of mask (operand 3) to memory.
+Element @var{i} of the mask is set if element @var{i} of (operand 3) should be 
stored.
+Mask elements @var{i} with @var{i} > (operand 4 + operand 5) are ignored.
+
 @cindex @code{vec_set@var{m}} instruction pattern
 @item @samp{vec_set@var{m}}
 Set given field in the vector value.  Operand 0 is the vector to modify,
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 9c09026793f..f6e5329cd84 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -159,6 +159,7 @@ init_internal_fns ()
 

[PATCH v3] Match: Support more form for scalar unsigned SAT_ADD

2024-05-27 Thread pan2 . li
From: Pan Li 

After we support one gassign form of the unsigned .SAT_ADD,  we
would like to support more forms including both the branch and
branchless.  There are 5 other forms of .SAT_ADD,  list as below:

Form 1:
  #define SAT_ADD_U_1(T) \
  T sat_add_u_1_##T(T x, T y) \
  { \
return (T)(x + y) >= x ? (x + y) : -1; \
  }

Form 2:
  #define SAT_ADD_U_2(T) \
  T sat_add_u_2_##T(T x, T y) \
  { \
T ret; \
T overflow = __builtin_add_overflow (x, y, ); \
return (T)(-overflow) | ret; \
  }

Form 3:
  #define SAT_ADD_U_3(T) \
  T sat_add_u_3_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, ) ? -1 : ret; \
  }

Form 4:
  #define SAT_ADD_U_4(T) \
  T sat_add_u_4_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, ) == 0 ? ret : -1; \
  }

Form 5:
  #define SAT_ADD_U_5(T) \
  T sat_add_u_5_##T(T x, T y) \
  { \
return (T)(x + y) < x ? -1 : (x + y); \
  }

Take the forms 3 of above as example:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, ) ? -1 : ret;
}

Before this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
goto ; [35.00%]
  else
goto ; [65.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
  _1 = REALPART_EXPR <_6>;
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;succ:   EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;succ:   EXIT
}

The below test suites are still running, will update it later.
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* genmatch.cc (dt_node::gen_kids): Add new arg of predicate id.
(allow_phi_predicate_p): New func impl to check the phi
predicate is allowed or not.
(dt_node::gen_kids_1): Add COND_EXPR gen for phi node if allowed.
(dt_operand::gen_phi_on_cond):
(write_predicate): Init the predicate id before gen_kids.
* match.pd: Add more forms of unsigned_integer_sat_add and
comments.
* tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
(match_assign_saturation_arith): Rename to.
(match_phi_saturation_arith): New func impl to match phi.
(math_opts_dom_walker::after_dom_children): Add phi match for
echo bb.

Signed-off-by: Pan Li 
---
 gcc/genmatch.cc   | 123 --
 gcc/match.pd  |  43 -
 gcc/tree-ssa-math-opts.cc |  51 +++-
 3 files changed, 210 insertions(+), 7 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index f1e0e7abe0c..816d2dafd23 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -1767,6 +1767,7 @@ public:
   unsigned level;
   dt_node *parent;
   vec kids;
+  const char *id;
 
   /* Statistics.  */
   unsigned num_leafs;
@@ -1786,7 +1787,7 @@ public:
   virtual void gen (FILE *, int, bool, int) {}
 
   void gen_kids (FILE *, int, bool, int);
-  void gen_kids_1 (FILE *, int, bool, int,
+  void gen_kids_1 (FILE *, const char *, int, bool, int,
   const vec &, const vec &,
   const vec &, const vec &,
   const vec &, const vec &);
@@ -1819,6 +1820,7 @@ public:
 
   char *get_name (char *);
   void gen_opname (char *, unsigned);
+  void gen_phi_on_cond (FILE *, int, bool, int);
 };
 
 /* Leaf node of the decision tree, used for DT_SIMPLIFY.  */
@@ -3173,7 +3175,7 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int 
depth)
 for what we have collected sofar.  */
  fns.qsort (fns_cmp);
  generic_fns.qsort (fns_cmp);
- gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
+ gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
  fns, generic_fns, preds, others);
  /* And output the true operand itself.  */
  kids[i]->gen (f, indent, gimple, depth);
@@ -3191,14 +3193,21 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, 
int depth)
   /* Generate code for the remains.  */
   fns.qsort (fns_cmp);
   generic_fns.qsort (fns_cmp);
-  gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
+  gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
  fns, generic_fns, preds, others);
 }
 
+static bool
+allow_phi_predicate_p (const char *id)
+{
+  return id && strcmp (id, "unsigned_integer_sat_add") == 0;
+}
+
 /* 

[PATCH v1] Gen-Match: Fix gen_kids_1 right hand braces mis-alignment

2024-05-25 Thread pan2 . li
From: Pan Li 

Notice some mis-alignment for gen_kids_1 right hand braces as below:

  if ((_q50 == _q20 && ! TREE_SIDE_EFFECTS (...
{
  if ((_q51 == _q21 && ! TREE_SIDE_EFFECTS (...
{
  {
tree captures[2] ATTRIBUTE_UNUSED = {...
{
  res_ops[0] = captures[0];
  res_ops[1] = captures[1];
  if (UNLIKELY (debug_dump)) ...
  return true;
}
  }
}
}
}  // mis-aligned here.
 }

The below test are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* genmatch.cc (dt_node::gen_kids_1): Fix indenet mis-aligned.

Signed-off-by: Pan Li 
---
 gcc/genmatch.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index c982c95b70f..f1e0e7abe0c 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -3428,7 +3428,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, 
int depth,
  child_opname, kid_opname, j);
}
   preds[i]->gen_kids (f, indent + 4, gimple, depth);
-  fprintf (f, "}\n");
+  fprintf_indent (f, indent, "  }\n");
   indent -= 2;
   fprintf_indent (f, indent, "}\n");
 }
-- 
2.34.1



[PATCH v4] Match: Add overloaded types_match to avoid code dup [NFC]

2024-05-22 Thread pan2 . li
From: Pan Li 

There are sorts of match pattern for SAT related cases,  there will be
some duplicated code to check the dest, op_0, op_1 are same tree types.
Aka ternary tree type matches.  Thus,  add overloaded types_match func
do this and avoid match code duplication.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 regression test.

gcc/ChangeLog:

* generic-match-head.cc (types_match): Add overloaded types_match
for 3 types.
* gimple-match-head.cc (types_match): Ditto.
* match.pd: Leverage overloaded types_match.

Signed-off-by: Pan Li 
---
 gcc/generic-match-head.cc | 14 ++
 gcc/gimple-match-head.cc  | 14 ++
 gcc/match.pd  | 30 ++
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
index 0d3f648fe8d..8d8ecfaeb1d 100644
--- a/gcc/generic-match-head.cc
+++ b/gcc/generic-match-head.cc
@@ -59,6 +59,20 @@ types_match (tree t1, tree t2)
   return TYPE_MAIN_VARIANT (t1) == TYPE_MAIN_VARIANT (t2);
 }
 
+/* Routine to determine if the types T1, T2 and T3 are effectively
+   the same for GENERIC.  If T1, T2 or T2 is not a type, the test
+   applies to their TREE_TYPE.  */
+
+static inline bool
+types_match (tree t1, tree t2, tree t3)
+{
+  t1 = TYPE_P (t1) ? t1 : TREE_TYPE (t1);
+  t2 = TYPE_P (t2) ? t2 : TREE_TYPE (t2);
+  t3 = TYPE_P (t3) ? t3 : TREE_TYPE (t3);
+
+  return types_match (t1, t2) && types_match (t2, t3);
+}
+
 /* Return if T has a single use.  For GENERIC, we assume this is
always true.  */
 
diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 5f8a1a1ad8e..2b7f746ab13 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -79,6 +79,20 @@ types_match (tree t1, tree t2)
   return types_compatible_p (t1, t2);
 }
 
+/* Routine to determine if the types T1, T2 and T3 are effectively
+   the same for GIMPLE.  If T1, T2 or T2 is not a type, the test
+   applies to their TREE_TYPE.  */
+
+static inline bool
+types_match (tree t1, tree t2, tree t3)
+{
+  t1 = TYPE_P (t1) ? t1 : TREE_TYPE (t1);
+  t2 = TYPE_P (t2) ? t2 : TREE_TYPE (t2);
+  t3 = TYPE_P (t3) ? t3 : TREE_TYPE (t3);
+
+  return types_match (t1, t2) && types_match (t2, t3);
+}
+
 /* Return if T has a single use.  For GIMPLE, we also allow any
non-SSA_NAME (ie constants) and zero uses to cope with uses
that aren't linked up yet.  */
diff --git a/gcc/match.pd b/gcc/match.pd
index 35e3d82b131..7081d76d56a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3048,38 +3048,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* Unsigned Saturation Add */
 (match (usadd_left_part_1 @0 @1)
  (plus:c @0 @1)
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
 
 (match (usadd_left_part_2 @0 @1)
  (realpart (IFN_ADD_OVERFLOW:c @0 @1))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
 
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (lt (plus:c @0 @1) @0)))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
 
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (gt @0 (plus:c @0 @1
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
 
 (match (usadd_right_part_2 @0 @1)
  (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
 
 /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
because the sub part of left_part_2 cannot work with right_part_1.
-- 
2.34.1



[PATCH v2] Match: Support branch form for unsigned SAT_ADD

2024-05-22 Thread pan2 . li
From: Pan Li 

This patch would like to support the branch form for unsigned
SAT_ADD.  For example as below:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  return (uint64_t) (x + y) >= x ? (x + y) : -1;
}

Different to the branchless version,  we leverage the simplify to
convert the branch version of SAT_ADD into branchless if and only
if the backend has supported the IFN_SAT_ADD.  Thus,  the backend has
the ability to choose branch or branchless implementation of .SAT_ADD.
For example,  some target can take care of branches code more optimally.

When the target implement the IFN_SAT_ADD for unsigned and before this
patch:
uint64_t sat_add_u_1_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  uint64_t _2;
  __complex__ long unsigned int _6;
  long unsigned int _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_3(D), y_4(D));
  _1 = REALPART_EXPR <_6>;
  _7 = IMAGPART_EXPR <_6>;
  if (_7 == 0)
goto ; [65.00%]
  else
goto ; [35.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _2 = PHI <18446744073709551615(3), _1(2)>
  return _2;
;;succ:   EXIT

}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _9;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _9 = .SAT_ADD (x_3(D), y_4(D)); [tail call]
  return _9;
;;succ:   EXIT
}

The below test suites are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* match.pd: Add new simplify to convert branch SAT_ADD into
branchless,  if and only if backend implement the IFN.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 20 
 1 file changed, 20 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 35e3d82b131..fd8140e6641 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3096,6 +3096,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+#if GIMPLE
+
+/* Simplify the branch version of SAT_ADD into branchless if and only if
+   the backend has supported the IFN_SAT_ADD.  Thus, the backend has the
+   ability to choose branch or branchless implementation of .SAT_ADD.  */
+
+(simplify
+ (cond (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep)
+  (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED (type)
+   && direct_internal_fn_supported_p (IFN_SAT_ADD, type, 
OPTIMIZE_FOR_BOTH))
+   (bit_ior @2 (negate (convert (lt @2 @0))
+
+(simplify
+ (cond (le @0 (plus:c@2 @0 @1)) @2 integer_minus_onep)
+  (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED (type)
+   && direct_internal_fn_supported_p (IFN_SAT_ADD, type, 
OPTIMIZE_FOR_BOTH))
+   (bit_ior @2 (negate (convert (lt @2 @0))
+
+#endif
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
-- 
2.34.1



[PATCH v2] Match: Support __builtin_add_overflow branch form for unsigned SAT_ADD

2024-05-21 Thread pan2 . li
From: Pan Li 

This patch would like to support the __builtin_add_overflow branch form for
unsigned SAT_ADD.  For example as below:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, ) ? -1 : ret;
}

Different to the branchless version,  we leverage the simplify to
convert the branch version of SAT_ADD into branchless if and only
if the backend has supported the IFN_SAT_ADD.  Thus,  the backend has
the ability to choose branch or branchless implementation of .SAT_ADD.
For example,  some target can take care of branches code more optimally.

When the target implement the IFN_SAT_ADD for unsigned and before this
patch:

uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
goto ; [35.00%]
  else
goto ; [65.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
  _1 = REALPART_EXPR <_6>;
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;succ:   EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;succ:   EXIT
}

The below test suites are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* match.pd: Add new simplify to convert branch SAT_ADD into
branchless,  if and only if backend implement the IFN.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index cff67c84498..2dc77a46e67 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3080,6 +3080,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+#if GIMPLE
+
+(simplify
+ (cond (ne (imagpart (IFN_ADD_OVERFLOW@2 @0 @1)) integer_zerop)
+  integer_minus_onep (realpart @2))
+ (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED (type)
+  && direct_internal_fn_supported_p (IFN_SAT_ADD, type, OPTIMIZE_FOR_BOTH))
+  (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))
+
+#endif
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
-- 
2.34.1



[PATCH v1 1/2] Match: Support __builtin_add_overflow branch form for unsigned SAT_ADD

2024-05-21 Thread pan2 . li
From: Pan Li 

This patch would like to support the __builtin_add_overflow branch form for
unsigned SAT_ADD.  For example as below:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, ) ? -1 : ret;
}

Different to the branchless version,  we leverage the simplify to
convert the branch version of SAT_ADD into branchless if and only
if the backend has supported the IFN_SAT_ADD.  Thus,  the backend has
the ability to choose branch or branchless implementation of .SAT_ADD.
For example,  some target can take care of branches code more optimally.

When the target implement the IFN_SAT_ADD for unsigned and before this
patch:

uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
goto ; [35.00%]
  else
goto ; [65.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
  _1 = REALPART_EXPR <_6>;
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;succ:   EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;succ:   EXIT
}

The below test suites are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* match.pd: Add new simplify to convert branch SAT_ADD into
branchless,  if and only if backend implement the IFN.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0f9c34fa897..8b9ded98323 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3094,6 +3094,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+#if GIMPLE
+
+(simplify
+ (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
+  integer_minus_onep (realpart @2))
+ (if (direct_internal_fn_supported_p (IFN_SAT_ADD, type, OPTIMIZE_FOR_BOTH))
+  (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))
+
+#endif
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
-- 
2.34.1



[PATCH v1 2/2] RISC-V: Add test cases for __builtin_add_overflow branch form unsigned SAT_ADD

2024-05-21 Thread pan2 . li
From: Pan Li 

After we support __builtin_add_overflow  branch form unsigned SAT_ADD
from the middle end.  Add more tests case to cover the functionarlities.

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test macro for
branch __builtin_add_overflow form.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c: New test.
* gcc.target/riscv/sat_u_add-13.c: New test.
* gcc.target/riscv/sat_u_add-14.c: New test.
* gcc.target/riscv/sat_u_add-15.c: New test.
* gcc.target/riscv/sat_u_add-16.c: New test.
* gcc.target/riscv/sat_u_add-run-13.c: New test.
* gcc.target/riscv/sat_u_add-run-14.c: New test.
* gcc.target/riscv/sat_u_add-run-15.c: New test.
* gcc.target/riscv/sat_u_add-run-16.c: New test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/binop/vec_sat_u_add-13.c  | 19 +
 .../rvv/autovec/binop/vec_sat_u_add-14.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-15.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-16.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-13.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-14.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-15.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-16.c  | 75 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-13.c | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-14.c | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-16.c | 17 +
 .../gcc.target/riscv/sat_u_add-run-13.c   | 25 +++
 .../gcc.target/riscv/sat_u_add-run-14.c   | 25 +++
 .../gcc.target/riscv/sat_u_add-run-15.c   | 25 +++
 .../gcc.target/riscv/sat_u_add-run-16.c   | 25 +++
 17 files changed, 579 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-16.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
new file mode 100644
index 000..2628ac315b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint8_t_fmt_4:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_4(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c
new file mode 100644
index 000..4f6e113d9ad
--- /dev/null
+++ 

[PATCH v3] Match: Extract ternary_integer_types_match_p helper func [NFC]

2024-05-20 Thread pan2 . li
From: Pan Li 

There are sorts of match pattern for SAT related cases,  there will be
some duplicated code to check the dest, op_0, op_1 are same tree types.
Aka ternary tree type matches.  Thus, extract one helper function to
do this and avoid match code duplication.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 regression test.

gcc/ChangeLog:

* match.pd: Leverage helper func for SAT_ADD match.
* tree.cc (ternary_integer_types_match_p): New func impl to
check if ternary tree types are all integer.
* tree.h (ternary_integer_types_match_p): New func decl.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 28 +++-
 gcc/tree.cc  | 16 
 gcc/tree.h   |  5 +
 3 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0f9c34fa897..cff67c84498 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -39,7 +39,8 @@ along with GCC; see the file COPYING3.  If not see
HONOR_NANS
uniform_vector_p
expand_vec_cmp_expr_p
-   bitmask_inv_cst_vector_p)
+   bitmask_inv_cst_vector_p
+   ternary_integer_types_match_p)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
@@ -3046,38 +3047,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* Unsigned Saturation Add */
 (match (usadd_left_part_1 @0 @1)
  (plus:c @0 @1)
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED (type
 
 (match (usadd_left_part_2 @0 @1)
  (realpart (IFN_ADD_OVERFLOW:c @0 @1))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED (type
 
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (lt (plus:c @0 @1) @0)))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED (type
 
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (gt @0 (plus:c @0 @1
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED (type
 
 (match (usadd_right_part_2 @0 @1)
  (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED (type
 
 /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
because the sub part of left_part_2 cannot work with right_part_1.
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 6564b002dc1..b59d42c3e47 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -10622,6 +10622,22 @@ uniform_integer_cst_p (tree t)
   return NULL_TREE;
 }
 
+/* Check if the types T1,  T2 and T3 are effectively the same integer type.
+   If T1,  T2 or T3 is not a type, the test applies to their TREE_TYPE.  */
+
+bool
+ternary_integer_types_match_p (tree t1, tree t2, tree t3)
+{
+  t1 = TYPE_P (t1) ? t1 : TREE_TYPE (t1);
+  t2 = TYPE_P (t2) ? t2 : TREE_TYPE (t2);
+  t3 = TYPE_P (t3) ? t3 : TREE_TYPE (t3);
+
+  if (!INTEGRAL_TYPE_P (t1) || !INTEGRAL_TYPE_P (t2) || !INTEGRAL_TYPE_P (t3))
+return false;
+
+  return types_compatible_p (t1, t2) && types_compatible_p (t2, t3);
+}
+
 /* Checks to see if T is a constant or a constant vector and if each element E
adheres to ~E + 1 == pow2 then return ~E otherwise NULL_TREE.  */
 
diff --git a/gcc/tree.h b/gcc/tree.h
index ee2aae332a4..4ac59ac55cb 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -5212,6 +5212,11 @@ extern bool integer_pow2p (const_tree);
 
 extern tree bitmask_inv_cst_vector_p (tree);
 
+/* Check if the types T1,  T2 and T3 are effectively the same integer type.
+   If T1,  T2 or T3 is not a type, the test applies to their TREE_TYPE.  */
+
+extern bool ternary_integer_types_match_p (tree, tree, tree);
+
 /* integer_nonzerop (tree x) is nonzero if X is an integer constant
with a nonzero value.  */
 
-- 
2.34.1



[PATCH v1 2/2] RISC-V: Add test cases for branch form unsigned SAT_ADD

2024-05-20 Thread pan2 . li
From: Pan Li 

After we support branch form unsigned SAT_ADD from the
middle end.  Add more tests case to cover the functionarlities.

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add branch form test macro.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-9.c: New test.
* gcc.target/riscv/sat_u_add-10.c: New test.
* gcc.target/riscv/sat_u_add-11.c: New test.
* gcc.target/riscv/sat_u_add-12.c: New test.
* gcc.target/riscv/sat_u_add-9.c: New test.
* gcc.target/riscv/sat_u_add-run-10.c: New test.
* gcc.target/riscv/sat_u_add-run-11.c: New test.
* gcc.target/riscv/sat_u_add-run-12.c: New test.
* gcc.target/riscv/sat_u_add-run-9.c: New test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/binop/vec_sat_u_add-10.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-11.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-12.c  | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-9.c | 19 +
 .../rvv/autovec/binop/vec_sat_u_add-run-10.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-11.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-12.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-9.c   | 75 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 23 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-10.c | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-11.c | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-12.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-9.c  | 19 +
 .../gcc.target/riscv/sat_u_add-run-10.c   | 25 +++
 .../gcc.target/riscv/sat_u_add-run-11.c   | 25 +++
 .../gcc.target/riscv/sat_u_add-run-12.c   | 25 +++
 .../gcc.target/riscv/sat_u_add-run-9.c| 25 +++
 17 files changed, 577 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-9.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-9.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-9.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c
new file mode 100644
index 000..db2233f04b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint16_t_fmt_3:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma
+** ...
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_3(uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c
new file mode 100644
index 000..27cd38ea74f
--- /dev/null
+++ 

[PATCH v1 1/2] Match: Support branch form for unsigned SAT_ADD

2024-05-20 Thread pan2 . li
From: Pan Li 

This patch would like to support the branch form for unsigned
SAT_ADD.  For example as below:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  return (uint64_t) (x + y) >= x ? (x + y) : -1;
}

Different to the branchless version,  we leverage the simplify to
convert the branch version of SAT_ADD into branchless if and only
if the backend has supported the IFN_SAT_ADD.  Thus,  the backend has
the ability to choose branch or branchless implementation of .SAT_ADD.
For example,  some target can take care of branches code more optimally.

When the target implement the IFN_SAT_ADD for unsigned and before this
patch:
uint64_t sat_add_u_1_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  uint64_t _2;
  __complex__ long unsigned int _6;
  long unsigned int _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_3(D), y_4(D));
  _1 = REALPART_EXPR <_6>;
  _7 = IMAGPART_EXPR <_6>;
  if (_7 == 0)
goto ; [65.00%]
  else
goto ; [35.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _2 = PHI <18446744073709551615(3), _1(2)>
  return _2;
;;succ:   EXIT

}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _9;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _9 = .SAT_ADD (x_3(D), y_4(D)); [tail call]
  return _9;
;;succ:   EXIT
}

The below test suites are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* match.pd: Add new simplify to convert branch SAT_ADD into
branchless,  if and only if backend implement the IFN.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0f9c34fa897..0547b57b3a3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3094,6 +3094,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+#if GIMPLE
+
+/* Simplify the branch version of SAT_ADD into branchless if and only if
+   the backend has supported the IFN_SAT_ADD.  Thus, the backend has the
+   ability to choose branch or branchless implementation of .SAT_ADD.  */
+
+(simplify
+ (cond (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep)
+  (if (direct_internal_fn_supported_p (IFN_SAT_ADD, type, OPTIMIZE_FOR_BOTH))
+   (bit_ior @2 (negate (convert (lt @2 @0))
+
+(simplify
+ (cond (le @0 (plus:c@2 @0 @1)) @2 integer_minus_onep)
+  (if (direct_internal_fn_supported_p (IFN_SAT_ADD, type, OPTIMIZE_FOR_BOTH))
+   (bit_ior @2 (negate (convert (lt @2 @0))
+
+#endif
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
-- 
2.34.1



[PATCH v2] Match: Extract integer_types_ternary_match helper to avoid code dup [NFC]

2024-05-20 Thread pan2 . li
From: Pan Li 

There are sorts of match pattern for SAT related cases,  there will be
some duplicated code to check the dest, op_0, op_1 are same tree types.
Aka ternary tree type matches.  Thus, extract one helper function to
do this and avoid match code duplication.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 regression test.

gcc/ChangeLog:

* generic-match-head.cc (integer_types_ternary_match): New helper
function to check tenary tree type matches or not.
* gimple-match-head.cc (integer_types_ternary_match): Ditto but
for match.
* match.pd: Leverage above helper function to avoid code dup.

Signed-off-by: Pan Li 
---
 gcc/generic-match-head.cc | 17 +
 gcc/gimple-match-head.cc  | 17 +
 gcc/match.pd  | 25 +
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
index 0d3f648fe8d..cdd48c7a5cc 100644
--- a/gcc/generic-match-head.cc
+++ b/gcc/generic-match-head.cc
@@ -59,6 +59,23 @@ types_match (tree t1, tree t2)
   return TYPE_MAIN_VARIANT (t1) == TYPE_MAIN_VARIANT (t2);
 }
 
+/* Routine to determine if the types T1,  T2 and T3 are effectively
+   the same integer type for GENERIC.  If T1,  T2 or T3 is not a type,
+   the test applies to their TREE_TYPE.  */
+
+static inline bool
+integer_types_ternary_match (tree t1, tree t2, tree t3)
+{
+  t1 = TYPE_P (t1) ? t1 : TREE_TYPE (t1);
+  t2 = TYPE_P (t2) ? t2 : TREE_TYPE (t2);
+  t3 = TYPE_P (t3) ? t3 : TREE_TYPE (t3);
+
+  if (!INTEGRAL_TYPE_P (t1) || !INTEGRAL_TYPE_P (t2) || !INTEGRAL_TYPE_P (t3))
+return false;
+
+  return types_match (t1, t2) && types_match (t1, t3);
+}
+
 /* Return if T has a single use.  For GENERIC, we assume this is
always true.  */
 
diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 5f8a1a1ad8e..91f2e56b8ef 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -79,6 +79,23 @@ types_match (tree t1, tree t2)
   return types_compatible_p (t1, t2);
 }
 
+/* Routine to determine if the types T1,  T2 and T3 are effectively
+   the same integer type for GIMPLE.  If T1,  T2 or T3 is not a type,
+   the test applies to their TREE_TYPE.  */
+
+static inline bool
+integer_types_ternary_match (tree t1, tree t2, tree t3)
+{
+  t1 = TYPE_P (t1) ? t1 : TREE_TYPE (t1);
+  t2 = TYPE_P (t2) ? t2 : TREE_TYPE (t2);
+  t3 = TYPE_P (t3) ? t3 : TREE_TYPE (t3);
+
+  if (!INTEGRAL_TYPE_P (t1) || !INTEGRAL_TYPE_P (t2) || !INTEGRAL_TYPE_P (t3))
+return false;
+
+  return types_match (t1, t2) && types_match (t1, t3);
+}
+
 /* Return if T has a single use.  For GIMPLE, we also allow any
non-SSA_NAME (ie constants) and zero uses to cope with uses
that aren't linked up yet.  */
diff --git a/gcc/match.pd b/gcc/match.pd
index 0f9c34fa897..401b52e7573 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3046,38 +3046,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* Unsigned Saturation Add */
 (match (usadd_left_part_1 @0 @1)
  (plus:c @0 @1)
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (integer_types_ternary_match (type, @0, @1) && TYPE_UNSIGNED (type
 
 (match (usadd_left_part_2 @0 @1)
  (realpart (IFN_ADD_OVERFLOW:c @0 @1))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (integer_types_ternary_match (type, @0, @1) && TYPE_UNSIGNED (type
 
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (lt (plus:c @0 @1) @0)))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (integer_types_ternary_match (type, @0, @1) && TYPE_UNSIGNED (type
 
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (gt @0 (plus:c @0 @1
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (integer_types_ternary_match (type, @0, @1) && TYPE_UNSIGNED (type
 
 (match (usadd_right_part_2 @0 @1)
  (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (integer_types_ternary_match (type, @0, @1) && TYPE_UNSIGNED (type
 
 /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
because the sub part of left_part_2 cannot work with right_part_1.
-- 
2.34.1



[PATCH v1 2/2] RISC-V: Add test cases for __builtin_add_overflow branchless unsigned SAT_ADD

2024-05-19 Thread pan2 . li
From: Pan Li 

After we support branchless __builtin_add_overflow unsigned SAT_ADD from
the middle end.  Add more tests case to cover the functionarlities.

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add __builtin_add_overflow test
macro.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-5.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-6.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-7.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-8.c: New test.
* gcc.target/riscv/sat_u_add-5.c: New test.
* gcc.target/riscv/sat_u_add-6.c: New test.
* gcc.target/riscv/sat_u_add-7.c: New test.
* gcc.target/riscv/sat_u_add-8.c: New test.
* gcc.target/riscv/sat_u_add-run-5.c: New test.
* gcc.target/riscv/sat_u_add-run-6.c: New test.
* gcc.target/riscv/sat_u_add-run-7.c: New test.
* gcc.target/riscv/sat_u_add-run-8.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_u_add-5.c | 19 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-6.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-7.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-8.c | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-5.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-6.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-7.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-8.c   | 75 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 27 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-5.c  | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-6.c  | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-7.c  | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-8.c  | 17 +
 .../gcc.target/riscv/sat_u_add-run-5.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-6.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-7.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-8.c| 25 +++
 17 files changed, 581 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-8.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c
new file mode 100644
index 000..47d83b0927d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint8_t_fmt_2:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_2(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c
new file mode 100644
index 000..b5d612dba21
--- /dev/null
+++ 

[PATCH v1 1/2] Match: Support __builtin_add_overflow for branchless unsigned SAT_ADD

2024-05-19 Thread pan2 . li
From: Pan Li 

This patch would like to support the branchless form for unsigned
SAT_ADD when leverage __builtin_add_overflow.  For example as below:

uint64_t sat_add_u(uint64_t x, uint64_t y)
{
  uint64_t ret;
  uint64_t overflow = __builtin_add_overflow (x, y, );

  return (T)(-overflow) | ret;
}

Before this patch:

uint64_t sat_add_u (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  long unsigned int _3;
  __complex__ long unsigned int _6;
  uint64_t _8;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _1 = REALPART_EXPR <_6>;
  _2 = IMAGPART_EXPR <_6>;
  _3 = -_2;
  _8 = _1 | _3;
  return _8;
;;succ:   EXIT

}

After this patch:

uint64_t sat_add_u (uint64_t x, uint64_t y)
{
  uint64_t _8;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _8 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _8;
;;succ:   EXIT

}

The below tests suite are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add SAT_ADD right part 2 for __builtin_add_overflow.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index b291e34bbe4..5328e846aff 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3064,6 +3064,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
  (if (TYPE_UNSIGNED (type) && integer_types_ternary_match (type, @0, @1
 
+(match (usadd_right_part_2 @0 @1)
+ (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
+ (if (TYPE_UNSIGNED (type) && integer_types_ternary_match (type, @0, @1
+
 /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
because the sub part of left_part_2 cannot work with right_part_1.
For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
-- 
2.34.1



[PATCH v1] Match: Extract integer_types_ternary_match helper to avoid code dup [NFC]

2024-05-18 Thread pan2 . li
From: Pan Li 

There are sorts of match pattern for SAT related cases,  there will be
some duplicated code to check the dest, op_0, op_1 are same tree types.
Aka ternary tree type matches.  Thus, extract one helper function to
do this and avoid match code duplication.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 regression test.

gcc/ChangeLog:

* generic-match-head.cc (integer_types_ternary_match): New helper
function to check tenary tree type matches or not.
* gimple-match-head.cc (integer_types_ternary_match): Ditto but
for match.
* match.pd: Leverage above helper function to avoid code dup.

Signed-off-by: Pan Li 
---
 gcc/generic-match-head.cc | 17 +
 gcc/gimple-match-head.cc  | 17 +
 gcc/match.pd  | 25 +
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
index 0d3f648fe8d..cdd48c7a5cc 100644
--- a/gcc/generic-match-head.cc
+++ b/gcc/generic-match-head.cc
@@ -59,6 +59,23 @@ types_match (tree t1, tree t2)
   return TYPE_MAIN_VARIANT (t1) == TYPE_MAIN_VARIANT (t2);
 }
 
+/* Routine to determine if the types T1,  T2 and T3 are effectively
+   the same integer type for GENERIC.  If T1,  T2 or T3 is not a type,
+   the test applies to their TREE_TYPE.  */
+
+static inline bool
+integer_types_ternary_match (tree t1, tree t2, tree t3)
+{
+  t1 = TYPE_P (t1) ? t1 : TREE_TYPE (t1);
+  t2 = TYPE_P (t2) ? t2 : TREE_TYPE (t2);
+  t3 = TYPE_P (t3) ? t3 : TREE_TYPE (t3);
+
+  if (!INTEGRAL_TYPE_P (t1) || !INTEGRAL_TYPE_P (t2) || !INTEGRAL_TYPE_P (t3))
+return false;
+
+  return types_match (t1, t2) && types_match (t1, t3);
+}
+
 /* Return if T has a single use.  For GENERIC, we assume this is
always true.  */
 
diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 5f8a1a1ad8e..91f2e56b8ef 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -79,6 +79,23 @@ types_match (tree t1, tree t2)
   return types_compatible_p (t1, t2);
 }
 
+/* Routine to determine if the types T1,  T2 and T3 are effectively
+   the same integer type for GIMPLE.  If T1,  T2 or T3 is not a type,
+   the test applies to their TREE_TYPE.  */
+
+static inline bool
+integer_types_ternary_match (tree t1, tree t2, tree t3)
+{
+  t1 = TYPE_P (t1) ? t1 : TREE_TYPE (t1);
+  t2 = TYPE_P (t2) ? t2 : TREE_TYPE (t2);
+  t3 = TYPE_P (t3) ? t3 : TREE_TYPE (t3);
+
+  if (!INTEGRAL_TYPE_P (t1) || !INTEGRAL_TYPE_P (t2) || !INTEGRAL_TYPE_P (t3))
+return false;
+
+  return types_match (t1, t2) && types_match (t1, t3);
+}
+
 /* Return if T has a single use.  For GIMPLE, we also allow any
non-SSA_NAME (ie constants) and zero uses to cope with uses
that aren't linked up yet.  */
diff --git a/gcc/match.pd b/gcc/match.pd
index 0f9c34fa897..b291e34bbe4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3046,38 +3046,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* Unsigned Saturation Add */
 (match (usadd_left_part_1 @0 @1)
  (plus:c @0 @1)
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (TYPE_UNSIGNED (type) && integer_types_ternary_match (type, @0, @1
 
 (match (usadd_left_part_2 @0 @1)
  (realpart (IFN_ADD_OVERFLOW:c @0 @1))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (TYPE_UNSIGNED (type) && integer_types_ternary_match (type, @0, @1
 
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (lt (plus:c @0 @1) @0)))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (TYPE_UNSIGNED (type) && integer_types_ternary_match (type, @0, @1
 
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (gt @0 (plus:c @0 @1
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (TYPE_UNSIGNED (type) && integer_types_ternary_match (type, @0, @1
 
 (match (usadd_right_part_2 @0 @1)
  (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@0))
-  && types_match (type, TREE_TYPE (@1)
+ (if (TYPE_UNSIGNED (type) && integer_types_ternary_match (type, @0, @1
 
 /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
because the sub part of left_part_2 cannot work with right_part_1.
-- 
2.34.1



[PATCH v6] RISC-V: Implement IFN SAT_ADD for both the scalar and vector

2024-05-17 Thread pan2 . li
From: Pan Li 

Update in v6:

* Rebase upstream for conflict.

Log for v5:

The patch implement the SAT_ADD in the riscv backend as the
sample for both the scalar and vector.  Given below vector
as example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v0,0(a1)
  vle64.v v1,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vadd.vv v1,v0,v1
  vmsgtu.vv   v0,v0,v1
  vmerge.vim  v1,v1,-1,v0
  vse64.v v1,0(a0)
  ...

After this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v1,0(a1)
  vle64.v v2,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vsaddu.vv   v1,v1,v2  <=  Vector Single-Width Saturating Add
  vse64.v v1,0(a0)
  ...

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* config/riscv/autovec.md (usadd3): New pattern expand for
the unsigned SAT_ADD in vector mode.
* config/riscv/riscv-protos.h (riscv_expand_usadd): New func decl
to expand usadd3 pattern.
(expand_vec_usadd): Ditto but for vector.
* config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to emit
the vsadd insn.
(expand_vec_usadd): New func impl to expand usadd3 for vector.
* config/riscv/riscv.cc (riscv_expand_usadd): New func impl to
expand usadd3 for scalar.
* config/riscv/riscv.md (usadd3): New pattern expand for
the unsigned SAT_ADD in scalar mode.
* config/riscv/vector.md: Allow VLS mode for vsaddu.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: New test.
* gcc.target/riscv/sat_arith.h: New test.
* gcc.target/riscv/sat_u_add-1.c: New test.
* gcc.target/riscv/sat_u_add-2.c: New test.
* gcc.target/riscv/sat_u_add-3.c: New test.
* gcc.target/riscv/sat_u_add-4.c: New test.
* gcc.target/riscv/sat_u_add-run-1.c: New test.
* gcc.target/riscv/sat_u_add-run-2.c: New test.
* gcc.target/riscv/sat_u_add-run-3.c: New test.
* gcc.target/riscv/sat_u_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec.md   | 17 +
 gcc/config/riscv/riscv-protos.h   |  2 +
 gcc/config/riscv/riscv-v.cc   | 16 
 gcc/config/riscv/riscv.cc | 47 
 gcc/config/riscv/riscv.md | 11 +++
 gcc/config/riscv/vector.md| 12 +--
 .../riscv/rvv/autovec/binop/vec_sat_binary.h  | 33 
 .../riscv/rvv/autovec/binop/vec_sat_u_add-1.c | 19 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-2.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-3.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-4.c | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-1.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-2.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-3.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-4.c   | 75 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 31 
 gcc/testsuite/gcc.target/riscv/sat_u_add-1.c  | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-2.c  | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-3.c  | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-4.c  | 17 +
 .../gcc.target/riscv/sat_u_add-run-1.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-2.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-3.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-4.c| 25 +++
 .../gcc.target/riscv/scalar_sat_binary.h  | 27 +++
 25 files changed, 744 insertions(+), 6 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c
 create mode 100644 

[PATCH v2 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len

2024-05-15 Thread pan2 . li
From: Pan Li 

After we support the loop lens for the vectorizable,  we would like to
implement the feature for the RISC-V target.  Given below example:

unsigned vect_a[1923];
unsigned vect_b[1923];

void test (unsigned limit, int n)
{
  for (int i = 0; i < n; i++)
{
  vect_b[i] = limit + i;

  if (vect_a[i] > limit)
{
  ret = vect_b[i];
  return ret;
}

  vect_a[i] = limit;
}
}

Before this patch:
  ...
.L8:
  swa3,0(a5)
  addiw a0,a0,1
  addi  a4,a4,4
  addi  a5,a5,4
  beq   a1,a0,.L2
.L4:
  swa0,0(a4)
  lwa2,0(a5)
  bleu  a2,a3,.L8
  ret

After this patch:
  ...
.L5:
  vsetvli   a5,a3,e8,mf4,ta,ma
  vmv1r.v   v4,v2
  vsetvli   t4,zero,e32,m1,ta,ma
  vmv.v.x   v1,a5
  vadd.vv   v2,v2,v1
  vsetvli   zero,a5,e32,m1,ta,ma
  vadd.vv   v5,v4,v3
  slli  a6,a5,2
  vle32.v   v1,0(t1)
  vmsltu.vv v1,v3,v1
  vcpop.m   t4,v1
  beq   t4,zero,.L4
  vmv.x.s   a4,v4
.L3:
  ...

The below tests are passed for this patch:
1. The riscv fully regression tests.

gcc/ChangeLog:

* config/riscv/autovec-opt.md
  (*vcond_mask_len_popcount_):
New pattern of vcond_mask_len_popcount for vector bool mode.
* config/riscv/autovec.md (vcond_mask_len_): New pattern
of vcond_mask_len for vector bool mode.
(cbranch4): New pattern for vector bool mode.
* config/riscv/vector-iterators.md: Add new unspec
  UNSPEC_SELECT_MASK.
* config/riscv/vector.md (@pred_popcount): Add
VLS mode to popcount pattern.
(@pred_popcount): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/early-break-1.c: New test.
* gcc.target/riscv/rvv/autovec/early-break-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec-opt.md   | 33 ++
 gcc/config/riscv/autovec.md   | 61 +++
 gcc/config/riscv/vector-iterators.md  |  1 +
 gcc/config/riscv/vector.md| 18 +++---
 .../riscv/rvv/autovec/early-break-1.c | 34 +++
 .../riscv/rvv/autovec/early-break-2.c | 37 +++
 6 files changed, 175 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 645dc53d868..04f85d8e455 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1436,3 +1436,36 @@ (define_insn_and_split "*n"
 DONE;
   }
   [(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_"
+  [(set (match_operand:P 0 "register_operand")
+(popcount:P
+ (unspec:VB_VLS [
+  (unspec:VB_VLS [
+   (match_operand:VB_VLS 1 "register_operand")
+   (match_operand:VB_VLS 2 "const_1_operand")
+   (match_operand:VB_VLS 3 "const_0_operand")
+   (match_operand 4 "autovec_length_operand")
+   (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+  (match_operand 6 "autovec_length_operand")
+  (const_int 1)
+  (reg:SI VL_REGNUM)
+  (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+  "TARGET_VECTOR
+   && can_create_pseudo_p ()
+   && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS 
(mode)).exists ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+riscv_vector::emit_nonvlmax_insn (
+   code_for_pred_popcount (mode, Pmode),
+   riscv_vector::CPOP_OP,
+   operands, operands[4]);
+DONE;
+  }
+  [(set_attr "type" "vector")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index aa1ae0fe075..1ee3c8052fb 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2612,3 +2612,64 @@ (define_expand "rawmemchr"
 DONE;
   }
 )
+
+;; =
+;; == Early break auto-vectorization patterns
+;; =
+
+;; vcond_mask_len (mask, 1s, 0s, len, bias)
+;; => mask[i] = mask[i] && i < len ? 1 : 0
+(define_insn_and_split "vcond_mask_len_"
+  [(set (match_operand:VB 0 "register_operand")
+(unspec: VB [
+ (match_operand:VB 1 "register_operand")
+ (match_operand:VB 2 "const_1_operand")
+ (match_operand:VB 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK))]
+  "TARGET_VECTOR
+   && can_create_pseudo_p ()
+   && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS 
(mode)).exists ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+machine_mode mode = riscv_vector::get_vector_mode (Pmode,
+   GET_MODE_NUNITS (mode)).require ();
+rtx reg = gen_reg_rtx (mode);
+

[PATCH v2 3/3] RISC-V: Enable vectorizable early exit testsuite

2024-05-15 Thread pan2 . li
From: Pan Li 

After we supported vectorizable early exit in RISC-V,  we would like to
enable the gcc vect test for vectorizable early test.

The vect-early-break_124-pr114403.c failed to vectorize for now.
Because that the __builtin_memcpy with 8 bytes failed to folded into
int64 assignment during ccp1.  We will improve that first and mark
this as xfail for RISC-V.

The below tests are passed for this patch:
1. The riscv fully regression tests.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/slp-mask-store-1.c: Add pragma novector as it will
have 2 times LOOP VECTORIZED in RISC-V.
* gcc.dg/vect/vect-early-break_124-pr114403.c: Xfail for the
riscv backend.
* lib/target-supports.exp: Add RISC-V backend.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c  | 2 ++
 gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c | 2 +-
 gcc/testsuite/lib/target-supports.exp | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c 
b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
index fdd9032da98..2f80bf89e5e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
@@ -28,6 +28,8 @@ main ()
 
   if (__builtin_memcmp (x, res, sizeof (x)) != 0)
 abort ();
+
+#pragma GCC novector
   for (int i = 0; i < 32; ++i)
 if (flag[i] != 0 && flag[i] != 1)
   abort ();
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
index 51abf245ccb..101ae1e0eaa 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
@@ -2,7 +2,7 @@
 /* { dg-require-effective-target vect_early_break_hw } */
 /* { dg-require-effective-target vect_long_long } */
 
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail riscv*-*-* } } 
} */
 
 #include "tree-vect.h"
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 6f5d477b128..ec9baa4f32a 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4099,6 +4099,7 @@ proc check_effective_target_vect_early_break { } {
|| [check_effective_target_arm_v8_neon_ok]
|| [check_effective_target_sse4]
|| [istarget amdgcn-*-*]
+   || [check_effective_target_riscv_v]
}}]
 }
 
@@ -4114,6 +4115,7 @@ proc check_effective_target_vect_early_break_hw { } {
|| [check_effective_target_arm_v8_neon_hw]
|| [check_sse4_hw_available]
|| [istarget amdgcn-*-*]
+   || [check_effective_target_riscv_v_ok]
}}]
 }
 
-- 
2.34.1



[PATCH v2 1/3] Vect: Support loop len in vectorizable early exit

2024-05-15 Thread pan2 . li
From: Pan Li 

This patch adds early break auto-vectorization support for target which
use length on partial vectorization.  Consider this following example:

unsigned vect_a[802];
unsigned vect_b[802];

void test (unsigned x, int n)
{
  for (int i = 0; i < n; i++)
  {
vect_b[i] = x + i;

if (vect_a[i] > x)
  break;

vect_a[i] = x;
  }
}

We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
And then the IR of RVV looks like below:

  ...
  _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
  _55 = (int) _87;
  ...
  mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
  vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
{0, ... }, _87, 0);
  if (vec_len_mask_72 != { 0, ... })
goto ; [5.50%]
  else
goto ; [94.50%]

The below tests are passed for this patch:
1. The riscv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* tree-vect-stmts.cc (vectorizable_early_exit): Add loop len
handling for one or multiple stmt.

gcc/ChangeLog:

* tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen
the loop len mask.
* tree-vect-stmts.cc (vectorizable_early_exit): Invoke the
vect_gen_loop_len_mask for 1 or more stmt(s).
* tree-vectorizer.h (vect_gen_loop_len_mask): New func decl
for vect_gen_loop_len_mask.

Signed-off-by: Pan Li 
---
 gcc/tree-vect-loop.cc  | 27 +++
 gcc/tree-vect-stmts.cc | 17 +++--
 gcc/tree-vectorizer.h  |  4 
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 361aec06488..83c0544b6aa 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
   return loop_len;
 }
 
+/* Generate the tree for the loop len mask and return it.  Given the lens,
+   nvectors, vectype, index and factor to gen the len mask as below.
+
+   tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
+*/
+tree
+vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
+   gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens,
+   unsigned int nvectors, tree vectype, tree stmt,
+   unsigned int index, unsigned int factor)
+{
+  tree all_one_mask = build_all_ones_cst (vectype);
+  tree all_zero_mask = build_zero_cst (vectype);
+  tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, 
index,
+   factor);
+  tree bias = build_int_cst (intQI_type_node,
+LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
+  tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
+  gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt,
+   all_one_mask, all_zero_mask, len,
+   bias);
+  gimple_call_set_lhs (call, len_mask);
+  gsi_insert_before (cond_gsi, call, GSI_SAME_STMT);
+
+  return len_mask;
+}
+
 /* Scale profiling counters by estimation for LOOP which is vectorized
by factor VF.
If FLAT is true, the loop we started with had unrealistically flat
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index b8a71605f1b..672959501bb 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info 
stmt_info,
 ncopies = vect_get_num_copies (loop_vinfo, vectype);
 
   vec_loop_masks *masks = _VINFO_MASKS (loop_vinfo);
+  vec_loop_lens *lens = _VINFO_LENS (loop_vinfo);
   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+  bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
 
   /* Now build the new conditional.  Pattern gimple_conds get dropped during
  codegen so we must replace the original insn.  */
@@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
{
  if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
  OPTIMIZE_FOR_SPEED))
-   return false;
+   vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
  else
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
}
 
-
   return true;
 }
 
@@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
  stmts[i], _gsi);
workset.quick_push (stmt_mask);
  }
+  else if (len_loop_p)
+   for (unsigned i = 0; i < stmts.length (); i++)
+ {
+   tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, _gsi,
+   lens, ncopies, vectype,
+   

[PATCH v5 2/3] Vect: Support new IFN SAT_ADD for unsigned vector int

2024-05-14 Thread pan2 . li
From: Pan Li 

For vectorize, we leverage the existing vect pattern recog to find
the pattern similar to scalar and let the vectorizer to perform
the rest part for standard name usadd3 in vector mode.
The riscv vector backend have insn "Vector Single-Width Saturating
Add and Subtract" which can be leveraged when expand the usadd3
in vector mode.  For example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
  ivtmp_58 = _80 * 8;
  vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
  vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
  vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
  mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
  vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615,
... }, vect__7.11_66);
  .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72);
  vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
  vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
  vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
  ivtmp_79 = ivtmp_78 - _80;
  ...
}

After this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
  ivtmp_46 = _62 * 8;
  vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
  vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
  vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
  .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54);
  ...
}

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New
func decl generated by match.pd match.
(vect_recog_sat_add_pattern): New func impl to recog the pattern
for unsigned SAT_ADD.

Signed-off-by: Pan Li 
---
 gcc/tree-vect-patterns.cc | 52 +++
 1 file changed, 52 insertions(+)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index dfb7d800526..a313dc64643 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4487,6 +4487,57 @@ vect_recog_mult_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+
+/*
+ * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
+ *   _7 = _4 + _6;
+ *   _8 = _4 > _7;
+ *   _9 = (long unsigned int) _8;
+ *   _10 = -_9;
+ *   _12 = _7 | _10;
+ *
+ * And then simplied to
+ *   _12 = .SAT_ADD (_4, _6);
+ */
+
+static gimple *
+vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+   tree *type_out)
+{
+  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+  if (!is_gimple_assign (last_stmt))
+return NULL;
+
+  tree res_ops[2];
+  tree lhs = gimple_assign_lhs (last_stmt);
+
+  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
+{
+  tree itype = TREE_TYPE (res_ops[0]);
+  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+
+  if (vtype != NULL_TREE
+   && direct_internal_fn_supported_p (IFN_SAT_ADD, vtype,
+  OPTIMIZE_FOR_BOTH))
+   {
+ *type_out = vtype;
+ gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, res_ops[0],
+   res_ops[1]);
+
+ gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+ gimple_call_set_nothrow (call, /* nothrow_p */ false);
+ gimple_set_location (call, gimple_location (last_stmt));
+
+ vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
+ return call;
+   }
+}
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
otherwise vectorized:
 
@@ -6987,6 +7038,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
   { vect_recog_divmod_pattern, "divmod" },
   { vect_recog_mult_pattern, "mult" },
+  { vect_recog_sat_add_pattern, "sat_add" },
   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
   { vect_recog_gcond_pattern, "gcond" },
   { vect_recog_bool_pattern, "bool" },
-- 
2.34.1



[PATCH v5 3/3] RISC-V: Implement IFN SAT_ADD for both the scalar and vector

2024-05-14 Thread pan2 . li
From: Pan Li 

The patch implement the SAT_ADD in the riscv backend as the
sample for both the scalar and vector.  Given below vector
as example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v0,0(a1)
  vle64.v v1,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vadd.vv v1,v0,v1
  vmsgtu.vv   v0,v0,v1
  vmerge.vim  v1,v1,-1,v0
  vse64.v v1,0(a0)
  ...

After this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v1,0(a1)
  vle64.v v2,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vsaddu.vv   v1,v1,v2  <=  Vector Single-Width Saturating Add
  vse64.v v1,0(a0)
  ...

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* config/riscv/autovec.md (usadd3): New pattern expand for
the unsigned SAT_ADD in vector mode.
* config/riscv/riscv-protos.h (riscv_expand_usadd): New func decl
to expand usadd3 pattern.
(expand_vec_usadd): Ditto but for vector.
* config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to emit
the vsadd insn.
(expand_vec_usadd): New func impl to expand usadd3 for vector.
* config/riscv/riscv.cc (riscv_expand_usadd): New func impl to
expand usadd3 for scalar.
* config/riscv/riscv.md (usadd3): New pattern expand for
the unsigned SAT_ADD in scalar mode.
* config/riscv/vector.md: Allow VLS mode for vsaddu.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: New test.
* gcc.target/riscv/sat_arith.h: New test.
* gcc.target/riscv/sat_u_add-1.c: New test.
* gcc.target/riscv/sat_u_add-2.c: New test.
* gcc.target/riscv/sat_u_add-3.c: New test.
* gcc.target/riscv/sat_u_add-4.c: New test.
* gcc.target/riscv/sat_u_add-run-1.c: New test.
* gcc.target/riscv/sat_u_add-run-2.c: New test.
* gcc.target/riscv/sat_u_add-run-3.c: New test.
* gcc.target/riscv/sat_u_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec.md   | 17 +
 gcc/config/riscv/riscv-protos.h   |  2 +
 gcc/config/riscv/riscv-v.cc   | 16 
 gcc/config/riscv/riscv.cc | 47 
 gcc/config/riscv/riscv.md | 11 +++
 gcc/config/riscv/vector.md| 12 +--
 .../riscv/rvv/autovec/binop/vec_sat_binary.h  | 33 
 .../riscv/rvv/autovec/binop/vec_sat_u_add-1.c | 19 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-2.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-3.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-4.c | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-1.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-2.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-3.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-4.c   | 75 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 31 
 gcc/testsuite/gcc.target/riscv/sat_u_add-1.c  | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-2.c  | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-3.c  | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-4.c  | 17 +
 .../gcc.target/riscv/sat_u_add-run-1.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-2.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-3.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-4.c| 25 +++
 .../gcc.target/riscv/scalar_sat_binary.h  | 27 +++
 25 files changed, 744 insertions(+), 6 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c
 create mode 

[PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

2024-05-14 Thread pan2 . li
From: Pan Li 

This patch would like to add the middle-end presentation for the
saturation add.  Aka set the result of add to the max when overflow.
It will take the pattern similar as below.

SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))

Take uint8_t as example, we will have:

* SAT_ADD (1, 254)   => 255.
* SAT_ADD (1, 255)   => 255.
* SAT_ADD (2, 255)   => 255.
* SAT_ADD (255, 255) => 255.

Given below example for the unsigned scalar integer uint64_t:

uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
  return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}

Before this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  _Bool _2;
  long unsigned int _3;
  long unsigned int _4;
  uint64_t _7;
  long unsigned int _10;
  __complex__ long unsigned int _11;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
  _1 = REALPART_EXPR <_11>;
  _10 = IMAGPART_EXPR <_11>;
  _2 = _10 != 0;
  _3 = (long unsigned int) _2;
  _4 = -_3;
  _7 = _1 | _4;
  return _7;
;;succ:   EXIT

}

After this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  return _7;
;;succ:   EXIT
}

The below tests are passed for this patch:
1. The riscv fully regression tests.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
to the return true switch case(s).
* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
* match.pd: Add unsigned SAT_ADD match(es).
* optabs.def (OPTAB_NL): Remove fixed-point limitation for
us/ssadd.
* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
extern func decl generated in match.pd match.
(match_saturation_arith): New func impl to match the saturation arith.
(math_opts_dom_walker::after_dom_children): Try match saturation
arith when IOR expr.

Signed-off-by: Pan Li 
---
 gcc/internal-fn.cc|  1 +
 gcc/internal-fn.def   |  2 ++
 gcc/match.pd  | 51 +++
 gcc/optabs.def|  4 +--
 gcc/tree-ssa-math-opts.cc | 32 
 5 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 0a7053c2286..73045ca8c8c 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
 case IFN_UBSAN_CHECK_MUL:
 case IFN_ADD_OVERFLOW:
 case IFN_MUL_OVERFLOW:
+case IFN_SAT_ADD:
 case IFN_VEC_WIDEN_PLUS:
 case IFN_VEC_WIDEN_PLUS_LO:
 case IFN_VEC_WIDEN_PLUS_HI:
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 848bb9dbff3..25badbb86e5 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | 
ECF_NOTHROW, first,
 DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
  smulhrs, umulhrs, binary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 07e743ae464..0f9c34fa897 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| POINTER_TYPE_P (itype))
   && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))
 
+/* Unsigned Saturation Add */
+(match (usadd_left_part_1 @0 @1)
+ (plus:c @0 @1)
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_left_part_2 @0 @1)
+ (realpart (IFN_ADD_OVERFLOW:c @0 @1))
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_right_part_1 @0 @1)
+ (negate (convert (lt (plus:c @0 @1) @0)))
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_right_part_1 @0 @1)
+ (negate (convert (gt @0 (plus:c @0 @1
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_right_part_2 @0 @1)
+ (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, 

[committed] RISC-V: Fix format issue for trailing operator [NFC]

2024-05-13 Thread pan2 . li
From: Pan Li 

This patch would like to fix below format issue of trailing operator.

=== ERROR type #1: trailing operator (4 error(s)) ===
gcc/config/riscv/riscv-vector-builtins.cc:4641:39:  if ((exts &
RVV_REQUIRE_ELEN_FP_16) &&
gcc/config/riscv/riscv-vector-builtins.cc:4651:39:  if ((exts &
RVV_REQUIRE_ELEN_FP_32) &&
gcc/config/riscv/riscv-vector-builtins.cc:4661:39:  if ((exts &
RVV_REQUIRE_ELEN_FP_64) &&
gcc/config/riscv/riscv-vector-builtins.cc:4670:36:  if ((exts &
RVV_REQUIRE_ELEN_64) &&

Passed the ./contrib/check_GNU_style.sh for this patch,  and double
checked there is no other format issue of the original patch.

Committed as format change.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc
(validate_instance_type_required_extensions): Remove the
operator from the trailing and put it to new line.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-vector-builtins.cc | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 3fdb4400d70..c08d87a2680 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4638,8 +4638,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
 {
   uint64_t exts = type.required_extensions;
 
-  if ((exts & RVV_REQUIRE_ELEN_FP_16) &&
-!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_FP_16)
+&& !TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the "
@@ -4648,8 +4648,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
   return false;
 }
 
-  if ((exts & RVV_REQUIRE_ELEN_FP_32) &&
-!TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_FP_32)
+&& !TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the "
@@ -4658,8 +4658,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
   return false;
 }
 
-  if ((exts & RVV_REQUIRE_ELEN_FP_64) &&
-!TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_FP_64)
+&& !TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the zve64d or v ISA extension",
@@ -4667,8 +4667,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
   return false;
 }
 
-  if ((exts & RVV_REQUIRE_ELEN_64) &&
-!TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_64)
+&& !TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the "
-- 
2.34.1



[PATCH v1 3/3] RISC-V: Enable vectorizable early exit test

2024-05-13 Thread pan2 . li
From: Pan Li 

This patch depends on below 2 patches.

https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651459.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651460.html

After we supported vectorizable early exit in RISC-V,  we would like to
enable the gcc vect test for vectorizable early test.

The vect-early-break_124-pr114403.c failed to vectorize for now.
Because that the __builtin_memcpy with 8 bytes failed to folded into
int64 assignment during ccp1.  We will improve that first and mark
this as xfail for RISC-V.

The below tests are passed for this patch:
1. The riscv fully regression tests.
2. The aarch64 fully regression tests.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/slp-mask-store-1.c: Add pragma novector as it will
have 2 times LOOP VECTORIZED in RISC-V.
* gcc.dg/vect/vect-early-break_124-pr114403.c: Xfail for the
riscv backend.
* lib/target-supports.exp: Add RISC-V backend.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c  | 2 ++
 gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c | 2 +-
 gcc/testsuite/lib/target-supports.exp | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c 
b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
index fdd9032da98..2f80bf89e5e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
@@ -28,6 +28,8 @@ main ()
 
   if (__builtin_memcmp (x, res, sizeof (x)) != 0)
 abort ();
+
+#pragma GCC novector
   for (int i = 0; i < 32; ++i)
 if (flag[i] != 0 && flag[i] != 1)
   abort ();
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
index 51abf245ccb..101ae1e0eaa 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
@@ -2,7 +2,7 @@
 /* { dg-require-effective-target vect_early_break_hw } */
 /* { dg-require-effective-target vect_long_long } */
 
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail riscv*-*-* } } 
} */
 
 #include "tree-vect.h"
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 6f5d477b128..adaa5912588 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4099,6 +4099,7 @@ proc check_effective_target_vect_early_break { } {
|| [check_effective_target_arm_v8_neon_ok]
|| [check_effective_target_sse4]
|| [istarget amdgcn-*-*]
+   || [check_effective_target_riscv_v]
}}]
 }
 
@@ -4114,6 +4115,7 @@ proc check_effective_target_vect_early_break_hw { } {
|| [check_effective_target_arm_v8_neon_hw]
|| [check_sse4_hw_available]
|| [istarget amdgcn-*-*]
+   || [check_effective_target_riscv_v]
}}]
 }
 
-- 
2.34.1



[PATCH v1 2/3] RISC-V: Implement vectorizable early exit with vcond_mask_len

2024-05-13 Thread pan2 . li
From: Pan Li 

This patch depends on below middle-end implementation.

https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651459.html

After we support the loop lens for the vectorizable,  we would like to
implement the feature for the RISC-V target.  Given below example:

unsigned vect_a[1923];
unsigned vect_b[1923];

unsigned test (unsigned limit, int n)
{
  unsigned ret = 0;

  for (int i = 0; i < n; i++)
{
  vect_b[i] = limit + i;

  if (vect_a[i] > limit)
{
  ret = vect_b[i];
  return ret;
}

  vect_a[i] = limit;
}

  return ret;
}

Before this patch:
  ...
.L8:
  swa3,0(a5)
  addiw a0,a0,1
  addi  a4,a4,4
  addi  a5,a5,4
  beq   a1,a0,.L2
.L4:
  swa0,0(a4)
  lwa2,0(a5)
  bleu  a2,a3,.L8
  ret

After this patch:
  ...
.L5:
  vsetvli   a5,a3,e8,mf4,ta,ma
  vmv1r.v   v4,v2
  vsetvli   t4,zero,e32,m1,ta,ma
  vmv.v.x   v1,a5
  vadd.vv   v2,v2,v1
  vsetvli   zero,a5,e32,m1,ta,ma
  vadd.vv   v5,v4,v3
  slli  a6,a5,2
  vle32.v   v1,0(t1)
  vmsltu.vv v1,v3,v1
  vcpop.m   t4,v1
  beq   t4,zero,.L4
  vmv.x.s   a4,v4
.L3:
  ...

The below tests are passed for this patch:
1. The riscv fully regression tests.

gcc/ChangeLog:

* config/riscv/autovec-opt.md 
(*vcond_mask_len_popcount_):
New pattern of vcond_mask_len_popcount for vector bool mode.
* config/riscv/autovec.md (vcond_mask_len_): New pattern
of vcond_mask_len for vector bool mode.
(cbranch4): New pattern for vector bool mode.
* config/riscv/vector-iterators.md: Add new unspec UNSPEC_SELECT_MASK.
* config/riscv/vector.md (@pred_popcount): Add
VLS mode to popcount pattern.
(@pred_popcount): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/early-break-1.c: New test.
* gcc.target/riscv/rvv/autovec/early-break-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec-opt.md   | 33 ++
 gcc/config/riscv/autovec.md   | 60 +++
 gcc/config/riscv/vector-iterators.md  |  1 +
 gcc/config/riscv/vector.md| 18 +++---
 .../riscv/rvv/autovec/early-break-1.c | 34 +++
 .../riscv/rvv/autovec/early-break-2.c | 37 
 6 files changed, 174 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 645dc53d868..04f85d8e455 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1436,3 +1436,36 @@ (define_insn_and_split "*n"
 DONE;
   }
   [(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_"
+  [(set (match_operand:P 0 "register_operand")
+(popcount:P
+ (unspec:VB_VLS [
+  (unspec:VB_VLS [
+   (match_operand:VB_VLS 1 "register_operand")
+   (match_operand:VB_VLS 2 "const_1_operand")
+   (match_operand:VB_VLS 3 "const_0_operand")
+   (match_operand 4 "autovec_length_operand")
+   (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+  (match_operand 6 "autovec_length_operand")
+  (const_int 1)
+  (reg:SI VL_REGNUM)
+  (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+  "TARGET_VECTOR
+   && can_create_pseudo_p ()
+   && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS 
(mode)).exists ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+riscv_vector::emit_nonvlmax_insn (
+   code_for_pred_popcount (mode, Pmode),
+   riscv_vector::CPOP_OP,
+   operands, operands[4]);
+DONE;
+  }
+  [(set_attr "type" "vector")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index aa1ae0fe075..dfa58b8af69 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2612,3 +2612,63 @@ (define_expand "rawmemchr"
 DONE;
   }
 )
+
+;; =
+;; == Early break auto-vectorization patterns
+;; =
+
+;; vcond_mask_len
+(define_insn_and_split "vcond_mask_len_"
+  [(set (match_operand:VB 0 "register_operand")
+(unspec: VB [
+ (match_operand:VB 1 "register_operand")
+ (match_operand:VB 2 "const_1_operand")
+ (match_operand:VB 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK))]
+  "TARGET_VECTOR
+   && can_create_pseudo_p ()
+   && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS 
(mode)).exists ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+machine_mode mode = riscv_vector::get_vector_mode (Pmode,
+   

[PATCH v1 1/3] Vect: Support loop len in vectorizable early exit

2024-05-13 Thread pan2 . li
From: Pan Li 

This patch adds early break auto-vectorization support for target which
use length on partial vectorization.  Consider this following example:

unsigned vect_a[802];
unsigned vect_b[802];

void test (unsigned x, int n)
{
  for (int i = 0; i < n; i++)
  {
    vect_b[i] = x + i;

    if (vect_a[i] > x)
      break;

    vect_a[i] = x;
  }
}

We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
And then the IR of RVV looks like below:

  ...
  _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
  _55 = (int) _87;
  ...
  mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
  vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
{0, ... }, _87, 0);
  if (vec_len_mask_72 != { 0, ... })
    goto ; [5.50%]
  else
    goto ; [94.50%]

The below tests are passed for this patch:
1. The riscv fully regression tests.
2. The aarch64 fully regression tests.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

gcc/ChangeLog:

* tree-vect-stmts.cc (vectorizable_early_exit): Add loop len
handling for one or multiple stmt.

Signed-off-by: Pan Li 
---
 gcc/tree-vect-stmts.cc | 47 --
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 21e8fe98e44..bfd9d66568f 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12896,7 +12896,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info 
stmt_info,
 ncopies = vect_get_num_copies (loop_vinfo, vectype);
 
   vec_loop_masks *masks = _VINFO_MASKS (loop_vinfo);
+  vec_loop_lens *lens = _VINFO_LENS (loop_vinfo);
   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+  bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
 
   /* Now build the new conditional.  Pattern gimple_conds get dropped during
  codegen so we must replace the original insn.  */
@@ -12960,12 +12962,11 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
{
  if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
  OPTIMIZE_FOR_SPEED))
-   return false;
+   vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
  else
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
}
 
-
   return true;
 }
 
@@ -13018,6 +13019,25 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
  stmts[i], _gsi);
workset.quick_push (stmt_mask);
  }
+  else if (len_loop_p)
+   for (unsigned i = 0; i < stmts.length (); i++)
+ {
+   tree all_ones_mask = build_all_ones_cst (vectype);
+   tree all_zero_mask = build_zero_cst (vectype);
+   tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
+ vectype, i, 1);
+   signed char cst = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+   tree bias = build_int_cst (intQI_type_node, cst);
+   tree len_mask
+ = make_temp_ssa_name (TREE_TYPE (stmts[i]), NULL, "vec_len_mask");
+   gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5,
+ stmts[i], all_ones_mask,
+ all_zero_mask, len, bias);
+   gimple_call_set_lhs (call, len_mask);
+   gsi_insert_before (_gsi, call, GSI_SAME_STMT);
+
+   workset.quick_push (len_mask);
+ }
   else
workset.splice (stmts);
 
@@ -13042,6 +13062,29 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
  new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
   new_temp, _gsi);
}
+  else if (len_loop_p)
+   {
+ /* len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
+
+which is equivalent to:
+
+len_mask = compare_mask mask && i < len ? 1 : 0
+ */
+ tree all_ones_mask = build_all_ones_cst (vectype);
+ tree all_zero_mask = build_zero_cst (vectype);
+ tree len
+   = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies, vectype, 0, 1);
+ signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ tree bias = build_int_cst (intQI_type_node, biasval);
+ tree len_mask
+   = make_temp_ssa_name (TREE_TYPE (new_temp), NULL, "vec_len_mask");
+ gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5,
+   new_temp, all_ones_mask,
+   all_zero_mask, len, bias);
+ gimple_call_set_lhs (call, len_mask);
+ gsi_insert_before (_gsi, call, GSI_SAME_STMT);
+ new_temp = len_mask;
+   }
 }
 
   gcc_assert 

[PATCH v1] RISC-V: Bugfix ICE for RVV intrinisc vfw on _Float16 scalar

2024-05-11 Thread pan2 . li
From: Pan Li 

For the vfw vx format RVV intrinsic, the scalar type _Float16 also
requires the zvfh extension.  Unfortunately,  we only check the
vector tree type and miss the scalar _Float16 type checking.  For
example:

vfloat32mf2_t test_vfwsub_wf_f32mf2(vfloat32mf2_t vs2, _Float16 rs1, size_t vl)
{
  return __riscv_vfwsub_wf_f32mf2(vs2, rs1, vl);
}

It should report some error message like zvfh extension is required
instead of ICE for unreg insn.

This patch would like to make up such kind of validation for _Float16
in the RVV intrinsic API.  It will report some error like below when
there is no zvfh enabled.

error: built-in function '__riscv_vfwsub_wf_f32mf2(vs2,  rs1,  vl)'
  requires the zvfhmin or zvfh ISA extension

PR target/114988

Passed the rv64gcv fully regression tests, included c/c++/fortran.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc
(validate_instance_type_required_extensions): New func impl to
validate the intrinisc func type ops.
(expand_builtin): Validate instance type before expand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114988-1.c: New test.
* gcc.target/riscv/rvv/base/pr114988-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-vector-builtins.cc | 51 +++
 .../gcc.target/riscv/rvv/base/pr114988-1.c|  9 
 .../gcc.target/riscv/rvv/base/pr114988-2.c|  9 
 3 files changed, 69 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 192a6c230d1..3fdb4400d70 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4632,6 +4632,54 @@ gimple_fold_builtin (unsigned int code, 
gimple_stmt_iterator *gsi, gcall *stmt)
   return gimple_folder (rfn.instance, rfn.decl, gsi, stmt).fold ();
 }
 
+static bool
+validate_instance_type_required_extensions (const rvv_type_info type,
+   tree exp)
+{
+  uint64_t exts = type.required_extensions;
+
+  if ((exts & RVV_REQUIRE_ELEN_FP_16) &&
+!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the "
+   "zvfhmin or zvfh ISA extension",
+   exp);
+  return false;
+}
+
+  if ((exts & RVV_REQUIRE_ELEN_FP_32) &&
+!TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the "
+   "zve32f, zve64f, zve64d or v ISA extension",
+   exp);
+  return false;
+}
+
+  if ((exts & RVV_REQUIRE_ELEN_FP_64) &&
+!TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the zve64d or v ISA extension",
+   exp);
+  return false;
+}
+
+  if ((exts & RVV_REQUIRE_ELEN_64) &&
+!TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the "
+   "zve64x, zve64f, zve64d or v ISA extension",
+   exp);
+  return false;
+}
+
+  return true;
+}
+
 /* Expand a call to the RVV function with subcode CODE.  EXP is the call
expression and TARGET is the preferred location for the result.
Return the value of the lhs.  */
@@ -4649,6 +4697,9 @@ expand_builtin (unsigned int code, tree exp, rtx target)
   return target;
 }
 
+  if (!validate_instance_type_required_extensions (rfn.instance.type, exp))
+return target;
+
   return function_expander (rfn.instance, rfn.decl, exp, target).expand ();
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c
new file mode 100644
index 000..b8474804c88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+vfloat32mf2_t test_vfwsub_wf_f32mf2(vfloat32mf2_t vs2, _Float16 rs1, size_t vl)
+{
+  return __riscv_vfwsub_wf_f32mf2(vs2, rs1, vl); /* { dg-error {built-in 
function '__riscv_vfwsub_wf_f32mf2\(vs2,  rs1,  vl\)' requires the zvfhmin or 
zvfh ISA extension} } */
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c
new file mode 100644
index 000..49aa3141af3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+vfloat32mf2_t test_vfwadd_wf_f32mf2(vfloat32mf2_t vs2, _Float16 rs1, 

[PATCH v1] RISC-V: Make full-vec-move1.c test robust for optimization

2024-05-08 Thread pan2 . li
From: Pan Li 

During investigate the support of early break autovec, we notice
the test full-vec-move1.c will be optimized to 'return 0;' in main
function body.  Because somehow the value of V type is compiler
time constant,  and then the second loop will be considered as
assert (true).

Thus,  the ccp4 pass will eliminate these stmt and just return 0.

typedef int16_t V __attribute__((vector_size (128)));

int main ()
{
  V v;
  for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
(v)[i] = i;

  V res = v;
  for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
assert (res[i] == i); // will be optimized to assert (true)
}

This patch would like to introduce a extern function to use the res[i]
that get rid of the ccp4 optimization.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c:
Introduce extern func use to get rid of ccp4 optimization.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
index d73bad4af6f..fae2ae91572 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
@@ -2,11 +2,12 @@
 /* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -mabi=lp64d 
-fno-vect-cost-model -mrvv-vector-bits=zvl" } */
 
 #include 
-#include 
 
 /* This would cause us to emit a vl1r.v for VNx4HImode even when
the hardware vector size vl > 64.  */
 
+extern int16_t test_element (int16_t);
+
 typedef int16_t V __attribute__((vector_size (128)));
 
 int main ()
@@ -14,9 +15,10 @@ int main ()
   V v;
   for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
 (v)[i] = i;
+
   V res = v;
   for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
-assert (res[i] == i);
+test_element (res[i]);
 }
 
 /* { dg-final { scan-assembler-not {vl[1248]r.v} } }  */
-- 
2.34.1



[PATCH v4 3/3] RISC-V: Implement IFN SAT_ADD for both the scalar and vector

2024-05-06 Thread pan2 . li
From: Pan Li 

This patch depends on below middle-end enabling patches for scalar and vector.

https://gcc.gnu.org/pipermail/gcc-patches/2024-May/650822.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/650823.html

The patch also implement the SAT_ADD in the riscv backend as
the sample for both the scalar and vector.  Given below vector
as example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v0,0(a1)
  vle64.v v1,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vadd.vv v1,v0,v1
  vmsgtu.vv   v0,v0,v1
  vmerge.vim  v1,v1,-1,v0
  vse64.v v1,0(a0)
  ...

After this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v1,0(a1)
  vle64.v v2,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vsaddu.vv   v1,v1,v2  <=  Vector Single-Width Saturating Add
  vse64.v v1,0(a0)
  ...

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* config/riscv/autovec.md (usadd3): New pattern expand for
the unsigned SAT_ADD in vector mode.
* config/riscv/riscv-protos.h (riscv_expand_usadd): New func decl
to expand usadd3 pattern.
(expand_vec_usadd): Ditto but for vector.
* config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to emit
the vsadd insn.
(expand_vec_usadd): New func impl to expand usadd3 for vector.
* config/riscv/riscv.cc (riscv_expand_usadd): New func impl to
expand usadd3 for scalar.
* config/riscv/riscv.md (usadd3): New pattern expand for
the unsigned SAT_ADD in scalar mode.
* config/riscv/vector.md: Allow VLS mode for vsaddu.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: New test.
* gcc.target/riscv/sat_arith.h: New test.
* gcc.target/riscv/sat_u_add-1.c: New test.
* gcc.target/riscv/sat_u_add-2.c: New test.
* gcc.target/riscv/sat_u_add-3.c: New test.
* gcc.target/riscv/sat_u_add-4.c: New test.
* gcc.target/riscv/sat_u_add-run-1.c: New test.
* gcc.target/riscv/sat_u_add-run-2.c: New test.
* gcc.target/riscv/sat_u_add-run-3.c: New test.
* gcc.target/riscv/sat_u_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec.md   | 17 +
 gcc/config/riscv/riscv-protos.h   |  2 +
 gcc/config/riscv/riscv-v.cc   | 16 
 gcc/config/riscv/riscv.cc | 47 
 gcc/config/riscv/riscv.md | 11 +++
 gcc/config/riscv/vector.md| 12 +--
 .../riscv/rvv/autovec/binop/vec_sat_binary.h  | 33 
 .../riscv/rvv/autovec/binop/vec_sat_u_add-1.c | 19 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-2.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-3.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-4.c | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-1.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-2.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-3.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-4.c   | 75 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 31 
 gcc/testsuite/gcc.target/riscv/sat_u_add-1.c  | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-2.c  | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-3.c  | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-4.c  | 17 +
 .../gcc.target/riscv/sat_u_add-run-1.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-2.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-3.c| 25 +++
 .../gcc.target/riscv/sat_u_add-run-4.c| 25 +++
 .../gcc.target/riscv/scalar_sat_binary.h  | 27 +++
 25 files changed, 744 insertions(+), 6 deletions(-)
 create mode 100644 

[PATCH v4 2/3] VECT: Support new IFN SAT_ADD for unsigned vector int

2024-05-06 Thread pan2 . li
From: Pan Li 

This patch depends on below scalar enabling patch:

https://gcc.gnu.org/pipermail/gcc-patches/2024-May/650822.html

For vectorize, we leverage the existing vect pattern recog to find
the pattern similar to scalar and let the vectorizer to perform
the rest part for standard name usadd3 in vector mode.
The riscv vector backend have insn "Vector Single-Width Saturating
Add and Subtract" which can be leveraged when expand the usadd3
in vector mode.  For example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
  ivtmp_58 = _80 * 8;
  vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
  vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
  vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
  mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
  vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615, ... }, 
vect__7.11_66);
  .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72);
  vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
  vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
  vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
  ivtmp_79 = ivtmp_78 - _80;
  ...
}

After this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
  ivtmp_46 = _62 * 8;
  vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
  vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
  vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
  .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54);
  ...
}

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New func
decl generated by match.pd match.
(vect_recog_sat_add_pattern): New func impl to recog the pattern
for unsigned SAT_ADD.

Signed-off-by: Pan Li 
---
 gcc/tree-vect-patterns.cc | 51 +++
 1 file changed, 51 insertions(+)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 87c2acff386..8ffcaf71d5c 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4487,6 +4487,56 @@ vect_recog_mult_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+
+/*
+ * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
+ *   _7 = _4 + _6;
+ *   _8 = _4 > _7;
+ *   _9 = (long unsigned int) _8;
+ *   _10 = -_9;
+ *   _12 = _7 | _10;
+ *
+ * And then simplied to
+ *   _12 = .SAT_ADD (_4, _6);
+ */
+
+static gimple *
+vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+   tree *type_out)
+{
+  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+  if (!is_gimple_assign (last_stmt))
+return NULL;
+
+  tree res_ops[2];
+  tree lhs = gimple_assign_lhs (last_stmt);
+
+  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
+{
+  tree itype = TREE_TYPE (res_ops[0]);
+  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+
+  if (vtype != NULL_TREE && direct_internal_fn_supported_p (
+   IFN_SAT_ADD, vtype, OPTIMIZE_FOR_SPEED))
+   {
+ *type_out = vtype;
+ gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, res_ops[0],
+   res_ops[1]);
+
+ gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+ gimple_call_set_nothrow (call, /* nothrow_p */ false);
+ gimple_set_location (call, gimple_location (last_stmt));
+
+ vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
+ return call;
+   }
+}
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
otherwise vectorized:
 
@@ -6987,6 +7037,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
   { vect_recog_divmod_pattern, "divmod" },
   { vect_recog_mult_pattern, "mult" },
+  { vect_recog_sat_add_pattern, "sat_add" },
   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
   { vect_recog_gcond_pattern, "gcond" },
   { vect_recog_bool_pattern, "bool" },
-- 
2.34.1



[PATCH v4 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

2024-05-06 Thread pan2 . li
From: Pan Li 

This patch would like to add the middle-end presentation for the
saturation add.  Aka set the result of add to the max when overflow.
It will take the pattern similar as below.

SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))

Take uint8_t as example, we will have:

* SAT_ADD (1, 254)   => 255.
* SAT_ADD (1, 255)   => 255.
* SAT_ADD (2, 255)   => 255.
* SAT_ADD (255, 255) => 255.

Given below example for the unsigned scalar integer uint64_t:

uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
  return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}

Before this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  _Bool _2;
  long unsigned int _3;
  long unsigned int _4;
  uint64_t _7;
  long unsigned int _10;
  __complex__ long unsigned int _11;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
  _1 = REALPART_EXPR <_11>;
  _10 = IMAGPART_EXPR <_11>;
  _2 = _10 != 0;
  _3 = (long unsigned int) _2;
  _4 = -_3;
  _7 = _1 | _4;
  return _7;
;;succ:   EXIT

}

After this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  return _7;
;;succ:   EXIT
}

We perform the tranform during widen_mult because that the sub-expr of
SAT_ADD will be optimized to .ADD_OVERFLOW.  We need to try the .SAT_ADD
pattern first and then .ADD_OVERFLOW,  or we may never catch the pattern
.SAT_ADD.  Meanwhile, the isel pass is after widen_mult and then we
cannot perform the .SAT_ADD pattern match as the sub-expr will be
optmized to .ADD_OVERFLOW first.

The below tests are passed for this patch:
1. The riscv fully regression tests.
2. The aarch64 fully regression tests.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
to the return true switch case(s).
* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
* match.pd: Add unsigned SAT_ADD match.
* optabs.def (OPTAB_NL): Remove fixed-point limitation for us/ssadd.
* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New extern
func decl generated in match.pd match.
(match_saturation_arith): New func impl to match the saturation arith.
(math_opts_dom_walker::after_dom_children): Try match saturation
arith.

Signed-off-by: Pan Li 
---
 gcc/internal-fn.cc|  1 +
 gcc/internal-fn.def   |  2 ++
 gcc/match.pd  | 28 
 gcc/optabs.def|  4 ++--
 gcc/tree-ssa-math-opts.cc | 46 +++
 5 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 0a7053c2286..73045ca8c8c 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
 case IFN_UBSAN_CHECK_MUL:
 case IFN_ADD_OVERFLOW:
 case IFN_MUL_OVERFLOW:
+case IFN_SAT_ADD:
 case IFN_VEC_WIDEN_PLUS:
 case IFN_VEC_WIDEN_PLUS_LO:
 case IFN_VEC_WIDEN_PLUS_HI:
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 848bb9dbff3..25badbb86e5 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | 
ECF_NOTHROW, first,
 DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
  smulhrs, umulhrs, binary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index d401e7503e6..7058e4cbe29 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3043,6 +3043,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| POINTER_TYPE_P (itype))
   && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))
 
+/* Unsigned Saturation Add */
+(match (usadd_left_part @0 @1)
+ (plus:c @0 @1)
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_right_part @0 @1)
+ (negate (convert (lt (plus:c @0 @1) @0)))
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_right_part @0 @1)
+ (negate (convert (gt @0 (plus:c @0 @1
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+/* Unsigned saturation add, case 1 (branchless):
+   SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
+   

[PATCH v4] DSE: Fix ICE after allow vector type in get_stored_val

2024-05-02 Thread pan2 . li
From: Pan Li 

We allowed vector type for get_stored_val when read is less than or
equal to store in previous.  Unfortunately,  the valididate_subreg
treats the vector type's size is less than vector register as
invalid.  Then we will have ICE here.

This patch would like to fix it by filter-out the invalid type size,
and make sure the subreg is valid for both the read_mode and store_mode
before perform the real gen_lowpart.

The below test suites are passed for this patch:

* The x86 bootstrap test.
* The x86 regression test.
* The riscv rv64gcv regression test.
* The riscv rv64gc regression test.
* The aarch64 regression test.

gcc/ChangeLog:

* dse.cc (get_stored_val): Make sure read_mode/write_mode
is valid subreg before gen_lowpart.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/bug-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/dse.cc|  4 +++-
 .../gcc.target/riscv/rvv/base/bug-6.c | 22 +++
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c

diff --git a/gcc/dse.cc b/gcc/dse.cc
index edc7a1dfecf..1596da91da0 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode 
read_mode,
 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
 && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-&& targetm.modes_tieable_p (read_mode, store_mode))
+&& targetm.modes_tieable_p (read_mode, store_mode)
+&& validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
+   subreg_lowpart_offset (read_mode, store_mode)))
 read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
 read_reg = extract_low_bits (read_mode, store_mode,
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
new file mode 100644
index 000..5bb00b8f587
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
@@ -0,0 +1,22 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+struct A { float x, y; };
+struct B { struct A u; };
+
+extern void bar (struct A *);
+
+float
+f3 (struct B *x, int y)
+{
+  struct A p = {1.0f, 2.0f};
+  struct A *q = [y].u;
+
+  __builtin_memcpy (>x, , sizeof (float));
+  __builtin_memcpy (>y, , sizeof (float));
+
+  bar ();
+
+  return x[y].u.x + x[y].u.y;
+}
-- 
2.34.1



[PATCH v3] DSE: Fix ICE after allow vector type in get_stored_val

2024-04-30 Thread pan2 . li
From: Pan Li 

We allowed vector type for get_stored_val when read is less than or
equal to store in previous.  Unfortunately,  the valididate_subreg
treats the vector type's size is less than vector register as
invalid.  Then we will have ICE here.

This patch would like to fix it by filter-out the invalid type size,
and make sure the subreg is valid for both the read_mode and store_mode
before perform the real gen_lowpart.

The below test suites are passed for this patch:

* The x86 bootstrap test.
* The x86 regression test.
* The riscv rv64gcv regression test.
* The riscv rv64gc regression test.
* The aarch64 regression test.

gcc/ChangeLog:

* dse.cc (get_stored_val): Make sure read_mode size is greater
than or equal to the vector register size before gen_lowpart.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr111720-10.c: Adjust asm checker.
* gcc.target/riscv/rvv/base/bug-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/dse.cc|  4 +++-
 .../gcc.target/riscv/rvv/base/bug-6.c | 22 +++
 .../gcc.target/riscv/rvv/base/pr111720-10.c   |  2 +-
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c

diff --git a/gcc/dse.cc b/gcc/dse.cc
index edc7a1dfecf..258d2ccc299 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode 
read_mode,
 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
 && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-&& targetm.modes_tieable_p (read_mode, store_mode))
+&& targetm.modes_tieable_p (read_mode, store_mode)
+/* It's invalid in validate_subreg if read_mode size is < reg natural.  */
+&& known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode)))
 read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
 read_reg = extract_low_bits (read_mode, store_mode,
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
new file mode 100644
index 000..5bb00b8f587
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
@@ -0,0 +1,22 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+struct A { float x, y; };
+struct B { struct A u; };
+
+extern void bar (struct A *);
+
+float
+f3 (struct B *x, int y)
+{
+  struct A p = {1.0f, 2.0f};
+  struct A *q = [y].u;
+
+  __builtin_memcpy (>x, , sizeof (float));
+  __builtin_memcpy (>y, , sizeof (float));
+
+  bar ();
+
+  return x[y].u.x + x[y].u.y;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
index 215eb99ce0f..ee6b2ccf7ad 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
@@ -15,4 +15,4 @@ vbool4_t test () {
 }
 
 /* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } 
} */
-/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} } 
} */
+/* { dg-final { scan-assembler-times {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} 
1 }  } */
-- 
2.34.1



[PATCH v3] Internal-fn: Introduce new internal function SAT_ADD

2024-04-29 Thread pan2 . li
From: Pan Li 

Update in v3:
* Rebase upstream for conflict.

Update in v2:
* Fix one failure for x86 bootstrap.

Original log:

This patch would like to add the middle-end presentation for the
saturation add.  Aka set the result of add to the max when overflow.
It will take the pattern similar as below.

SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))

Take uint8_t as example, we will have:

* SAT_ADD (1, 254)   => 255.
* SAT_ADD (1, 255)   => 255.
* SAT_ADD (2, 255)   => 255.
* SAT_ADD (255, 255) => 255.

The patch also implement the SAT_ADD in the riscv backend as
the sample for both the scalar and vector.  Given below example:

uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
  return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}

Before this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  _Bool _2;
  long unsigned int _3;
  long unsigned int _4;
  uint64_t _7;
  long unsigned int _10;
  __complex__ long unsigned int _11;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
  _1 = REALPART_EXPR <_11>;
  _10 = IMAGPART_EXPR <_11>;
  _2 = _10 != 0;
  _3 = (long unsigned int) _2;
  _4 = -_3;
  _7 = _1 | _4;
  return _7;
;;succ:   EXIT

}

After this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  return _7;
;;succ:   EXIT
}

For vectorize, we leverage the existing vect pattern recog to find
the pattern similar to scalar and let the vectorizer to perform
the rest part for standard name usadd3 in vector mode.
The riscv vector backend have insn "Vector Single-Width Saturating
Add and Subtract" which can be leveraged when expand the usadd3
in vector mode.  For example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
  ivtmp_58 = _80 * 8;
  vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
  vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
  vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
  mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
  vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615, ... }, 
vect__7.11_66);
  .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72);
  vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
  vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
  vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
  ivtmp_79 = ivtmp_78 - _80;
  ...
}

vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v0,0(a1)
  vle64.v v1,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vadd.vv v1,v0,v1
  vmsgtu.vv   v0,v0,v1
  vmerge.vim  v1,v1,-1,v0
  vse64.v v1,0(a0)
  ...

After this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
  ivtmp_46 = _62 * 8;
  vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
  vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
  vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
  .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54);
  ...
}

vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v1,0(a1)
  vle64.v v2,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vsaddu.vv   v1,v1,v2
  vse64.v v1,0(a0)
  ...

To limit the patch size for review, only unsigned version of
usadd3 are involved here. The signed version will be covered
in the underlying patch(es).

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* config/riscv/autovec.md (usadd3): New pattern expand
for unsigned SAT_ADD vector.
* config/riscv/riscv-protos.h (riscv_expand_usadd): New func
decl to expand usadd3 pattern.
(expand_vec_usadd): Ditto but for vector.
* config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to
emit the vsadd insn.
(expand_vec_usadd): New func impl to expand usadd3 for
vector.
* config/riscv/riscv.cc (riscv_expand_usadd): New func impl
to expand usadd3 for scalar.
* config/riscv/riscv.md (usadd3): New pattern expand
for unsigned SAT_ADD scalar.
* config/riscv/vector.md: Allow VLS mode for vsaddu.
* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD.
* internal-fn.def (SAT_ADD): Add new signed optab 

[PATCH v2] RISC-V: Fix ICE for legitimize move on subreg const_poly_int [PR114885]

2024-04-29 Thread pan2 . li
From: Pan Li 

When we build with isl, there will be a ICE for graphite in both
the c/c++ and fortran.  The legitimize move cannot take care of
below rtl.

(set (subreg:DI (reg:TI 237) 8) (subreg:DI (const_poly_int:TI [4, 2]) 8))

Then we will have ice similar to below:

internal compiler error: in extract_insn, at recog.cc:2812.

This patch would like to take care of the above rtl.  Given the value of
const_poly_int can hardly excceed the max of int64,  we can simply
consider the highest 8 bytes of TImode is zero and then set the dest
to (const_int 0).

The below test cases are fixed by this PATCH.

C:
FAIL: gcc.dg/graphite/pr111878.c (internal compiler error: in
extract_insn, at recog.cc:2812)
FAIL: gcc.dg/graphite/pr111878.c (test for excess errors)

Fortran:
FAIL: gfortran.dg/graphite/vect-pr40979.f90   -O  (internal compiler
error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -g  (test for excess
errors)
FAIL: gfortran.dg/graphite/pr14741.f90   -O  (test for excess errors)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
FAIL: gfortran.dg/graphite/vect-pr40979.f90   -O  (test for excess
errors)
FAIL: gfortran.dg/graphite/id-27.f90   -O  (internal compiler error: in
extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -g  (internal compiler
error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -g  (test for excess
errors)
FAIL: gfortran.dg/graphite/id-27.f90   -O  (test for excess errors)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr14741.f90   -O  (internal compiler error:
in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -g  (internal compiler
error: in extract_insn, at recog.cc:2812)

The below test suites are passed for this patch:
* The rv64gcv fully regression test.
* The rv64gc fully regression test.

Try to write some RTL code for test but not works well according to
existing test cases.  Thus, take above as test cases.  Please note
graphite require the gcc build with isl.

PR target/114885

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_legitimize_subreg_const_poly_move): New
func impl to take care of (const_int_poly:TI 8).
(riscv_legitimize_move): Handle subreg is const_int_poly,

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 44 +++
 1 file changed, 44 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0519e0679ed..0f62b295b96 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -2786,6 +2786,45 @@ riscv_v_adjust_scalable_frame (rtx target, poly_int64 
offset, bool epilogue)
   REG_NOTES (insn) = dwarf;
 }
 
+/* Take care below subreg const_poly_int move:
+
+   1. (set (subreg:DI (reg:TI 237) 8)
+  (subreg:DI (const_poly_int:TI [4, 2]) 8))
+  =>
+  (set (subreg:DI (reg:TI 237) 8)
+  (const_int 0)) */
+
+static bool
+riscv_legitimize_subreg_const_poly_move (machine_mode mode, rtx dest, rtx src)
+{
+  gcc_assert (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src)));
+  gcc_assert (SUBREG_BYTE (src).is_constant ());
+
+  int byte_offset = SUBREG_BYTE (src).to_constant ();
+  rtx const_poly = SUBREG_REG (src);
+  machine_mode subreg_mode = GET_MODE (const_poly);
+
+  if (subreg_mode != TImode) /* Only TImode is needed for now.  */
+return false;
+
+  if (byte_offset == 8)
+{
+  /* The const_poly_int cannot exceed int64, just set zero here.  */
+  emit_move_insn (dest, CONST0_RTX (mode));
+  return true;
+}
+
+  /* The below transform will be covered in somewhere else.
+ Thus, ignore this here.
+ (set (subreg:DI (reg:TI 237) 0)
+ (subreg:DI (const_poly_int:TI [4, 2]) 0))
+ =>
+ (set (subreg:DI (reg:TI 237) 0)
+ (const_poly_int:DI [4, 2])) */
+
+  return false;
+}
+
 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
sequence that is valid.  */
 
@@ -2839,6 +2878,11 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx 
src)
}
   return true;
 }
+
+  if (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src))
+&& riscv_legitimize_subreg_const_poly_move (mode, dest, src))
+return true;
+
   /* Expand
(set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
  Expand this data movement instead of simply forbid it since
-- 

[PATCH v1] RISC-V: Fix ICE for legitimize move on subreg const_poly_move

2024-04-27 Thread pan2 . li
From: Pan Li 

When we build with isl, there will be a ICE for graphite in both
the c/c++ and fortran.  The legitimize move cannot take care of
below rtl.

(set (subreg:DI (reg:TI 237) 8) (subreg:DI (const_poly_int:TI [4, 2]) 8))

Then we will have ice similar to below:

internal compiler error: in extract_insn, at recog.cc:2812.

This patch would like to take care of the above rtl.  Given the value of
const_poly_int can hardly excceed the max of int64,  we can simply
consider the highest 8 bytes of TImode is zero and then set the dest
to (const_int 0).

The below test cases are fixed by this PATCH.

C:
FAIL: gcc.dg/graphite/pr111878.c (internal compiler error: in
extract_insn, at recog.cc:2812)
FAIL: gcc.dg/graphite/pr111878.c (test for excess errors)

Fortran:
FAIL: gfortran.dg/graphite/vect-pr40979.f90   -O  (internal compiler
error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -g  (test for excess
errors)
FAIL: gfortran.dg/graphite/pr14741.f90   -O  (test for excess errors)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
FAIL: gfortran.dg/graphite/vect-pr40979.f90   -O  (test for excess
errors)
FAIL: gfortran.dg/graphite/id-27.f90   -O  (internal compiler error: in
extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -g  (internal compiler
error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -g  (test for excess
errors)
FAIL: gfortran.dg/graphite/id-27.f90   -O  (test for excess errors)
FAIL: gfortran.dg/graphite/pr29832.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr14741.f90   -O  (internal compiler error:
in extract_insn, at recog.cc:2812)
FAIL: gfortran.dg/graphite/pr29581.f90   -O3 -g  (internal compiler
error: in extract_insn, at recog.cc:2812)

The below test suites are passed for this patch:
* The rv64gcv fully regression test.
* The rv64gc fully regression test.

Try to write some RTL code for test but not works well according to
existing test cases.  Thus, take above as test cases.  Please note
graphite require the gcc build with isl.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_legitimize_subreg_const_poly_move): New
func impl to take care of (const_int_poly:TI 8).
(riscv_legitimize_move): Handle subreg is const_int_poly,

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 43 +++
 1 file changed, 43 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0519e0679ed..bad23ea487f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -2786,6 +2786,44 @@ riscv_v_adjust_scalable_frame (rtx target, poly_int64 
offset, bool epilogue)
   REG_NOTES (insn) = dwarf;
 }
 
+/* Take care below subreg const_poly_int move:
+
+   1. (set (subreg:DI (reg:TI 237) 8)
+  (subreg:DI (const_poly_int:TI [4, 2]) 8))
+  =>
+  (set (subreg:DI (reg:TI 237) 8)
+  (const_int 0)) */
+
+static bool
+riscv_legitimize_subreg_const_poly_move (machine_mode mode, rtx dest, rtx src)
+{
+  gcc_assert (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src)));
+  gcc_assert (SUBREG_BYTE (src).is_constant ());
+
+  int byte_offset = SUBREG_BYTE (src).to_constant ();
+  rtx const_poly = SUBREG_REG (src);
+  machine_mode subreg_mode = GET_MODE (const_poly);
+
+  if (subreg_mode != TImode) /* Only TImode is needed for now.  */
+return false;
+
+  if (byte_offset == 8)
+{ /* The const_poly_int cannot exceed int64, just set zero here.  */
+  emit_move_insn (dest, CONST0_RTX (mode));
+  return true;
+}
+
+  /* The below transform will be covered in somewhere else.
+ Thus, ignore this here.
+   1. (set (subreg:DI (reg:TI 237) 0)
+  (subreg:DI (const_poly_int:TI [4, 2]) 0))
+  =>
+  (set (subreg:DI (reg:TI 237) 0)
+  (const_poly_int:DI [4, 2])) */
+
+  return false;
+}
+
 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
sequence that is valid.  */
 
@@ -2839,6 +2877,11 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx 
src)
}
   return true;
 }
+
+  if (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src))
+&& riscv_legitimize_subreg_const_poly_move (mode, dest, src))
+return true;
+
   /* Expand
(set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
  Expand this data movement instead of simply forbid it since
-- 
2.34.1



[PATCH v1] RISC-V: Add test cases for insn does not satisfy its constraints [PR114714]

2024-04-25 Thread pan2 . li
From: Pan Li 

We have one ICE when RVV register overlap is enabled.  We reverted this
feature as it is in stage 4 and there is no much time to figure a better
solution for this.  Thus, for now add the related test cases which will
trigger ICE when register overlap enabled.

This will gate the RVV register overlap support in GCC-15.

PR target/114714

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr114714-1.C: New test.
* g++.target/riscv/rvv/base/pr114714-2.C: New test.

Signed-off-by: Pan Li 
Co-Authored-by: Kito Cheng 
---
 .../g++.target/riscv/rvv/base/pr114714-1.C| 85 +++
 .../g++.target/riscv/rvv/base/pr114714-2.C| 85 +++
 2 files changed, 170 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr114714-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr114714-2.C

diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr114714-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr114714-1.C
new file mode 100644
index 000..d3230f7f23e
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr114714-1.C
@@ -0,0 +1,85 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -std=c++17" } */
+
+typedef int a;
+typedef short b;
+typedef unsigned c;
+template < typename > using e = unsigned;
+template < typename > void ab();
+#pragma riscv intrinsic "vector"
+template < typename f, int, int ac > struct g {
+  using i = f;
+  template < typename m > using j = g< m, 0, ac >;
+  using k = g< i, 1, ac - 1 >;
+  using ad = g< i, 1, ac + 1 >;
+};
+namespace ae {
+struct af {
+  using h = g< short, 6, 0 < 3 >;
+};
+struct ag {
+  using h = af::h;
+};
+} template < typename, int > using ah = ae::ag::h;
+template < class ai > using aj = typename ai::i;
+template < class i, class ai > using j = typename ai::j< i >;
+template < class ai > using ak = j< e< ai >, ai >;
+template < class ai > using k = typename ai::k;
+template < class ai > using ad = typename ai::ad;
+template < a ap > vuint16m1_t ar(g< b, ap, 0 >, b);
+template < a ap > vuint16m2_t ar(g< b, ap, 1 >, b);
+template < a ap > vuint32m2_t ar(g< c, ap, 1 >, c);
+template < a ap > vuint32m4_t ar(g< c, ap, 2 >, c);
+template < class ai > using as = decltype(ar(ai(), aj< ai >()));
+template < class ai > as< ai > at(ai);
+namespace ae {
+template < int ap > vuint32m4_t au(g< c, ap, 1 + 1 >, vuint32m2_t l) {
+  return __riscv_vlmul_ext_v_u32m2_u32m4(l);
+}
+} template < int ap > vuint32m2_t aw(g< c, ap, 1 >, vuint16m1_t l) {
+  return __riscv_vzext_vf2_u32m2(l, 0);
+}
+namespace ae {
+vuint32m4_t ax(vuint32m4_t, vuint32m4_t, a);
+}
+template < class ay, class an > as< ay > az(ay ba, an bc) {
+  an bb;
+  return ae::ax(ae::au(ba, bc), ae::au(ba, bb), 2);
+}
+template < class bd > as< bd > be(bd, as< ad< bd > >);
+namespace ae {
+template < class bh, class bi > void bj(bh bk, bi bl) {
+  ad< decltype(bk) > bn;
+  az(bn, bl);
+}
+} template < int ap, int ac, class bp, class bq >
+void br(g< c, ap, ac > bk, bp, bq bl) {
+  ae::bj(bk, bl);
+}
+template < class ai > using bs = decltype(at(ai()));
+struct bt;
+template < int ac = 1 > class bu {
+public:
+  template < typename i > void operator()(i) {
+ah< i, ac > d;
+bt()(i(), d);
+  }
+};
+struct bt {
+  template < typename bv, class bf > void operator()(bv, bf bw) {
+using bx = bv;
+ak< bf > by;
+k< bf > bz;
+using bq = bs< decltype(by) >;
+using bp = bs< decltype(bw) >;
+bp cb;
+ab< bx >();
+for (;;) {
+  bp cc;
+  bq bl = aw(by, be(bz, cc));
+  br(by, cb, bl);
+}
+  }
+};
+void d() { bu()(b()); }
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr114714-2.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr114714-2.C
new file mode 100644
index 000..55621e98fee
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr114714-2.C
@@ -0,0 +1,85 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -std=c++17" } */
+
+typedef int a;
+typedef short b;
+typedef unsigned c;
+template < typename > using e = unsigned;
+template < typename > void ab();
+#pragma riscv intrinsic "vector"
+template < typename f, int, int ac > struct g {
+  using i = f;
+  template < typename m > using j = g< m, 0, ac >;
+  using k = g< i, 1, ac - 1 >;
+  using ad = g< i, 1, ac + 1 >;
+};
+namespace ae {
+struct af {
+  using h = g< short, 6, 0 < 3 >;
+};
+struct ag {
+  using h = af::h;
+};
+} template < typename, int > using ah = ae::ag::h;
+template < class ai > using aj = typename ai::i;
+template < class i, class ai > using j = typename ai::j< i >;
+template < class ai > using ak = j< e< ai >, ai >;
+template < class ai > using k = typename ai::k;
+template < class ai > using ad = typename ai::ad;
+template < a ap > vuint16mf2_t ar(g< b, ap, 0 >, b);
+template < a ap > vuint16m1_t ar(g< b, ap, 1 >, b);
+template < a ap > 

[PATCH v1] RISC-V: Add xfail test case for highpart register overlap of vwcvt

2024-04-24 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

bdad036da32 RISC-V: Support highpart register overlap for vwcvt

The below test suites are passed for this patch
* The rv64gcv fully regression test with isl build.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-1.c: New test.
* gcc.target/riscv/rvv/base/pr112431-2.c: New test.
* gcc.target/riscv/rvv/base/pr112431-3.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-1.c| 104 ++
 .../gcc.target/riscv/rvv/base/pr112431-2.c|  68 
 .../gcc.target/riscv/rvv/base/pr112431-3.c|  51 +
 3 files changed, 223 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-3.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-1.c
new file mode 100644
index 000..6f9c6f7bd8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-1.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v8 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v9 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v10 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v11 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v12 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v13 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v14 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v15 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vwcvt_x_x_v_i16m2 (v0, vl);
+  vint16m2_t vw1 = __riscv_vwcvt_x_x_v_i16m2 (v1, vl);
+  vint16m2_t vw2 = __riscv_vwcvt_x_x_v_i16m2 (v2, vl);
+  vint16m2_t vw3 = __riscv_vwcvt_x_x_v_i16m2 (v3, vl);
+  vint16m2_t vw4 = __riscv_vwcvt_x_x_v_i16m2 (v4, vl);
+  vint16m2_t vw5 = __riscv_vwcvt_x_x_v_i16m2 (v5, vl);
+  vint16m2_t vw6 = __riscv_vwcvt_x_x_v_i16m2 (v6, vl);
+  vint16m2_t vw7 = __riscv_vwcvt_x_x_v_i16m2 (v7, vl);
+  vint16m2_t vw8 = __riscv_vwcvt_x_x_v_i16m2 (v8, vl);
+  vint16m2_t vw9 = __riscv_vwcvt_x_x_v_i16m2 (v9, vl);
+  vint16m2_t vw10 = __riscv_vwcvt_x_x_v_i16m2 (v10, vl);
+  vint16m2_t vw11 = __riscv_vwcvt_x_x_v_i16m2 (v11, vl);
+  vint16m2_t vw12 = __riscv_vwcvt_x_x_v_i16m2 (v12, vl);
+  vint16m2_t vw13 = __riscv_vwcvt_x_x_v_i16m2 (v13, vl);
+  vint16m2_t vw14 = __riscv_vwcvt_x_x_v_i16m2 (v14, vl);
+  vint16m2_t vw15 = __riscv_vwcvt_x_x_v_i16m2 (v15, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i16m2_i16 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i16m2_i16 (vw7);
+  size_t sum8 = __riscv_vmv_x_s_i16m2_i16 (vw8);
+  size_t sum9 = __riscv_vmv_x_s_i16m2_i16 (vw9);
+  size_t sum10 = __riscv_vmv_x_s_i16m2_i16 (vw10);
+  

[PATCH v1] RISC-V: Add early clobber to the dest of vwsll

2024-04-24 Thread pan2 . li
From: Pan Li 

We missed the existing early clobber for the dest operand of vwsll
pattern when resolve the conflict of revert register overlap.  Thus
add it back to the pattern.  Unfortunately, we have no test to cover
this part and will improve this after GCC-15 open.

The below tests are passed for this patch:
* The rv64gcv fully regression test with isl build.

gcc/ChangeLog:

* config/riscv/vector-crypto.md: Add early clobber to the
dest operand of vwsll.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/vector-crypto.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/vector-crypto.md 
b/gcc/config/riscv/vector-crypto.md
index 8a4888a7653..e474ddf5da7 100755
--- a/gcc/config/riscv/vector-crypto.md
+++ b/gcc/config/riscv/vector-crypto.md
@@ -303,7 +303,7 @@ (define_insn "@pred_vwsll"
(set_attr "mode" "")])
 
 (define_insn "@pred_vwsll_scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand"  "=vr, vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"  "=,")
  (if_then_else:VWEXTI
(unspec:
  [(match_operand: 1 "vector_mask_operand"   "vmWc1, vmWc1")
-- 
2.34.1



[PATCH v1] Revert "RISC-V: Support highpart register overlap for vwcvt"

2024-04-24 Thread pan2 . li
From: Pan Li 

This reverts commit bdad036da32f72b84a96070518e7d75c21706dc2.
---
 gcc/config/riscv/constraints.md   |  23 
 gcc/config/riscv/riscv.md |  24 
 gcc/config/riscv/vector-crypto.md |  21 ++--
 gcc/config/riscv/vector.md|  19 ++--
 .../gcc.target/riscv/rvv/base/pr112431-1.c| 104 --
 .../gcc.target/riscv/rvv/base/pr112431-2.c|  68 
 .../gcc.target/riscv/rvv/base/pr112431-3.c|  51 -
 .../gcc.target/riscv/rvv/base/pr112431-39.c   |   2 +-
 .../gcc.target/riscv/rvv/base/pr112431-40.c   |   2 +-
 .../gcc.target/riscv/rvv/base/pr112431-41.c   |   2 +-
 10 files changed, 22 insertions(+), 294 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-1.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-3.c

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index e37c6936bfa..a590df545d7 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -159,29 +159,6 @@ (define_register_constraint "vd" "TARGET_VECTOR ? VD_REGS 
: NO_REGS"
 (define_register_constraint "vm" "TARGET_VECTOR ? VM_REGS : NO_REGS"
   "A vector mask register (if available).")
 
-;; These following constraints are used by RVV instructions with dest EEW > 
src EEW.
-;; RISC-V 'V' Spec 5.2. Vector Operands:
-;; The destination EEW is greater than the source EEW, the source EMUL is at 
least 1,
-;; and the overlap is in the highest-numbered part of the destination register 
group.
-;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or 
v4 is not).
-(define_register_constraint "W21" "TARGET_VECTOR ? V_REGS : NO_REGS"
-  "A vector register has register number % 2 == 1." "regno % 2 == 1")
-
-(define_register_constraint "W42" "TARGET_VECTOR ? V_REGS : NO_REGS"
-  "A vector register has register number % 4 == 2." "regno % 4 == 2")
-
-(define_register_constraint "W84" "TARGET_VECTOR ? V_REGS : NO_REGS"
-  "A vector register has register number % 8 == 4." "regno % 8 == 4")
-
-(define_register_constraint "W41" "TARGET_VECTOR ? V_REGS : NO_REGS"
-  "A vector register has register number % 4 == 1." "regno % 4 == 1")
-
-(define_register_constraint "W81" "TARGET_VECTOR ? V_REGS : NO_REGS"
-  "A vector register has register number % 8 == 1." "regno % 8 == 1")
-
-(define_register_constraint "W82" "TARGET_VECTOR ? V_REGS : NO_REGS"
-  "A vector register has register number % 8 == 2." "regno % 8 == 2")
-
 ;; This constraint is used to match instruction "csrr %0, vlenb" which is 
generated in "mov".
 ;; VLENB is a run-time constant which represent the vector register length in 
bytes.
 ;; BYTES_PER_RISCV_VECTOR represent runtime invariant of vector register 
length in bytes.
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 1693d4008c6..455715ab2f7 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -538,27 +538,6 @@ (define_attr "fp_vector_disabled" "no,yes"
   ]
   (const_string "no")))
 
-(define_attr "vconstraint" "no,W21,W42,W84,W41,W81,W82"
-  (const_string "no"))
-
-(define_attr "vconstraint_enabled" "no,yes"
-  (cond [(eq_attr "vconstraint" "no")
- (const_string "yes")
-
- (and (eq_attr "vconstraint" "W21")
- (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 2"))
-(const_string "no")
-
- (and (eq_attr "vconstraint" "W42,W41")
- (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 4"))
-(const_string "no")
-
- (and (eq_attr "vconstraint" "W84,W81,W82")
- (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 8"))
-(const_string "no")
-]
-   (const_string "yes")))
-
 ;; This attribute marks the alternatives not matching the constraints
 ;; described in spec as disabled.
 (define_attr "spec_restriction" "none,thv,rvv"
@@ -587,9 +566,6 @@ (define_attr "enabled" "no,yes"
 (eq_attr "fp_vector_disabled" "yes")
 (const_string "no")
 
-(eq_attr "vconstraint_enabled" "no")
-(const_string "no")
-
 (eq_attr "spec_restriction_disabled" "yes")
 (const_string "no")
   ]
diff --git a/gcc/config/riscv/vector-crypto.md 
b/gcc/config/riscv/vector-crypto.md
index 23dc549e5b8..8a4888a7653 100755
--- a/gcc/config/riscv/vector-crypto.md
+++ b/gcc/config/riscv/vector-crypto.md
@@ -303,26 +303,25 @@ (define_insn "@pred_vwsll"
(set_attr "mode" "")])
 
 (define_insn "@pred_vwsll_scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand""=vd, vr, vd, vr, vd, 
vr, vd, vr, vd, vr, vd, vr, ?, ?")
+  [(set (match_operand:VWEXTI 0 "register_operand"  "=vr, vr")
  (if_then_else:VWEXTI
(unspec:
- [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
-

[PATCH v1] RISC-V: Add xfail test case for highpart overlap of vext.vf

2024-04-23 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

62685890d88 RISC-V: Support highpart overlap for vext.vf

The below test suites are passed for this patch
* The rv64gcv fully regression test with isl build.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/unop_v_constraint-2.c: Adjust asm
check cond.
* gcc.target/riscv/rvv/base/pr112431-4.c: New test.
* gcc.target/riscv/rvv/base/pr112431-5.c: New test.
* gcc.target/riscv/rvv/base/pr112431-6.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-4.c| 104 ++
 .../gcc.target/riscv/rvv/base/pr112431-5.c|  68 
 .../gcc.target/riscv/rvv/base/pr112431-6.c|  51 +
 .../riscv/rvv/base/unop_v_constraint-2.c  |   2 +-
 4 files changed, 224 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-6.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c
new file mode 100644
index 000..cecf796e10c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-4.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v8 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v9 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v10 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v11 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v12 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v13 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v14 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v15 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vsext_vf2_i16m2 (v0, vl);
+  vint16m2_t vw1 = __riscv_vsext_vf2_i16m2 (v1, vl);
+  vint16m2_t vw2 = __riscv_vsext_vf2_i16m2 (v2, vl);
+  vint16m2_t vw3 = __riscv_vsext_vf2_i16m2 (v3, vl);
+  vint16m2_t vw4 = __riscv_vsext_vf2_i16m2 (v4, vl);
+  vint16m2_t vw5 = __riscv_vsext_vf2_i16m2 (v5, vl);
+  vint16m2_t vw6 = __riscv_vsext_vf2_i16m2 (v6, vl);
+  vint16m2_t vw7 = __riscv_vsext_vf2_i16m2 (v7, vl);
+  vint16m2_t vw8 = __riscv_vsext_vf2_i16m2 (v8, vl);
+  vint16m2_t vw9 = __riscv_vsext_vf2_i16m2 (v9, vl);
+  vint16m2_t vw10 = __riscv_vsext_vf2_i16m2 (v10, vl);
+  vint16m2_t vw11 = __riscv_vsext_vf2_i16m2 (v11, vl);
+  vint16m2_t vw12 = __riscv_vsext_vf2_i16m2 (v12, vl);
+  vint16m2_t vw13 = __riscv_vsext_vf2_i16m2 (v13, vl);
+  vint16m2_t vw14 = __riscv_vsext_vf2_i16m2 (v14, vl);
+  vint16m2_t vw15 = __riscv_vsext_vf2_i16m2 (v15, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i16m2_i16 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i16m2_i16 (vw7);
+  size_t sum8 = __riscv_vmv_x_s_i16m2_i16 (vw8);

[PATCH v1] RISC-V: Adjust overlap attr after revert d3544cea63d and e65aaf8efe1

2024-04-22 Thread pan2 . li
From: Pan Li 

After we reverted below 2 commits, the reference to attr need some
adjustment as the group_overlap is no longer available.

* RISC-V: Robostify the W43, W86, W87 constraint enabled attribute
* RISC-V: Rename vconstraint into group_overlap

The below tests are passed for this patch.

* The rv64gcv fully regression tests.

gcc/ChangeLog:

* config/riscv/vector-crypto.md:

Signed-off-by: Pan Li 
---
 gcc/config/riscv/vector-crypto.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/vector-crypto.md 
b/gcc/config/riscv/vector-crypto.md
index 519c6a10d94..23dc549e5b8 100755
--- a/gcc/config/riscv/vector-crypto.md
+++ b/gcc/config/riscv/vector-crypto.md
@@ -322,7 +322,7 @@ (define_insn "@pred_vwsll_scalar"
   "vwsll.v%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vwsll")
(set_attr "mode" "")
-   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+   (set_attr "vconstraint" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,no,no")])
 
 ;; vbrev.v vbrev8.v vrev8.v
 (define_insn "@pred_v"
-- 
2.34.1



[PATCH v1] RISC-V: Add xfail test case for highpart overlap floating-point widen insn

2024-04-22 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

8614cbb2534 RISC-V: Support highpart overlap for floating-point widen 
instructions

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-10.c: New test.
* gcc.target/riscv/rvv/base/pr112431-11.c: New test.
* gcc.target/riscv/rvv/base/pr112431-12.c: New test.
* gcc.target/riscv/rvv/base/pr112431-13.c: New test.
* gcc.target/riscv/rvv/base/pr112431-14.c: New test.
* gcc.target/riscv/rvv/base/pr112431-15.c: New test.
* gcc.target/riscv/rvv/base/pr112431-7.c: New test.
* gcc.target/riscv/rvv/base/pr112431-8.c: New test.
* gcc.target/riscv/rvv/base/pr112431-9.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-10.c   | 104 ++
 .../gcc.target/riscv/rvv/base/pr112431-11.c   |  68 +++
 .../gcc.target/riscv/rvv/base/pr112431-12.c   |  51 +
 .../gcc.target/riscv/rvv/base/pr112431-13.c   | 188 ++
 .../gcc.target/riscv/rvv/base/pr112431-14.c   | 119 +++
 .../gcc.target/riscv/rvv/base/pr112431-15.c   |  86 
 .../gcc.target/riscv/rvv/base/pr112431-7.c| 104 ++
 .../gcc.target/riscv/rvv/base/pr112431-8.c|  68 +++
 .../gcc.target/riscv/rvv/base/pr112431-9.c|  51 +
 9 files changed, 839 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-9.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c
new file mode 100644
index 000..5d3f2fbe46d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+double __attribute__ ((noinline))
+sumation (double sum0, double sum1, double sum2, double sum3, double sum4,
+ double sum5, double sum6, double sum7, double sum8, double sum9,
+ double sum10, double sum11, double sum12, double sum13, double sum14,
+ double sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+double
+foo (char const *buf, size_t len)
+{
+  double sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint32m1_t v0 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v1 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v2 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v3 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v4 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v5 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v6 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v7 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v8 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v9 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v10 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v11 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v12 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v13 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v14 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v15 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vfloat64m2_t vw0 = __riscv_vfwcvt_f_x_v_f64m2 (v0, vl);
+  vfloat64m2_t vw1 = __riscv_vfwcvt_f_x_v_f64m2 (v1, vl);
+  vfloat64m2_t vw2 = __riscv_vfwcvt_f_x_v_f64m2 (v2, vl);
+  vfloat64m2_t vw3 = __riscv_vfwcvt_f_x_v_f64m2 (v3, vl);
+  vfloat64m2_t vw4 = __riscv_vfwcvt_f_x_v_f64m2 (v4, vl);
+  vfloat64m2_t vw5 = __riscv_vfwcvt_f_x_v_f64m2 (v5, vl);
+  vfloat64m2_t vw6 = 

[PATCH v2] RISC-V: Add xfail test case for indexed load overlap with SRC EEW < DEST EEW

2024-04-22 Thread pan2 . li
From: Pan Li 

Update in v2:
* Add change log to pr112431-34.c.

Original log:

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

4418d55bcd1 RISC-V: Support highpart overlap for indexed load with SRC EEW < 
DEST EEW

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-34.c: Remove xfail for vluxei8 
check.
* gcc.target/riscv/rvv/base/pr112431-28.c: New test.
* gcc.target/riscv/rvv/base/pr112431-29.c: New test.
* gcc.target/riscv/rvv/base/pr112431-30.c: New test.
* gcc.target/riscv/rvv/base/pr112431-31.c: New test.
* gcc.target/riscv/rvv/base/pr112431-32.c: New test.
* gcc.target/riscv/rvv/base/pr112431-33.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-28.c   | 104 ++
 .../gcc.target/riscv/rvv/base/pr112431-29.c   |  68 
 .../gcc.target/riscv/rvv/base/pr112431-30.c   |  51 +
 .../gcc.target/riscv/rvv/base/pr112431-31.c   |  68 
 .../gcc.target/riscv/rvv/base/pr112431-32.c   |  51 +
 .../gcc.target/riscv/rvv/base/pr112431-33.c   |  51 +
 .../gcc.target/riscv/rvv/base/pr112431-34.c   |   2 +-
 7 files changed, 394 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-29.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-30.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-31.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-33.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
new file mode 100644
index 000..c16cbdfe9f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v4 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v5 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v6 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v7 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v8 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v9 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v10 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v11 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v12 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v13 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v14 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v15 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vluxei8_v_i16m2 ((void *) it, v0, vl);
+  vint16m2_t vw1 = __riscv_vluxei8_v_i16m2 ((void *) it, v1, vl);
+  vint16m2_t vw2 = __riscv_vluxei8_v_i16m2 ((void *) it, v2, vl);
+  vint16m2_t vw3 = __riscv_vluxei8_v_i16m2 ((void *) it, v3, vl);
+  vint16m2_t vw4 = __riscv_vluxei8_v_i16m2 ((void *) it, v4, vl);
+  vint16m2_t vw5 = __riscv_vluxei8_v_i16m2 ((void *) it, v5, vl);
+  vint16m2_t vw6 = __riscv_vluxei8_v_i16m2 ((void *) it, v6, vl);
+  vint16m2_t vw7 = __riscv_vluxei8_v_i16m2 ((void *) it, v7, vl);
+  vint16m2_t vw8 = __riscv_vluxei8_v_i16m2 ((void *) it, v8, vl);
+  vint16m2_t vw9 = __riscv_vluxei8_v_i16m2 ((void *) it, v9, vl);
+  vint16m2_t vw10 = __riscv_vluxei8_v_i16m2 ((void *) it, v10, vl);
+   

[PATCH v1] RISC-V: Add xfail test case for indexed load overlap with SRC EEW < DEST EEW

2024-04-22 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

4418d55bcd1 RISC-V: Support highpart overlap for indexed load with SRC EEW < 
DEST EEW

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-34.c:
* gcc.target/riscv/rvv/base/pr112431-28.c: New test.
* gcc.target/riscv/rvv/base/pr112431-29.c: New test.
* gcc.target/riscv/rvv/base/pr112431-30.c: New test.
* gcc.target/riscv/rvv/base/pr112431-31.c: New test.
* gcc.target/riscv/rvv/base/pr112431-32.c: New test.
* gcc.target/riscv/rvv/base/pr112431-33.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-28.c   | 104 ++
 .../gcc.target/riscv/rvv/base/pr112431-29.c   |  68 
 .../gcc.target/riscv/rvv/base/pr112431-30.c   |  51 +
 .../gcc.target/riscv/rvv/base/pr112431-31.c   |  68 
 .../gcc.target/riscv/rvv/base/pr112431-32.c   |  51 +
 .../gcc.target/riscv/rvv/base/pr112431-33.c   |  51 +
 .../gcc.target/riscv/rvv/base/pr112431-34.c   |   2 +-
 7 files changed, 394 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-29.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-30.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-31.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-33.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
new file mode 100644
index 000..c16cbdfe9f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v4 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v5 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v6 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v7 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v8 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v9 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v10 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v11 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v12 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v13 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v14 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v15 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vluxei8_v_i16m2 ((void *) it, v0, vl);
+  vint16m2_t vw1 = __riscv_vluxei8_v_i16m2 ((void *) it, v1, vl);
+  vint16m2_t vw2 = __riscv_vluxei8_v_i16m2 ((void *) it, v2, vl);
+  vint16m2_t vw3 = __riscv_vluxei8_v_i16m2 ((void *) it, v3, vl);
+  vint16m2_t vw4 = __riscv_vluxei8_v_i16m2 ((void *) it, v4, vl);
+  vint16m2_t vw5 = __riscv_vluxei8_v_i16m2 ((void *) it, v5, vl);
+  vint16m2_t vw6 = __riscv_vluxei8_v_i16m2 ((void *) it, v6, vl);
+  vint16m2_t vw7 = __riscv_vluxei8_v_i16m2 ((void *) it, v7, vl);
+  vint16m2_t vw8 = __riscv_vluxei8_v_i16m2 ((void *) it, v8, vl);
+  vint16m2_t vw9 = __riscv_vluxei8_v_i16m2 ((void *) it, v9, vl);
+  vint16m2_t vw10 = __riscv_vluxei8_v_i16m2 ((void *) it, v10, vl);
+  vint16m2_t vw11 = __riscv_vluxei8_v_i16m2 ((void *) it, v11, vl);
+  vint16m2_t vw12 = 

[PATCH v1] RISC-V: Add xfail test case for highest-number regno ternary overlap

2024-04-22 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

27fde325d64 RISC-V: Support highest-number regno overlap for widen ternary

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-37.c: New test.
* gcc.target/riscv/rvv/base/pr112431-38.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-37.c   | 103 ++
 .../gcc.target/riscv/rvv/base/pr112431-38.c   |  82 ++
 2 files changed, 185 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
new file mode 100644
index 000..66e81ea905a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmacc_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo2 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmacc_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo3 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmacc_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo4 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmaccus_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo5 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmaccus_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo6 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmaccus_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo7 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vuint8m1_t high_ueew8 = __riscv_vreinterpret_v_i8m1_u8m1 (high_eew8);
+  vint16m2_t result = __riscv_vwmaccsu_vx_i16m2 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo8 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vuint8m2_t high_ueew8 = __riscv_vreinterpret_v_i8m2_u8m2 (high_eew8);
+  vint16m4_t result = __riscv_vwmaccsu_vx_i16m4 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo9 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vuint8m4_t high_ueew8 = __riscv_vreinterpret_v_i8m4_u8m4 (high_eew8);
+  vint16m8_t result = __riscv_vwmaccsu_vx_i16m8 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv4r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv8r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c 

[PATCH v1] RISC-V: Add xfail test case for widening register overlap of vf4/vf8

2024-04-21 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

303195e2a6b RISC-V: Support widening register overlap for vf4/vf8

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-16.c: New test.
* gcc.target/riscv/rvv/base/pr112431-17.c: New test.
* gcc.target/riscv/rvv/base/pr112431-18.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-16.c   | 68 +++
 .../gcc.target/riscv/rvv/base/pr112431-17.c   | 51 ++
 .../gcc.target/riscv/rvv/base/pr112431-18.c   | 51 ++
 3 files changed, 170 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
new file mode 100644
index 000..42d11611d98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
+  vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
+  vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
+  vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
+  vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
+  vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
+  vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
+  vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
+
+  sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
+}
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
new file mode 100644
index 000..9ecc62e234b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v3 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+
+  asm volatile("nop" ::: "memory");
+  vint32m8_t vw0 = __riscv_vsext_vf4_i32m8 (v0, vl);
+  

[PATCH v1] RISC-V: Add xfail test case for highpart register overlap of vx/vf widen

2024-04-20 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

a23415d7572 RISC-V: Support highpart register overlap for widen vx/vf 
instructions

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-22.c: New test.
* gcc.target/riscv/rvv/base/pr112431-23.c: New test.
* gcc.target/riscv/rvv/base/pr112431-24.c: New test.
* gcc.target/riscv/rvv/base/pr112431-25.c: New test.
* gcc.target/riscv/rvv/base/pr112431-26.c: New test.
* gcc.target/riscv/rvv/base/pr112431-27.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-22.c   | 188 ++
 .../gcc.target/riscv/rvv/base/pr112431-23.c   | 119 +++
 .../gcc.target/riscv/rvv/base/pr112431-24.c   |  86 
 .../gcc.target/riscv/rvv/base/pr112431-25.c   | 104 ++
 .../gcc.target/riscv/rvv/base/pr112431-26.c   |  68 +++
 .../gcc.target/riscv/rvv/base/pr112431-27.c   |  51 +
 6 files changed, 616 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-23.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-24.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-25.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-26.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-27.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c
new file mode 100644
index 000..ac56703c75c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c
@@ -0,0 +1,188 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v8 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v9 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v10 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v11 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v12 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v13 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v14 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v15 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vwadd_vx_i16m2 (v0, 33, vl);
+  vint16m2_t vw1 = __riscv_vwadd_vx_i16m2 (v1, 33, vl);
+  vint16m2_t vw2 = __riscv_vwadd_vx_i16m2 (v2, 33, vl);
+  vint16m2_t vw3 = __riscv_vwadd_vx_i16m2 (v3, 33, vl);
+  vint16m2_t vw4 = __riscv_vwadd_vx_i16m2 (v4, 33, vl);
+  vint16m2_t vw5 = __riscv_vwadd_vx_i16m2 (v5, 33, vl);
+  vint16m2_t vw6 = __riscv_vwadd_vx_i16m2 (v6, 33, vl);
+  vint16m2_t vw7 = __riscv_vwadd_vx_i16m2 (v7, 33, vl);
+  vint16m2_t vw8 = __riscv_vwadd_vx_i16m2 (v8, 33, vl);
+  vint16m2_t vw9 = __riscv_vwadd_vx_i16m2 (v9, 33, vl);
+  vint16m2_t vw10 = __riscv_vwadd_vx_i16m2 (v10, 33, vl);
+  vint16m2_t vw11 = __riscv_vwadd_vx_i16m2 (v11, 33, vl);
+  vint16m2_t vw12 = __riscv_vwadd_vx_i16m2 (v12, 33, vl);
+  vint16m2_t vw13 = __riscv_vwadd_vx_i16m2 (v13, 33, vl);
+  vint16m2_t vw14 = __riscv_vwadd_vx_i16m2 (v14, 33, vl);
+  vint16m2_t vw15 = __riscv_vwadd_vx_i16m2 (v15, 33, vl);
+
+  asm volatile("nop" ::: "memory");
+

[PATCH v1] RISC-V: Add xfail test case for incorrect overlap on v0

2024-04-20 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

018ba3ac952 RISC-V: Fix overlap group incorrect overlap on v0

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-34.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-34.c   | 101 ++
 1 file changed, 101 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c
new file mode 100644
index 000..286185aa01e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c
@@ -0,0 +1,101 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v4 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v5 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v6 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v7 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v8 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v9 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v10 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v11 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v12 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v13 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v14 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v15 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vluxei8_v_i16m2 ((void *) it, v0, vl);
+  vint16m2_t vw1 = __riscv_vluxei8_v_i16m2 ((void *) it, v1, vl);
+  vint16m2_t vw2 = __riscv_vluxei8_v_i16m2 ((void *) it, v2, vl);
+  vint16m2_t vw3 = __riscv_vluxei8_v_i16m2 ((void *) it, v3, vl);
+  vint16m2_t vw4 = __riscv_vluxei8_v_i16m2 ((void *) it, v4, vl);
+  vint16m2_t vw5 = __riscv_vluxei8_v_i16m2 ((void *) it, v5, vl);
+  vint16m2_t vw6 = __riscv_vluxei8_v_i16m2 ((void *) it, v6, vl);
+  vint16m2_t vw7 = __riscv_vluxei8_v_i16m2 ((void *) it, v7, vl);
+  vint16m2_t vw8 = __riscv_vluxei8_v_i16m2 ((void *) it, v8, vl);
+  vint16m2_t vw9 = __riscv_vluxei8_v_i16m2 ((void *) it, v9, vl);
+  vint16m2_t vw10 = __riscv_vluxei8_v_i16m2 ((void *) it, v10, vl);
+  vint16m2_t vw11 = __riscv_vluxei8_v_i16m2 ((void *) it, v11, vl);
+  vint16m2_t vw12 = __riscv_vluxei8_v_i16m2 ((void *) it, v12, vl);
+  vint16m2_t vw13 = __riscv_vluxei8_v_i16m2 ((void *) it, v13, vl);
+  vint16m2_t vw14 = __riscv_vluxei8_v_i16m2 ((void *) it, v14, vl);
+  vbool8_t mask = *(vbool8_t*)it;
+  vint16m2_t vw15 = __riscv_vluxei8_v_i16m2_m (mask, (void *) it, v15, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i16m2_i16 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i16m2_i16 (vw7);
+  size_t sum8 = __riscv_vmv_x_s_i16m2_i16 (vw8);
+  size_t sum9 = __riscv_vmv_x_s_i16m2_i16 (vw9);
+  size_t sum10 = __riscv_vmv_x_s_i16m2_i16 (vw10);
+  size_t sum11 = __riscv_vmv_x_s_i16m2_i16 (vw11);
+  size_t sum12 = __riscv_vmv_x_s_i16m2_i16 (vw12);
+  size_t sum13 = __riscv_vmv_x_s_i16m2_i16 (vw13);
+  size_t sum14 = 

[PATCH] RISC-V: Add xfail test case for wv insn highest overlap

2024-04-20 Thread pan2 . li
From: Pan Li 

We reverted below patch for wv insn overlap, add the related wv
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

7e854b58084 RISC-V: Support highest overlap for wv instructions

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c: Xfail csr check.
* gcc.target/riscv/rvv/base/pr112431-39.c: New test.
* gcc.target/riscv/rvv/base/pr112431-40.c: New test.
* gcc.target/riscv/rvv/base/pr112431-41.c: New test.

Signed-off-by: Pan Li 
---
 .../costmodel/riscv/rvv/dynamic-lmul8-11.c|   2 +-
 .../gcc.target/riscv/rvv/base/pr112431-39.c   | 158 ++
 .../gcc.target/riscv/rvv/base/pr112431-40.c   |  94 +++
 .../gcc.target/riscv/rvv/base/pr112431-41.c   |  62 +++
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-40.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-41.c

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c
index c9e28251225..5a39f04b140 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c
@@ -40,7 +40,7 @@ void foo2 (int64_t *__restrict a,
 }
 
 /* { dg-final { scan-assembler {e64,m8} } } */
-/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */
 /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it 
has unexpected spills" "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c
new file mode 100644
index 000..770b5411666
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c
@@ -0,0 +1,158 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out, int n)
+{
+  for (int i = 0; i < n; i++)
+{
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v0 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v0 = __riscv_vwsub_wv_i16m2_tu (v0, v0, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v0, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v1 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v1 = __riscv_vwsub_wv_i16m2_tu (v1, v1, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v1, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v2 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v2 = __riscv_vwsub_wv_i16m2_tu (v2, v2, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v2, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v3 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v3 = __riscv_vwsub_wv_i16m2_tu (v3, v3, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v3, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v4 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v4 = __riscv_vwsub_wv_i16m2_tu (v4, v4, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v4, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v5 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v5 = __riscv_vwsub_wv_i16m2_tu (v5, v5, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v5, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v6 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v6 = __riscv_vwsub_wv_i16m2_tu (v6, v6, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v6, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v7 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v7 = __riscv_vwsub_wv_i16m2_tu (v7, v7, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v7, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v8 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v8 = __riscv_vwsub_wv_i16m2_tu (v8, v8, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v8, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v9 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v9 = __riscv_vwsub_wv_i16m2_tu (v9, v9, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v9, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v10 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v10 = __riscv_vwsub_wv_i16m2_tu (v10, v10, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v10, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  

[PATCH v2] RISC-V: Add xfail test case for wv insn register overlap

2024-04-19 Thread pan2 . li
From: Pan Li 

We reverted below patch for wv insn overlap, add the related wv
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

b3b2799b872 RISC-V: Support one more overlap for wv instructions

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-42.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-42.c   | 30 +++
 1 file changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c
new file mode 100644
index 000..fa5dac58a20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ffast-math" } */
+
+#include 
+
+int64_t
+reduc_plus_int (int *__restrict a, int n)
+{
+  int64_t r = 0;
+  for (int i = 0; i < n; ++i)
+r += a[i];
+  return r;
+}
+
+double
+reduc_plus_float (float *__restrict a, int n)
+{
+  double r = 0;
+  for (int i = 0; i < n; ++i)
+r += a[i];
+  return r;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vwadd\.wv} 1 } } */
+/* { dg-final { scan-assembler-times {vfwadd\.wv} 1 } } */
-- 
2.34.1



[PATCH v1] RISC-V: Add xfail test case for wv insn register overlap

2024-04-19 Thread pan2 . li
From: Pan Li 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-42.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-42.c   | 30 +++
 1 file changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c
new file mode 100644
index 000..fa5dac58a20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ffast-math" } */
+
+#include 
+
+int64_t
+reduc_plus_int (int *__restrict a, int n)
+{
+  int64_t r = 0;
+  for (int i = 0; i < n; ++i)
+r += a[i];
+  return r;
+}
+
+double
+reduc_plus_float (float *__restrict a, int n)
+{
+  double r = 0;
+  for (int i = 0; i < n; ++i)
+r += a[i];
+  return r;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vwadd\.wv} 1 } } */
+/* { dg-final { scan-assembler-times {vfwadd\.wv} 1 } } */
-- 
2.34.1



[PATCH v1] RISC-V: Revert RVV wv instructions overlap and xfail tests

2024-04-19 Thread pan2 . li
From: Pan Li 

The RVV register overlap requires both the dest, and src operands.
Thus the rigister filter in constraint cannot cover the fully sematics
of the vector register overlap.

Thus, revert these overlap patches list and xfail the related test
cases.  This patch would like to revert *b3b2799b872*, and the full
picture of related series are listed as below.

[P] b3b2799b872 RISC-V: Support one more overlap for wv instructions
[N] 7e854b58084 RISC-V: Support highest overlap for wv instructions
[N] 018ba3ac952 RISC-V: Fix overlap group incorrect overlap on v0
[N] 27fde325d64 RISC-V: Support highest-number regno overlap for widen ternary
[N] a23415d7572 RISC-V: Support highpart register overlap for widen vx/vf 
instructions
[N] 4418d55bcd1 RISC-V: Support highpart overlap for indexed load with SRC EEW 
< DEST EEW
[N] 303195e2a6b RISC-V: Support widening register overlap for vf4/vf8
[N] 8614cbb2534 RISC-V: Support highpart overlap for floating-point widen 
instructions
[N] e65aaf8efe1 RISC-V: Rename vconstraint into group_overlap
[N] 62685890d88 RISC-V: Support highpart overlap for vext.vf
[N] bdad036da32 RISC-V: Support highpart register overlap for vwcvt
[N] 1a0af6e5a99 RISC-V: Allow dest operand and accumulator operand overlap of 
widen reduction instruction[PR112327]

Indicator:
[D]: Done, aka this patch has reverted already.
[P]: Patched, aka the revert patch is sent but not merged.
[N]: None, aka not started yet.

The below test suites are passed for this patch.
* The riscv rv64gcv fully regression test.
* The riscv rv64gc fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.md (none,W21,W42,W84,W43,W86,W87,W0): Remove W0.
(none,W21,W42,W84,W43,W86,W87): Ditto.
* config/riscv/vector.md: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-42.c: Xfail vmv1r asm check.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.md | 14 +---
 gcc/config/riscv/vector.md| 84 +--
 .../gcc.target/riscv/rvv/base/pr112431-42.c   |  2 +-
 3 files changed, 47 insertions(+), 53 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index c2b4323c53a..f0928398698 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -541,7 +541,7 @@ (define_attr "fp_vector_disabled" "no,yes"
 ;; Widening instructions have group-overlap constraints.  Those are only
 ;; valid for certain register-group sizes.  This attribute marks the
 ;; alternatives not matching the required register-group size as disabled.
-(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
+(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87"
   (const_string "none"))
 
 (define_attr "group_overlap_valid" "no,yes"
@@ -562,9 +562,9 @@ (define_attr "group_overlap_valid" "no,yes"
 
  ;; According to RVV ISA:
  ;; The destination EEW is greater than the source EEW, the source 
EMUL is at least 1,
- ;; and the overlap is in the highest-numbered part of the destination 
register group
- ;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, 
v2, or v4 is not).
- ;; So the source operand should have LMUL >= 1.
+;; and the overlap is in the highest-numbered part of the destination 
register group
+;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, 
v2, or v4 is not).
+;; So the source operand should have LMUL >= 1.
  (and (eq_attr "group_overlap" "W43")
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 4
   && riscv_get_v_regno_alignment (GET_MODE 
(operands[3])) >= 1"))
@@ -574,12 +574,6 @@ (define_attr "group_overlap_valid" "no,yes"
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 8
   && riscv_get_v_regno_alignment (GET_MODE 
(operands[3])) >= 1"))
 (const_string "no")
-
- ;; W21 supports highest-number overlap for source LMUL = 1.
- ;; For 'wv' variant, we can also allow wide source operand overlaps 
dest operand.
- (and (eq_attr "group_overlap" "W0")
- (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
> 1"))
-(const_string "no")
 ]
(const_string "yes")))
 
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 8b1c24c5d79..8298a72b771 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3842,48 +3842,48 @@ (define_insn 
"@pred_dual_widen__scal
(set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
 
 (define_insn "@pred_single_widen_sub"
-  [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, 
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr,  ,  , ?, ?")
+  [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, 
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?, ?")

[committed] RISC-V: Fix Werror=sign-compare in riscv_validate_vector_type

2024-04-12 Thread pan2 . li
From: Pan Li 

This patch would like to fix the Werror=sign-compare similar to below:

gcc/config/riscv/riscv.cc: In function ‘void
riscv_validate_vector_type(const_tree, const char*)’:
gcc/config/riscv/riscv.cc:5614:23: error: comparison of integer
expressions of different signedness: ‘int’ and ‘unsigned int’
[-Werror=sign-compare]
 5614 |   if (TARGET_MIN_VLEN < required_min_vlen)

The TARGET_MIN_VLEN is *int* by default but the required_min_vlen
returned from riscv_vector_required_min_vlen is **unsigned**.  Thus,
adjust the related function and reference variable(s) to int type
to avoid such kind of Werror.

The below test suite is passed for this patch.
* The rv64gcv fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_vector_float_type_p): Take int
as the return value instead of unsigned.
(riscv_vector_element_bitsize): Ditto.
(riscv_vector_required_min_vlen): Ditto.
(riscv_validate_vector_type): Take int type for local variable(s).

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e5f00806bb9..74445bc977c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5499,7 +5499,7 @@ riscv_vector_float_type_p (const_tree type)
   return strstr (name, "vfloat") != NULL;
 }
 
-static unsigned
+static int
 riscv_vector_element_bitsize (const_tree type)
 {
   machine_mode mode = TYPE_MODE (type);
@@ -5523,7 +5523,7 @@ riscv_vector_element_bitsize (const_tree type)
   gcc_unreachable ();
 }
 
-static unsigned
+static int
 riscv_vector_required_min_vlen (const_tree type)
 {
   machine_mode mode = TYPE_MODE (type);
@@ -5531,7 +5531,7 @@ riscv_vector_required_min_vlen (const_tree type)
   if (riscv_v_ext_mode_p (mode))
 return TARGET_MIN_VLEN;
 
-  unsigned element_bitsize = riscv_vector_element_bitsize (type);
+  int element_bitsize = riscv_vector_element_bitsize (type);
   const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
 
   if (strstr (name, "bool64") != NULL)
@@ -5569,7 +5569,7 @@ riscv_validate_vector_type (const_tree type, const char 
*hint)
   return;
 }
 
-  unsigned element_bitsize = riscv_vector_element_bitsize (type);
+  int element_bitsize = riscv_vector_element_bitsize (type);
   bool int_type_p = riscv_vector_int_type_p (type);
 
   if (int_type_p && element_bitsize == 64
@@ -5609,7 +5609,7 @@ riscv_validate_vector_type (const_tree type, const char 
*hint)
   return;
 }
 
-  unsigned required_min_vlen = riscv_vector_required_min_vlen (type);
+  int required_min_vlen = riscv_vector_required_min_vlen (type);
 
   if (TARGET_MIN_VLEN < required_min_vlen)
 {
-- 
2.34.1



[PATCH v1] RISC-V: Bugfix ICE non-vector in TARGET_FUNCTION_VALUE_REGNO_P

2024-04-12 Thread pan2 . li
From: Pan Li 

This patch would like to fix one ICE when vector is not enabled
in hook TARGET_FUNCTION_VALUE_REGNO_P implementation.  The vector
regno is available if and only if the TARGET_VECTOR is true.  The
previous implement missed this condition and then result in ICE
when rv64gc build option without vector.

PR target/114639

The below test suite is passed for this patch.

* The rv64gcv fully regression tests.
* The rv64gc fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_function_value_regno_p): Add
TARGET_VECTOR predicate for V_RETURN regno.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr114639-1.c: New test.
* gcc.target/riscv/pr114639-2.c: New test.
* gcc.target/riscv/pr114639-3.c: New test.
* gcc.target/riscv/pr114639-4.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc   |  2 +-
 gcc/testsuite/gcc.target/riscv/pr114639-1.c | 11 +++
 gcc/testsuite/gcc.target/riscv/pr114639-2.c | 11 +++
 gcc/testsuite/gcc.target/riscv/pr114639-3.c | 11 +++
 gcc/testsuite/gcc.target/riscv/pr114639-4.c | 11 +++
 5 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr114639-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr114639-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr114639-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr114639-4.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 91f017dd52a..e5f00806bb9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11008,7 +11008,7 @@ riscv_function_value_regno_p (const unsigned regno)
   if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST)
 return true;
 
-  if (regno == V_RETURN)
+  if (TARGET_VECTOR && regno == V_RETURN)
 return true;
 
   return false;
diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-1.c 
b/gcc/testsuite/gcc.target/riscv/pr114639-1.c
new file mode 100644
index 000..f41723193a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114639-1.c
@@ -0,0 +1,11 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -std=gnu89 -O3" } */
+
+g (a, b) {}
+
+f (xx)
+ void* xx;
+{
+  __builtin_apply ((void*)g, xx, 200);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-2.c 
b/gcc/testsuite/gcc.target/riscv/pr114639-2.c
new file mode 100644
index 000..0c402c4b254
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114639-2.c
@@ -0,0 +1,11 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64imac -mabi=lp64 -std=gnu89 -O3" } */
+
+g (a, b) {}
+
+f (xx)
+ void* xx;
+{
+  __builtin_apply ((void*)g, xx, 200);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-3.c 
b/gcc/testsuite/gcc.target/riscv/pr114639-3.c
new file mode 100644
index 000..ffb0d6d162d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114639-3.c
@@ -0,0 +1,11 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -std=gnu89 -O3" } */
+
+g (a, b) {}
+
+f (xx)
+ void* xx;
+{
+  __builtin_apply ((void*)g, xx, 200);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-4.c 
b/gcc/testsuite/gcc.target/riscv/pr114639-4.c
new file mode 100644
index 000..a6e229101ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114639-4.c
@@ -0,0 +1,11 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imac -mabi=ilp32 -std=gnu89 -O3" } */
+
+g (a, b) {}
+
+f (xx)
+ void* xx;
+{
+  __builtin_apply ((void*)g, xx, 200);
+}
-- 
2.34.1



[PATCH v1] RISC-V: Remove -Wno-psabi for test build option [NFC]

2024-04-10 Thread pan2 . li
From: Pan Li 

Just notice there are some test case still have -Wno-psabi option,
which is deprecated now.  Remove them all for riscv test cases.

The below test are passed for this patch.
* The riscv rvv regression test.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr109244.C: Remove deprecated
-Wno-psabi option.
* g++.target/riscv/rvv/base/pr109535.C: Ditto.
* gcc.target/riscv/rvv/autovec/fixed-vlmax-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive_run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive_run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-run.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/testsuite/g++.target/riscv/rvv/base/pr109244.C  | 2 +-
 gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C  | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/fixed-vlmax-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/compress-1.c | 2 +-
 

[PATCH v1] RISC-V: Bugfix ICE for the vector return arg in mode switch

2024-04-10 Thread pan2 . li
From: Pan Li 

This patch would like to fix a ICE in mode sw for below example code.

during RTL pass: mode_sw
test.c: In function ‘vbool16_t j(vuint64m4_t)’:
test.c:15:1: internal compiler error: in create_pre_exit, at
mode-switching.cc:451
   15 | }
  | ^
0x3978f12 create_pre_exit
__RISCV_BUILD__/../gcc/mode-switching.cc:451
0x3979e9e optimize_mode_switching
__RISCV_BUILD__/../gcc/mode-switching.cc:849
0x397b9bc execute
__RISCV_BUILD__/../gcc/mode-switching.cc:1324

extern size_t get_vl ();

vbool16_t
test (vuint64m4_t a)
{
  unsigned long b;
  return __riscv_vmsne_vx_u64m4_b16 (a, b, get_vl ());
}

The create_pre_exit would like to find a return value copy.  If
not, there will be a reason in assert but not available for above
sample code when vector calling convension is enabled by default.
This patch would like to override the TARGET_FUNCTION_VALUE_REGNO_P
for vector register and then we will have hard_regno_nregs for copy_num,
aka there is a return value copy.

As a side-effect of allow vector in TARGET_FUNCTION_VALUE_REGNO_P, the
TARGET_GET_RAW_RESULT_MODE will have vector mode and which is sizeless
cannot be converted to fixed_size_mode.  Thus override the hook
TARGET_GET_RAW_RESULT_MODE and return VOIDmode when the regno is-not-a
fixed_size_mode.

The below tests are passed for this patch.
* The fully riscv regression tests.
* The reproducing test in bugzilla PR114639.

PR target/114639

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_function_value_regno_p): New func
impl for hook TARGET_FUNCTION_VALUE_REGNO_P.
(riscv_get_raw_result_mode): New func imple for hook
TARGET_GET_RAW_RESULT_MODE.
(TARGET_FUNCTION_VALUE_REGNO_P): Impl the hook.
(TARGET_GET_RAW_RESULT_MODE): Ditto.
* config/riscv/riscv.h (V_RETURN): New macro for vector return.
(GP_RETURN_FIRST): New macro for the first GPR in return.
(GP_RETURN_LAST): New macro for the last GPR in return.
(FP_RETURN_FIRST): Diito but for FPR.
(FP_RETURN_LAST): Ditto.
(FUNCTION_VALUE_REGNO_P): Remove as deprecated and replace by
TARGET_FUNCTION_VALUE_REGNO_P.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr114639-1.C: New test.
* gcc.target/riscv/rvv/base/pr114639-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 34 +++
 gcc/config/riscv/riscv.h  |  8 +++--
 .../g++.target/riscv/rvv/base/pr114639-1.C| 25 ++
 .../gcc.target/riscv/rvv/base/pr114639-1.c| 14 
 4 files changed, 79 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr114639-1.C
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114639-1.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 00defa69fd8..91f017dd52a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10997,6 +10997,34 @@ riscv_vector_mode_supported_any_target_p (machine_mode)
   return true;
 }
 
+/* Implements hook TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+riscv_function_value_regno_p (const unsigned regno)
+{
+  if (GP_RETURN_FIRST <= regno && regno <= GP_RETURN_LAST)
+return true;
+
+  if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST)
+return true;
+
+  if (regno == V_RETURN)
+return true;
+
+  return false;
+}
+
+/* Implements hook TARGET_GET_RAW_RESULT_MODE.  */
+
+static fixed_size_mode
+riscv_get_raw_result_mode (int regno)
+{
+  if (!is_a  (reg_raw_mode[regno]))
+return as_a  (VOIDmode);
+
+  return default_get_reg_raw_mode (regno);
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -11343,6 +11371,12 @@ riscv_vector_mode_supported_any_target_p (machine_mode)
 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P 
riscv_vector_mode_supported_any_target_p
 
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p
+
+#undef TARGET_GET_RAW_RESULT_MODE
+#define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-riscv.h"
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 269b8c1f076..7797e67317a 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -683,6 +683,12 @@ enum reg_class
 
 #define GP_RETURN GP_ARG_FIRST
 #define FP_RETURN (UNITS_PER_FP_ARG == 0 ? GP_RETURN : FP_ARG_FIRST)
+#define V_RETURN  V_REG_FIRST
+
+#define GP_RETURN_FIRST GP_ARG_FIRST
+#define GP_RETURN_LAST  GP_ARG_FIRST + 1
+#define FP_RETURN_FIRST FP_RETURN
+#define FP_RETURN_LAST  FP_RETURN + 1
 
 #define MAX_ARGS_IN_REGISTERS \
   (riscv_abi == ABI_ILP32E || riscv_abi == ABI_LP64E \
@@ -714,8 +720,6 @@ enum reg_class
 #define FUNCTION_VALUE(VALTYPE, FUNC) \
   riscv_function_value 

[PATCH v1] RISC-V: Refine the error msg for RVV intrinisc required ext

2024-04-08 Thread pan2 . li
From: Pan Li 

The RVV intrinisc API has sorts of required extension from both
the march or target attribute.  It will have error message similar
to below:

built-in function '__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension

However, it is not accurate as we have many additional sub extenstion
besides v extension.  For example, zvbb, zvbk, zvbc ... etc.  This patch
would like to refine the error message with a friendly hint for the
required extension.  For example as below:

vuint64m1_t
__attribute__((target("arch=+v")))
test_1 (vuint64m1_t op_1, vuint64m1_t op_2, size_t vl)
{
  return __riscv_vclmul_vv_u64m1 (op_1, op_2, vl);
}

When compile with march=rv64gc and target arch=+v, we will have error
message as below:

error: built-in function '__riscv_vclmul_vv_u64m1(op_1,  op_2,  vl)'
  requires the 'zvbc' ISA extension

Then the end-user will get the point that the *zvbc* extension is missing
for the intrinisc API easily.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-shapes.cc (build_one): Pass
required_ext arg when invoke add function.
(build_th_loadstore): Ditto.
(struct vcreate_def): Ditto.
(struct read_vl_def): Ditto.
(struct vlenb_def): Ditto.
* config/riscv/riscv-vector-builtins.cc 
(function_builder::add_function):
Introduce new arg required_ext to fill in the register func.
(function_builder::add_unique_function): Ditto.
(function_builder::add_overloaded_function): Ditto.
(expand_builtin): Leverage required_extensions_specified to
check if the required extension is provided.
* config/riscv/riscv-vector-builtins.h (reqired_ext_to_isa_name): New
func impl to convert the required_ext enum to the extension name.
(required_extensions_specified): New func impl to predicate if
the required extension is well feeded.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c: 
Adjust
the error message for v extension.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c: 
Ditto.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-1.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-10.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-2.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-3.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-4.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-5.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-6.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-7.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-8.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-9.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/riscv-vector-builtins-shapes.cc | 18 +++--
 gcc/config/riscv/riscv-vector-builtins.cc | 23 --
 gcc/config/riscv/riscv-vector-builtins.h  | 75 ++-
 .../riscv/rvv/base/intrinsic_required_ext-1.c | 10 +++
 .../rvv/base/intrinsic_required_ext-10.c  | 11 +++
 .../riscv/rvv/base/intrinsic_required_ext-2.c | 11 +++
 .../riscv/rvv/base/intrinsic_required_ext-3.c | 11 +++
 .../riscv/rvv/base/intrinsic_required_ext-4.c | 11 +++
 .../riscv/rvv/base/intrinsic_required_ext-5.c | 11 +++
 .../riscv/rvv/base/intrinsic_required_ext-6.c | 11 +++
 .../riscv/rvv/base/intrinsic_required_ext-7.c | 11 +++
 .../riscv/rvv/base/intrinsic_required_ext-8.c | 11 +++
 .../riscv/rvv/base/intrinsic_required_ext-9.c | 11 +++
 .../target_attribute_v_with_intrinsic-7.c |  2 +-
 .../target_attribute_v_with_intrinsic-8.c |  2 +-
 15 files changed, 210 insertions(+), 19 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/intrinsic_required_ext-9.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index c5ffcc1f2c4..7f983e82370 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -72,9 +72,10 @@ build_one 

[PATCH v2] Internal-fn: Introduce new internal function SAT_ADD

2024-04-07 Thread pan2 . li
From: Pan Li 

Update in v2:
* Fix one failure for x86 bootstrap.

Original log:

This patch would like to add the middle-end presentation for the
saturation add.  Aka set the result of add to the max when overflow.
It will take the pattern similar as below.

SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))

Take uint8_t as example, we will have:

* SAT_ADD (1, 254)   => 255.
* SAT_ADD (1, 255)   => 255.
* SAT_ADD (2, 255)   => 255.
* SAT_ADD (255, 255) => 255.

The patch also implement the SAT_ADD in the riscv backend as
the sample for both the scalar and vector.  Given below example:

uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
  return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}

Before this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  _Bool _2;
  long unsigned int _3;
  long unsigned int _4;
  uint64_t _7;
  long unsigned int _10;
  __complex__ long unsigned int _11;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
  _1 = REALPART_EXPR <_11>;
  _10 = IMAGPART_EXPR <_11>;
  _2 = _10 != 0;
  _3 = (long unsigned int) _2;
  _4 = -_3;
  _7 = _1 | _4;
  return _7;
;;succ:   EXIT

}

After this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  return _7;
;;succ:   EXIT
}

For vectorize, we leverage the existing vect pattern recog to find
the pattern similar to scalar and let the vectorizer to perform
the rest part for standard name usadd3 in vector mode.
The riscv vector backend have insn "Vector Single-Width Saturating
Add and Subtract" which can be leveraged when expand the usadd3
in vector mode.  For example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
  ivtmp_58 = _80 * 8;
  vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
  vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
  vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
  mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
  vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615, ... }, 
vect__7.11_66);
  .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72);
  vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
  vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
  vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
  ivtmp_79 = ivtmp_78 - _80;
  ...
}

vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v0,0(a1)
  vle64.v v1,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vadd.vv v1,v0,v1
  vmsgtu.vv   v0,v0,v1
  vmerge.vim  v1,v1,-1,v0
  vse64.v v1,0(a0)
  ...

After this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
  ivtmp_46 = _62 * 8;
  vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
  vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
  vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
  .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54);
  ...
}

vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v1,0(a1)
  vle64.v v2,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vsaddu.vv   v1,v1,v2
  vse64.v v1,0(a0)
  ...

To limit the patch size for review, only unsigned version of
usadd3 are involved here. The signed version will be covered
in the underlying patch(es).

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* config/riscv/autovec.md (usadd3): New pattern expand
for unsigned SAT_ADD vector.
* config/riscv/riscv-protos.h (riscv_expand_usadd): New func
decl to expand usadd3 pattern.
(expand_vec_usadd): Ditto but for vector.
* config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to
emit the vsadd insn.
(expand_vec_usadd): New func impl to expand usadd3 for
vector.
* config/riscv/riscv.cc (riscv_expand_usadd): New func impl
to expand usadd3 for scalar.
* config/riscv/riscv.md (usadd3): New pattern expand
for unsigned SAT_ADD scalar.
* config/riscv/vector.md: Allow VLS mode for vsaddu.
* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD.
* internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD.
* match.pd: Add unsigned SAT_ADD 

[PATCH v1] Internal-fn: Introduce new internal function SAT_ADD

2024-04-06 Thread pan2 . li
From: Pan Li 

This patch would like to add the middle-end presentation for the
saturation add.  Aka set the result of add to the max when overflow.
It will take the pattern similar as below.

SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))

Take uint8_t as example, we will have:

* SAT_ADD (1, 254)   => 255.
* SAT_ADD (1, 255)   => 255.
* SAT_ADD (2, 255)   => 255.
* SAT_ADD (255, 255) => 255.

The patch also implement the SAT_ADD in the riscv backend as
the sample for both the scalar and vector.  Given below example:

uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
  return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}

Before this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  _Bool _2;
  long unsigned int _3;
  long unsigned int _4;
  uint64_t _7;
  long unsigned int _10;
  __complex__ long unsigned int _11;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
  _1 = REALPART_EXPR <_11>;
  _10 = IMAGPART_EXPR <_11>;
  _2 = _10 != 0;
  _3 = (long unsigned int) _2;
  _4 = -_3;
  _7 = _1 | _4;
  return _7;
;;succ:   EXIT

}

After this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  return _7;
;;succ:   EXIT
}

For vectorize, we leverage the existing vect pattern recog to find
the pattern similar to scalar and let the vectorizer to perform
the rest part for standard name usadd3 in vector mode.
The riscv vector backend have insn "Vector Single-Width Saturating
Add and Subtract" which can be leveraged when expand the usadd3
in vector mode.  For example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
  ivtmp_58 = _80 * 8;
  vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
  vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
  vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
  mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
  vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615, ... }, 
vect__7.11_66);
  .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72);
  vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
  vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
  vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
  ivtmp_79 = ivtmp_78 - _80;
  ...
}

vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v0,0(a1)
  vle64.v v1,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vadd.vv v1,v0,v1
  vmsgtu.vv   v0,v0,v1
  vmerge.vim  v1,v1,-1,v0
  vse64.v v1,0(a0)
  ...

After this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
  ivtmp_46 = _62 * 8;
  vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
  vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
  vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
  .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54);
  ...
}

vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v1,0(a1)
  vle64.v v2,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vsaddu.vv   v1,v1,v2
  vse64.v v1,0(a0)
  ...

To limit the patch size for review, only unsigned version of
usadd3 are involved here. The signed version will be covered
in the underlying patch(es).

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* config/riscv/autovec.md (usadd3): New pattern expand
for unsigned SAT_ADD vector.
* config/riscv/riscv-protos.h (riscv_expand_usadd): New func
decl to expand usadd3 pattern.
(expand_vec_usadd): Ditto but for vector.
* config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to
emit the vsadd insn.
(expand_vec_usadd): New func impl to expand usadd3 for
vector.
* config/riscv/riscv.cc (riscv_expand_usadd): New func impl
to expand usadd3 for scalar.
* config/riscv/riscv.md (usadd3): New pattern expand
for unsigned SAT_ADD scalar.
* config/riscv/vector.md: Allow VLS mode for vsaddu.
* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD.
* internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD.
* match.pd: Add unsigned SAT_ADD match and simply.
* optabs.def (OPTAB_NL): Remove fixed-point limitation for us/ssadd.
   

[PATCH] RISC-V: Fix one unused varable in riscv_subset_list::parse

2024-03-30 Thread pan2 . li
From: Pan Li 

This patch would like to fix one unused variable as below:

../../gcc/common/config/riscv/riscv-common.cc: In static member function
'static riscv_subset_list* riscv_subset_list::parse(const char*, location_t)':
../../gcc/common/config/riscv/riscv-common.cc:1501:19: error: unused variable 
'itr'
  [-Werror=unused-variable]
 1501 |   riscv_subset_t *itr;

The variable consume code was removed but missed the var itself in
previous.  Thus, we have unused variable here.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_subset_list::parse):
Remove unused var decl.

Signed-off-by: Pan Li 
---
 gcc/common/config/riscv/riscv-common.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 7095f303cbb..43b7549e3ec 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1498,7 +1498,6 @@ riscv_subset_list::parse (const char *arch, location_t 
loc)
 return NULL;
 
   riscv_subset_list *subset_list = new riscv_subset_list (arch, loc);
-  riscv_subset_t *itr;
   const char *p = arch;
   p = subset_list->parse_base_ext (p);
   if (p == NULL)
-- 
2.34.1



[PATCH] RISC-V: Fix misspelled term builtin in error message

2024-03-30 Thread pan2 . li
From: Pan Li 

This patch would like to fix below misspelled term in error message.

../../gcc/config/riscv/riscv-vector-builtins.cc:4592:16: error:
misspelled term 'builtin function' in format; use 'built-in function' instead 
[-Werror=format-diag]
 4592 |   "builtin function %qE requires the V ISA extension", exp);

The below tests are passed for this patch.
* The riscv regression test on rvv.exp and riscv.exp.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc (expand_builtin): Take
the term built-in over builtin.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c:
Adjust test dg-error.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c:
Ditto.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-vector-builtins.cc   | 2 +-
 .../riscv/rvv/base/target_attribute_v_with_intrinsic-7.c| 2 +-
 .../riscv/rvv/base/target_attribute_v_with_intrinsic-8.c| 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index e07373d8b57..db9246eed2d 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4589,7 +4589,7 @@ expand_builtin (unsigned int code, tree exp, rtx target)
 
   if (!TARGET_VECTOR)
 error_at (EXPR_LOCATION (exp),
- "builtin function %qE requires the V ISA extension", exp);
+ "built-in function %qE requires the V ISA extension", exp);
 
   return function_expander (rfn.instance, rfn.decl, exp, target).expand ();
 }
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c
index 520b2e59fae..a4cd67f4f95 100644
--- 
a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c
@@ -5,5 +5,5 @@
 
 size_t test_1 (size_t vl)
 {
-  return __riscv_vsetvl_e8m4 (vl); /* { dg-error {builtin function 
'__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */
+  return __riscv_vsetvl_e8m4 (vl); /* { dg-error {built-in function 
'__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */
 }
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c
index 9032d9d0b43..06ed9a9eddc 100644
--- 
a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c
@@ -19,5 +19,5 @@ test_2 ()
 size_t
 test_3 (size_t vl)
 {
-  return __riscv_vsetvl_e8m4 (vl); /* { dg-error {builtin function 
'__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */
+  return __riscv_vsetvl_e8m4 (vl); /* { dg-error {built-in function 
'__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */
 }
-- 
2.34.1



[PATCH v1] RISC-V: Allow RVV intrinsic for more function target

2024-03-26 Thread pan2 . li
From: Pan Li 

In previous, we allowed the target(("arch=+v")) for a function with
rv64gc build.  This patch would like to support more arch options as
below:
* zve32x
* zve32f
* zve64x
* zve64f
* zve64d
* zvfhmin
* zvfh

For example, we have sample code as below.
vfloat32m1_t
__attribute__((target("arch=+zve64f")))
test_9 (vfloat32m1_t a, vfloat32m1_t b, size_t vl)
{
  return __riscv_vfadd_vv_f32m1 (a, b, vl);
}

It will generate the asm code when build with -O3 -march=rv64gc
test_9:
vsetvli zero,a0,e32,m1,ta,ma
vfadd.vvv8,v8,v9
ret

Meanwhile, this patch introduces more error handling for the target
attribute.  Take arch=+zve32x with vfloat32m1_t will have error message
"'vfloat32m1_t' requires the zve32f, zve64f or zve64d ISA extension".
And take arch=+zve32f with vfloat16m1_t will have error message
"'vfloat16m1_t' requires the zvfhmin or zvfh ISA extension".

Below test are passed for this patch:
* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_pragma_intrinsic): Add INT and
FP vector element flags, invoke override option and mode adjust.
* config/riscv/riscv-protos.h (riscv_option_override): New extern
func decl.
* config/riscv/riscv-vector-builtins.cc (expand_builtin): Return
target rtx after error_at.
* config/riscv/riscv.cc (riscv_vector_int_type_p): New predicate
func to tell one tree type is integer or not.
(riscv_vector_float_type_p): New predicate func to tell one tree
type is float or not.
(riscv_vector_element_bitsize): New func to get the element bitsize
of a vector tree type.
(riscv_validate_vector_type): New func to validate the tree type
is valid on flags.
(riscv_return_value_is_vector_type_p): Leverage the func
riscv_validate_vector_type to do the tree type validation.
(riscv_arguments_is_vector_type_p): Diito.
(riscv_override_options_internal): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-10.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-11.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-12.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-13.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-14.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-15.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-16.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-17.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-18.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-19.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-20.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-21.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-22.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-23.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-24.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-25.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-26.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-27.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-28.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-29.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-9.c: New 
test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc   |  30 +-
 gcc/config/riscv/riscv-protos.h   |   1 +
 gcc/config/riscv/riscv-vector-builtins.cc |   7 +-
 gcc/config/riscv/riscv.cc | 101 --
 .../target_attribute_v_with_intrinsic-10.c|  12 +++
 .../target_attribute_v_with_intrinsic-11.c|  26 +
 .../target_attribute_v_with_intrinsic-12.c|  33 ++
 .../target_attribute_v_with_intrinsic-13.c|  33 ++
 .../target_attribute_v_with_intrinsic-14.c|  40 +++
 .../target_attribute_v_with_intrinsic-15.c|  47 
 .../target_attribute_v_with_intrinsic-16.c|  12 +++
 .../target_attribute_v_with_intrinsic-17.c|  13 +++
 .../target_attribute_v_with_intrinsic-18.c|  13 +++
 .../target_attribute_v_with_intrinsic-19.c|  13 +++
 .../target_attribute_v_with_intrinsic-20.c|  13 +++
 .../target_attribute_v_with_intrinsic-21.c|  13 +++
 .../target_attribute_v_with_intrinsic-22.c|  13 +++
 .../target_attribute_v_with_intrinsic-23.c|  13 +++
 .../target_attribute_v_with_intrinsic-24.c  

[PATCH v1] RISC-V: Allow RVV intrinsic when function target("arch=+v")

2024-03-25 Thread pan2 . li
From: Pan Li 

This patch would like to allow the RVV intrinsic when function is
attributed as target("arch=+v") and build with rv64gc.  For example:

vint32m1_t
__attribute__((target("arch=+v")))
test_1 (vint32m1_t a, vint32m1_t b, size_t vl)
{
  return __riscv_vadd_vv_i32m1 (a, b, vl);
}

build with -march=rv64gc -mabi=lp64d -O3, we will have asm like below:
test_1:
  .option push
  .option arch, rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_\
zifencei2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0
  vsetvli zero,a0,e32,m1,ta,ma
  vadd.vv v8,v8,v9
  ret

The riscv_vector.h must be included when leverage intrinisc type(s) and
API(s).  And the scope of this attribute should not excced the function
body.  Meanwhile, to make rvv types and API(s) available for this attribute,
include riscv_vector.h will not report error for now if v is not present
in march.

Below test are passed for this patch:
* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_pragma_intrinsic): Remove error
when V is disabled and init the RVV types and intrinic APIs.
* config/riscv/riscv-vector-builtins.cc (expand_builtin): Report
error if V ext is disabled.
* config/riscv/riscv.cc (riscv_return_value_is_vector_type_p):
Ditto.
(riscv_arguments_is_vector_type_p): Ditto.
(riscv_vector_cc_function_p): Ditto.
* config/riscv/riscv_vector.h: Remove error if V is disable.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pragma-1.c: Remove.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-1.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-2.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-3.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-4.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-5.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-6.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c: New 
test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c: New 
test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc   | 18 +++
 gcc/config/riscv/riscv-vector-builtins.cc |  5 
 gcc/config/riscv/riscv.cc | 30 ---
 gcc/config/riscv/riscv_vector.h   |  4 ---
 .../gcc.target/riscv/rvv/base/pragma-1.c  |  4 ---
 .../target_attribute_v_with_intrinsic-1.c |  5 
 .../target_attribute_v_with_intrinsic-2.c | 18 +++
 .../target_attribute_v_with_intrinsic-3.c | 13 
 .../target_attribute_v_with_intrinsic-4.c | 10 +++
 .../target_attribute_v_with_intrinsic-5.c | 12 
 .../target_attribute_v_with_intrinsic-6.c | 12 
 .../target_attribute_v_with_intrinsic-7.c |  9 ++
 .../target_attribute_v_with_intrinsic-8.c | 23 ++
 13 files changed, 145 insertions(+), 18 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pragma-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index edb866d51e4..01314037461 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -201,14 +201,20 @@ riscv_pragma_intrinsic (cpp_reader *)
   if (strcmp (name, "vector") == 0
   || strcmp (name, "xtheadvector") == 0)
 {
-  if (!TARGET_VECTOR)
+  if (TARGET_VECTOR)
+   riscv_vector::handle_pragma_vector ();
+  else /* Indicates riscv_vector.h is included but v is missing in arch  */
{
- error ("%<#pragma riscv intrinsic%> option %qs needs 'V' or "
-"'XTHEADVECTOR' extension enabled",
-name);
- return;
+ /* To make the the rvv types and intrinsic API available for the
+target("arch=+v") attribute,  we need to temporally enable the
+TARGET_VECTOR, and disable it after all initialized.  */
+ target_flags |= MASK_VECTOR;
+
+ riscv_vector::init_builtins ();

[PATCH v4] RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV

2024-03-22 Thread pan2 . li
From: Pan Li 

This patch would like to introduce one new gcc attribute for RVV.
This attribute is used to define fixed-length variants of one
existing sizeless RVV types.

This attribute is valid if and only if the mrvv-vector-bits=zvl, the only
one args should be the integer constant and its' value is terminated
by the LMUL and the vector register bits in zvl*b.  For example:

typedef vint32m2_t fixed_vint32m2_t __attribute__((riscv_rvv_vector_bits(128)));

The above type define is valid when -march=rv64gc_zve64d_zvl64b
(aka 2(m2) * 64 = 128 for vin32m2_t), and will report error when
-march=rv64gcv_zvl128b similar to below.

"error: invalid RVV vector size '128', expected size is '256' based on
LMUL of type and '-mrvv-vector-bits=zvl'"

Meanwhile, a pre-define macro __riscv_v_fixed_vlen is introduced to
represent the fixed vlen in a RVV vector register.

For the vint*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, %, &, |, ^, >>, <<, ~, -

The CMP will return vint*m*_t the same as aarch64 sve. For example:
typedef vint32m1_t fixed_vint32m1_t __attribute__((riscv_rvv_vector_bits(128)));
fixed_vint32m1_t less_than (fixed_vint32m1_t a, fixed_vint32m1_t b)
{
  return a < b;
}

For the vfloat*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, -

The CMP will return vfloat*m*_t the same as aarch64 sve. For example:
typedef vfloat32m1_t fixed_vfloat32m1_t 
__attribute__((riscv_rvv_vector_bits(128)));
fixed_vfloat32m1_t less_than (fixed_vfloat32m1_t a, fixed_vfloat32m1_t b)
{
  return a < b;
}

For the vbool*_t types only below operations are allowed except
the CMP and ALU. The CMP and ALU operations on vbool*_t is not
well defined currently.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.

For the vint*x*m*_t tuple types are not suppored in this patch which is
compatible with clang.

This patch passed the below testsuites.
* The riscv fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Add pre-define
macro __riscv_v_fixed_vlen when zvl.
* config/riscv/riscv.cc (riscv_handle_rvv_vector_bits_attribute):
New static func to take care of the RVV types decorated by
the attributes.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-13.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-14.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-15.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-16.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-17.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-18.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc   |   3 +
 gcc/config/riscv/riscv.cc |  87 +-
 .../riscv/rvv/base/riscv_rvv_vector_bits-1.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-10.c |  53 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-11.c |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits-12.c |  14 +++
 .../riscv/rvv/base/riscv_rvv_vector_bits-13.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-14.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-15.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-16.c |  11 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-17.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-18.c |  45 
 .../riscv/rvv/base/riscv_rvv_vector_bits-2.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-3.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-4.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-5.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-6.c  |   6 +
 

[PATCH v2] RISC-V: Bugfix ICE for __attribute__((target("arch=+v"))

2024-03-21 Thread pan2 . li
From: Pan Li 

This patch would like to fix one ICE for __attribute__((target("arch=+v"))
and likewise extension(s). Given we have sample code as below:

void __attribute__((target("arch=+v")))
test_2 (int *a, int *b, int *out, unsigned count)
{
  unsigned i;
  for (i = 0; i < count; i++)
   out[i] = a[i] + b[i];
}

It will have ICE when build with -march=rv64gc -O3.

test.c: In function ‘test_2’:
test.c:4:1: internal compiler error: Floating point exception
4 | {
  | ^
0x1a5891b crash_signal
.../__RISC-V_BUILD__/../gcc/toplev.cc:319
0x7f0a7884251f ???
./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0
0x1f51ba4 riscv_hard_regno_nregs
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:8143
0x1967bb9 init_reg_modes_target()
.../__RISC-V_BUILD__/../gcc/reginfo.cc:471
0x13fc029 init_emit_regs()
.../__RISC-V_BUILD__/../gcc/emit-rtl.cc:6237
0x1a5b83d target_reinit()
.../__RISC-V_BUILD__/../gcc/toplev.cc:1936
0x35e374d save_target_globals()
.../__RISC-V_BUILD__/../gcc/target-globals.cc:92
0x35e381f save_target_globals_default_opts()
.../__RISC-V_BUILD__/../gcc/target-globals.cc:122
0x1f544cc riscv_save_restore_target_globals(tree_node*)
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:9138
0x1f55c36 riscv_set_current_function
...

There are two reasons for this ICE.
1. The implied extension(s) of v are not well handled and the
   TARGET_MIN_VLEN is 0 which is not reinitialized.  Then the
   size / TARGET_MIN_VLEN will have DivideByZero.
2. The machine modes of the vector types will be vary after
   the v extension is introduced.

This patch passed below testsuite:
1. The riscv fully regression test.

PR target/114352

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_subset_list::parse):
Replace implied, combine and check to func finalize.
(riscv_subset_list::finalize): New func impl to take care of
implied, combine ext and related checks.
* config/riscv/riscv-subset.h: Add func decl for finalize.
* config/riscv/riscv-target-attr.cc 
(riscv_target_attr_parser::parse_arch):
Finalize the ext before return succeed.
* config/riscv/riscv.cc (riscv_set_current_function): Reinit the
machine mode before when set cur function.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114352-1.c: New test.
* gcc.target/riscv/rvv/base/pr114352-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/common/config/riscv/riscv-common.cc   | 31 ++
 gcc/config/riscv/riscv-subset.h   |  2 +
 gcc/config/riscv/riscv-target-attr.cc |  2 +
 gcc/config/riscv/riscv.cc |  4 ++
 .../gcc.target/riscv/rvv/base/pr114352-1.c| 58 +++
 .../gcc.target/riscv/rvv/base/pr114352-2.c| 27 +
 6 files changed, 114 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114352-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114352-2.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 440127a2af0..15d44245b3c 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1428,16 +1428,7 @@ riscv_subset_list::parse (const char *arch, location_t 
loc)
   if (p == NULL)
 goto fail;
 
-  for (itr = subset_list->m_head; itr != NULL; itr = itr->next)
-{
-  subset_list->handle_implied_ext (itr->name.c_str ());
-}
-
-  /* Make sure all implied extensions are included. */
-  gcc_assert (subset_list->check_implied_ext ());
-
-  subset_list->handle_combine_ext ();
-  subset_list->check_conflict_ext ();
+  subset_list->finalize ();
 
   return subset_list;
 
@@ -1467,6 +1458,26 @@ riscv_subset_list::set_loc (location_t loc)
   m_loc = loc;
 }
 
+/* Make sure the implied or combined extension is included after add
+   a new std extension to subset list or likewise.  For exmaple as below,
+
+   void __attribute__((target("arch=+v"))) func () with -march=rv64gc.
+
+   The implied zvl128b and zve64d of the std v should be included.  */
+void
+riscv_subset_list::finalize ()
+{
+  riscv_subset_t *subset;
+
+  for (subset = m_head; subset != NULL; subset = subset->next)
+handle_implied_ext (subset->name.c_str ());
+
+  gcc_assert (check_implied_ext ());
+
+  handle_combine_ext ();
+  check_conflict_ext ();
+}
+
 /* Return the current arch string.  */
 
 std::string
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index ae849e2a302..ec979040e8c 100644
--- a/gcc/config/riscv/riscv-subset.h
+++ b/gcc/config/riscv/riscv-subset.h
@@ -105,6 +105,8 @@ public:
   int match_score (riscv_subset_list *) const;
 
   void set_loc (location_t);
+
+  void finalize ();
 };
 
 extern const riscv_subset_list *riscv_current_subset_list (void);
diff --git a/gcc/config/riscv/riscv-target-attr.cc 
b/gcc/config/riscv/riscv-target-attr.cc
index 

[PATCH v1] RISC-V: Bugfix function target attribute pollution

2024-03-20 Thread pan2 . li
From: Pan Li 

This patch depends on below ICE fix.

https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647915.html

The function target attribute should be on a per-function basis.
For example, we have 3 function as below:

void test_1 () {}

void __attribute__((target("arch=+v"))) test_2 () {}

void __attribute__((target("arch=+zfh"))) test_3 () {}

void test_4 () {}

The scope of the target attribute should not extend the function body.
Aka, test_3 cannot have the 'v' extension, as well as the test_4
cannot have both the 'v' and 'zfh' extension.

Unfortunately, for now the test_4 is able to leverage the 'v' and
the 'zfh' extension which is incorrect.  This patch would like to
fix the sticking attribute by introduce the commandline subset_list.
When parse_arch, we always clone from the cmdline_subset_list instead
of the current_subset_list.

Meanwhile, we correct the print information about arch like below.

.option arch, rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zbb1p0

The riscv_declare_function_name hook is always after the hook
riscv_process_target_attr.  Thus, we introduce one hash_map to record
the 1:1 mapping from fndel to its' subset_list in advance.  And later
the riscv_declare_function_name is able to get the right information
about the arch.

Below test are passed for this patch
* The riscv fully regression test.

PR target/114352

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (struct riscv_func_target_info):
New struct for func decl and target name.
(struct riscv_func_target_hasher): New hasher for hash table mapping
from the fn_decl to fn_target_name.
(riscv_func_decl_hash): New func to compute the hash for fn_decl.
(riscv_func_target_hasher::hash): New func to impl hash interface.
(riscv_func_target_hasher::equal): New func to impl equal interface.
(riscv_cmdline_subset_list): New static var for cmdline subset list.
(riscv_func_target_table_lazy_init): New func to lazy init the func
target hash table.
(riscv_func_target_get): New func to get target name from hash table.
(riscv_func_target_put): New func to put target name into hash table.
(riscv_func_target_remove_and_destory): New func to remove target
info from the hash table and destory it.
(riscv_parse_arch_string): Set the static var cmdline_subset_list.
* config/riscv/riscv-subset.h (riscv_cmdline_subset_list): New static
var for cmdline subset list.
(riscv_func_target_get): New func decl.
(riscv_func_target_put): Ditto.
(riscv_func_target_remove_and_destory): Ditto.
* config/riscv/riscv-target-attr.cc 
(riscv_target_attr_parser::parse_arch):
Take cmdline_subset_list instead of current_subset_list when clone.
(riscv_process_target_attr): Record the func target info to hash table.
(riscv_option_valid_attribute_p): Add new arg tree fndel.
* config/riscv/riscv.cc (riscv_declare_function_name): Consume the
func target info and print the arch message.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114352-3.c: New test.

Signed-off-by: Pan Li 
---
 gcc/common/config/riscv/riscv-common.cc   | 105 +++-
 gcc/config/riscv/riscv-subset.h   |   4 +
 gcc/config/riscv/riscv-target-attr.cc |  18 ++-
 gcc/config/riscv/riscv.cc |   7 +-
 .../gcc.target/riscv/rvv/base/pr114352-3.c| 113 ++
 5 files changed, 240 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114352-3.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index d32bf147eca..76ec9bf846c 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -425,11 +425,108 @@ bool riscv_subset_list::parse_failed = false;
 
 static riscv_subset_list *current_subset_list = NULL;
 
+static riscv_subset_list *cmdline_subset_list = NULL;
+
+struct riscv_func_target_info
+{
+  tree fn_decl;
+  std::string fn_target_name;
+
+  riscv_func_target_info (const tree , const std::string _name)
+: fn_decl (decl), fn_target_name (target_name)
+  {
+  }
+};
+
+struct riscv_func_target_hasher : nofree_ptr_hash
+{
+  typedef tree compare_type;
+
+  static hashval_t hash (value_type);
+  static bool equal (value_type, const compare_type &);
+};
+
+static hash_table *func_target_table = NULL;
+
+static inline hashval_t riscv_func_decl_hash (tree fn_decl)
+{
+  inchash::hash h;
+
+  h.add_ptr (fn_decl);
+
+  return h.end ();
+}
+
+inline hashval_t
+riscv_func_target_hasher::hash (value_type value)
+{
+  return riscv_func_decl_hash (value->fn_decl);
+}
+
+inline bool
+riscv_func_target_hasher::equal (value_type value, const compare_type )
+{
+  return value->fn_decl == key;
+}
+
 const riscv_subset_list *riscv_current_subset_list ()
 {
   return 

[PATCH v1] RISC-V: Bugfix ICE for __attribute__((target("arch=+v"))

2024-03-18 Thread pan2 . li
From: Pan Li 

This patch would like to fix one ICE for __attribute__((target("arch=+v"))
and likewise extension(s). Given we have sample code as below:

void __attribute__((target("arch=+v")))
test_2 (int *a, int *b, int *out, unsigned count)
{
  unsigned i;
  for (i = 0; i < count; i++)
   out[i] = a[i] + b[i];
}

It will have ICE when build with -march=rv64gc -O3.

test.c: In function ‘test_2’:
test.c:4:1: internal compiler error: Floating point exception
4 | {
  | ^
0x1a5891b crash_signal
.../__RISC-V_BUILD__/../gcc/toplev.cc:319
0x7f0a7884251f ???
./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0
0x1f51ba4 riscv_hard_regno_nregs
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:8143
0x1967bb9 init_reg_modes_target()
.../__RISC-V_BUILD__/../gcc/reginfo.cc:471
0x13fc029 init_emit_regs()
.../__RISC-V_BUILD__/../gcc/emit-rtl.cc:6237
0x1a5b83d target_reinit()
.../__RISC-V_BUILD__/../gcc/toplev.cc:1936
0x35e374d save_target_globals()
.../__RISC-V_BUILD__/../gcc/target-globals.cc:92
0x35e381f save_target_globals_default_opts()
.../__RISC-V_BUILD__/../gcc/target-globals.cc:122
0x1f544cc riscv_save_restore_target_globals(tree_node*)
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:9138
0x1f55c36 riscv_set_current_function
...

There are two reasons for this ICE.
1. The implied extension(s) of v are not well handled and the
   TARGET_MIN_VLEN is 0 which is not reinitialized.  Then the
   size / TARGET_MIN_VLEN will have DivideByZero.
2. The machine modes of the vector types will be vary after
   the v extension is introduced.

This patch passed below testsuite:
1. The riscv fully regression test.

PR target/114352

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
(riscv_subset_list::parse_single_ext): Add implied, combine
and conflict check after parse single extension.
* config/riscv/riscv.cc (riscv_set_current_function):
Reini the machine mode before when set cur function.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114352-1.c: New test.
* gcc.target/riscv/rvv/base/pr114352-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/common/config/riscv/riscv-common.cc   | 33 ---
 gcc/config/riscv/riscv.cc |  4 ++
 .../gcc.target/riscv/rvv/base/pr114352-1.c| 58 +++
 .../gcc.target/riscv/rvv/base/pr114352-2.c| 27 +
 4 files changed, 115 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114352-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114352-2.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 48efef40dfd..d32bf147eca 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1375,20 +1375,39 @@ riscv_subset_list::parse_single_multiletter_ext (const 
char *p,
 const char *
 riscv_subset_list::parse_single_ext (const char *p, bool exact_single_p)
 {
+  const char *end_of_ext;
+
   switch (p[0])
 {
 case 'x':
-  return parse_single_multiletter_ext (p, "x", "non-standard extension",
-  exact_single_p);
+  end_of_ext = parse_single_multiletter_ext (p, "x",
+"non-standard extension",
+exact_single_p);
+  break;
 case 'z':
-  return parse_single_multiletter_ext (p, "z", "sub-extension",
-  exact_single_p);
+  end_of_ext = parse_single_multiletter_ext (p, "z", "sub-extension",
+exact_single_p);
+  break;
 case 's':
-  return parse_single_multiletter_ext (p, "s", "supervisor extension",
-  exact_single_p);
+  end_of_ext = parse_single_multiletter_ext (p, "s", "supervisor 
extension",
+exact_single_p);
+  break;
 default:
-  return parse_single_std_ext (p, exact_single_p);
+  end_of_ext = parse_single_std_ext (p, exact_single_p);
+  break;
 }
+
+  /* Make sure the implied or combined extension is included after add
+ a new std extension to subset list.  For exmaple as below,
+
+ void __attribute__((target("arch=+v"))) func () with -march=rv64gc.
+
+ The implied zvl128b and zve64d of the std v should be included.  */
+  handle_implied_ext (p);
+  handle_combine_ext ();
+  check_conflict_ext ();
+
+  return end_of_ext;
 }
 
 /* Parsing arch string to subset list, return NULL if parsing failed.  */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 680c4a728e9..89acb94af10 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -9474,6 +9474,10 @@ riscv_set_current_function (tree decl)
   cl_target_option_restore (_options, 

[PATCH v1] RISC-V: Fix some code style issue(s) in riscv-c.cc [NFC]

2024-03-12 Thread pan2 . li
From: Pan Li 

Notice some code style issue(s) when add __riscv_v_fixed_vlen, includes:

* Meanless empty line.
* Line greater than 80 chars.
* Indent with 3 space(s).
* Argument unalignment.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_ext_version_value): Fix
code style greater than 80 chars.
(riscv_cpu_cpp_builtins): Fix useless empty line, indent
with 3 space(s) and argument unalignment.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 3755ec0b8ef..7029ba88186 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -37,7 +37,8 @@ along with GCC; see the file COPYING3.  If not see
 static int
 riscv_ext_version_value (unsigned major, unsigned minor)
 {
-  return (major * RISCV_MAJOR_VERSION_BASE) + (minor * 
RISCV_MINOR_VERSION_BASE);
+  return (major * RISCV_MAJOR_VERSION_BASE)
++ (minor * RISCV_MINOR_VERSION_BASE);
 }
 
 /* Implement TARGET_CPU_CPP_BUILTINS.  */
@@ -110,7 +111,6 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
 case CM_MEDANY:
   builtin_define ("__riscv_cmodel_medany");
   break;
-
 }
 
   if (riscv_user_wants_strict_align)
@@ -142,9 +142,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
 riscv_ext_version_value (0, 12));
 }
 
-   if (TARGET_XTHEADVECTOR)
- builtin_define_with_int_value ("__riscv_th_v_intrinsic",
-riscv_ext_version_value (0, 11));
+  if (TARGET_XTHEADVECTOR)
+builtin_define_with_int_value ("__riscv_th_v_intrinsic",
+  riscv_ext_version_value (0, 11));
 
   /* Define architecture extension test macros.  */
   builtin_define_with_int_value ("__riscv_arch_test", 1);
-- 
2.34.1



[PATCH v3] RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV

2024-03-12 Thread pan2 . li
From: Pan Li 

Update in v3:
* Add pre-defined __riscv_v_fixed_vlen when zvl.

Update in v2:
* Cleanup some unused code.
* Fix some typo of commit log.

Original log:

This patch would like to introduce one new gcc attribute for RVV.
This attribute is used to define fixed-length variants of one
existing sizeless RVV types.

This attribute is valid if and only if the mrvv-vector-bits=zvl, the only
one args should be the integer constant and its' value is terminated
by the LMUL and the vector register bits in zvl*b.  For example:

typedef vint32m2_t fixed_vint32m2_t __attribute__((riscv_rvv_vector_bits(128)));

The above type define is valid when -march=rv64gc_zve64d_zvl64b
(aka 2(m2) * 64 = 128 for vin32m2_t), and will report error when
-march=rv64gcv_zvl128b similar to below.

"error: invalid RVV vector size '128', expected size is '256' based on
LMUL of type and '-mrvv-vector-bits=zvl'"

Meanwhile, a pre-define macro __riscv_v_fixed_vlen is introduced to
represent the fixed vlen in a RVV vector register.

For the vint*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, %, &, |, ^, >>, <<, ~, -

For the vfloat*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, -

For the vbool*_t types only below operations are allowed except
the CMP and ALU. The CMP and ALU operations on vbool*_t is not
well defined currently.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.

For the vint*x*m*_t tuple types are not suppored in this patch
which is compatible with clang.

This patch passed the below testsuites.
* The riscv fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Add pre-define
macro __riscv_v_fixed_vlen when zvl.
* config/riscv/riscv.cc (riscv_handle_rvv_vector_bits_attribute):
New static func to take care of the RVV types decorated by
the attributes.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-13.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-14.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-15.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-16.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-17.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc   |   3 +
 gcc/config/riscv/riscv.cc |  87 +-
 .../riscv/rvv/base/riscv_rvv_vector_bits-1.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-10.c |  53 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-11.c |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits-12.c |  14 +++
 .../riscv/rvv/base/riscv_rvv_vector_bits-13.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-14.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-15.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-16.c |  11 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-17.c |  10 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-2.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-3.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-4.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-5.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-6.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-7.c  |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits-8.c  |  75 
 .../riscv/rvv/base/riscv_rvv_vector_bits-9.c  |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits.h| 108 ++
 20 files changed, 653 insertions(+), 2 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c
 create mode 

[PATCH v2] VECT: Fix ICE for vectorizable LD/ST when both len and store are enabled

2024-03-09 Thread pan2 . li
From: Pan Li 

This patch would like to fix one ICE in vectorizable_store when both the
loop_masks and loop_lens are enabled.  The ICE looks like below when build
with "-march=rv64gcv -O3".

during GIMPLE pass: vect
test.c: In function ‘d’:
test.c:6:6: internal compiler error: in vectorizable_store, at
tree-vect-stmts.cc:8691
6 | void d() {
  |  ^
0x37a6f2f vectorizable_store
.../__RISC-V_BUILD__/../gcc/tree-vect-stmts.cc:8691
0x37b861c vect_analyze_stmt(vec_info*, _stmt_vec_info*, bool*,
_slp_tree*, _slp_instance*, vec*)
.../__RISC-V_BUILD__/../gcc/tree-vect-stmts.cc:13242
0x1db5dca vect_analyze_loop_operations
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:2208
0x1db885b vect_analyze_loop_2
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3041
0x1dba029 vect_analyze_loop_1
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3481
0x1dbabad vect_analyze_loop(loop*, vec_info_shared*)
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3639
0x1e389d1 try_vectorize_loop_1
.../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1066
0x1e38f3d try_vectorize_loop
.../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1182
0x1e39230 execute
.../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1298

There are two ways to reach vectorizer LD/ST, one is the analysis and
the other is transform.  We cannot have both the lens and the masks
enabled during transform but it is valid during analysis.  Given the
transform doesn't required cost_vec,  we can only enable the assert
based on cost_vec is NULL or not.

Below testsuites are passed for this patch:
* The x86 bootstrap tests.
* The x86 fully regression tests.
* The aarch64 fully regression tests.
* The riscv fully regressison tests.

gcc/ChangeLog:

* tree-vect-stmts.cc (vectorizable_store): Enable the assert
during transform process.
(vectorizable_load): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114195-1.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr114195-1.c | 15 +++
 gcc/tree-vect-stmts.cc | 18 ++
 2 files changed, 29 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c
new file mode 100644
index 000..a67b847112b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c
@@ -0,0 +1,15 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+long a, b;
+extern short c[];
+
+void d() {
+  for (int e = 0; e < 35; e = 2) {
+a = ({ a < 0 ? a : 0; });
+b = ({ b < 0 ? b : 0; });
+
+c[e] = 0;
+  }
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 14a3ffb5f02..e8617439a48 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8697,8 +8697,13 @@ vectorizable_store (vec_info *vinfo,
? _VINFO_LENS (loop_vinfo)
: NULL);
 
-  /* Shouldn't go with length-based approach if fully masked.  */
-  gcc_assert (!loop_lens || !loop_masks);
+  /* The vect_transform_stmt and vect_analyze_stmt will go here but there
+ are some difference here.  We cannot enable both the lens and masks
+ during transform but it is allowed during analysis.
+ Shouldn't go with length-based approach if fully masked.  */
+  if (cost_vec == NULL)
+/* The cost_vec is NULL during transfrom.  */
+gcc_assert ((!loop_lens || !loop_masks));
 
   /* Targets with store-lane instructions must not require explicit
  realignment.  vect_supportable_dr_alignment always returns either
@@ -10577,8 +10582,13 @@ vectorizable_load (vec_info *vinfo,
? _VINFO_LENS (loop_vinfo)
: NULL);
 
-  /* Shouldn't go with length-based approach if fully masked.  */
-  gcc_assert (!loop_lens || !loop_masks);
+  /* The vect_transform_stmt and vect_analyze_stmt will go here but there
+ are some difference here.  We cannot enable both the lens and masks
+ during transform but it is allowed during analysis.
+ Shouldn't go with length-based approach if fully masked.  */
+  if (cost_vec == NULL)
+/* The cost_vec is NULL during transfrom.  */
+gcc_assert ((!loop_lens || !loop_masks));
 
   /* Targets with store-lane instructions must not require explicit
  realignment.  vect_supportable_dr_alignment always returns either
-- 
2.34.1



[PATCH v1] VECT: Bugfix ICE for vectorizable_store when both len and mask

2024-03-07 Thread pan2 . li
From: Pan Li 

This patch would like to fix one ICE in vectorizable_store for both the
loop_masks and loop_lens.  The ICE looks like below with "-march=rv64gcv -O3".

during GIMPLE pass: vect
test.c: In function ‘d’:
test.c:6:6: internal compiler error: in vectorizable_store, at
tree-vect-stmts.cc:8691
6 | void d() {
  |  ^
0x37a6f2f vectorizable_store
.../__RISC-V_BUILD__/../gcc/tree-vect-stmts.cc:8691
0x37b861c vect_analyze_stmt(vec_info*, _stmt_vec_info*, bool*,
_slp_tree*, _slp_instance*, vec*)
.../__RISC-V_BUILD__/../gcc/tree-vect-stmts.cc:13242
0x1db5dca vect_analyze_loop_operations
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:2208
0x1db885b vect_analyze_loop_2
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3041
0x1dba029 vect_analyze_loop_1
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3481
0x1dbabad vect_analyze_loop(loop*, vec_info_shared*)
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3639
0x1e389d1 try_vectorize_loop_1
.../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1066
0x1e38f3d try_vectorize_loop
.../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1182
0x1e39230 execute
.../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1298

Given the masks and the lens cannot be enabled simultanously when loop is
using partial vectors.  Thus, we need to ensure the one is disabled when we
would like to record the other in check_load_store_for_partial_vectors.  For
example, when we try to record loop len, we need to check if the loop mask
is disabled or not.

Below testsuites are passed for this patch:
* The x86 bootstrap tests.
* The x86 fully regression tests.
* The aarch64 fully regression tests.
* The riscv fully regressison tests.

PR target/114195

gcc/ChangeLog:

* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Add
loop mask/len check before recording as they are mutual exclusion.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114195-1.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr114195-1.c| 15 +++
 gcc/tree-vect-stmts.cc| 26 ++-
 2 files changed, 35 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c
new file mode 100644
index 000..b0c9d5b81b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c
@@ -0,0 +1,15 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+long a, b;
+extern short c[];
+
+void d() {
+  for (int e = 0; e < 35; e += 2) {
+a = ({ a < 0 ? a : 0; });
+b = ({ b < 0 ? b : 0; });
+
+c[e] = 0;
+  }
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 14a3ffb5f02..624947ed271 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1502,6 +1502,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
  gather_scatter_info *gs_info,
  tree scalar_mask)
 {
+  gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo));
+
   /* Invariant loads need no special support.  */
   if (memory_access_type == VMAT_INVARIANT)
 return;
@@ -1521,9 +1523,17 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
   internal_fn ifn
= (is_load ? vect_load_lanes_supported (vectype, group_size, true)
   : vect_store_lanes_supported (vectype, group_size, true));
-  if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
+
+  /* When the loop_vinfo using partial vector,  we cannot enable both
+the fully mask and length simultaneously.  Thus, make sure the
+other one is disabled when record one of them.
+The same as other place for both the vect_record_loop_len and
+vect_record_loop_mask.  */
+  if ((ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
+   && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
-  else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
+  else if ((ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
+   && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
   scalar_mask);
   else
@@ -1549,12 +1559,14 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
   if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
  gs_info->memory_type,
  gs_info->offset_vectype,
- 

[PATCH v2] RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV

2024-03-05 Thread pan2 . li
From: Pan Li 

Update in v2:
* Cleanup some unused code.
* Fix some typo of commit log.

Original log:

This patch would like to introduce one new gcc attribute for RVV.
This attribute is used to define fixed-length variants of one
existing sizeless RVV types.

This attribute is valid if and only if the mrvv-vector-bits=zvl, the only
one args should be the integer constant and its' value is terminated
by the LMUL and the vector register bits in zvl*b.  For example:

typedef vint32m2_t fixed_vint32m2_t __attribute__((riscv_rvv_vector_bits(128)));

The above type define is valid when -march=rv64gc_zve64d_zvl64b
(aka 2(m2) * 64 = 128 for vin32m2_t), and will report error when
-march=rv64gcv_zvl128b similar to below.

"error: invalid RVV vector size '128', expected size is '256' based on
LMUL of type and '-mrvv-vector-bits=zvl'"

For the vint*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, %, &, |, ^, >>, <<, ~, -

For the vfloat*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, -

For the vbool*_t types only below operations are allowed except
the CMP and ALU. The CMP and ALU operations on vbool*_t is not
well defined currently.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.

For the vint*x*m*_t tuple types are not suppored in this patch
which is compatible with clang.

This patch passed the below testsuites.
* The riscv fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_handle_rvv_vector_bits_attribute):
New static func to take care of the RVV types decorated by
the attributes.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc |  87 +-
 .../riscv/rvv/base/riscv_rvv_vector_bits-1.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-10.c |  53 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-11.c |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits-12.c |  14 +++
 .../riscv/rvv/base/riscv_rvv_vector_bits-2.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-3.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-4.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-5.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-6.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-7.c  |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits-8.c  |  75 
 .../riscv/rvv/base/riscv_rvv_vector_bits-9.c  |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits.h| 108 ++
 14 files changed, 599 insertions(+), 2 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc

[PATCH v1] RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV

2024-03-05 Thread pan2 . li
From: Pan Li 

This patch would like to introduce one new gcc attribute for RVV.
This attribute is used to define fixed-length variants of one
existing sizeless RVV types.

This attribute is valid if and only if the mrvv-vector-bits=zvl, the only
one args should be the integer constant and its' value is terminated
by the LMUL and the vector register bits in zvl*b.  For example:

typedef vint32m2_t fixed_vint32m2_t __attribute__((riscv_rvv_vector_bits(128)));

The above type define is invalid when -march=rv64gc_zve64d_zvl64b
(aka 2(m2) * 64 = 128 for vin32m2_t), and will report error when
-march=rv64gcv_zvl128b similar to below.

"error: invalid RVV vector size '128', expected size is '256' based on
LMUL of type and '-mrvv-vector-bits=zvl'"

For the vint*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, %, &, |, ^, >>, <<, ~, -

For the vfloat*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, -

For the vbool*_t types only below operations are allowed except
the CMP and ALU. The CMP and ALU operations on vbool*_t is not
well defined currently.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.

For the vint*x*m*_t tuple types are not suppored in this patch
which is compatible with clang.

This patch passed the below testsuites.
* The riscv fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_handle_rvv_vector_bits_attribute):
New static func to take care of the RVV types decorated by
the attributes.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc |  88 +-
 .../riscv/rvv/base/riscv_rvv_vector_bits-1.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-10.c |  53 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-11.c |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits-12.c |  14 +++
 .../riscv/rvv/base/riscv_rvv_vector_bits-2.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-3.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-4.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-5.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-6.c  |   6 +
 .../riscv/rvv/base/riscv_rvv_vector_bits-7.c  |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits-8.c  |  75 
 .../riscv/rvv/base/riscv_rvv_vector_bits-9.c  |  76 
 .../riscv/rvv/base/riscv_rvv_vector_bits.h| 108 ++
 14 files changed, 600 insertions(+), 2 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 56cd8d2c23f..fdbaf1633ac 100644
--- a/gcc/config/riscv/riscv.cc
+++ 

[PATCH v1] RISC-V: Cleanup unused code in riscv_v_adjust_bytesize [NFC]

2024-03-05 Thread pan2 . li
From: Pan Li 

Cleanup mode_size related code which is not used anymore. Below tests are
passed for this patch.

* The RVV fully regresssion test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_v_adjust_bytesize): Cleanup unused
mode_size related code.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 4 
 1 file changed, 4 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 56cd8d2c23f..691d967de29 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1527,10 +1527,6 @@ riscv_v_adjust_bytesize (machine_mode mode, int scale)
return BYTES_PER_RISCV_VECTOR;
 
   poly_int64 nunits = GET_MODE_NUNITS (mode);
-  poly_int64 mode_size = GET_MODE_SIZE (mode);
-
-  if (maybe_eq (mode_size, (uint16_t) -1))
-   mode_size = riscv_vector_chunks * scale;
 
   if (nunits.coeffs[0] > 8)
return exact_div (nunits, 8);
-- 
2.34.1



[PATCH v3] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-28 Thread pan2 . li
From: Pan Li 

This patch would like to introduce one new gcc option for RVV. To
appoint the bits size of one RVV vector register. Valid arguments to
'-mrvv-vector-bits=' are:

* scalable
* zvl

The scalable will pick up the zvl*b in the march as the minimal vlen.
For example, the minimal vlen will be 512 when
march=rv64gcv_zvl512b and mrvv-vector-bits=scalable.

The zvl will pick up the zvl*b in the march as exactly vlen.
For example, the vlen will be 1024 exactly when
march=rv64gcv_zvl1024b and mrvv-vector-bits=zvl.

Given below sample:

void test_rvv_vector_bits ()
{
  vint32m1_t x;
  asm volatile ("def %0": "=vr"(x));
  asm volatile (""::: "v0",   "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
  "v8",   "v9", "v10", "v11", "v12", "v13", "v14", "v15",
  "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
  "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
  asm volatile ("use %0": : "vr"(x));
}

With -march=rv64gcv_zvl128b -mrvv-vector-bits=scalable we have (for min_vlen >= 
128)
  csrrt0,vlenb
  sub sp,sp,t0
  def v1
  vs1r.v  v1,0(sp)
  vl1re32.v   v1,0(sp)
  use v1
  csrrt0,vlenb
  add sp,sp,t0
  jr  ra

With -march=rv64gcv_zvl128b -mrvv-vector-bits=zvl we have (for vlen = 128)
  addisp,sp,-16
  def v1
  vs1r.v  v1,0(sp)
  vl1re32.v   v1,0(sp)
  use v1
  addisp,sp,16
  jr  ra

The below test are passed for this patch.

* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-opts.h (enum rvv_vector_bits_enum): New enum for
different RVV vector bits.
* config/riscv/riscv.cc (riscv_convert_vector_bits): New func to
get the RVV vector bits, with given min_vlen.
(riscv_convert_vector_chunks): Combine the mrvv-vector-bits
option with min_vlen to RVV vector chunks.
(riscv_override_options_internal): Update comments and rename the
vector chunks.
* config/riscv/riscv.opt: Add option mrvv-vector-bits.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/rvv-vector-bits-1.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-2.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-3.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-4.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-5.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-opts.h |  8 +
 gcc/config/riscv/riscv.cc | 35 +++
 gcc/config/riscv/riscv.opt| 14 
 .../riscv/rvv/base/rvv-vector-bits-1.c|  7 
 .../riscv/rvv/base/rvv-vector-bits-2.c|  7 
 .../riscv/rvv/base/rvv-vector-bits-3.c|  9 +
 .../riscv/rvv/base/rvv-vector-bits-4.c|  9 +
 .../riscv/rvv/base/rvv-vector-bits-5.c| 17 +
 .../riscv/rvv/base/rvv-vector-bits-6.c| 17 +
 9 files changed, 116 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-6.c

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 4edddbadc37..2a311c9d2a3 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -129,6 +129,14 @@ enum vsetvl_strategy_enum {
   VSETVL_OPT_NO_FUSION,
 };
 
+/* RVV vector bits for option -mrvv-vector-bits, default is scalable.  */
+enum rvv_vector_bits_enum {
+  /* scalable indicates taking the value of zvl*b as the minimal vlen.  */
+  RVV_VECTOR_BITS_SCALABLE,
+  /* zvl indicates taking the value of zvl*b as the exactly vlen.  */
+  RVV_VECTOR_BITS_ZVL,
+};
+
 #define TARGET_ZICOND_LIKE (TARGET_ZICOND || (TARGET_XVENTANACONDOPS && 
TARGET_64BIT))
 
 /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5e984ee2a55..b6b133210ff 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8801,13 +8801,33 @@ riscv_init_machine_status (void)
   return ggc_cleared_alloc ();
 }
 
-/* Return the VLEN value associated with -march.
+static int
+riscv_convert_vector_bits (int min_vlen)
+{
+  int rvv_bits = 0;
+
+  switch (rvv_vector_bits)
+{
+  case RVV_VECTOR_BITS_ZVL:
+  case RVV_VECTOR_BITS_SCALABLE:
+   rvv_bits = min_vlen;
+   break;
+  default:
+   gcc_unreachable ();
+}
+
+  return rvv_bits;
+}
+
+/* Return the VLEN value associated with -march and 

[PATCH v3] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-28 Thread pan2 . li
From: Pan Li 

This patch would like to introduce one new gcc option for RVV. To
appoint the bits size of one RVV vector register. Valid arguments to
'-mrvv-vector-bits=' are:

* scalable
* zvl

The scalable will pick up the zvl*b in the march as the minimal vlen.
For example, the minimal vlen will be 512 when
march=rv64gcv_zvl512b and mrvv-vector-bits=scalable.

The zvl will pick up the zvl*b in the march as exactly vlen.
For example, the vlen will be 1024 exactly when
march=rv64gcv_zvl1024b and mrvv-vector-bits=zvl.

Given below sample:

void test_rvv_vector_bits ()
{
  vint32m1_t x;
  asm volatile ("def %0": "=vr"(x));
  asm volatile (""::: "v0",   "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
  "v8",   "v9", "v10", "v11", "v12", "v13", "v14", "v15",
  "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
  "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
  asm volatile ("use %0": : "vr"(x));
}

With -march=rv64gcv_zvl128b -mrvv-vector-bits=scalable we have (for min_vlen >= 
128)
  csrrt0,vlenb
  sub sp,sp,t0
  def v1
  vs1r.v  v1,0(sp)
  vl1re32.v   v1,0(sp)
  use v1
  csrrt0,vlenb
  add sp,sp,t0
  jr  ra

With -march=rv64gcv_zvl128b -mrvv-vector-bits=zvl we have (for vlen = 128)
  addisp,sp,-16
  def v1
  vs1r.v  v1,0(sp)
  vl1re32.v   v1,0(sp)
  use v1
  addisp,sp,16
  jr  ra

The below test are passed for this patch.

* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-opts.h (enum rvv_vector_bits_enum): New enum for
different RVV vector bits.
* config/riscv/riscv.cc (riscv_convert_vector_bits): New func to
get the RVV vector bits, with given min_vlen.
(riscv_convert_vector_chunks): Combine the mrvv-vector-bits
option with min_vlen to RVV vector chunks.
(riscv_override_options_internal): Update comments and rename the
vector chunks.
* config/riscv/riscv.opt: Add option mrvv-vector-bits.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/rvv-vector-bits-1.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-2.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-3.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-4.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-5.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-opts.h |  8 +
 gcc/config/riscv/riscv.cc | 35 +++
 gcc/config/riscv/riscv.opt| 14 
 .../riscv/rvv/base/rvv-vector-bits-1.c|  7 
 .../riscv/rvv/base/rvv-vector-bits-2.c|  7 
 .../riscv/rvv/base/rvv-vector-bits-3.c|  9 +
 .../riscv/rvv/base/rvv-vector-bits-4.c|  9 +
 .../riscv/rvv/base/rvv-vector-bits-5.c| 17 +
 .../riscv/rvv/base/rvv-vector-bits-6.c| 17 +
 9 files changed, 116 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-6.c

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 4edddbadc37..eefd2f9e01c 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -129,6 +129,14 @@ enum vsetvl_strategy_enum {
   VSETVL_OPT_NO_FUSION,
 };
 
+/* RVV vector bits for option -mrvv-vector-bits
+   zvl indicates take the bits of zvl*b provided by march as vector bits.
+ */
+enum rvv_vector_bits_enum {
+  RVV_VECTOR_BITS_SCALABLE,
+  RVV_VECTOR_BITS_ZVL,
+};
+
 #define TARGET_ZICOND_LIKE (TARGET_ZICOND || (TARGET_XVENTANACONDOPS && 
TARGET_64BIT))
 
 /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5e984ee2a55..b6b133210ff 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8801,13 +8801,33 @@ riscv_init_machine_status (void)
   return ggc_cleared_alloc ();
 }
 
-/* Return the VLEN value associated with -march.
+static int
+riscv_convert_vector_bits (int min_vlen)
+{
+  int rvv_bits = 0;
+
+  switch (rvv_vector_bits)
+{
+  case RVV_VECTOR_BITS_ZVL:
+  case RVV_VECTOR_BITS_SCALABLE:
+   rvv_bits = min_vlen;
+   break;
+  default:
+   gcc_unreachable ();
+}
+
+  return rvv_bits;
+}
+
+/* Return the VLEN value associated with -march and -mwrvv-vector-bits.
TODO: So far we only support length-agnostic value. */
 static poly_uint16

[PATCH v2] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-27 Thread pan2 . li
From: Pan Li 

This patch would like to introduce one new gcc option for RVV. To
appoint the bits size of one RVV vector register. Valid arguments to
'-mrvv-vector-bits=' are:

* zvl

The zvl will pick up the zvl*b from the march option. For example,
the mrvv-vector-bits will be 1024 when march=rv64gcv_zvl1024b.

The below test are passed for this patch.

* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-opts.h (enum rvv_vector_bits_enum): New enum for
different RVV vector bits.
* config/riscv/riscv.cc (riscv_convert_vector_bits): New func to
get the RVV vector bits, with given min_vlen.
(riscv_convert_vector_chunks): Combine the mrvv-vector-bits
option with min_vlen to RVV vector chunks.
(riscv_override_options_internal): Update comments and rename the
vector chunks.
* config/riscv/riscv.opt: Add option mrvv-vector-bits.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/rvv-vector-bits-1.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-2.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-3.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-opts.h |  7 +
 gcc/config/riscv/riscv.cc | 31 +++
 gcc/config/riscv/riscv.opt| 11 +++
 .../riscv/rvv/base/rvv-vector-bits-1.c|  7 +
 .../riscv/rvv/base/rvv-vector-bits-2.c|  7 +
 .../riscv/rvv/base/rvv-vector-bits-3.c| 25 +++
 6 files changed, 82 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-3.c

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 4edddbadc37..0162e00515b 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -129,6 +129,13 @@ enum vsetvl_strategy_enum {
   VSETVL_OPT_NO_FUSION,
 };
 
+/* RVV vector bits for option -mrvv-vector-bits
+   zvl indicates take the bits of zvl*b provided by march as vector bits.
+ */
+enum rvv_vector_bits_enum {
+  RVV_VECTOR_BITS_ZVL,
+};
+
 #define TARGET_ZICOND_LIKE (TARGET_ZICOND || (TARGET_XVENTANACONDOPS && 
TARGET_64BIT))
 
 /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5e984ee2a55..d18e5226bce 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8801,13 +8801,32 @@ riscv_init_machine_status (void)
   return ggc_cleared_alloc ();
 }
 
-/* Return the VLEN value associated with -march.
+static int
+riscv_convert_vector_bits (int min_vlen)
+{
+  int rvv_bits = 0;
+
+  switch (rvv_vector_bits)
+{
+  case RVV_VECTOR_BITS_ZVL:
+   rvv_bits = min_vlen;
+   break;
+  default:
+   gcc_unreachable ();
+}
+
+  return rvv_bits;
+}
+
+/* Return the VLEN value associated with -march and -mwrvv-vector-bits.
TODO: So far we only support length-agnostic value. */
 static poly_uint16
-riscv_convert_vector_bits (struct gcc_options *opts)
+riscv_convert_vector_chunks (struct gcc_options *opts)
 {
   int chunk_num;
   int min_vlen = TARGET_MIN_VLEN_OPTS (opts);
+  int rvv_bits = riscv_convert_vector_bits (min_vlen);
+
   if (min_vlen > 32)
 {
   /* When targetting minimum VLEN > 32, we should use 64-bit chunk size.
@@ -8826,7 +8845,7 @@ riscv_convert_vector_bits (struct gcc_options *opts)
   - TARGET_MIN_VLEN = 2048bit: [256,256]
   - TARGET_MIN_VLEN = 4096bit: [512,512]
   FIXME: We currently DON'T support TARGET_MIN_VLEN > 4096bit.  */
-  chunk_num = min_vlen / 64;
+  chunk_num = rvv_bits / 64;
 }
   else
 {
@@ -8848,7 +8867,7 @@ riscv_convert_vector_bits (struct gcc_options *opts)
   if (TARGET_VECTOR_OPTS_P (opts))
 {
   if (opts->x_riscv_autovec_preference == RVV_FIXED_VLMAX)
-   return (int) min_vlen / (riscv_bytes_per_vector_chunk * 8);
+   return (int) rvv_bits / (riscv_bytes_per_vector_chunk * 8);
   else
return poly_uint16 (chunk_num, chunk_num);
 }
@@ -8920,8 +8939,8 @@ riscv_override_options_internal (struct gcc_options *opts)
   if (TARGET_VECTOR && TARGET_BIG_ENDIAN)
 sorry ("Current RISC-V GCC does not support RVV in big-endian mode");
 
-  /* Convert -march to a chunks count.  */
-  riscv_vector_chunks = riscv_convert_vector_bits (opts);
+  /* Convert -march and -mrvv-vector-bits to a chunks count.  */
+  riscv_vector_chunks = riscv_convert_vector_chunks (opts);
 }
 
 /* Implement TARGET_OPTION_OVERRIDE.  */
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 20685c42aed..42ea8efd05d 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -607,3 +607,14 @@ Enum(stringop_strategy) String(vector) 

[PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val

2024-02-26 Thread pan2 . li
From: Pan Li 

We allowed vector type for get_stored_val when read is less than or
equal to store in previous.  Unfortunately, we missed to adjust the
validate_subreg part accordingly.  When the vector type's size is
less than vector register, it will be considered as invalid in the
validate_subreg.

Consider the validate_subreg is kind of a can with worms and we are
in stage 4.  We will fix the issue from the DES side, and make sure
the subreg is valid for both the read_mode and store_mode before
perform the real gen_lowpart.

The below test are passed for this patch:

* The x86 bootstrap test.
* The x86 regression test.
* The riscv regression test.
* The aarch64 regression test.

gcc/ChangeLog:

* dse.cc (get_stored_val): Add validate_subreg check before
perform the gen_lowpart for rtl.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger
the ICE.
* gcc.target/riscv/rvv/base/bug-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/dse.cc|  4 +++-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c|  2 +-
 .../gcc.target/riscv/rvv/base/bug-6.c | 22 +++
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c

diff --git a/gcc/dse.cc b/gcc/dse.cc
index edc7a1dfecf..1596da91da0 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode 
read_mode,
 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
 && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-&& targetm.modes_tieable_p (read_mode, store_mode))
+&& targetm.modes_tieable_p (read_mode, store_mode)
+&& validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
+   subreg_lowpart_offset (read_mode, store_mode)))
 read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
 read_reg = extract_low_bits (read_mode, store_mode,
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
index f79b4c142ae..624a00a4f32 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-fre1" } */
+/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
 
 struct A { float x, y; };
 struct B { struct A u; };
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
new file mode 100644
index 000..5bb00b8f587
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
@@ -0,0 +1,22 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+struct A { float x, y; };
+struct B { struct A u; };
+
+extern void bar (struct A *);
+
+float
+f3 (struct B *x, int y)
+{
+  struct A p = {1.0f, 2.0f};
+  struct A *q = [y].u;
+
+  __builtin_memcpy (>x, , sizeof (float));
+  __builtin_memcpy (>y, , sizeof (float));
+
+  bar ();
+
+  return x[y].u.x + x[y].u.y;
+}
-- 
2.34.1



[PATCH v1] RTL: Bugfix ICE after allow vector type in DSE

2024-02-25 Thread pan2 . li
From: Pan Li 

We allowed vector type for get_stored_val when read is less than or
equal to store in previous.  Unfortunately, we missed to adjust the
validate_subreg part accordingly.  For vector type, we don't need to
restrict the mode size is greater than the vector register size.

Thus, for example when gen_lowpart from E_V2SFmode to E_V4QImode, it
will have NULL_RTX(of course ICE after that) because of the mode size
is less than vector register size.  That also explain that gen_lowpart
from E_V8SFmode to E_V16QImode is valid here.

This patch would like to remove the the restriction for vector mode, to
rid of the ICE when gen_lowpart because of validate_subreg fails.

The below test are passed for this patch:

* The X86 bootstrap test.
* The fully riscv regression tests.

gcc/ChangeLog:

* emit-rtl.cc (validate_subreg): Bypass register size check
if the mode is vector.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-fre-44.c: Add ftree-vectorize to trigger
the ICE.
* gcc.target/riscv/rvv/base/bug-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/emit-rtl.cc   |  3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c|  2 +-
 .../gcc.target/riscv/rvv/base/bug-6.c | 22 +++
 3 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c

diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index 1856fa4884f..45c6301b487 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -934,7 +934,8 @@ validate_subreg (machine_mode omode, machine_mode imode,
 ;
   /* ??? Similarly, e.g. with (subreg:DF (reg:TI)).  Though store_bit_field
  is the culprit here, and not the backends.  */
-  else if (known_ge (osize, regsize) && known_ge (isize, osize))
+  else if (known_ge (isize, osize) && (known_ge (osize, regsize)
+|| (VECTOR_MODE_P (imode) || VECTOR_MODE_P (omode
 ;
   /* Allow component subregs of complex and vector.  Though given the below
  extraction rules, it's not always clear what that means.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
index f79b4c142ae..624a00a4f32 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-fre1" } */
+/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */
 
 struct A { float x, y; };
 struct B { struct A u; };
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
new file mode 100644
index 000..5bb00b8f587
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
@@ -0,0 +1,22 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+struct A { float x, y; };
+struct B { struct A u; };
+
+extern void bar (struct A *);
+
+float
+f3 (struct B *x, int y)
+{
+  struct A p = {1.0f, 2.0f};
+  struct A *q = [y].u;
+
+  __builtin_memcpy (>x, , sizeof (float));
+  __builtin_memcpy (>y, , sizeof (float));
+
+  bar ();
+
+  return x[y].u.x + x[y].u.y;
+}
-- 
2.34.1



[PATCH v2] Draft|Internal-fn: Introduce internal fn saturation US_PLUS

2024-02-24 Thread pan2 . li
From: Pan Li 

Hi Richard & Tamar,

Try the DEF_INTERNAL_INT_EXT_FN as your suggestion.  By mapping
us_plus$a3 to the RTL representation (us_plus:m x y) in optabs.def.
And then expand_US_PLUS in internal-fn.cc.  Not very sure if my
understanding is correct for DEF_INTERNAL_INT_EXT_FN.

I am not sure if we still need DEF_INTERNAL_SIGNED_OPTAB_FN here, given
the RTL representation has (ss_plus:m x y) and (us_plus:m x y) already.

Note this patch is a draft for validation, no test are invovled here.

gcc/ChangeLog:

* builtins.def (BUILT_IN_US_PLUS): Add builtin def.
(BUILT_IN_US_PLUSIMAX): Ditto.
(BUILT_IN_US_PLUSL): Ditto.
(BUILT_IN_US_PLUSLL): Ditto.
(BUILT_IN_US_PLUSG): Ditto.
* config/riscv/riscv-protos.h (riscv_expand_us_plus): Add new
func decl for expanding us_plus.
* config/riscv/riscv.cc (riscv_expand_us_plus): Add new func
impl for expanding us_plus.
* config/riscv/riscv.md (us_plus3): Add new pattern impl
us_plus3.
* internal-fn.cc (expand_US_PLUS): Add new func impl to expand
US_PLUS.
* internal-fn.def (US_PLUS): Add new INT_EXT_FN.
* internal-fn.h (expand_US_PLUS): Add new func decl.
* match.pd: Add new simplify pattern for us_plus.
* optabs.def (OPTAB_NL): Add new OPTAB_NL to US_PLUS rtl.

Signed-off-by: Pan Li 
---
 gcc/builtins.def|  7 +
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.cc   | 46 +
 gcc/config/riscv/riscv.md   | 11 
 gcc/internal-fn.cc  | 26 +++
 gcc/internal-fn.def |  3 +++
 gcc/internal-fn.h   |  1 +
 gcc/match.pd| 17 
 gcc/optabs.def  |  2 ++
 9 files changed, 114 insertions(+)

diff --git a/gcc/builtins.def b/gcc/builtins.def
index f6f3e104f6a..0777b912cfa 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -1055,6 +1055,13 @@ DEF_GCC_BUILTIN(BUILT_IN_POPCOUNTIMAX, 
"popcountimax", BT_FN_INT_UINTMAX
 DEF_GCC_BUILTIN(BUILT_IN_POPCOUNTL, "popcountl", BT_FN_INT_ULONG, 
ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN(BUILT_IN_POPCOUNTLL, "popcountll", 
BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN(BUILT_IN_POPCOUNTG, "popcountg", BT_FN_INT_VAR, 
ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
+
+DEF_GCC_BUILTIN(BUILT_IN_US_PLUS, "us_plus", BT_FN_INT_UINT, 
ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN(BUILT_IN_US_PLUSIMAX, "us_plusimax", 
BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN(BUILT_IN_US_PLUSL, "us_plusl", BT_FN_INT_ULONG, 
ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN(BUILT_IN_US_PLUSLL, "us_plusll", BT_FN_INT_ULONGLONG, 
ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN(BUILT_IN_US_PLUSG, "us_plusg", BT_FN_INT_VAR, 
ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
+
 DEF_EXT_LIB_BUILTIN(BUILT_IN_POSIX_MEMALIGN, "posix_memalign", 
BT_FN_INT_PTRPTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
 DEF_GCC_BUILTIN(BUILT_IN_PREFETCH, "prefetch", 
BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
 DEF_LIB_BUILTIN(BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE, 
ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 80efdf2b7e5..ba6086f1f25 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -132,6 +132,7 @@ extern void riscv_asm_output_external (FILE *, const tree, 
const char *);
 extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
+extern void riscv_expand_us_plus (rtx, rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
*invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4100abc9dd1..23f08974f07 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10657,6 +10657,52 @@ riscv_vector_mode_supported_any_target_p (machine_mode)
   return true;
 }
 
+/* Emit insn for the saturation addu, aka (x + y) | - ((x + y) < x).  */
+void
+riscv_expand_us_plus (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  rtx pmode_sum = gen_reg_rtx (Pmode);
+  rtx pmode_lt = gen_reg_rtx (Pmode);
+  rtx pmode_x = gen_lowpart (Pmode, x);
+  rtx pmode_y = gen_lowpart (Pmode, y);
+  rtx pmode_dest = gen_reg_rtx (Pmode);
+
+  /* Step-1: sum = x + y  */
+  if (mode == SImode && mode != Pmode)
+{ /* Take addw to avoid the sum truncate.  */
+  rtx simode_sum = gen_reg_rtx (SImode);
+  riscv_emit_binary (PLUS, simode_sum, x, y);
+  emit_move_insn (pmode_sum, gen_lowpart (Pmode, simode_sum));
+}
+  else
+riscv_emit_binary (PLUS, pmode_sum, pmode_x, pmode_y);
+
+  /* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI.  

[PATCH v1] RISC-V: Introduce gcc option mrvv-vector-bits for RVV

2024-02-23 Thread pan2 . li
From: Pan Li 

This patch would like to introduce one new gcc option for RVV. To
appoint the bits size of one RVV vector register. Valid arguments to
'-mrvv-vector-bits=' are:

* 64
* 128
* 256
* 512
* 1024
* 2048
* 4096
* 8192
* 16384
* 32768
* 65536
* scalable
* zvl

1. The scalable will be the default values which take min_vlen for
   the riscv_vector_chunks.
2. The zvl will pick up the zvl*b from the march option. For example,
   the mrvv-vector-bits will be 1024 when march=rv64gcv_zvl1024b.
3. Otherwise, it will take the value provide and complain error if none
   of above valid value is given.

This option may influence the code gen when auto-vector. For example,

void test_rvv_vector_bits (int *a, int *b, int *out)
{
  for (int i = 0; i < 8; i++)
out[i] = a[i] + b[i];
}

It will generate code similar to below when build with
  -march=rv64gcv_zvl128b -mabi=lp64 -mrvv-vector-bits=zvl

test_rvv_vector_bits:
  ...
  vsetivli  zero,4,e32,m1,ta,ma
  vle32.v   v1,0(a0)
  vle32.v   v2,0(a1)
  vadd.vv   v1,v1,v2
  vse32.v   v1,0(a2)
  ...
  vle32.v   v1,0(a0)
  vle32.v   v2,0(a1)
  vadd.vv   v1,v1,v2
  vse32.v   v1,0(a2)

And it will become more simply similar to below when build with
  -march=rv64gcv_zvl128b -mabi=lp64 -mrvv-vector-bits=256

test_rvv_vector_bits:
  ...
  vsetivli  zero,8,e32,m2,ta,ma
  vle32.v   v2,0(a0)
  vle32.v   v4,0(a1)
  vadd.vv   v2,v2,v4
  vse32.v   v2,0(a2)

Passed the regression test of rvv.

gcc/ChangeLog:

* config/riscv/riscv-opts.h (enum rvv_vector_bits_enum): New enum for
different RVV vector bits.
* config/riscv/riscv.cc (riscv_convert_vector_bits): New func to
get the RVV vector bits, with given min_vlen.
(riscv_convert_vector_chunks): Combine the mrvv-vector-bits
option with min_vlen to RVV vector chunks.
(riscv_override_options_internal): Update comments and rename the
vector chunks.
* config/riscv/riscv.opt: Add option mrvv-vector-bits.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/rvv-vector-bits-1.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-2.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-3.c: New test.
* gcc.target/riscv/rvv/base/rvv-vector-bits-4.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-opts.h | 16 ++
 gcc/config/riscv/riscv.cc | 49 ---
 gcc/config/riscv/riscv.opt| 47 ++
 .../riscv/rvv/base/rvv-vector-bits-1.c|  6 +++
 .../riscv/rvv/base/rvv-vector-bits-2.c| 20 
 .../riscv/rvv/base/rvv-vector-bits-3.c| 25 ++
 .../riscv/rvv/base/rvv-vector-bits-4.c|  6 +++
 7 files changed, 163 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/rvv-vector-bits-4.c

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 4edddbadc37..b2141190731 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -129,6 +129,22 @@ enum vsetvl_strategy_enum {
   VSETVL_OPT_NO_FUSION,
 };
 
+enum rvv_vector_bits_enum {
+  RVV_VECTOR_BITS_SCALABLE,
+  RVV_VECTOR_BITS_ZVL,
+  RVV_VECTOR_BITS_64 = 64,
+  RVV_VECTOR_BITS_128 = 128,
+  RVV_VECTOR_BITS_256 = 256,
+  RVV_VECTOR_BITS_512 = 512,
+  RVV_VECTOR_BITS_1024 = 1024,
+  RVV_VECTOR_BITS_2048 = 2048,
+  RVV_VECTOR_BITS_4096 = 4096,
+  RVV_VECTOR_BITS_8192 = 8192,
+  RVV_VECTOR_BITS_16384 = 16384,
+  RVV_VECTOR_BITS_32768 = 32768,
+  RVV_VECTOR_BITS_65536 = 65536,
+};
+
 #define TARGET_ZICOND_LIKE (TARGET_ZICOND || (TARGET_XVENTANACONDOPS && 
TARGET_64BIT))
 
 /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5e984ee2a55..366d7ece383 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8801,13 +8801,50 @@ riscv_init_machine_status (void)
   return ggc_cleared_alloc ();
 }
 
-/* Return the VLEN value associated with -march.
+static int
+riscv_convert_vector_bits (int min_vlen)
+{
+  int rvv_bits = 0;
+
+  switch (rvv_vector_bits)
+{
+  case RVV_VECTOR_BITS_SCALABLE:
+  case RVV_VECTOR_BITS_ZVL:
+   rvv_bits = min_vlen;
+   break;
+  case RVV_VECTOR_BITS_64:
+  case RVV_VECTOR_BITS_128:
+  case RVV_VECTOR_BITS_256:
+  case RVV_VECTOR_BITS_512:
+  case RVV_VECTOR_BITS_1024:
+  case RVV_VECTOR_BITS_2048:
+  case RVV_VECTOR_BITS_4096:
+  case RVV_VECTOR_BITS_8192:
+  case RVV_VECTOR_BITS_16384:
+  case RVV_VECTOR_BITS_32768:
+  case RVV_VECTOR_BITS_65536:
+   rvv_bits = rvv_vector_bits;
+   

[PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12

2024-02-20 Thread pan2 . li
From: Pan Li 

Upgrade the version of RVV intrinsic from 0.11 to 0.12.

PR target/114017

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Upgrade
the version to 0.12.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/predef-__riscv_v_intrinsic.c: Update the
version to 0.12.
* gcc.target/riscv/rvv/base/pr114017-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc   |  2 +-
 .../riscv/predef-__riscv_v_intrinsic.c|  2 +-
 .../gcc.target/riscv/rvv/base/pr114017-1.c| 19 +++
 3 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 3ef06dcfd2d..3755ec0b8ef 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -139,7 +139,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
 {
   builtin_define ("__riscv_vector");
   builtin_define_with_int_value ("__riscv_v_intrinsic",
-riscv_ext_version_value (0, 11));
+riscv_ext_version_value (0, 12));
 }
 
if (TARGET_XTHEADVECTOR)
diff --git a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c 
b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
index dbbedf54f87..07f1f159a8f 100644
--- a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
+++ b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
@@ -3,7 +3,7 @@
 
 int main () {
 
-#if __riscv_v_intrinsic != 11000
+#if __riscv_v_intrinsic != 12000
 #error "__riscv_v_intrinsic"
 #endif
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
new file mode 100644
index 000..8eee7c68f71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+vuint8mf2_t
+test (vuint16m1_t val, size_t shift, size_t vl)
+{
+#if __riscv_v_intrinsic == 11000
+  #warning "RVV Intrinsics v0.11"
+  return __riscv_vnclipu (val, shift, vl);
+#endif
+
+#if __riscv_v_intrinsic == 12000
+  #warning "RVV Intrinsics v0.12" /* { dg-warning "RVV Intrinsics v0.12" } */
+  return __riscv_vnclipu (val, shift, 0, vl);
+#endif
+}
+
-- 
2.34.1



[PATCH v1] Internal-fn: Add new internal function SAT_ADDU

2024-02-17 Thread pan2 . li
From: Pan Li 

This patch would like to add the middle-end presentation for the
unsigned saturation add.  Aka set the result of add to the max
when overflow.  It will take the pattern similar as below.

SAT_ADDU (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))

Take uint8_t as example, we will have:

* SAT_ADDU (1, 254)   => 255.
* SAT_ADDU (1, 255)   => 255.
* SAT_ADDU (2, 255)   => 255.
* SAT_ADDU (255, 255) => 255.

The patch also implement the SAT_ADDU in the riscv backend as
the sample.  Given below example:

uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
  return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}

Before this patch:

uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  _Bool _2;
  long unsigned int _3;
  long unsigned int _4;
  uint64_t _7;
  long unsigned int _10;
  __complex__ long unsigned int _11;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
  _1 = REALPART_EXPR <_11>;
  _10 = IMAGPART_EXPR <_11>;
  _2 = _10 != 0;
  _3 = (long unsigned int) _2;
  _4 = -_3;
  _7 = _1 | _4;
  return _7;
;;succ:   EXIT

}

After this patch:

uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _7 = .SAT_ADDU (x_5(D), y_6(D)); [tail call]
  return _7;
;;succ:   EXIT

}

Then we will have the middle-end representation like .SAT_ADDU after
this patch.

PR target/51492
PR target/112600

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_saturation_addu):
New func decl for the SAT_ADDU expand.
* config/riscv/riscv.cc (riscv_expand_saturation_addu): New func
impl for the SAT_ADDU expand.
* config/riscv/riscv.md (sat_addu_3): New pattern to impl
the standard name SAT_ADDU.
* doc/md.texi: Add doc for SAT_ADDU.
* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADDU.
* internal-fn.def (SAT_ADDU): Add SAT_ADDU.
* match.pd: Add simplify pattern patch for SAT_ADDU.
* optabs.def (OPTAB_D): Add sat_addu_optab.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_addu-1.c: New test.
* gcc.target/riscv/sat_addu-2.c: New test.
* gcc.target/riscv/sat_addu-3.c: New test.
* gcc.target/riscv/sat_addu-4.c: New test.
* gcc.target/riscv/sat_addu-run-1.c: New test.
* gcc.target/riscv/sat_addu-run-2.c: New test.
* gcc.target/riscv/sat_addu-run-3.c: New test.
* gcc.target/riscv/sat_addu-run-4.c: New test.
* gcc.target/riscv/sat_arith.h: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-protos.h   |  1 +
 gcc/config/riscv/riscv.cc | 46 +
 gcc/config/riscv/riscv.md | 11 +
 gcc/doc/md.texi   | 11 +
 gcc/internal-fn.cc|  1 +
 gcc/internal-fn.def   |  1 +
 gcc/match.pd  | 22 +
 gcc/optabs.def|  2 +
 gcc/testsuite/gcc.target/riscv/sat_addu-1.c   | 18 +++
 gcc/testsuite/gcc.target/riscv/sat_addu-2.c   | 20 
 gcc/testsuite/gcc.target/riscv/sat_addu-3.c   | 17 +++
 gcc/testsuite/gcc.target/riscv/sat_addu-4.c   | 16 ++
 .../gcc.target/riscv/sat_addu-run-1.c | 42 
 .../gcc.target/riscv/sat_addu-run-2.c | 42 
 .../gcc.target/riscv/sat_addu-run-3.c | 42 
 .../gcc.target/riscv/sat_addu-run-4.c | 49 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 15 ++
 17 files changed, 356 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-run-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_arith.h

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index ae1685850ac..f201b2384f9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -132,6 +132,7 @@ extern void riscv_asm_output_external (FILE *, const tree, 
const char *);
 extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
+extern void riscv_expand_saturation_addu (rtx, rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
*invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 

[PATCH v1] RISC-V: Fix misspelled term args in error_at message

2024-02-10 Thread pan2 . li
From: Pan Li 

When build with "-Werror=format-diag", there will be one misspelled
term args as below. This patch would like fix it by taking the term
arguments instead.

../../gcc/config/riscv/riscv-vector-builtins.cc: In function 'tree_node*
riscv_vector::resolve_overloaded_builtin(location_t, unsigned int, tree,
vec*)':
../../gcc/config/riscv/riscv-vector-builtins.cc:4633:65: error:
misspelled term 'args' in format; use 'arguments' instead
[-Werror=format-diag]
 4633 | error_at (loc, "no matching function call to %qE with empty
  args", fndecl);

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc (resolve_overloaded_builtin):
Replace args to arguments for misspelled term.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr113766-1.c: Adjust the test cases.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-vector-builtins.cc |   3 +-
 .../gcc.target/riscv/rvv/base/pr113766-1.c| 126 +-
 2 files changed, 65 insertions(+), 64 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index efcdc8f1767..c5881a501d1 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4630,7 +4630,8 @@ resolve_overloaded_builtin (location_t loc, unsigned int 
code, tree fndecl,
 
  Here we report error when overloaded function with empty args.  */
   if (rfun->overloaded_p && arglist->length () == 0)
-error_at (loc, "no matching function call to %qE with empty args", fndecl);
+error_at (loc, "no matching function call to %qE with empty arguments",
+ fndecl);
 
   hashval_t hash = rfun->overloaded_hash (*arglist);
   registered_function *rfn
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-1.c
index fd674a8895c..9e911e31117 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-1.c
@@ -6,96 +6,96 @@
 void
 test ()
 {
-  __riscv_vand ();  /* { dg-error {no matching function call to 
'__riscv_vand' with empty args} } */
-  __riscv_vand_tu ();   /* { dg-error {no matching function call to 
'__riscv_vand_tu' with empty args} } */
-  __riscv_vand_tumu (); /* { dg-error {no matching function call to 
'__riscv_vand_tumu' with empty args} } */
+  __riscv_vand ();  /* { dg-error {no matching function call to 
'__riscv_vand' with empty arguments} } */
+  __riscv_vand_tu ();   /* { dg-error {no matching function call to 
'__riscv_vand_tu' with empty arguments} } */
+  __riscv_vand_tumu (); /* { dg-error {no matching function call to 
'__riscv_vand_tumu' with empty arguments} } */
 
-  __riscv_vcompress (); /* { dg-error {no matching function call to 
'__riscv_vcompress' with empty args} } */
-  __riscv_vcompress_tu ();  /* { dg-error {no matching function call to 
'__riscv_vcompress_tu' with empty args} } */
+  __riscv_vcompress (); /* { dg-error {no matching function call to 
'__riscv_vcompress' with empty arguments} } */
+  __riscv_vcompress_tu ();  /* { dg-error {no matching function call to 
'__riscv_vcompress_tu' with empty arguments} } */
 
-  __riscv_vcpop (); /* { dg-error {no matching function call to 
'__riscv_vcpop' with empty args} } */
+  __riscv_vcpop (); /* { dg-error {no matching function call to 
'__riscv_vcpop' with empty arguments} } */
 
-  __riscv_vdiv ();  /* { dg-error {no matching function call to 
'__riscv_vdiv' with empty args} } */
-  __riscv_vdiv_tu ();   /* { dg-error {no matching function call to 
'__riscv_vdiv_tu' with empty args} } */
-  __riscv_vdiv_tumu (); /* { dg-error {no matching function call to 
'__riscv_vdiv_tumu' with empty args} } */
+  __riscv_vdiv ();  /* { dg-error {no matching function call to 
'__riscv_vdiv' with empty arguments} } */
+  __riscv_vdiv_tu ();   /* { dg-error {no matching function call to 
'__riscv_vdiv_tu' with empty arguments} } */
+  __riscv_vdiv_tumu (); /* { dg-error {no matching function call to 
'__riscv_vdiv_tumu' with empty arguments} } */
 
-  __riscv_vfabs (); /* { dg-error {no matching function call to 
'__riscv_vfabs' with empty args} } */
-  __riscv_vfabs_tu ();  /* { dg-error {no matching function call to 
'__riscv_vfabs_tu' with empty args} } */
-  __riscv_vfabs_tumu ();/* { dg-error {no matching function call to 
'__riscv_vfabs_tumu' with empty args} } */
+  __riscv_vfabs (); /* { dg-error {no matching function call to 
'__riscv_vfabs' with empty arguments} } */
+  __riscv_vfabs_tu ();  /* { dg-error {no matching function call to 
'__riscv_vfabs_tu' with empty arguments} } */
+  __riscv_vfabs_tumu ();/* { dg-error {no matching function call to 
'__riscv_vfabs_tumu' with empty arguments} } */
 
-  __riscv_vfadd ();

[PATCH v1] RISC-V: Bugfix for RVV overloaded intrinsic ICE in function checker

2024-02-07 Thread pan2 . li
From: Pan Li 

There is another corn case when similar as below example:

void test (void)
{
  __riscv_vaadd ();
}

We report error when overloaded function with empty args.  For example:

test.c: In function 'foo':
test.c:8:3: error: no matching function call to '__riscv_vaadd' with empty args
8 |   __riscv_vaadd ();
  |   ^~~~

Unfortunately, it will meet another ICE similar to below after above
message.  The underlying build function checker will have zero args
and break some assumption of the function checker.  For example, the
count of args is not less than 2.

ice.c: In function ‘foo’:
ice.c:8:3: internal compiler error: in require_immediate, at
config/riscv/riscv-vector-builtins.cc:4252
8 |   __riscv_vaadd ();
  |   ^
0x20b36ac riscv_vector::function_checker::require_immediate(unsigned
int, long, long) const
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv-vector-builtins.cc:4252
0x20b890c riscv_vector::alu_def::check(riscv_vector::function_checker&) const

.../__RISC-V_BUILD__/../gcc/config/riscv/riscv-vector-builtins-shapes.cc:387
0x20b38d7 riscv_vector::function_checker::check()
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv-vector-builtins.cc:4315
0x20b4876 riscv_vector::check_builtin_call(unsigned int, vec,
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv-vector-builtins.cc:4605
0x2069393 riscv_check_builtin_call
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv-c.cc:227

Below test are passed for this patch.

* The riscv regression tests.

PR target/113766

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-shapes.cc (struct alu_def): Make
sure the c.arg_num is >= 2 before checking.
(struct build_frm_base): Ditto.
(struct narrow_alu_def): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr113766-1.c: Add new cases.

Signed-off-by: Pan Li 
---
 .../riscv/riscv-vector-builtins-shapes.cc   | 17 +
 .../gcc.target/riscv/rvv/base/pr113766-1.c  | 16 
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index 8e90b17a94b..c5ffcc1f2c4 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -383,7 +383,10 @@ struct alu_def : public build_base
 /* Check whether rounding mode argument is a valid immediate.  */
 if (c.base->has_rounding_mode_operand_p ())
   {
-   if (!c.any_type_float_p ())
+   /* Some invalid overload intrinsic like below will have zero for
+  c.arg_num ().  Thus, make sure arg_num is big enough here.
+  __riscv_vaadd () will make c.arg_num () == 0.  */
+   if (!c.any_type_float_p () && c.arg_num () >= 2)
  return c.require_immediate (c.arg_num () - 2, VXRM_RNU, VXRM_ROD);
/* TODO: We will support floating-point intrinsic modeling
   rounding mode in the future.  */
@@ -411,8 +414,11 @@ struct build_frm_base : public build_base
   {
 gcc_assert (c.any_type_float_p ());
 
-/* Check whether rounding mode argument is a valid immediate.  */
-if (c.base->has_rounding_mode_operand_p ())
+/* Check whether rounding mode argument is a valid immediate.
+   Some invalid overload intrinsic like below will have zero for
+   c.arg_num ().  Thus, make sure arg_num is big enough here.
+   __riscv_vaadd () will make c.arg_num () == 0.  */
+if (c.base->has_rounding_mode_operand_p () && c.arg_num () >= 2)
   {
unsigned int frm_num = c.arg_num () - 2;
 
@@ -679,7 +685,10 @@ struct narrow_alu_def : public build_base
 /* Check whether rounding mode argument is a valid immediate.  */
 if (c.base->has_rounding_mode_operand_p ())
   {
-   if (!c.any_type_float_p ())
+   /* Some invalid overload intrinsic like below will have zero for
+  c.arg_num ().  Thus, make sure arg_num is big enough here.
+  __riscv_vaadd () will make c.arg_num () == 0.  */
+   if (!c.any_type_float_p () && c.arg_num () >= 2)
  return c.require_immediate (c.arg_num () - 2, VXRM_RNU, VXRM_ROD);
/* TODO: We will support floating-point intrinsic modeling
   rounding mode in the future.  */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-1.c
index bd4943b0b7e..fd674a8895c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-1.c
@@ -82,4 +82,20 @@ test ()
 
   __riscv_vfredosum (); /* { dg-error {no matching function call to 
'__riscv_vfredosum' with empty args} } */
   __riscv_vfredosum_tu ();  /* { dg-error {no matching function call to 
'__riscv_vfredosum_tu' with empty args} } */
+
+  __riscv_vaadd (); /* { dg-error {no matching function call to 
'__riscv_vaadd' 

[PATCH v1] RISC-V: Bugfix for RVV overloaded intrinisc ICE when empty args

2024-02-06 Thread pan2 . li
From: Pan Li 

There is one corn case when similar as below example:

void test (void)
{
  __riscv_vfredosum_tu ();
}

It will meet ICE because of the implement details of overloaded function
in gcc.  According to the rvv intrinisc doc, we have no such overloaded
function with empty args.  Unfortunately, we register the empty args
function as overloaded for avoiding conflict.  Thus, there will be actual
one register function after return NULL_TREE back to the middle-end,
and finally result in ICE when expanding.  For example:

1. First we registered void __riscv_vfredmax () as the overloaded function.
2. Then resolve_overloaded_builtin (this func) return NULL_TREE.
3. The functions register in step 1 bypass the args check as empty args.
4. Finally, fall into expand_builtin with empty args and meet ICE.

Here we report error when overloaded function with empty args.  For example:

test.c: In function 'foo':
test.c:8:3: error: no matching function call to '__riscv_vfredosum_tu' with 
empty args
8 |   __riscv_vfredosum_tu();
  |   ^~~~

Below test are passed for this patch.

* The riscv regression tests.

PR target/113766

gcc/ChangeLog:

* config/riscv/riscv-protos.h (resolve_overloaded_builtin): Adjust
the signature of func.
* config/riscv/riscv-c.cc (riscv_resolve_overloaded_builtin): Ditto.
* config/riscv/riscv-vector-builtins.cc (resolve_overloaded_builtin): 
Make
overloaded func with empty args error.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr113766-1.c: New test.
* gcc.target/riscv/rvv/base/pr113766-2.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc   |  3 +-
 gcc/config/riscv/riscv-protos.h   |  2 +-
 gcc/config/riscv/riscv-vector-builtins.cc | 23 -
 .../gcc.target/riscv/rvv/base/pr113766-1.c| 85 +++
 .../gcc.target/riscv/rvv/base/pr113766-2.c| 48 +++
 5 files changed, 155 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr113766-2.c

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 2e306057347..94c3871c760 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -250,7 +250,8 @@ riscv_resolve_overloaded_builtin (unsigned int 
uncast_location, tree fndecl,
 case RISCV_BUILTIN_GENERAL:
   break;
 case RISCV_BUILTIN_VECTOR:
-  new_fndecl = riscv_vector::resolve_overloaded_builtin (subcode, arglist);
+  new_fndecl = riscv_vector::resolve_overloaded_builtin (loc, subcode,
+fndecl, arglist);
   break;
 default:
   gcc_unreachable ();
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index b3f0bdb9924..ae1685850ac 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -560,7 +560,7 @@ gimple *gimple_fold_builtin (unsigned int, 
gimple_stmt_iterator *, gcall *);
 rtx expand_builtin (unsigned int, tree, rtx);
 bool check_builtin_call (location_t, vec, unsigned int,
   tree, unsigned int, tree *);
-tree resolve_overloaded_builtin (unsigned int, vec *);
+tree resolve_overloaded_builtin (location_t, unsigned int, tree, vec *);
 bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 bool legitimize_move (rtx, rtx *);
 void emit_vlmax_vsetvl (machine_mode, rtx);
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 403e1021fd1..efcdc8f1767 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4606,7 +4606,8 @@ check_builtin_call (location_t location, vec, 
unsigned int code,
 }
 
 tree
-resolve_overloaded_builtin (unsigned int code, vec *arglist)
+resolve_overloaded_builtin (location_t loc, unsigned int code, tree fndecl,
+   vec *arglist)
 {
   if (code >= vec_safe_length (registered_functions))
 return NULL_TREE;
@@ -4616,12 +4617,26 @@ resolve_overloaded_builtin (unsigned int code, 
vec *arglist)
   if (!rfun || !rfun->overloaded_p)
 return NULL_TREE;
 
+  /* According to the rvv intrinisc doc, we have no such overloaded function
+ with empty args.  Unfortunately, we register the empty args function as
+ overloaded for avoiding conflict.  Thus, there will actual one register
+ function after return NULL_TREE back to the middle-end, and finally result
+ in ICE when expanding.  For example:
+
+ 1. First we registered void __riscv_vfredmax () as the overloaded 
function.
+ 2. Then resolve_overloaded_builtin (this func) return NULL_TREE.
+ 3. The functions register in step 1 bypass the args check as empty args.
+ 4. Finally, fall into expand_builtin with empty args and meet ICE.
+
+ Here we report error 

[PATCH v1] RISC-V: Cleanup the comments for the psabi

2024-01-30 Thread pan2 . li
From: Pan Li 

This patch would like to cleanup some comments which are out of date or 
incorrect.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_get_arg_info): Cleanup comments.
(riscv_pass_by_reference): Ditto.
(riscv_fntype_abi): Ditto.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 529ef5e84b7..7713ad26c8d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5067,8 +5067,7 @@ riscv_get_arg_info (struct riscv_arg_info *info, const 
CUMULATIVE_ARGS *cum,
   info->gpr_offset = cum->num_gprs;
   info->fpr_offset = cum->num_fprs;
 
-  /* When disable vector_abi or scalable vector argument is anonymous, this
- argument is passed by reference.  */
+  /* Passed by reference when the scalable vector argument is anonymous.  */
   if (riscv_v_ext_mode_p (mode) && !named)
 return NULL_RTX;
 
@@ -5265,8 +5264,9 @@ riscv_pass_by_reference (cumulative_args_t cum_v, const 
function_arg_info )
  so we can avoid the call to riscv_get_arg_info in this case.  */
   if (cum != NULL)
 {
-  /* Don't pass by reference if we can use a floating-point register.  */
   riscv_get_arg_info (, cum, arg.mode, arg.type, arg.named, false);
+
+  /* Don't pass by reference if we can use a floating-point register.  */
   if (info.num_fprs)
return false;
 
@@ -5279,9 +5279,9 @@ riscv_pass_by_reference (cumulative_args_t cum_v, const 
function_arg_info )
return false;
 }
 
-  /* When vector abi disabled(without --param=riscv-vector-abi option) or
- scalable vector argument is anonymous or cannot be passed through vector
- registers, this argument is passed by reference. */
+  /* Passed by reference when:
+ 1. The scalable vector argument is anonymous.
+ 2. Args cannot be passed through vector registers.  */
   if (riscv_v_ext_mode_p (arg.mode))
 return true;
 
@@ -5392,12 +5392,9 @@ riscv_arguments_is_vector_type_p (const_tree fntype)
 static const predefined_function_abi &
 riscv_fntype_abi (const_tree fntype)
 {
-  /* Implementing an experimental vector calling convention, the proposal
- can be viewed at the bellow link:
-   https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389
-
- You can enable this feature via the `--param=riscv-vector-abi` compiler
- option.  */
+  /* Implement the vector calling convention.  For more details please
+ reference the below link.
+ https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389  */
   if (riscv_return_value_is_vector_type_p (fntype)
  || riscv_arguments_is_vector_type_p (fntype))
 return riscv_v_abi ();
-- 
2.34.1



[PATCH v2] RISC-V: Bugfix for vls mode aggregated in GPR calling convention

2024-01-30 Thread pan2 . li
From: Pan Li 

According to the issue as below.

https://hub.fgit.cf/riscv-non-isa/riscv-elf-psabi-doc/pull/416

When the mode size of vls integer mode is less than 2 * XLEN, we will
take the gpr for both the args and the return values. Instead of the
reference. For example the below code:

typedef short v8hi __attribute__ ((vector_size (16)));

v8hi __attribute__((noinline))
add (v8hi a, v8hi b)
{
  v8hi r = a + b;
  return r;
}

Before this patch:
add:
  vsetivli zero,8,e16,m1,ta,ma
  vle16.v  v1,0(a1) <== arg by reference
  vle16.v  v2,0(a2) <== arg by reference
  vadd.vv  v1,v1,v2
  vse16.v  v1,0(a0) <== return by reference
  ret

After this patch:
add:
  addi sp,sp,-32
  sd   a0,0(sp)  <== arg by register a0 - a3
  sd   a1,8(sp)
  sd   a2,16(sp)
  sd   a3,24(sp)
  addi a5,sp,16
  vsetivli zero,8,e16,m1,ta,ma
  vle16.v  v2,0(sp)
  vle16.v  v1,0(a5)
  vadd.vv  v1,v1,v2
  vse16.v  v1,0(sp)
  ld   a0,0(sp)  <== return by a0 - a1.
  ld   a1,8(sp)
  addi sp,sp,32
  jr   ra

For vls floating point, we take the same rules as integer and passed by
the gpr or reference.  However, we can simplify the above code by vmv,
and avoid the read/write values to the stack.  We will prepare another
patch for it as it isn't the scope of bugfix.

The riscv regression passed for this patch.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_v_vls_mode_aggregate_gpr_count): New 
function to
calculate the gpr count required by vls mode.
(riscv_v_vls_to_gpr_mode): New function convert vls mode to gpr mode.
(riscv_pass_vls_aggregate_in_gpr): New function to return the rtx of gpr
for vls mode.
(riscv_get_arg_info): Add vls mode handling.
(riscv_pass_by_reference): Return false if arg info has no zero gpr 
count.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls/def.h: Add new helper macro.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-10.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-8.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-9.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc |  75 +
 .../rvv/autovec/vls/calling-convention-1.c| 154 ++
 .../rvv/autovec/vls/calling-convention-10.c   |  51 ++
 .../rvv/autovec/vls/calling-convention-2.c| 142 
 .../rvv/autovec/vls/calling-convention-3.c| 130 +++
 .../rvv/autovec/vls/calling-convention-4.c| 118 ++
 .../rvv/autovec/vls/calling-convention-5.c| 141 
 .../rvv/autovec/vls/calling-convention-6.c| 129 +++
 .../rvv/autovec/vls/calling-convention-7.c| 118 ++
 .../rvv/autovec/vls/calling-convention-8.c|  43 +
 .../rvv/autovec/vls/calling-convention-9.c|  51 ++
 .../autovec/vls/calling-convention-run-1.c|  55 +++
 .../autovec/vls/calling-convention-run-2.c|  55 +++
 .../autovec/vls/calling-convention-run-3.c|  55 +++
 .../autovec/vls/calling-convention-run-4.c|  55 +++
 .../autovec/vls/calling-convention-run-5.c|  55 +++
 .../autovec/vls/calling-convention-run-6.c|  55 +++
 .../gcc.target/riscv/rvv/autovec/vls/def.h|  74 +
 18 files changed, 1556 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c
 create mode 100644 

[PATCH v1] RISC-V: Bugfix for vls integer mode calling convention

2024-01-23 Thread pan2 . li
From: Pan Li 

According to the issue as below.

https://hub.fgit.cf/riscv-non-isa/riscv-elf-psabi-doc/pull/416

When the mode size of vls integer mode is less than 2 * XLEN, we will
take the gpr/fpr for both the args and the return values. Instead of
the reference. For example the below code:

typedef short v8hi __attribute__ ((vector_size (16)));

v8hi __attribute__((noinline))
add (v8hi a, v8hi b)
{
  v8hi r = a + b;
  return r;
}

Before this patch:
add:
  vsetivli zero,8,e16,m1,ta,ma
  vle16.v  v1,0(a1) <== arg by reference
  vle16.v  v2,0(a2) <== arg by reference
  vadd.vv  v1,v1,v2
  vse16.v  v1,0(a0) <== return by reference
  ret

After this patch:
add:
  addi sp,sp,-32
  sd   a0,0(sp)  <== arg by register a0 - a3
  sd   a1,8(sp)
  sd   a2,16(sp)
  sd   a3,24(sp)
  addi a5,sp,16
  vsetivli zero,8,e16,m1,ta,ma
  vle16.v  v2,0(sp)
  vle16.v  v1,0(a5)
  vadd.vv  v1,v1,v2
  vse16.v  v1,0(sp)
  ld   a0,0(sp)  <== return by a0 - a1.
  ld   a1,8(sp)
  addi sp,sp,32
  jr   ra

For vls floating point, the things get more complicated.  We follow
the below rules.

1. Vls element count <= 2 and vls size <= 2 * xlen, go fpr.
2. Vls size <= 2 * xlen, go gpr.
3. Vls size > 2 * xlen, go reference.

One exceptions is V2DF mode, we treat vls mode as aggregated and we will
have TFmode here.  Unforturnately, the emit_move_multi_word cannot take
care of TFmode elegantly and we go to gpr for V2DF mode.

The riscv regression passed for this patch.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_v_ext_vector_or_tuple_mode_p):
New predicate function for vector or tuple vector.
(riscv_v_vls_mode_aggregate_reg_count): New function to
calculate the gpr/fpr count required by vls mode.
(riscv_gpr_unit_size): New function to get gpr in bytes.
(riscv_fpr_unit_size): New function to get fpr in bytes.
(riscv_v_vls_to_gpr_mode): New function convert vls mode to gpr mode.
(riscv_v_vls_to_fpr_mode): New function convert vls mode to fpr mode.
(riscv_pass_vls_aggregate_in_gpr_or_fpr): New function to return
the rtx of gpr/fpr for vls mode.
(riscv_mode_pass_by_reference_p): New predicate function to
indicate the mode will be passed by reference or not.
(riscv_get_arg_info): Add vls mode handling.
(riscv_pass_by_reference): Return false if arg info has no zero
gpr count.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls/def.h: Add helper marcos.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-10.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-8.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-9.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-6.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 185 +-
 .../rvv/autovec/vls/calling-convention-1.c| 154 +++
 .../rvv/autovec/vls/calling-convention-10.c   |  51 +
 .../rvv/autovec/vls/calling-convention-2.c| 142 ++
 .../rvv/autovec/vls/calling-convention-3.c| 130 
 .../rvv/autovec/vls/calling-convention-4.c| 118 +++
 .../rvv/autovec/vls/calling-convention-5.c| 141 +
 .../rvv/autovec/vls/calling-convention-6.c| 129 
 .../rvv/autovec/vls/calling-convention-7.c| 120 
 .../rvv/autovec/vls/calling-convention-8.c|  43 
 .../rvv/autovec/vls/calling-convention-9.c|  51 +
 .../autovec/vls/calling-convention-run-1.c|  55 ++
 .../autovec/vls/calling-convention-run-2.c|  55 ++
 .../autovec/vls/calling-convention-run-3.c|  55 ++
 .../autovec/vls/calling-convention-run-4.c|  55 ++
 .../autovec/vls/calling-convention-run-5.c|  55 ++
 .../autovec/vls/calling-convention-run-6.c|  55 ++
 .../gcc.target/riscv/rvv/autovec/vls/def.h|  74 +++
 18 files changed, 1665 insertions(+), 3 

[PATCH v1] RISC-V: Fix asm checks regression due to recent middle-end change

2024-01-17 Thread pan2 . li
From: Pan Li 

The recent middle-end change result in some asm check failures.
This patch would like to fix the asm check by adjust the times.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls/shift-1.c: Fix asm check
count.
* gcc.target/riscv/rvv/autovec/vls/shift-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/shift-3.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-1.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-2.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-1.c
index e57a0b6bdf3..cb5a1dbc9ff 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-1.c
@@ -53,5 +53,5 @@ DEF_OP_VV (shift, 128, int64_t, >>)
 DEF_OP_VV (shift, 256, int64_t, >>)
 DEF_OP_VV (shift, 512, int64_t, >>)
 
-/* { dg-final { scan-assembler-times 
{vsra\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 39 } } */
+/* { dg-final { scan-assembler-times 
{vsra\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 42 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-2.c
index 9d1fa64232c..e626a52c2d8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-2.c
@@ -53,5 +53,5 @@ DEF_OP_VV (shift, 128, uint64_t, >>)
 DEF_OP_VV (shift, 256, uint64_t, >>)
 DEF_OP_VV (shift, 512, uint64_t, >>)
 
-/* { dg-final { scan-assembler-times 
{vsrl\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 39 } } */
+/* { dg-final { scan-assembler-times 
{vsrl\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 42 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
index 8de1b9c0c41..244bee02e55 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
@@ -53,5 +53,5 @@ DEF_OP_VV (shift, 128, int64_t, <<)
 DEF_OP_VV (shift, 256, int64_t, <<)
 DEF_OP_VV (shift, 512, int64_t, <<)
 
-/* { dg-final { scan-assembler-times 
{vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 46 } } */
+/* { dg-final { scan-assembler-times 
{vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 47 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-- 
2.34.1



[PATCH v1] RISC-V: Update the comments of riscv_v_ext_mode_p [NFC]

2024-01-11 Thread pan2 . li
From: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_v_ext_mode_p): Update the
comments of predicate func riscv_v_ext_mode_p.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index df9799d9c5e..f829014a589 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1361,7 +1361,10 @@ riscv_v_ext_vls_mode_p (machine_mode mode)
   return false;
 }
 
-/* Return true if it is either RVV vector mode or RVV tuple mode.  */
+/* Return true if it is either of below modes.
+   1. RVV vector mode.
+   2. RVV tuple mode.
+   3. RVV vls mode.  */
 
 static bool
 riscv_v_ext_mode_p (machine_mode mode)
-- 
2.34.1



[PATCH v5] LOOP-UNROLL: Leverage HAS_SIGNED_ZERO for var expansion

2024-01-11 Thread pan2 . li
From: Pan Li 

The insert_var_expansion_initialization depends on the
HONOR_SIGNED_ZEROS to initialize the unrolling variables
to +0.0f when -0.0f and no-signed-option.  Unfortunately,
we should always keep the -0.0f here because:

* The -0.0f is always the correct initial value.
* We need to support the target that always honor signed zero.

Thus, we need to leverage MODE_HAS_SIGNED_ZEROS when initialize
instead of HONOR_SIGNED_ZEROS.  Then the target/backend can
decide to honor the no-signed-zero or not.

We also removed the testcase pr30957-1.c, as it makes undefined behavior
whether the return value is positive or negative.

The below tests are passed for this patch:

* The riscv regression tests.
* The aarch64 regression tests.
* The x86 bootstrap and regression tests.

gcc/ChangeLog:

* loop-unroll.cc (insert_var_expansion_initialization): Leverage
MODE_HAS_SIGNED_ZEROS for expansion variable initialization.

gcc/testsuite/ChangeLog:

* gcc.dg/pr30957-1.c: Remove.

Signed-off-by: Pan Li 
---
 gcc/loop-unroll.cc   |  4 ++--
 gcc/testsuite/gcc.dg/pr30957-1.c | 36 
 2 files changed, 2 insertions(+), 38 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.dg/pr30957-1.c

diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc
index 4176a21e308..bfdfe6c2bb7 100644
--- a/gcc/loop-unroll.cc
+++ b/gcc/loop-unroll.cc
@@ -1855,7 +1855,7 @@ insert_var_expansion_initialization (struct var_to_expand 
*ve,
   rtx var, zero_init;
   unsigned i;
   machine_mode mode = GET_MODE (ve->reg);
-  bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
+  bool has_signed_zero_p = MODE_HAS_SIGNED_ZEROS (mode);
 
   if (ve->var_expansions.length () == 0)
 return;
@@ -1869,7 +1869,7 @@ insert_var_expansion_initialization (struct var_to_expand 
*ve,
 case MINUS:
   FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
 {
- if (honor_signed_zero_p)
+ if (has_signed_zero_p)
zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
  else
zero_init = CONST0_RTX (mode);
diff --git a/gcc/testsuite/gcc.dg/pr30957-1.c b/gcc/testsuite/gcc.dg/pr30957-1.c
deleted file mode 100644
index 564410913ab..000
--- a/gcc/testsuite/gcc.dg/pr30957-1.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do run { xfail { mmix-*-* } } } */
-/* We don't (and don't want to) perform this optimisation on soft-float 
targets,
-   where each addition is a library call.  /
-/* { dg-require-effective-target hard_float } */
-/* -fassociative-math requires -fno-trapping-math and -fno-signed-zeros. */
-/* { dg-options "-O2 -funroll-loops -fassociative-math -fno-trapping-math 
-fno-signed-zeros -fvariable-expansion-in-unroller -fdump-rtl-loop2_unroll" } */
-
-extern void abort (void);
-extern void exit (int);
-
-float __attribute__((noinline))
-foo (float d, int n)
-{
-  unsigned i;
-  float accum = d;
-
-  for (i = 0; i < n; i++)
-accum += d;
-
-  return accum;
-}
-
-int
-main ()
-{
-  /* When compiling standard compliant we expect foo to return -0.0.  But the
- variable expansion during unrolling optimization (for this testcase 
enabled
- by non-compliant -fassociative-math) instantiates copy(s) of the
- accumulator which it initializes with +0.0.  Hence we expect that foo
- returns +0.0.  */
-  if (__builtin_copysignf (1.0, foo (0.0 / -5.0, 10)) != 1.0)
-abort ();
-  exit (0);
-}
-
-/* { dg-final { scan-rtl-dump "Expanding Accumulator" "loop2_unroll" { xfail 
mmix-*-* } } } */
-- 
2.34.1



[PATCH v4] LOOP-UNROLL: Leverage HAS_SIGNED_ZERO for var expansion

2024-01-10 Thread pan2 . li
From: Pan Li 

The insert_var_expansion_initialization depends on the
HONOR_SIGNED_ZEROS to initialize the unrolling variables
to +0.0f when -0.0f and no-signed-option.  Unfortunately,
we should always keep the -0.0f here because:

* The -0.0f is always the correct initial value.
* We need to support the target that always honor signed zero.

Thus, we need to leverage MODE_HAS_SIGNED_ZEROS when initialize
instead of HONOR_SIGNED_ZEROS.  Then the target/backend can
decide to honor the no-signed-zero or not.

The below tests are passed for this patch:

* The riscv regression tests.
* The aarch64 regression tests.
* The x86 bootstrap and regression tests.

gcc/ChangeLog:

* loop-unroll.cc (insert_var_expansion_initialization): Leverage
MODE_HAS_SIGNED_ZEROS for expansion variable initialization.

gcc/testsuite/ChangeLog:

* gcc.dg/pr30957-1.c: Adjust tests cases for different scenarios.

Signed-off-by: Pan Li 
---
 gcc/loop-unroll.cc   |  4 +--
 gcc/testsuite/gcc.dg/pr30957-1.c | 48 
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc
index 4176a21e308..bfdfe6c2bb7 100644
--- a/gcc/loop-unroll.cc
+++ b/gcc/loop-unroll.cc
@@ -1855,7 +1855,7 @@ insert_var_expansion_initialization (struct var_to_expand 
*ve,
   rtx var, zero_init;
   unsigned i;
   machine_mode mode = GET_MODE (ve->reg);
-  bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
+  bool has_signed_zero_p = MODE_HAS_SIGNED_ZEROS (mode);
 
   if (ve->var_expansions.length () == 0)
 return;
@@ -1869,7 +1869,7 @@ insert_var_expansion_initialization (struct var_to_expand 
*ve,
 case MINUS:
   FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
 {
- if (honor_signed_zero_p)
+ if (has_signed_zero_p)
zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
  else
zero_init = CONST0_RTX (mode);
diff --git a/gcc/testsuite/gcc.dg/pr30957-1.c b/gcc/testsuite/gcc.dg/pr30957-1.c
index 564410913ab..6a9d3d87932 100644
--- a/gcc/testsuite/gcc.dg/pr30957-1.c
+++ b/gcc/testsuite/gcc.dg/pr30957-1.c
@@ -20,16 +20,52 @@ foo (float d, int n)
   return accum;
 }
 
+float __attribute__((noinline))
+get_minus_zero()
+{
+  return 0.0 / -5.0;
+}
+
 int
 main ()
 {
-  /* When compiling standard compliant we expect foo to return -0.0.  But the
- variable expansion during unrolling optimization (for this testcase 
enabled
- by non-compliant -fassociative-math) instantiates copy(s) of the
- accumulator which it initializes with +0.0.  Hence we expect that foo
- returns +0.0.  */
-  if (__builtin_copysignf (1.0, foo (0.0 / -5.0, 10)) != 1.0)
+  /* The variable expansion in unroll requires option unsafe-math-optimizations
+ (aka -fno-signed-zeros, -fno-trapping-math, -fassociative-math
+ and -freciprocal-math).
+
+ When loop like above will have expansion after unrolling as below:
+
+ accum_1 += d_1;
+ accum_2 += d_2;
+ accum_3 += d_3;
+ ...
+
+ The accum_1, accum_2 and accum_3 need to be initialized. Given the
+ floating-point we have
+ +0.0f + -0.0f = +0.0f.
+
+ Thus, we should initialize the accum_* to -0.0 for correctness.  But
+ the things become more complicated when no-signed-zeros, as well as VLA
+ vectorizer mode which doesn't trigger variable expansion. Then we have:
+
+ Case 1: Trigger variable expansion but target doesn't honor 
no-signed-zero.
+   minus_zero will be -0.0f and foo (minus_zero, 10) will be -0.0f.
+ Case 2: Trigger variable expansion but target does honor no-signed-zero.
+   minus_zero will be +0.0f and foo (minus_zero, 10) will be +0.0f.
+ Case 3: No variable expansion but target doesn't honor no-signed-zero.
+   minus_zero will be -0.0f and foo (minus_zero, 10) will be -0.0f.
+ Case 4: No variable expansion but target does honor no-signed-zero.
+   minus_zero will be +0.0f and foo (minus_zero, 10) will be +0.0f.
+
+ The test case covers above 4 cases for running.
+ */
+  float minus_zero = get_minus_zero ();
+  float a = __builtin_copysignf (1.0, minus_zero);
+  float b = __builtin_copysignf (1.0, foo (minus_zero, 10));
+
+  if (a != b)
 abort ();
+
   exit (0);
 }
 
-- 
2.34.1



[PATCH v3] RISC-V: Bugfix for doesn't honor no-signed-zeros option

2024-01-02 Thread pan2 . li
From: Pan Li 

According to the sematics of no-signed-zeros option, the backend
like RISC-V should treat the minus zero -0.0f as plus zero 0.0f.

Consider below example with option -fno-signed-zeros.

void
test (float *a)
{
  *a = -0.0;
}

We will generate code as below, which doesn't treat the minus zero
as plus zero.

test:
  lui  a5,%hi(.LC0)
  flw  fa5,%lo(.LC0)(a5)
  fsw  fa5,0(a0)
  ret

.LC0:
  .word -2147483648 // aka -0.0 (0x8000 in hex)

This patch would like to fix the bug and treat the minus zero -0.0
as plus zero, aka +0.0. Thus after this patch we will have asm code
as below for the above sampe code.

test:
  sw zero,0(a0)
  ret

This patch also fix the run failure of the test case pr30957-1.c. The
below tests are passed for this patch.

* The riscv regression tests.
* The pr30957-1.c run tests.

gcc/ChangeLog:

* config/riscv/constraints.md: Leverage func 
riscv_float_const_zero_rtx_p
for predicating the rtx is const zero float or not.
* config/riscv/predicates.md: Ditto.
* config/riscv/riscv.cc (riscv_const_insns): Ditto.
(riscv_float_const_zero_rtx_p): New func impl for predicating the rtx is
const zero float or not.
(riscv_const_zero_rtx_p): New func impl for predicating the rtx
is const zero (both int and fp) or not.
* config/riscv/riscv-protos.h (riscv_float_const_zero_rtx_p):
New func decl.
(riscv_const_zero_rtx_p): Ditto.
* config/riscv/riscv.md: Making sure the operand[1] of movfp is
CONST0_RTX when the operand[1] is const zero float.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/no-signed-zeros-0.c: New test.
* gcc.target/riscv/no-signed-zeros-1.c: New test.
* gcc.target/riscv/no-signed-zeros-2.c: New test.
* gcc.target/riscv/no-signed-zeros-3.c: New test.
* gcc.target/riscv/no-signed-zeros-4.c: New test.
* gcc.target/riscv/no-signed-zeros-5.c: New test.
* gcc.target/riscv/no-signed-zeros-run-0.c: New test.
* gcc.target/riscv/no-signed-zeros-run-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/constraints.md   |  2 +-
 gcc/config/riscv/predicates.md|  2 +-
 gcc/config/riscv/riscv-protos.h   |  2 +
 gcc/config/riscv/riscv.cc | 35 -
 gcc/config/riscv/riscv.md | 49 ---
 .../gcc.target/riscv/no-signed-zeros-0.c  | 26 ++
 .../gcc.target/riscv/no-signed-zeros-1.c  | 28 +++
 .../gcc.target/riscv/no-signed-zeros-2.c  | 26 ++
 .../gcc.target/riscv/no-signed-zeros-3.c  | 28 +++
 .../gcc.target/riscv/no-signed-zeros-4.c  | 26 ++
 .../gcc.target/riscv/no-signed-zeros-5.c  | 28 +++
 .../gcc.target/riscv/no-signed-zeros-run-0.c  | 36 ++
 .../gcc.target/riscv/no-signed-zeros-run-1.c  | 36 ++
 13 files changed, 314 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/no-signed-zeros-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/no-signed-zeros-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/no-signed-zeros-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/no-signed-zeros-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/no-signed-zeros-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/no-signed-zeros-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/no-signed-zeros-run-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/no-signed-zeros-run-1.c

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index de4359af00d..db1d5e1385f 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -108,7 +108,7 @@ (define_constraint "DnS"
 (define_constraint "G"
   "@internal"
   (and (match_code "const_double")
-   (match_test "op == CONST0_RTX (mode)")))
+   (match_test "riscv_float_const_zero_rtx_p (op)")))
 
 (define_memory_constraint "A"
   "An address that is held in a general-purpose register."
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index b87a6900841..b428d842101 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -78,7 +78,7 @@ (define_predicate "sleu_operand"
 
 (define_predicate "const_0_operand"
   (and (match_code "const_int,const_wide_int,const_double,const_vector")
-   (match_test "op == CONST0_RTX (GET_MODE (op))")))
+   (match_test "riscv_const_zero_rtx_p (op)")))
 
 (define_predicate "const_1_operand"
   (and (match_code "const_int,const_wide_int,const_vector")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 31049ef7523..fcf30e084a3 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -131,6 +131,8 @@ extern void riscv_asm_output_external (FILE *, const tree, 
const char *);
 extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, 

  1   2   >