From: Pan Li <[email protected]>
Hi Richard & Tamar,
Try the DEF_INTERNAL_INT_EXT_FN as your suggestion. By mapping
us_plus$a3 to the RTL representation (us_plus:m x y) in optabs.def.
And then expand_US_PLUS in internal-fn.cc. Not very sure if my
understanding is correct for DEF_INTERNAL_INT_EXT_FN.
I am not sure if we still need DEF_INTERNAL_SIGNED_OPTAB_FN here, given
the RTL representation has (ss_plus:m x y) and (us_plus:m x y) already.
Note this patch is a draft for validation, no test are invovled here.
gcc/ChangeLog:
* builtins.def (BUILT_IN_US_PLUS): Add builtin def.
(BUILT_IN_US_PLUSIMAX): Ditto.
(BUILT_IN_US_PLUSL): Ditto.
(BUILT_IN_US_PLUSLL): Ditto.
(BUILT_IN_US_PLUSG): Ditto.
* config/riscv/riscv-protos.h (riscv_expand_us_plus): Add new
func decl for expanding us_plus.
* config/riscv/riscv.cc (riscv_expand_us_plus): Add new func
impl for expanding us_plus.
* config/riscv/riscv.md (us_plus<mode>3): Add new pattern impl
us_plus<mode>3.
* internal-fn.cc (expand_US_PLUS): Add new func impl to expand
US_PLUS.
* internal-fn.def (US_PLUS): Add new INT_EXT_FN.
* internal-fn.h (expand_US_PLUS): Add new func decl.
* match.pd: Add new simplify pattern for us_plus.
* optabs.def (OPTAB_NL): Add new OPTAB_NL to US_PLUS rtl.
Signed-off-by: Pan Li <[email protected]>
---
gcc/builtins.def | 7 +++++
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv.cc | 46 +++++++++++++++++++++++++++++++++
gcc/config/riscv/riscv.md | 11 ++++++++
gcc/internal-fn.cc | 26 +++++++++++++++++++
gcc/internal-fn.def | 3 +++
gcc/internal-fn.h | 1 +
gcc/match.pd | 17 ++++++++++++
gcc/optabs.def | 2 ++
9 files changed, 114 insertions(+)
diff --git a/gcc/builtins.def b/gcc/builtins.def
index f6f3e104f6a..0777b912cfa 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -1055,6 +1055,13 @@ DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTIMAX,
"popcountimax", BT_FN_INT_UINTMAX
DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTL, "popcountl", BT_FN_INT_ULONG,
ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTLL, "popcountll",
BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTG, "popcountg", BT_FN_INT_VAR,
ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
+
+DEF_GCC_BUILTIN (BUILT_IN_US_PLUS, "us_plus", BT_FN_INT_UINT,
ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_US_PLUSIMAX, "us_plusimax",
BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_US_PLUSL, "us_plusl", BT_FN_INT_ULONG,
ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_US_PLUSLL, "us_plusll", BT_FN_INT_ULONGLONG,
ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_US_PLUSG, "us_plusg", BT_FN_INT_VAR,
ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
+
DEF_EXT_LIB_BUILTIN (BUILT_IN_POSIX_MEMALIGN, "posix_memalign",
BT_FN_INT_PTRPTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
DEF_GCC_BUILTIN (BUILT_IN_PREFETCH, "prefetch",
BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
DEF_LIB_BUILTIN (BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE,
ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 80efdf2b7e5..ba6086f1f25 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -132,6 +132,7 @@ extern void riscv_asm_output_external (FILE *, const tree,
const char *);
extern bool
riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
+extern void riscv_expand_us_plus (rtx, rtx, rtx);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool
*invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4100abc9dd1..23f08974f07 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10657,6 +10657,52 @@ riscv_vector_mode_supported_any_target_p (machine_mode)
return true;
}
+/* Emit insn for the saturation addu, aka (x + y) | - ((x + y) < x). */
+void
+riscv_expand_us_plus (rtx dest, rtx x, rtx y)
+{
+ machine_mode mode = GET_MODE (dest);
+ rtx pmode_sum = gen_reg_rtx (Pmode);
+ rtx pmode_lt = gen_reg_rtx (Pmode);
+ rtx pmode_x = gen_lowpart (Pmode, x);
+ rtx pmode_y = gen_lowpart (Pmode, y);
+ rtx pmode_dest = gen_reg_rtx (Pmode);
+
+ /* Step-1: sum = x + y */
+ if (mode == SImode && mode != Pmode)
+ { /* Take addw to avoid the sum truncate. */
+ rtx simode_sum = gen_reg_rtx (SImode);
+ riscv_emit_binary (PLUS, simode_sum, x, y);
+ emit_move_insn (pmode_sum, gen_lowpart (Pmode, simode_sum));
+ }
+ else
+ riscv_emit_binary (PLUS, pmode_sum, pmode_x, pmode_y);
+
+ /* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI. */
+ if (mode == HImode || mode == QImode)
+ {
+ int mode_bits = GET_MODE_BITSIZE (mode).to_constant ();
+ int shift_bits = GET_MODE_BITSIZE (Pmode) - mode_bits;
+
+ gcc_assert (shift_bits > 0);
+
+ riscv_emit_binary (ASHIFT, pmode_sum, pmode_sum, GEN_INT (shift_bits));
+ riscv_emit_binary (LSHIFTRT, pmode_sum, pmode_sum, GEN_INT (shift_bits));
+ }
+
+ /* Step-2: lt = sum < x */
+ riscv_emit_binary (LTU, pmode_lt, pmode_sum, pmode_x);
+
+ /* Step-3: lt = -lt */
+ riscv_emit_unary (NEG, pmode_lt, pmode_lt);
+
+ /* Step-4: pmode_dest = sum | lt */
+ riscv_emit_binary (IOR, pmode_dest, pmode_lt, pmode_sum);
+
+ /* Step-5: dest = pmode_dest */
+ emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3f7a023d941..eaa9867023c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3841,6 +3841,17 @@ (define_insn "*large_load_address"
[(set_attr "type" "load")
(set (attr "length") (const_int 8))])
+(define_expand "us_plus<mode>3"
+ [(match_operand:ANYI 0 "register_operand")
+ (match_operand:ANYI 1 "register_operand")
+ (match_operand:ANYI 2 "register_operand")]
+ ""
+ {
+ riscv_expand_us_plus (operands[0], operands[1], operands[2]);
+ DONE;
+ }
+)
+
(include "bitmanip.md")
(include "crypto.md")
(include "sync.md")
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index a07f25f3aee..a7341a57ffa 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -5177,3 +5177,29 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt)
emit_move_insn (plhs, cmp);
}
}
+
+void
+expand_US_PLUS (internal_fn fn, gcall *stmt)
+{
+ tree lhs = gimple_call_lhs (stmt);
+ tree rhs_0 = gimple_call_arg (stmt, 0);
+ tree rhs_1 = gimple_call_arg (stmt, 1);
+
+ do_pending_stack_adjust ();
+
+ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ rtx op_0 = expand_normal (rhs_0);
+ rtx op_1 = expand_normal (rhs_1);
+
+ class expand_operand ops[3];
+
+ create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (lhs)));
+ create_output_operand (&ops[1], op_0, TYPE_MODE (TREE_TYPE (rhs_0)));
+ create_output_operand (&ops[2], op_1, TYPE_MODE (TREE_TYPE (rhs_1)));
+
+ insn_code code = optab_handler (us_plus_optab, TYPE_MODE (TREE_TYPE
(rhs_0)));
+ expand_insn (code, 3, ops);
+
+ if (!rtx_equal_p (target, ops[0].value))
+ emit_move_insn (target, ops[0].value);
+}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index c14d30365c1..b1d7b5a0307 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -447,6 +447,9 @@ DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs,
unary)
DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
+/* Binary integer ops. */
+DEF_INTERNAL_INT_EXT_FN (US_PLUS, ECF_CONST | ECF_NOTHROW, us_plus, binary)
+
DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index bccee1c3e09..46e404b4a49 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -263,6 +263,7 @@ extern void expand_DIVMODBITINT (internal_fn, gcall *);
extern void expand_FLOATTOBITINT (internal_fn, gcall *);
extern void expand_BITINTTOFLOAT (internal_fn, gcall *);
extern void expand_POPCOUNT (internal_fn, gcall *);
+extern void expand_US_PLUS (internal_fn, gcall *);
extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
diff --git a/gcc/match.pd b/gcc/match.pd
index c5b6540f939..f45fd58ad23 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10265,3 +10265,20 @@ and,
}
(if (full_perm_p)
(vec_perm (op@3 @0 @1) @3 @2))))))
+
+#if GIMPLE
+
+/* Unsigned saturation add, aka:
+ SAT_ADDU = (X + Y) | - ((X + Y) < X) or
+ SAT_ADDU = (X + Y) | - ((X + Y) < Y). */
+(simplify
+ (bit_ior:c (plus:c@2 @0 @1) (negate (convert (lt @2 @0))))
+ (if (optimize
+ && INTEGRAL_TYPE_P (type)
+ && TYPE_UNSIGNED (TREE_TYPE (@0))
+ && types_match (type, TREE_TYPE (@0))
+ && types_match (type, TREE_TYPE (@1))
+ && direct_internal_fn_supported_p (IFN_US_PLUS, type,
OPTIMIZE_FOR_BOTH))
+ (IFN_US_PLUS @0 @1)))
+
+#endif
diff --git a/gcc/optabs.def b/gcc/optabs.def
index ad14f9328b9..5855c4e0834 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -179,6 +179,8 @@ OPTAB_NL(clrsb_optab, "clrsb$a2", CLRSB, "clrsb", '2',
gen_int_libfunc)
OPTAB_NL(popcount_optab, "popcount$a2", POPCOUNT, "popcount", '2',
gen_int_libfunc)
OPTAB_NL(parity_optab, "parity$a2", PARITY, "parity", '2', gen_int_libfunc)
+OPTAB_NL(us_plus_optab, "us_plus$a3", US_PLUS, "us_plus", '3', gen_int_libfunc)
+
/* Comparison libcalls for integers MUST come in pairs, signed/unsigned. */
OPTAB_NL(cmp_optab, NULL, UNKNOWN, "cmp", '2', gen_int_fp_fixed_libfunc)
OPTAB_NL(ucmp_optab, NULL, UNKNOWN, "ucmp", '2', gen_int_libfunc)
--
2.34.1