From: Mihailo Stojanovic <mihailo.stojano...@rt-rk.com> This adds the custom MIPS-specific modulo by power of two expander, which uses a modified algorithm, tailored to MIPS instruction set.
gcc/ * config/mips/mips-protos.h (mips_expand_mod_pow2): New prototype. * config/mips/mips.cc (mips_rtx_costs): Don't force power of two constants into registers during modulo operations. Modify the cost modulo by power of two. (mips_expand_mod_pow2): New expander for modulo by power of two of 64-bit values on 32-bit targets. * config/mips/mips.md (define_expand "<u>mod<mode>3"): Separate define_expand for "<u>mod<mode>3" from the define_insn and call the new expander for 64-bit values on 32-bit targets. (define_insn "*<u>mod<mode>3"): Add * to the pattern name to avoid clash with the define_expand pattern. gcc/testsuite/ * gcc.target/mips/mod-pow2.c: New test. Cherry-picked e683ed1717b3f689c959c738a764174fdcdc7998 from https://github.com/MIPS/gcc Signed-off-by: Mihailo Stojanovic <mistojano...@wavecomp.com> Signed-off-by: Faraz Shahbazker <fshahbaz...@wavecomp.com> Signed-off-by: Aleksandar Rakic <aleksandar.ra...@htecgroup.com> --- gcc/config/mips/mips-protos.h | 2 + gcc/config/mips/mips.cc | 144 ++++++++++++++++++- gcc/config/mips/mips.md | 31 +++- gcc/testsuite/gcc.target/mips/mod-pow2.c | 176 +++++++++++++++++++++++ 4 files changed, 350 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/mips/mod-pow2.c diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index 435b2e7e179..5782cd5d1b7 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -401,4 +401,6 @@ extern void mips_split_msa_subreg_move (rtx, rtx); extern const char *mips_output_compare (const char *fpcmp, const char *fcond, const char *fmt, const char *fpcc_mode, bool swap); +extern bool mips_expand_mod_pow2 (rtx, rtx, rtx); + #endif /* ! GCC_MIPS_PROTOS_H */ diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index d23c30a43be..19d428e6ed6 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -5292,6 +5292,19 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code, return true; } + /* Don't force the constant into register during modulo by power of two. + This is needed so that the MIPS-specific modulo pattern will be + selected during the expand phase. */ + if (!TARGET_64BIT + && !TARGET_MIPS16 + && outer_code == MOD + && mode == DImode + && (exact_log2 (INTVAL (x)) > 0)) + { + *total = 0; + return true; + } + if (TARGET_MIPS16) { cost = mips16_constant_cost (outer_code, INTVAL (x)); @@ -5615,8 +5628,20 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code, } /* Fall through. */ - case SQRT: case MOD: + /* Modulo by power of two produces (at most) nine instructions. */ + if (CONST_INT_P (XEXP (x, 1)) + && exact_log2 (INTVAL (XEXP (x, 1))) > 0 + && !TARGET_64BIT + && !TARGET_MIPS16 + && mode == DImode) + { + *total = COSTS_N_INSNS (9); + return true; + } + /* Fall through. */ + + case SQRT: if (float_mode_p) { *total = mips_fp_div_cost (mode); @@ -26107,6 +26132,123 @@ mips_prune_insertions_deletions (struct edge_list* edge_list, sbitmap_free (ifcv_blocks); sbitmap_free (insertions); } + +/* Expand modulo by power of two of DImode values on 32-bit targets. */ + +bool +mips_expand_mod_pow2 (rtx target, rtx op1, rtx op2) +{ + HOST_WIDE_INT val, reg_width; + rtx out_low, out_high; + rtx in_low, in_high; + rtx at, temp; + rtx comp, cond_operands[4]; + + gcc_assert (GET_CODE (op2) == CONST_INT); + + val = INTVAL (op2); + + int logd = exact_log2 (val); + + if (logd <= 0) + return false; + + /* Extract lower and upper words of DImode source and destination. */ + out_low = mips_subword (target, 0); + out_high = mips_subword (target, 1); + + in_low = mips_subword (op1, 0); + in_high = mips_subword (op1, 1); + + at = gen_reg_rtx (SImode); + temp = gen_reg_rtx (SImode); + + reg_width = GET_MODE_BITSIZE (SImode); + + /* Divisor equals 2. */ + if (logd == 1) + { + mips_emit_binary (AND, at, in_low, const1_rtx); + mips_emit_binary (ASHIFT, temp, in_low, + gen_int_mode (reg_width - 1, SImode)); + mips_emit_binary (AND, temp, in_high, temp); + mips_emit_binary (ASHIFTRT, out_high, temp, + gen_int_mode (reg_width - 1, SImode)); + mips_emit_binary (IOR, out_low, out_high, at); + + return true; + } + /* Divisor fits into 32 bits. */ + else if (logd <= reg_width) + { + mips_emit_binary (ASHIFTRT, at, in_high, + gen_int_mode (reg_width - 1, SImode)); + + if (logd == reg_width) + mips_emit_move (out_low, in_low); + else if (ISA_HAS_EXT_INS || logd <= 16) + mips_emit_binary (AND, out_low, in_low, + gen_int_mode ((1 << logd) - 1, SImode)); + else + { + mips_emit_binary (ASHIFT, out_low, in_low, + gen_int_mode (reg_width - logd, SImode)); + mips_emit_binary (LSHIFTRT, out_low, out_low, + gen_int_mode (reg_width - logd, SImode)); + } + + comp = gen_rtx_EQ (VOIDmode, out_low, const0_rtx); + cond_operands[0] = at; + cond_operands[1] = comp; + cond_operands[2] = const0_rtx; + cond_operands[3] = at; + + mips_expand_conditional_move (cond_operands); + mips_emit_move (out_high, at); + if (logd < reg_width) + { + mips_emit_binary (ASHIFT, at, at, gen_int_mode (logd, SImode)); + mips_emit_binary (IOR, out_low, out_low, at); + } + + return true; + } + /* Divisor is wider than 32 bits. */ + else if (logd <= 2 * reg_width - 1) + { + mips_emit_binary (ASHIFTRT, at, in_high, + gen_int_mode (reg_width - 1, SImode)); + mips_emit_move (temp, in_low); + + if (ISA_HAS_EXT_INS || logd <= 16) + mips_emit_binary (AND, out_high, in_high, + gen_int_mode ((1 << (logd - reg_width)) - 1, SImode)); + else + { + mips_emit_binary (ASHIFT, out_high, in_high, + gen_int_mode (2 * reg_width - logd, SImode)); + mips_emit_binary (LSHIFTRT, out_high, out_high, + gen_int_mode (2 * reg_width - logd, SImode)); + } + mips_emit_move (out_low, temp); + mips_emit_binary (IOR, temp, temp, out_high); + + comp = gen_rtx_EQ (VOIDmode, temp, const0_rtx); + cond_operands[0] = at; + cond_operands[1] = comp; + cond_operands[2] = const0_rtx; + cond_operands[3] = at; + + mips_expand_conditional_move (cond_operands); + mips_emit_binary (ASHIFT, at, at, + gen_int_mode (logd - reg_width, SImode)); + mips_emit_binary (IOR, out_high, out_high, at); + + return true; + } + + return false; +} /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 1243f20f344..0c93ce17ae4 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -821,6 +821,10 @@ ;; modes. (define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")]) +;; This mode iterator is used in the same way as GPR, except that it allows +;; 64-bit patterns on 32-bit targets. +(define_mode_iterator GPR3 [SI DI]) + (define_mode_iterator MOVEP1 [SI SF]) (define_mode_iterator MOVEP2 [SI SF]) (define_mode_iterator JOIN_MODE [HI @@ -3098,7 +3102,31 @@ [(set_attr "type" "idiv3") (set_attr "mode" "<MODE>")]) -(define_insn "<u>mod<mode>3" +(define_expand "<u>mod<mode>3" + [(set (match_operand:GPR3 0 "register_operand") + (any_mod:GPR3 (match_operand:GPR3 1 "register_operand") + (match_operand:GPR3 2 "nonmemory_operand")))] + "(TARGET_LOONGSON_2EF || TARGET_LOONGSON_EXT || ISA_HAS_R6<D>DIV) + || (!TARGET_64BIT && !TARGET_MIPS16 && <MODE>mode == DImode)" +{ + /* In case of DImode signed modulo with power of 2 on 32-bit targets, call + the custom expander. */ + if (<CODE> == MOD + && !TARGET_64BIT + && !TARGET_MIPS16 + && GET_CODE (operands[2]) == CONST_INT) + { + if (mips_expand_mod_pow2 (operands[0], operands[1], operands[2])) + DONE; + FAIL; + } + /* Else, forbid the expand of DImode modulo on 32-bit targets. */ + else if ((!TARGET_64BIT && <MODE>mode == DImode) + || GET_CODE (operands[2]) == CONST_INT) + FAIL; +}) + +(define_insn "*<u>mod<mode>3" [(set (match_operand:GPR 0 "register_operand" "=&d") (any_mod:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d")))] @@ -3113,7 +3141,6 @@ } [(set_attr "type" "idiv3") (set_attr "mode" "<MODE>")]) - ;; ;; .................... ;; diff --git a/gcc/testsuite/gcc.target/mips/mod-pow2.c b/gcc/testsuite/gcc.target/mips/mod-pow2.c new file mode 100644 index 00000000000..4fee299a777 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/mod-pow2.c @@ -0,0 +1,176 @@ +/* { dg-do run } */ +/* { dg-options "-mabi=32 (REQUIRES_STDLIB)" } */ + +/* Test modulo by power of two expansion. */ +#include <stdlib.h> + +#define ARGPASTE(X, Y) X ## Y +#define ARGPASTE2(X) ARGPASTE (mod_, X) + +/* Create 64 distinct functions which calculate the modulo by power of two */ +#define MOD(LOG) \ + __attribute__((noinline)) long long ARGPASTE2 (__COUNTER__) (long long x) \ + { return x % (1LL << (LOG)); } +#define MOD_1(x) \ + MOD (x) \ + MOD (x + 1) +#define MOD_2(x) \ + MOD_1 (x) \ + MOD_1 (x + 2) +#define MOD_4(x) \ + MOD_2 (x) \ + MOD_2 (x + 4) +#define MOD_8(x) \ + MOD_4 (x) \ + MOD_4 (x + 8) +#define MOD_16(x) \ + MOD_8 (x) \ + MOD_8 (x + 16) +#define MOD_32(x) \ + MOD_16 (x) \ + MOD_16 (x + 32) + +MOD_32 (0) + +#define TEST_NUM_POS 0x7fffffffffffffff +#define TEST_NUM_NEG 0xffffffffffffffff + +/* Test the modulo by power of two with a positive number. */ +#define TEST_MOD_POS(LOG) \ + ARGPASTE2 (LOG) (TEST_NUM_POS) == (TEST_NUM_POS & ((1LL << (LOG)) - 1)) \ + ? (void) 0 : abort () + +/* Test the modulo by power of two with a negative number. */ +#define TEST_MOD_NEG(LOG) \ + ARGPASTE2 (LOG) (TEST_NUM_NEG) == -1LL ? (void) 0 : abort() + +int main() { + TEST_MOD_POS(1); + TEST_MOD_POS(2); + TEST_MOD_POS(3); + TEST_MOD_POS(4); + TEST_MOD_POS(5); + TEST_MOD_POS(6); + TEST_MOD_POS(7); + TEST_MOD_POS(8); + TEST_MOD_POS(9); + TEST_MOD_POS(10); + TEST_MOD_POS(11); + TEST_MOD_POS(12); + TEST_MOD_POS(13); + TEST_MOD_POS(14); + TEST_MOD_POS(15); + TEST_MOD_POS(16); + TEST_MOD_POS(17); + TEST_MOD_POS(18); + TEST_MOD_POS(19); + TEST_MOD_POS(20); + TEST_MOD_POS(21); + TEST_MOD_POS(22); + TEST_MOD_POS(23); + TEST_MOD_POS(24); + TEST_MOD_POS(25); + TEST_MOD_POS(26); + TEST_MOD_POS(27); + TEST_MOD_POS(28); + TEST_MOD_POS(29); + TEST_MOD_POS(30); + TEST_MOD_POS(31); + TEST_MOD_POS(32); + TEST_MOD_POS(33); + TEST_MOD_POS(34); + TEST_MOD_POS(35); + TEST_MOD_POS(36); + TEST_MOD_POS(37); + TEST_MOD_POS(38); + TEST_MOD_POS(39); + TEST_MOD_POS(40); + TEST_MOD_POS(41); + TEST_MOD_POS(42); + TEST_MOD_POS(43); + TEST_MOD_POS(44); + TEST_MOD_POS(45); + TEST_MOD_POS(46); + TEST_MOD_POS(47); + TEST_MOD_POS(48); + TEST_MOD_POS(49); + TEST_MOD_POS(50); + TEST_MOD_POS(51); + TEST_MOD_POS(52); + TEST_MOD_POS(53); + TEST_MOD_POS(54); + TEST_MOD_POS(55); + TEST_MOD_POS(56); + TEST_MOD_POS(57); + TEST_MOD_POS(58); + TEST_MOD_POS(59); + TEST_MOD_POS(60); + TEST_MOD_POS(61); + TEST_MOD_POS(62); + + TEST_MOD_NEG(1); + TEST_MOD_NEG(2); + TEST_MOD_NEG(3); + TEST_MOD_NEG(4); + TEST_MOD_NEG(5); + TEST_MOD_NEG(6); + TEST_MOD_NEG(7); + TEST_MOD_NEG(8); + TEST_MOD_NEG(9); + TEST_MOD_NEG(10); + TEST_MOD_NEG(11); + TEST_MOD_NEG(12); + TEST_MOD_NEG(13); + TEST_MOD_NEG(14); + TEST_MOD_NEG(15); + TEST_MOD_NEG(16); + TEST_MOD_NEG(17); + TEST_MOD_NEG(18); + TEST_MOD_NEG(19); + TEST_MOD_NEG(20); + TEST_MOD_NEG(21); + TEST_MOD_NEG(22); + TEST_MOD_NEG(23); + TEST_MOD_NEG(24); + TEST_MOD_NEG(25); + TEST_MOD_NEG(26); + TEST_MOD_NEG(27); + TEST_MOD_NEG(28); + TEST_MOD_NEG(29); + TEST_MOD_NEG(30); + TEST_MOD_NEG(31); + TEST_MOD_NEG(32); + TEST_MOD_NEG(33); + TEST_MOD_NEG(34); + TEST_MOD_NEG(35); + TEST_MOD_NEG(36); + TEST_MOD_NEG(37); + TEST_MOD_NEG(38); + TEST_MOD_NEG(39); + TEST_MOD_NEG(40); + TEST_MOD_NEG(41); + TEST_MOD_NEG(42); + TEST_MOD_NEG(43); + TEST_MOD_NEG(44); + TEST_MOD_NEG(45); + TEST_MOD_NEG(46); + TEST_MOD_NEG(47); + TEST_MOD_NEG(48); + TEST_MOD_NEG(49); + TEST_MOD_NEG(50); + TEST_MOD_NEG(51); + TEST_MOD_NEG(52); + TEST_MOD_NEG(53); + TEST_MOD_NEG(54); + TEST_MOD_NEG(55); + TEST_MOD_NEG(56); + TEST_MOD_NEG(57); + TEST_MOD_NEG(58); + TEST_MOD_NEG(59); + TEST_MOD_NEG(60); + TEST_MOD_NEG(61); + TEST_MOD_NEG(62); + TEST_MOD_NEG(63); + + exit (0); +} -- 2.34.1