From: Robert Suchanek <robert.sucha...@imgtec.com> - Bugfix [MIPS16E2]: split of moves of negative constants should exclude zero const.
- Add support for every style of ZEB/ZEH support that has been tried: An earlier attempt to improve generation of ZEB/ZEH led to a chaotic effect of sometimes generating the instructions and sometimes retaining the ANDI 0xffff. Also occasional generation of LHU/LBU appeared where the original value was not already in memory. Performance results are showing wild and unexpected variation which appears to correlate with the way in which ZEH/ZEB handling is or is not implemented. Support all forms tried so far with a hidden option defaulting to the preferred method. - Check to see if it is safe to use the SAVE/RESTORE instruction in a function. - Add interaptiv-mr2 architecture with COPYW/UCOPYW. - Add -muse-copyw-ucopyw option (hidden from help). - Disable tests at -O0 due to introducing a frame: SAVE/RESTORE end up introducing a frame owing to saving more data than strictly necessary. gcc/ * config/mips/mips.cc (mips_option_override): Set default for TARGET_USE_COPYW_UCOPYW. * config/mips/mips.h (ISA_HAS_COPY): Update to reference TARGET_USE_COPYW_UCOPYW. * config/mips/mips.opt (-muse-copyw-ucopyw): New hidden option. * config/mips/mips-cpus.def: Set PTF_AVOID_BRANCHLIKELY_ALWAYS flag for interAptiv-mr2 CPU. gcc/testsuite/ * gcc.target/mips/iamr2.c: New test. * gcc.target/mips/memcpy-3.c: New test. * gcc.target/mips/memcpy-4.c: Likewise. * gcc.target/mips/mips.exp: Accept -muse-copyw-ucopyw and isa=interaptiv-mr2. (mips-dg-init): Add memcpy option. * gcc.target/mips/r10k-cache-barrier-9.c: Skip test for -O0. * gcc.target/mips/stack-1.c: Likewise. Cherry-picked 01dbcc401881f2e4ed063fe43406f8670e4e0cac, 34e4b01b6e6afea14f51c093520c58e7eb3ddb66, 3475f16f5ce9d1247758f5d3a858af5163116d71 and aecf341540d1462145eaf47e3cfa7e7780ee7adc from https://github.com/MIPS/gcc Signed-off-by: Robert Suchanek <robert.sucha...@imgtec.com> Signed-off-by: Matthew Fortune <matthew.fort...@imgtec.com> Signed-off-by: Mihailo Stojanovic <mistojano...@wavecomp.com> Signed-off-by: Faraz Shahbazker <fshahbaz...@wavecomp.com> Signed-off-by: Aleksandar Rakic <aleksandar.ra...@htecgroup.com> --- gcc/config/mips/24k.md | 68 ++-- gcc/config/mips/mips-cpus.def | 2 + gcc/config/mips/mips-protos.h | 3 +- gcc/config/mips/mips-tables.opt | 57 ++-- gcc/config/mips/mips.cc | 300 ++++++++++++++++-- gcc/config/mips/mips.h | 35 +- gcc/config/mips/mips.md | 94 +++++- gcc/config/mips/mips.opt | 6 + gcc/config/mips/predicates.md | 2 +- gcc/doc/invoke.texi | 2 +- gcc/testsuite/gcc.target/mips/iamr2.c | 51 +++ gcc/testsuite/gcc.target/mips/memcpy-3.c | 14 + gcc/testsuite/gcc.target/mips/memcpy-4.c | 14 + gcc/testsuite/gcc.target/mips/mips.exp | 6 + .../gcc.target/mips/r10k-cache-barrier-9.c | 1 + gcc/testsuite/gcc.target/mips/stack-1.c | 1 + 16 files changed, 557 insertions(+), 99 deletions(-) create mode 100644 gcc/testsuite/gcc.target/mips/iamr2.c create mode 100644 gcc/testsuite/gcc.target/mips/memcpy-3.c create mode 100644 gcc/testsuite/gcc.target/mips/memcpy-4.c diff --git a/gcc/config/mips/24k.md b/gcc/config/mips/24k.md index 1d09c929ab4..8e49456eac0 100644 --- a/gcc/config/mips/24k.md +++ b/gcc/config/mips/24k.md @@ -41,7 +41,7 @@ ;; 1. Loads: lb, lbu, lh, lhu, ll, lw, lwl, lwr, lwpc, lwxs (define_insn_reservation "r24k_int_load" 2 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "load")) "r24k_iss+r24k_ixu_arith") @@ -53,7 +53,7 @@ ;; (movn/movz is not matched, we'll need to split condmov to ;; differentiate between integer/float moves) (define_insn_reservation "r24k_int_arith" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "arith,const,logical,move,nop,shift,signext,slt")) "r24k_iss+r24k_ixu_arith") @@ -61,13 +61,13 @@ ;; 3. Links: bgezal, bgezall, bltzal, bltzall, jal, jalr, jalx ;; 3a. jr/jalr consumer (define_insn_reservation "r24k_int_jump" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "call,jump")) "r24k_iss+r24k_ixu_arith") ;; 3b. branch consumer (define_insn_reservation "r24k_int_branch" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "branch")) "r24k_iss+r24k_ixu_arith") @@ -75,38 +75,38 @@ ;; 4. MDU: fully pipelined multiplier ;; mult - delivers result to hi/lo in 1 cycle (pipelined) (define_insn_reservation "r24k_int_mult" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "imul")) "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)") ;; madd, msub - delivers result to hi/lo in 1 cycle (pipelined) (define_insn_reservation "r24k_int_madd" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "imadd")) "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)") ;; mul - delivers result to gpr in 5 cycles (define_insn_reservation "r24k_int_mul3" 5 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "imul3")) "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)*5") ;; mfhi, mflo, mflhxu - deliver result to gpr in 5 cycles (define_insn_reservation "r24k_int_mfhilo" 5 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "mfhi,mflo")) "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)") ;; mthi, mtlo, mtlhx - deliver result to hi/lo, thence madd, handled as bypass (define_insn_reservation "r24k_int_mthilo" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "mthi,mtlo")) "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)") ;; div - default to 36 cycles for 32bit operands. Faster for 24bit, 16bit and ;; 8bit, but is tricky to identify. (define_insn_reservation "r24k_int_div" 36 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "idiv")) "r24k_iss+(r24k_mul3a+r24k_mul3b+r24k_mul3c)*36") @@ -114,21 +114,21 @@ ;; 5. Cop: cfc1, di, ei, mfc0, mtc0 ;; (Disabled until we add proper cop0 support) ;;(define_insn_reservation "r24k_int_cop" 3 -;; (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") +;; (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") ;; (eq_attr "type" "cop0")) ;; "r24k_iss+r24k_ixu_arith") ;; 6. Store (define_insn_reservation "r24k_int_store" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "store")) "r24k_iss+r24k_ixu_arith") ;; 7. Multiple instructions (define_insn_reservation "r24k_int_multi" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "multi")) "r24k_iss+r24k_ixu_arith+r24k_fpu_arith+(r24k_mul3a+r24k_mul3b+r24k_mul3c)") @@ -137,14 +137,14 @@ ;; rtls. They do not really affect scheduling latency, (blockage affects ;; scheduling via log links, but not used here). (define_insn_reservation "r24k_int_unknown" 0 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "unknown,atomic,syncloop")) "r24k_iss") ;; 9. Prefetch (define_insn_reservation "r24k_int_prefetch" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "prefetch,prefetchx")) "r24k_iss+r24k_ixu_arith") @@ -211,31 +211,31 @@ ;; packrl, pick, preceq, preceu, precequ, precrq, precrqu, raddu, rddsp, repl, ;; replv, shll, shllv, shra, shrav, shrl, shrlv, subq, subu, wrdsp (define_insn_reservation "r24k_dsp_alu" 2 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "dspalu,dspalusat")) "r24k_iss+r24k_ixu_arith") ;; dpaq_s, dpau, dpsq_s, dpsu, maq_s, mulsaq (define_insn_reservation "r24k_dsp_mac" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "dspmac")) "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)") ;; dpaq_sa, dpsq_sa, maq_sa (define_insn_reservation "r24k_dsp_mac_sat" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "dspmacsat")) "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)") ;; extp, extpdp, extpdpv, extpv, extr, extrv (define_insn_reservation "r24k_dsp_acc_ext" 5 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "accext")) "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)") ;; mthlip, shilo, shilov (define_insn_reservation "r24k_dsp_acc_mod" 1 - (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1") + (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2") (eq_attr "type" "accmod")) "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)") @@ -435,37 +435,37 @@ ;; fadd, fabs, fneg (define_insn_reservation "r24kf1_1_fadd" 4 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (eq_attr "type" "fadd,fabs,fneg")) "r24kf1_1_fpu_iss") ;; fmove, fcmove (define_insn_reservation "r24kf1_1_fmove" 4 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (eq_attr "type" "fmove,condmove")) "r24kf1_1_fpu_iss") ;; fload (define_insn_reservation "r24kf1_1_fload" 3 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (eq_attr "type" "fpload,fpidxload")) "r24kf1_1_fpu_iss") ;; fstore (define_insn_reservation "r24kf1_1_fstore" 1 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (eq_attr "type" "fpstore")) "r24kf1_1_fpu_iss") ;; fmul, fmadd (define_insn_reservation "r24kf1_1_fmul_sf" 4 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (and (eq_attr "type" "fmul,fmadd") (eq_attr "mode" "SF"))) "r24kf1_1_fpu_iss") (define_insn_reservation "r24kf1_1_fmul_df" 5 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (and (eq_attr "type" "fmul,fmadd") (eq_attr "mode" "DF"))) "r24kf1_1_fpu_iss,r24k_fpu_arith") @@ -473,27 +473,27 @@ ;; fdiv, fsqrt, frsqrt (define_insn_reservation "r24kf1_1_fdiv_sf" 17 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (and (eq_attr "type" "fdiv,fsqrt,frsqrt") (eq_attr "mode" "SF"))) "r24kf1_1_fpu_iss,(r24k_fpu_arith*13)") (define_insn_reservation "r24kf1_1_fdiv_df" 32 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (and (eq_attr "type" "fdiv,fsqrt") (eq_attr "mode" "DF"))) "r24kf1_1_fpu_iss,(r24k_fpu_arith*28)") ;; frsqrt (define_insn_reservation "r24kf1_1_frsqrt_df" 35 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (and (eq_attr "type" "frsqrt") (eq_attr "mode" "DF"))) "r24kf1_1_fpu_iss,(r24k_fpu_arith*30)") ;; fcmp (define_insn_reservation "r24kf1_1_fcmp" 2 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (eq_attr "type" "fcmp")) "r24kf1_1_fpu_iss") @@ -502,28 +502,28 @@ ;; fcvt (cvt.d.s, cvt.[sd].[wl]) (define_insn_reservation "r24kf1_1_fcvt_i2f_s2d" 4 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (and (eq_attr "type" "fcvt") (eq_attr "cnv_mode" "I2S,I2D,S2D"))) "r24kf1_1_fpu_iss") ;; fcvt (cvt.s.d) (define_insn_reservation "r24kf1_1_fcvt_s2d" 6 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (and (eq_attr "type" "fcvt") (eq_attr "cnv_mode" "D2S"))) "r24kf1_1_fpu_iss") ;; fcvt (cvt.[wl].[sd], etc) (define_insn_reservation "r24kf1_1_fcvt_f2i" 5 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (and (eq_attr "type" "fcvt") (eq_attr "cnv_mode" "S2I,D2I"))) "r24kf1_1_fpu_iss") ;; fxfer (mfc1, mfhc1, mtc1, mthc1) (define_insn_reservation "r24kf1_1_fxfer" 2 - (and (eq_attr "cpu" "24kf1_1") + (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2") (eq_attr "type" "mfc,mtc")) "r24kf1_1_fpu_iss") diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def index 50843480b03..b52a609e12a 100644 --- a/gcc/config/mips/mips-cpus.def +++ b/gcc/config/mips/mips-cpus.def @@ -145,6 +145,8 @@ MIPS_CPU ("1004kf", PROCESSOR_24KF2_1, MIPS_ISA_MIPS32R2, 0) MIPS_CPU ("1004kf1_1", PROCESSOR_24KF1_1, MIPS_ISA_MIPS32R2, 0) MIPS_CPU ("interaptiv", PROCESSOR_24KF2_1, MIPS_ISA_MIPS32R2, 0) +MIPS_CPU ("interaptiv-mr2", PROCESSOR_INTERAPTIV_MR2, MIPS_ISA_MIPS32R3, + PTF_AVOID_BRANCHLIKELY_ALWAYS) /* MIPS32 Release 5 processors. */ MIPS_CPU ("p5600", PROCESSOR_P5600, MIPS_ISA_MIPS32R5, (PTF_AVOID_BRANCHLIKELY_SPEED diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index c514c0711de..6b8f2370752 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -242,7 +242,8 @@ extern bool mips_get_pic_call_symbol (rtx *, int); extern void mips_set_return_address (rtx, rtx); extern bool mips_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); extern bool mips_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); -extern bool mips_expand_block_move (rtx, rtx, rtx); +extern bool mips_expand_block_move (rtx, rtx, rtx, rtx); +extern bool mips16_expand_copy (rtx, rtx, rtx, rtx); extern void mips_expand_synci_loop (rtx, rtx); extern void mips_init_cumulative_args (CUMULATIVE_ARGS *, tree); diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt index c26009cfb5c..9f5da087bbd 100644 --- a/gcc/config/mips/mips-tables.opt +++ b/gcc/config/mips/mips-tables.opt @@ -634,83 +634,86 @@ EnumValue Enum(mips_arch_opt_value) String(interaptiv) Value(85) Canonical EnumValue -Enum(mips_arch_opt_value) String(p5600) Value(86) Canonical +Enum(mips_arch_opt_value) String(interaptiv-mr2) Value(86) Canonical EnumValue -Enum(mips_arch_opt_value) String(m5100) Value(87) Canonical +Enum(mips_arch_opt_value) String(p5600) Value(87) Canonical EnumValue -Enum(mips_arch_opt_value) String(m5101) Value(88) Canonical +Enum(mips_arch_opt_value) String(m5100) Value(88) Canonical EnumValue -Enum(mips_arch_opt_value) String(m6201) Value(89) Canonical +Enum(mips_arch_opt_value) String(m5101) Value(89) Canonical EnumValue -Enum(mips_arch_opt_value) String(5kc) Value(90) Canonical +Enum(mips_arch_opt_value) String(m6201) Value(90) Canonical EnumValue -Enum(mips_arch_opt_value) String(r5kc) Value(90) +Enum(mips_arch_opt_value) String(5kc) Value(91) Canonical EnumValue -Enum(mips_arch_opt_value) String(5kf) Value(91) Canonical +Enum(mips_arch_opt_value) String(r5kc) Value(91) EnumValue -Enum(mips_arch_opt_value) String(r5kf) Value(91) +Enum(mips_arch_opt_value) String(5kf) Value(92) Canonical EnumValue -Enum(mips_arch_opt_value) String(20kc) Value(92) Canonical +Enum(mips_arch_opt_value) String(r5kf) Value(92) EnumValue -Enum(mips_arch_opt_value) String(r20kc) Value(92) +Enum(mips_arch_opt_value) String(20kc) Value(93) Canonical EnumValue -Enum(mips_arch_opt_value) String(sb1) Value(93) Canonical +Enum(mips_arch_opt_value) String(r20kc) Value(93) EnumValue -Enum(mips_arch_opt_value) String(sb1a) Value(94) Canonical +Enum(mips_arch_opt_value) String(sb1) Value(94) Canonical EnumValue -Enum(mips_arch_opt_value) String(sr71000) Value(95) Canonical +Enum(mips_arch_opt_value) String(sb1a) Value(95) Canonical EnumValue -Enum(mips_arch_opt_value) String(sr71k) Value(95) +Enum(mips_arch_opt_value) String(sr71000) Value(96) Canonical EnumValue -Enum(mips_arch_opt_value) String(xlr) Value(96) Canonical +Enum(mips_arch_opt_value) String(sr71k) Value(96) EnumValue -Enum(mips_arch_opt_value) String(loongson3a) Value(97) Canonical +Enum(mips_arch_opt_value) String(xlr) Value(97) Canonical EnumValue -Enum(mips_arch_opt_value) String(gs464) Value(98) Canonical +Enum(mips_arch_opt_value) String(loongson3a) Value(98) Canonical EnumValue -Enum(mips_arch_opt_value) String(gs464e) Value(99) Canonical +Enum(mips_arch_opt_value) String(gs464) Value(99) Canonical EnumValue -Enum(mips_arch_opt_value) String(gs264e) Value(100) Canonical +Enum(mips_arch_opt_value) String(gs464e) Value(100) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon) Value(101) Canonical +Enum(mips_arch_opt_value) String(gs264e) Value(101) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon+) Value(102) Canonical +Enum(mips_arch_opt_value) String(octeon) Value(102) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon2) Value(103) Canonical +Enum(mips_arch_opt_value) String(octeon+) Value(103) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon3) Value(104) Canonical +Enum(mips_arch_opt_value) String(octeon2) Value(104) Canonical EnumValue -Enum(mips_arch_opt_value) String(xlp) Value(105) Canonical +Enum(mips_arch_opt_value) String(octeon3) Value(105) Canonical EnumValue -Enum(mips_arch_opt_value) String(i6400) Value(106) Canonical +Enum(mips_arch_opt_value) String(xlp) Value(106) Canonical EnumValue -Enum(mips_arch_opt_value) String(i6500) Value(107) Canonical +Enum(mips_arch_opt_value) String(i6400) Value(107) Canonical EnumValue -Enum(mips_arch_opt_value) String(p6600) Value(108) Canonical +Enum(mips_arch_opt_value) String(i6500) Value(108) Canonical + +EnumValue +Enum(mips_arch_opt_value) String(p6600) Value(109) Canonical diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index d9c913f2e23..9808fda286c 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -106,7 +106,7 @@ along with GCC; see the file COPYING3. If not see to save and restore registers, and to allocate and deallocate the top part of the frame. */ #define MIPS_MAX_FIRST_STACK_STEP \ - (!TARGET_COMPRESSION ? 0x7ff0 \ + (!TARGET_COMPRESSION && !TARGET_USE_SAVE_RESTORE ? 0x7ff0 \ : TARGET_MICROMIPS || GENERATE_MIPS16E_SAVE_RESTORE ? 0x7f8 \ : TARGET_64BIT ? 0x100 : 0x400) @@ -1413,6 +1413,19 @@ static const struct mips_rtx_cost_data 1, /* branch_cost */ 4 /* memory_latency */ }, + { /* INTERAPTIV_MR2 (identical to 24KF1_1) */ + COSTS_N_INSNS (4), /* fp_add */ + COSTS_N_INSNS (4), /* fp_mult_sf */ + COSTS_N_INSNS (5), /* fp_mult_df */ + COSTS_N_INSNS (17), /* fp_div_sf */ + COSTS_N_INSNS (32), /* fp_div_df */ + COSTS_N_INSNS (5), /* int_mult_si */ + COSTS_N_INSNS (5), /* int_mult_di */ + COSTS_N_INSNS (41), /* int_div_si */ + COSTS_N_INSNS (41), /* int_div_di */ + 1, /* branch_cost */ + 4 /* memory_latency */ + }, { /* Loongson-2E */ DEFAULT_COSTS }, @@ -1752,7 +1765,7 @@ static const struct mips_rtx_cost_data COSTS_N_INSNS (68), /* int_div_di */ 1, /* branch_cost */ 4 /* memory_latency */ - } + } }; static rtx mips_find_pic_call_symbol (rtx_insn *, rtx, bool); @@ -2454,7 +2467,10 @@ mips_build_lower (struct mips_integer_op *codes, unsigned HOST_WIDE_INT value) /* Either this is a simple LUI/ORI pair, or clearing the lowest 16 bits gives a value with at least 17 trailing zeros. */ i = mips_build_integer (codes, high); - codes[i].code = IOR; + if (ISA_HAS_MIPS16E2 && (value & 0x8000) == 0) + codes[i].code = PLUS; + else + codes[i].code = IOR; codes[i].value = value & 0xffff; } return i + 1; @@ -4660,7 +4676,7 @@ mips_rewrite_small_data_p (rtx x, enum mips_symbol_context context) /* Return true if OP refers to small data symbols directly, not through a LO_SUM. CONTEXT is the context in which X appears. */ -static int +static bool mips_small_data_pattern_1 (rtx x, enum mips_symbol_context context) { subrtx_var_iterator::array_type array; @@ -4760,6 +4776,11 @@ mips16_constant_cost (int code, HOST_WIDE_INT x) return COSTS_N_INSNS (1); return -1; + case IOR: + if (ISA_HAS_MIPS16E2 && SMALL_OPERAND_UNSIGNED (x)) + return COSTS_N_INSNS (1); + return -1; + case LEU: /* Like LE, but reject the always-true case. */ if (x == -1) @@ -6060,6 +6081,15 @@ mips_split_move_insn (rtx dest, rtx src, rtx insn) /* Return the appropriate instructions to move SRC into DEST. Assume that SRC is operand 1 and DEST is operand 0. */ +bool +mips_constant_pool_symbol_in_sdata (rtx x, enum mips_symbol_context context) +{ + enum mips_symbol_type symbol_type; + return (mips_symbolic_constant_p (x, context, &symbol_type) + && symbol_type == SYMBOL_GP_RELATIVE + && CONSTANT_POOL_ADDRESS_P (x)); +} + const char * mips_output_move (rtx insn, rtx dest, rtx src) { @@ -6234,7 +6264,13 @@ mips_output_move (rtx insn, rtx dest, rtx src) } if (src_code == HIGH) - return (TARGET_MIPS16 && !ISA_HAS_MIPS16E2) ? "#" : "lui\t%0,%h1"; + { + if (mips_constant_pool_symbol_in_sdata (XEXP (src, 0), + SYMBOL_CONTEXT_MEM)) + return "move\t%0,$28"; + + return (TARGET_MIPS16 && !ISA_HAS_MIPS16E2) ? "#" : "lui\t%0,%h1"; + } if (CONST_GP_P (src)) return "move\t%0,%1"; @@ -9133,6 +9169,10 @@ mips_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, return false; if (align < BITS_PER_WORD) return size < UNITS_PER_WORD; + /* It is more profitable to use COPYW for at least 2 words. */ + if (ISA_HAS_COPY + && align >= BITS_PER_WORD && size >= 2 * UNITS_PER_WORD) + return false; return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT; } @@ -9202,7 +9242,8 @@ mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align) Assume that the areas do not overlap. */ static void -mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) +mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length, + HOST_WIDE_INT alignment ATTRIBUTE_UNUSED) { HOST_WIDE_INT offset, delta; unsigned HOST_WIDE_INT bits; @@ -9302,6 +9343,7 @@ mips_adjust_block_mem (rtx mem, HOST_WIDE_INT length, static void mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, + HOST_WIDE_INT alignment, HOST_WIDE_INT bytes_per_iter) { rtx_code_label *label; @@ -9325,7 +9367,7 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, emit_label (label); /* Emit the loop body. */ - mips_block_move_straight (dest, src, bytes_per_iter); + mips_block_move_straight (dest, src, bytes_per_iter, alignment); /* Move on to the next block. */ mips_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); @@ -9340,36 +9382,176 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, /* Mop up any left-over bytes. */ if (leftover) - mips_block_move_straight (dest, src, leftover); + mips_block_move_straight (dest, src, leftover, alignment); else /* Temporary fix for PR79150. */ emit_insn (gen_nop ()); } +/* Expand a cpymemsi instruction using the mips16 copy instruction. */ + +bool +mips16_expand_copy (rtx dest, rtx src, rtx length, rtx alignment) +{ + rtx base_dest, base_src; + rtx temp; + HOST_WIDE_INT offset_dest, offset_src; + int word_count, byte_count, offset = 0; + rtx first_dest = dest, first_src = src; + rtx xdest = XEXP (dest, 0); + rtx xsrc = XEXP (src, 0); + int align = INTVAL (alignment); + bool word_by_pieces_p = false; + + if (!ISA_HAS_COPY) + return false; + + gcc_assert (!TARGET_64BIT); + gcc_assert (MEM_P (src) && MEM_P (dest)); + + if (!CONST_INT_P (length)) + return false; + + byte_count = INTVAL (length); + + if (byte_count > (mips_movmem_limit == -1 + ? MIPS_MAX_MOVE_BYTES_STRAIGHT + : mips_movmem_limit)) + return false; + + if (byte_count >= MIPS_MAX_MOVE_BYTES_STRAIGHT + && align < 4) + return false; + + word_count = byte_count / UNITS_PER_WORD; + byte_count = byte_count % UNITS_PER_WORD; + + mips_split_plus (xdest, &base_dest, &offset_dest); + mips_split_plus (xsrc, &base_src, &offset_src); + + /* In some cases, it's better to move by pieces rather than generating + COPYW/UCOPYW: + 1. Copying 4 bytes when both dest and src are aligned but base+offset is + likely to be squashed. + 2. Copying 4 bytes when the lowest alignment is 2-bytes iff the offsets + are not the same or multiples of 16 bytes. */ + + /* Case (1). */ + if (word_count == 1 + && MEM_ALIGN (dest) >= 4 * BITS_PER_UNIT + && MEM_ALIGN (src) >= 4 * BITS_PER_UNIT + && (offset_dest >= 0 || offset_src >= 0)) + word_by_pieces_p = true; + + /* Case (2). */ + if (word_count == 1 && align >= 2 + && !(offset_src == offset_dest && offset_src % 16 != 0)) + word_by_pieces_p = true; + + if (word_by_pieces_p) + { + rtx src2 = adjust_address (src, BLKmode, offset); + rtx dest2 = adjust_address (dest, BLKmode, offset); + move_by_pieces (dest2, src2, 4, INTVAL (alignment), RETURN_BEGIN); + offset += 4; + word_count = 0; + } + + if (word_count > 0 && !REG_P (XEXP (dest, 0))) + { + rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0)); + first_dest = replace_equiv_address (first_dest, dest_reg); + } + + if (word_count > 0 && !REG_P (XEXP (src, 0))) + { + rtx src_reg = copy_addr_to_reg (XEXP (src, 0)); + first_src = replace_equiv_address (first_src, src_reg); + } + + while (word_count > 0) + { + int new_word_count, new_offset; + rtx adj_src, adj_dest; + + new_offset = offset; + new_word_count = word_count >= 4 ? 4 : word_count; + + /* Using a COPYW dst,src,*,1 instruction causes the core to stall + so we generate a lw/sw sequence to get around this core bug. */ + if (new_word_count == 1 && align >= 4) + { + temp = gen_reg_rtx (SImode); + adj_src = adjust_address (first_src, Pmode, new_offset); + adj_dest = adjust_address (first_dest, Pmode, new_offset); + mips_emit_move (temp, adj_src); + mips_emit_move (adj_dest, temp); + } + else + { + adj_src = adjust_address (first_src, BLKmode, new_offset); + adj_dest = adjust_address (first_dest, BLKmode, new_offset); + set_mem_size (adj_src, new_word_count * 4); + set_mem_size (adj_dest, new_word_count * 4); + emit_insn (gen_mips16_copy (adj_dest, adj_src, GEN_INT (new_offset), + GEN_INT (new_word_count), alignment)); + } + + offset += new_word_count * 4; + word_count = word_count >= 4 ? word_count - 4 : 0; + + if (offset > 496) + { + rtx dest_reg = copy_addr_to_reg (XEXP (adj_dest, 0)); + rtx src_reg = copy_addr_to_reg (XEXP (adj_src, 0)); + first_dest = replace_equiv_address (first_dest, dest_reg); + first_src = replace_equiv_address (first_src, src_reg); + offset = 0; + } + } + + if (byte_count > 0) + { + rtx src2 = adjust_address (src, BLKmode, offset); + rtx dest2 = adjust_address (dest, BLKmode, offset); + move_by_pieces (dest2, src2, byte_count, align, RETURN_BEGIN); + } + + return true; +} + /* Expand a cpymemsi instruction, which copies LENGTH bytes from - memory reference SRC to memory reference DEST. */ + memory reference SRC to memory reference DEST. The lowest alignment + of SRC and DEST is specified by ALIGNMENT. */ bool -mips_expand_block_move (rtx dest, rtx src, rtx length) +mips_expand_block_move (rtx dest, rtx src, rtx length, rtx alignment) { if (!CONST_INT_P (length)) return false; + if (TARGET_MIPS16 && !ISA_HAS_COPY) + return false; + if (mips_isa_rev >= 6 && !ISA_HAS_UNALIGNED_ACCESS - && (MEM_ALIGN (src) < MIPS_MIN_MOVE_MEM_ALIGN - || MEM_ALIGN (dest) < MIPS_MIN_MOVE_MEM_ALIGN)) + && !(INTVAL (alignment) * BITS_PER_UNIT >= MIPS_MIN_MOVE_MEM_ALIGN + || ISA_HAS_COPY)) return false; if (mips_movmem_limit == -1 || INTVAL (length) < mips_movmem_limit) { - if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER) + if (ISA_HAS_COPY) + return mips16_expand_copy (dest, src, length, alignment); + else if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER) { - mips_block_move_straight (dest, src, INTVAL (length)); + mips_block_move_straight (dest, src, INTVAL (length), + INTVAL (alignment)); return true; } else if (optimize) { mips_block_move_loop (dest, src, INTVAL (length), + INTVAL (alignment), MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER); return true; } @@ -12287,6 +12469,8 @@ mips_compute_frame_info (void) struct mips_frame_info *frame; HOST_WIDE_INT offset, size; unsigned int regno, i; + int global_reg_used; + int local_reg_used; /* Skip re-computing the frame info after reload completed. */ if (reload_completed) @@ -12401,10 +12585,61 @@ mips_compute_frame_info (void) frame->mask |= 1 << (EH_RETURN_DATA_REGNO (i) - GP_REG_FIRST); } + /* The SAVE and RESTORE instructions have two ranges of registers: + $a3-$a0 and $s2-$s8. If we save one register in the range, we must + save all later registers too. This can cause problems if the user has + placed a global value into a register that falls into one of these + ranges and the function uses a callee saved register that also in the + same range. In this case the global value could be accidently saved + and restored on function entry and exit which means any changes made to + its value in the function will be lost. + + The code below checks for this case, and if it is found it turns off + the use of the SAVE/RESTORE instruction in this function. + + This approach is not optimal because it should really just check that + the number of the register used for the global value occurs before + one of the callee saved registers. However as the use of forcing global + values into a register is small it is fine to use the unoptimal version + of the code for the moment. */ + cfun->machine->safe_to_use_save_restore = true; + + global_reg_used = 0; + local_reg_used = 0; + + for (i = 0 ; i < ARRAY_SIZE (mips16e_s2_s8_regs) ; i++) + { + regno = mips16e_s2_s8_regs[i]; + if (global_regs[regno]) + global_reg_used = 1; + + if (BITSET_P (frame->mask, regno)) + local_reg_used = 1; + } + + if (global_reg_used && local_reg_used) + cfun->machine->safe_to_use_save_restore = false; + + global_reg_used = 0; + local_reg_used = 0; + + for (i = 0 ; i < ARRAY_SIZE (mips16e_a0_a3_regs) ; i++) + { + regno = mips16e_a0_a3_regs[i]; + if (global_regs[regno]) + global_reg_used = 1; + + if (BITSET_P (frame->mask, regno)) + local_reg_used = 1; + } + + if (global_reg_used && local_reg_used) + cfun->machine->safe_to_use_save_restore = false; + /* The MIPS16e SAVE and RESTORE instructions have two ranges of registers: $a3-$a0 and $s2-$s8. If we save one register in the range, we must save all later registers too. */ - if (GENERATE_MIPS16E_SAVE_RESTORE) + if (GENERATE_MIPS16E_SAVE_RESTORE && cfun->machine->safe_to_use_save_restore) { mips16e_mask_registers (&frame->mask, mips16e_s2_s8_regs, ARRAY_SIZE (mips16e_s2_s8_regs), &frame->num_gp); @@ -13495,7 +13730,9 @@ mips_expand_prologue (void) HOST_WIDE_INT step1; step1 = MIN (size, MIPS_MAX_FIRST_STACK_STEP); - if (GENERATE_MIPS16E_SAVE_RESTORE) + if (GENERATE_MIPS16E_SAVE_RESTORE + && !cfun->machine->interrupt_handler_p + && cfun->machine->safe_to_use_save_restore) { HOST_WIDE_INT offset; unsigned int mask, regno; @@ -13945,7 +14182,9 @@ mips_expand_epilogue (bool sibcall_p) emit_insn (gen_blockage ()); mips_epilogue.cfa_restore_sp_offset = step2; - if (GENERATE_MIPS16E_SAVE_RESTORE && frame->mask != 0) + if (GENERATE_MIPS16E_SAVE_RESTORE && frame->mask != 0 + && !cfun->machine->interrupt_handler_p + && cfun->machine->safe_to_use_save_restore) { unsigned int regno, mask; HOST_WIDE_INT offset; @@ -21555,6 +21794,32 @@ mips_option_override (void) "-mcompact-branches=never"); } + /* Enable the use of interAptiv MIPS32 SAVE/RESTORE instructions. */ + if (TARGET_USE_SAVE_RESTORE == -1) + { + if (TARGET_INTERAPTIV_MR2) + TARGET_USE_SAVE_RESTORE = 1; + else + TARGET_USE_SAVE_RESTORE = 0; + } + else if (TARGET_USE_SAVE_RESTORE + && !TARGET_INTERAPTIV_MR2) + error ("unsupported combination: %qs %s", + mips_arch_info->name, "-muse-save-restore"); + + /* Enable the use of interAptiv MIPS16 COPYW/UCOPYW instructions. */ + if (TARGET_USE_COPYW_UCOPYW == -1) + { + if (TARGET_INTERAPTIV_MR2) + TARGET_USE_COPYW_UCOPYW = 1; + else + TARGET_USE_COPYW_UCOPYW = 0; + } + else if (TARGET_USE_COPYW_UCOPYW + && !TARGET_INTERAPTIV_MR2) + error ("unsupported combination: %qs %s", + mips_arch_info->name, "-muse-copyw_ucopyw"); + /* Require explicit relocs for MIPS R6 onwards. This enables simplification of the compact branch and jump support through the backend. */ if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6) @@ -24221,7 +24486,6 @@ mips_bit_clear_p (enum machine_mode mode, unsigned HOST_WIDE_INT m) return false; } - /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 70a7b2032dc..b727074bf53 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -265,7 +265,11 @@ struct mips_cpu_info { /* Generate mips16e code. Default 16bit ASE for mips32* and mips64* */ #define GENERATE_MIPS16E (TARGET_MIPS16 && mips_isa >= MIPS_ISA_MIPS32) /* Generate mips16e register save/restore sequences. */ -#define GENERATE_MIPS16E_SAVE_RESTORE (GENERATE_MIPS16E && mips_abi == ABI_32) +#define GENERATE_MIPS16E_SAVE_RESTORE ((GENERATE_MIPS16E \ + || (TARGET_USE_SAVE_RESTORE \ + && !TARGET_MICROMIPS \ + && TARGET_SOFT_FLOAT)) \ + && mips_abi == ABI_32) /* True if we're generating a form of MIPS16 code in which general text loads are allowed. */ @@ -319,6 +323,7 @@ struct mips_cpu_info { || mips_arch == PROCESSOR_SB1A) #define TARGET_SR71K (mips_arch == PROCESSOR_SR71000) #define TARGET_XLP (mips_arch == PROCESSOR_XLP) +#define TARGET_INTERAPTIV_MR2 (mips_arch == PROCESSOR_INTERAPTIV_MR2) /* Scheduling target defines. */ #define TUNE_20KC (mips_tune == PROCESSOR_20KC) @@ -431,6 +436,8 @@ struct mips_cpu_info { for (p = macro; *p != 0; p++) \ if (*p == '+') \ *p = 'P'; \ + else if (*p == '-') \ + *p = '_'; \ else \ *p = TOUPPER (*p); \ \ @@ -844,7 +851,7 @@ struct mips_cpu_info { %{march=mips32r2|march=m4k|march=4ke*|march=4ksd|march=24k* \ |march=34k*|march=74k*|march=m14k*|march=1004k* \ |march=interaptiv: -mips32r2} \ - %{march=mips32r3: -mips32r3} \ + %{march=mips32r3|march=interaptiv-mr2: -mips32r3} \ %{march=mips32r5|march=p5600|march=m5100|march=m5101: -mips32r5} \ %{march=mips32r6|march=m6201: -mips32r6} \ %{march=mips64|march=5k*|march=20k*|march=sb1*|march=sr71000 \ @@ -963,10 +970,12 @@ struct mips_cpu_info { #define MIPS_ASE_DSP_SPEC \ "%{!mno-dsp: \ %{march=24ke*|march=34kc*|march=34kf*|march=34kx*|march=1004k* \ - |march=interaptiv: -mdsp} \ + |march=interaptiv*: -mdsp} \ %{march=74k*|march=m14ke*: %{!mno-dspr2: -mdspr2 -mdsp}}}" \ "%{!mforbidden-slots: \ - %{mips32r6|mips64r6:%{mmicromips:-mno-forbidden-slots}}}" + %{mips32r6|mips64r6:%{mmicromips:-mno-forbidden-slots}}}" \ + "%{!mno-mips16e2: \ + %{march=interaptiv-mr2: -mmips16e2}}" #define MIPS_ASE_LOONGSON_MMI_SPEC \ "%{!mno-loongson-mmi: \ @@ -1334,6 +1343,10 @@ struct mips_cpu_info { #define ISA_HAS_MIPS16E2 (TARGET_MIPS16 && TARGET_MIPS16E2 \ && !TARGET_64BIT) +/* The interAptiv MR2 COPYW/UCOPYW instructions are available. */ +#define ISA_HAS_COPY (TARGET_MIPS16 && TARGET_INTERAPTIV_MR2 \ + && TARGET_USE_COPYW_UCOPYW) + /* True if the result of a load is not available to the next instruction. A nop will then be needed between instructions like "lw $4,..." and "addiu $4,$4,1". */ @@ -1501,6 +1514,7 @@ struct mips_cpu_info { %{mtune=*}" \ FP_ASM_SPEC "\ %{mmips16e2} \ +%{mmips16-copy:-mmips16cp} \ %(subtarget_asm_spec)" /* Extra switches sometimes passed to the linker. */ @@ -2733,7 +2747,6 @@ typedef struct mips_args { do not truncate the shift amount at all. */ #define SHIFT_COUNT_TRUNCATED (!TARGET_LOONGSON_MMI) - /* Specify the machine mode that pointers have. After generation of rtl, the compiler makes no further distinction between pointers and any other objects of this machine mode. */ @@ -3143,7 +3156,9 @@ while (0) /* The maximum number of bytes that can be copied by one iteration of a cpymemsi loop; see mips_block_move_loop. */ #define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \ - (UNITS_PER_WORD * 4) + (ISA_HAS_COPY \ + ? UNITS_PER_WORD * 4 * 4 \ + : UNITS_PER_WORD * 4) /* The maximum number of bytes that can be copied by a straight-line implementation of cpymemsi; see mips_block_move_straight. We want @@ -3174,7 +3189,9 @@ while (0) #define MOVE_RATIO(speed) \ (HAVE_cpymemsi \ - ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \ + ? (ISA_HAS_COPY \ + ? MIPS_MAX_MOVE_BYTES_STRAIGHT / 4 / MOVE_MAX \ + : MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX) \ : MIPS_CALL_RATIO / 2) /* For CLEAR_RATIO, when optimizing for size, give a better estimate @@ -3435,6 +3452,10 @@ struct GTY(()) machine_function { /* True if the function should generate hazard barrier return. */ bool use_hazard_barrier_return_p; + + /* True if we are safe to use SAVE/RESTORE instruction in the + prologue/epilogue. */ + bool safe_to_use_save_restore; }; #endif diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index b1e55428682..21f31a5595a 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -35,6 +35,7 @@ 74kf2_1 74kf1_1 74kf3_2 + interaptiv_mr2 loongson_2e loongson_2f gs464 @@ -4332,7 +4333,7 @@ (sign_extract:GPR (match_operand:BLK 1 "memory_operand") (match_operand 2 "const_int_operand") (match_operand 3 "const_int_operand")))] - "ISA_HAS_LWL_LWR" + "ISA_HAS_LWL_LWR || ISA_HAS_MIPS16E2" { if (mips_expand_ext_as_unaligned_load (operands[0], operands[1], INTVAL (operands[2]), @@ -4369,7 +4370,7 @@ (zero_extract:GPR (match_operand:BLK 1 "memory_operand") (match_operand 2 "const_int_operand") (match_operand 3 "const_int_operand")))] - "ISA_HAS_LWL_LWR" + "ISA_HAS_LWL_LWR || ISA_HAS_MIPS16E2" { if (mips_expand_ext_as_unaligned_load (operands[0], operands[1], INTVAL (operands[2]), @@ -4445,7 +4446,7 @@ (match_operand 1 "const_int_operand") (match_operand 2 "const_int_operand")) (match_operand:GPR 3 "reg_or_0_operand"))] - "ISA_HAS_LWL_LWR" + "ISA_HAS_LWL_LWR || ISA_HAS_MIPS16E2" { if (mips_expand_ins_as_unaligned_store (operands[0], operands[3], INTVAL (operands[1]), @@ -4891,7 +4892,7 @@ (define_insn "*movdi_32bit_mips16" [(set (match_operand:DI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d") - (match_operand:DI 1 "move_operand" "d,d,y,K,N,m,d,*x"))] + (match_operand:DI 1 "move_operand" "d,d,y,i,N,m,d,*x"))] "!TARGET_64BIT && TARGET_MIPS16 && (register_operand (operands[0], DImode) || register_operand (operands[1], DImode))" @@ -4919,6 +4920,74 @@ [(set_attr "move_type" "move,move,move,const,constN,const,loadpool,load,store,mflo") (set_attr "mode" "DI")]) +;; Operand 0 is the register containing the destination address +;; Operand 1 is the register containing the source address +;; Operand 2 is a byte offset to use for both the source and dest addresses +;; Operand 3 is the number of words to copy (1,2,3, or 4) +;; Operand 4 is a constant integer value for the known alignment. + +(define_expand "mips16_copy" + [(parallel + [(set (match_operand 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (use (match_operand 4 "" "")) + (clobber (reg:SI 12)) + (clobber (reg:SI 13)) + (clobber (reg:SI 14)) + (clobber (reg:SI 15))])] + "ISA_HAS_COPY" + { + /* Using a COPYW dst,src,*,1 instruction causes the core to stall + so we can not use mips16_copy in this case. */ + gcc_assert (!(INTVAL (operands[3]) == 1 && INTVAL (operands[4]) >= 4)); + }) + +(define_insn "" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "d")) + (mem:BLK (match_operand:SI 1 "register_operand" "d"))) + (use (match_operand:SI 2 "const_int_operand")) + (use (match_operand:SI 3 "const_int_operand")) + (use (match_operand:SI 4 "const_int_operand")) + (clobber (reg:SI 12)) + (clobber (reg:SI 13)) + (clobber (reg:SI 14)) + (clobber (reg:SI 15))] + "ISA_HAS_COPY" + { + if (INTVAL (operands[4]) < 4) + return "ucopyw\t%0,%1,%2,%3"; + else + return "copyw\t%0,%1,%2,%3"; + } + [(set_attr "move_type" "store") + (set_attr "mode" "SI") + (set_attr "extended_mips16" "yes")]) + +(define_insn "mips16_copy_ofs" + [(set (mem:BLK (plus:SI (match_operand:SI 0 "register_operand" "d") + (match_operand:SI 2 "const_int_operand"))) + (mem:BLK (plus:SI (match_operand:SI 1 "register_operand" "d") + (match_dup 2)))) + (use (match_dup 2)) + (use (match_operand:SI 3 "const_int_operand")) + (use (match_operand:SI 4 "const_int_operand")) + (clobber (reg:SI 12)) + (clobber (reg:SI 13)) + (clobber (reg:SI 14)) + (clobber (reg:SI 15))] + "ISA_HAS_COPY" + { + if (INTVAL (operands[4]) < 4) + return "ucopyw\t%0,%1,%2,%3"; + else + return "copyw\t%0,%1,%2,%3"; + } + [(set_attr "move_type" "store") + (set_attr "mode" "SI") + (set_attr "extended_mips16" "yes")]) + ;; On the mips16, we can split ld $r,N($r) into an add and a load, ;; when the original load is a 4 byte instruction but the add and the ;; load are 2 2 byte instructions. @@ -5426,7 +5495,11 @@ (define_split [(set (match_operand 0 "d_operand") (match_operand 1 "const_int_operand"))] - "TARGET_MIPS16 && reload_completed && INTVAL (operands[1]) < 0" + "TARGET_MIPS16 && reload_completed + && (ISA_HAS_MIPS16E2 + ? SMALL_OPERAND_UNSIGNED (-INTVAL (operands[1])) + && INTVAL (operands[1]) != 0 + : INTVAL (operands[1]) < 0)" [(set (match_dup 2) (match_dup 3)) (set (match_dup 2) @@ -5842,12 +5915,12 @@ (match_operand:BLK 1 "general_operand")) (use (match_operand:SI 2 "")) (use (match_operand:SI 3 "const_int_operand"))])] - "!TARGET_MIPS16 && !TARGET_MEMCPY" + "(!TARGET_MIPS16 || ISA_HAS_COPY) && !TARGET_MEMCPY" { - if (mips_expand_block_move (operands[0], operands[1], operands[2])) + if (mips_expand_block_move (operands[0], operands[1], + operands[2], operands[3])) DONE; - else - FAIL; + FAIL; }) ;; @@ -7779,7 +7852,8 @@ && mips16e_save_restore_pattern_p (operands[0], INTVAL (operands[2]), NULL)" { return mips16e_output_save_restore (operands[0], INTVAL (operands[2])); } [(set_attr "type" "arith") - (set_attr "extended_mips16" "yes")]) + (set_attr "extended_mips16" "yes") + (set_attr "can_delay" "no")]) ;; Thread-Local Storage diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index a4b93de924d..c5a3addbc55 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -559,3 +559,9 @@ mfunc-opt-list=FILE Use to specify per function optimizations. mblockmov-limit= Target RejectNegative Undocumented Joined UInteger Var(mips_movmem_limit) Init(-1) + +muse-save-restore +Target Undocumented Var(TARGET_USE_SAVE_RESTORE) Init(-1) + +muse-copyw-ucopyw +Target Undocumented Var(TARGET_USE_COPYW_UCOPYW) Init(-1) diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md index a64900d25ef..31cc57af435 100644 --- a/gcc/config/mips/predicates.md +++ b/gcc/config/mips/predicates.md @@ -578,7 +578,7 @@ (and (match_code "ne") (not (match_test "TARGET_MIPS16"))))) (define_predicate "small_data_pattern" - (and (match_code "set,parallel,unspec,unspec_volatile,prefetch") + (and (match_code "set,parallel,prefetch") (match_test "mips_small_data_pattern_p (op)"))) (define_predicate "mem_noofs_operand" diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index cd84cafafd5..d607f8e430c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -28195,7 +28195,7 @@ The processor names are: @samp{74kc}, @samp{74kf2_1}, @samp{74kf1_1}, @samp{74kf3_2}, @samp{1004kc}, @samp{1004kf2_1}, @samp{1004kf1_1}, @samp{i6400}, @samp{i6500}, -@samp{interaptiv}, +@samp{interaptiv}, @samp{interaptiv-mr2}, @samp{loongson2e}, @samp{loongson2f}, @samp{loongson3a}, @samp{gs464}, @samp{gs464e}, @samp{gs264e}, @samp{m4k}, diff --git a/gcc/testsuite/gcc.target/mips/iamr2.c b/gcc/testsuite/gcc.target/mips/iamr2.c new file mode 100644 index 00000000000..40e425ddcd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/iamr2.c @@ -0,0 +1,51 @@ +/* { dg-options "-march=interaptiv-mr2 -mno-abicalls -mgpopt -G8 -mabi=32 -mips16 -mmips16e2 -mno-memcpy" } */ + +/* Test UCOPYW. */ + +/* { dg-final { scan-assembler "test01:.*\tucopyw\t.*\tucopyw\t.*test01\n" } } */ +/* { dg-final { scan-assembler-times "\tucopyw\t" 2 } } */ +struct node01 +{ + int i; + int j; + int k; + int l; + int a; + int b; + int c; + int d; +} __attribute__ ((packed)); + +struct node01 dst01; +struct node01 src01; + +void +test01 (void) +{ + dst01 = src01; +} + +/* Test COPYW. */ + +/* { dg-final { scan-assembler "test02:.*\tcopyw\t.*\tcopyw\t.*test02\n" } } */ +/* { dg-final { scan-assembler-times "\tcopyw\t" 2 } } */ +struct node02 +{ + int i; + int j; + int k; + int l; + int a; + int b; + int c; + int d; +}; + +struct node02 dst02; +struct node02 src02; + +void +test02 (void) +{ + dst02 = src02; +} diff --git a/gcc/testsuite/gcc.target/mips/memcpy-3.c b/gcc/testsuite/gcc.target/mips/memcpy-3.c new file mode 100644 index 00000000000..cf1b073c9ae --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/memcpy-3.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "isa=interaptiv-mr2 -mno-memcpy (-mips16)" } */ +/* { dg-skip-if "code quality test" { *-*-* } { "" } { "" } } */ + +char * ref = "123456789012"; + +__attribute__((mips16)) +void +f1 (int *p) +{ + __builtin_memcpy (p, ref, 12); +} + +/* { dg-final { scan-assembler "\tucopyw\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/memcpy-4.c b/gcc/testsuite/gcc.target/mips/memcpy-4.c new file mode 100644 index 00000000000..fc4f3761c9a --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/memcpy-4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "isa=interaptiv-mr2 -mno-memcpy -mno-use-copyw-ucopyw (-mips16)" } */ +/* { dg-skip-if "code quality test" { *-*-* } { "" } { "" } } */ + +char * ref = "123456789012"; + +__attribute__((mips16)) +void +f1 (int *p) +{ + __builtin_memcpy (p, ref, 12); +} + +/* { dg-final { scan-assembler-not "\tucopyw\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/mips.exp b/gcc/testsuite/gcc.target/mips/mips.exp index b0825ca4339..3907fe2a778 100644 --- a/gcc/testsuite/gcc.target/mips/mips.exp +++ b/gcc/testsuite/gcc.target/mips/mips.exp @@ -289,6 +289,7 @@ foreach option { long-calls lxc1-sxc1 madd4 + memcpy paired-single plt shared @@ -303,6 +304,7 @@ foreach option { loongson-ext loongson-ext2 mips16e2 + use-copyw-ucopyw } { lappend mips_option_groups $option "-m(no-|)$option" } @@ -1135,6 +1137,10 @@ proc mips-dg-options { args } { if { ![regexp {^-march=p5600} $arch] } { set arch "-march=p5600" } + } elseif { [string equal $spec "isa=interaptiv-mr2"] } { + if { ![regexp {^-march=interaptiv-mr2} $arch] } { + set arch "-march=interaptiv-mr2" + } } else { if { ![regexp {^(isa(?:|_rev))(=|<=|>=)([0-9]*)$} \ $spec dummy prop relation value nocpus] } { diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c index 2516b663ca1..103dd82caa1 100644 --- a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c @@ -1,4 +1,5 @@ /* { dg-options "-mr10k-cache-barrier=store -G8 -w" } */ +/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */ /* Test that out-of-range stores to components of static objects are protected by a cache barrier. */ diff --git a/gcc/testsuite/gcc.target/mips/stack-1.c b/gcc/testsuite/gcc.target/mips/stack-1.c index 5f25c21a0a9..227e6c9201b 100644 --- a/gcc/testsuite/gcc.target/mips/stack-1.c +++ b/gcc/testsuite/gcc.target/mips/stack-1.c @@ -1,3 +1,4 @@ +/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */ /* { dg-final { scan-assembler "\td?addiu\t(\\\$sp,)?\\\$sp,\[1-9\]" } } */ /* { dg-final { scan-assembler "\tlw\t" } } */ /* { dg-final { scan-assembler-not "\td?addiu\t(\\\$sp,)?\\\$sp,\[1-9\].*\tlw\t" } } */ -- 2.34.1