This patch adds a scheduling state struct and changes the handling of end-group conditions.
--- gcc/config/s390/s390.c | 158 ++++++++++++++++++----------------------- 1 file changed, 68 insertions(+), 90 deletions(-) diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 15926ec88cd..249df00268a 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -348,7 +348,6 @@ static rtx_insn *last_scheduled_insn; static int last_scheduled_unit_distance[MAX_SCHED_UNITS]; #define NUM_SIDES 2 -static int current_side = 1; /* Estimate of number of cycles a long-running insn occupies an execution unit. */ @@ -14261,17 +14260,15 @@ s390_bb_fallthru_entry_likely (basic_block bb) return true; } -/* The s390_sched_state variable tracks the state of the current or - the last instruction group. - - 0,1,2 number of instructions scheduled in the current group - 3 the last group is complete - normal insns - 4 the last group was a cracked/expanded insn */ - -static int s390_sched_state = 0; +struct s390_sched_state +{ + /* Number of insns in the group. */ + int group_state; + /* Execution side of the group. */ + int side; +} s390_sched_state; -#define S390_SCHED_STATE_NORMAL 3 -#define S390_SCHED_STATE_CRACKED 4 +static struct s390_sched_state sched_state = {0, 1}; #define S390_SCHED_ATTR_MASK_CRACKED 0x1 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2 @@ -14369,14 +14366,14 @@ s390_is_longrunning (rtx_insn *insn) /* Return the scheduling score for INSN. The higher the score the better. The score is calculated from the OOO scheduling attributes - of INSN and the scheduling state s390_sched_state. */ + of INSN and the scheduling state sched_state. */ static int s390_sched_score (rtx_insn *insn) { unsigned int mask = s390_get_sched_attrmask (insn); int score = 0; - switch (s390_sched_state) + switch (sched_state.group_state) { case 0: /* Try to put insns into the first slot which would otherwise @@ -14408,21 +14405,6 @@ s390_sched_score (rtx_insn *insn) if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0) score += 10; break; - case S390_SCHED_STATE_NORMAL: - /* Prefer not cracked insns if the last was not cracked. */ - if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 - && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0) - score += 5; - if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) - score += 10; - break; - case S390_SCHED_STATE_CRACKED: - /* Try to keep cracked insns together to prevent them from - interrupting groups. */ - if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 - || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) - score += 5; - break; } if (s390_tune >= PROCESSOR_2964_Z13) @@ -14442,46 +14424,46 @@ s390_sched_score (rtx_insn *insn) score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE); - int other_side = 1 - current_side; + int other_side = 1 - sched_state.side; /* Try to delay long-running insns when side is busy. */ if (s390_is_longrunning (insn)) { if (s390_tune == PROCESSOR_2964_Z13) { - if (get_attr_z13_unit_fxd (insn) && fxd_longrunning[current_side] - && fxd_longrunning[other_side] <= fxd_longrunning[current_side]) + if (get_attr_z13_unit_fxd (insn) && fxd_longrunning[sched_state.side] + && fxd_longrunning[other_side] <= fxd_longrunning[sched_state.side]) score = MAX (0, score - 10); if (get_attr_z13_unit_fxd (insn) - && fxd_longrunning[other_side] >= fxd_longrunning[current_side]) + && fxd_longrunning[other_side] >= fxd_longrunning[sched_state.side]) score += 10; - if (get_attr_z13_unit_fpd (insn) && fpd_longrunning[current_side] - && fpd_longrunning[other_side] <= fpd_longrunning[current_side]) + if (get_attr_z13_unit_fpd (insn) && fpd_longrunning[sched_state.side] + && fpd_longrunning[other_side] <= fpd_longrunning[sched_state.side]) score = MAX (0, score - 10); if (get_attr_z13_unit_fpd (insn) - && fpd_longrunning[other_side] >= fpd_longrunning[current_side]) + && fpd_longrunning[other_side] >= fpd_longrunning[sched_state.side]) score += 10; } if (s390_tune == PROCESSOR_3906_Z14) { - if (get_attr_z14_unit_fxd (insn) && fxd_longrunning[current_side] - && fxd_longrunning[other_side] <= fxd_longrunning[current_side]) + if (get_attr_z14_unit_fxd (insn) && fxd_longrunning[sched_state.side] + && fxd_longrunning[other_side] <= fxd_longrunning[sched_state.side]) score = MAX (0, score - 10); if (get_attr_z14_unit_fxd (insn) - && fxd_longrunning[other_side] >= fxd_longrunning[current_side]) + && fxd_longrunning[other_side] >= fxd_longrunning[sched_state.side]) score += 10; - if (get_attr_z14_unit_fpd (insn) && fpd_longrunning[current_side] - && fpd_longrunning[other_side] <= fpd_longrunning[current_side]) + if (get_attr_z14_unit_fpd (insn) && fpd_longrunning[sched_state.side] + && fpd_longrunning[other_side] <= fpd_longrunning[sched_state.side]) score = MAX (0, score - 10); if (get_attr_z14_unit_fpd (insn) - && fpd_longrunning[other_side] >= fpd_longrunning[current_side]) + && fpd_longrunning[other_side] >= fpd_longrunning[sched_state.side]) score += 10; } } @@ -14554,7 +14536,7 @@ s390_sched_reorder (FILE *file, int verbose, if (verbose > 5) { fprintf (file, "ready list ooo attributes - sched state: %d\n", - s390_sched_state); + sched_state.group_state); for (i = last_index; i >= 0; i--) { @@ -14605,7 +14587,7 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more) { last_scheduled_insn = insn; - bool starts_group = false; + bool ends_group = false; if (s390_tune >= PROCESSOR_2827_ZEC12 && reload_completed @@ -14613,37 +14595,21 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more) { unsigned int mask = s390_get_sched_attrmask (insn); - if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 - || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0 - || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) - starts_group = true; + /* Longrunning and side bookkeeping. */ + for (int i = 0; i < 2; i++) + { + fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1); + fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1); + } - if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 - || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) - s390_sched_state = S390_SCHED_STATE_CRACKED; - else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0 - || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) - s390_sched_state = S390_SCHED_STATE_NORMAL; - else + unsigned latency = insn_default_latency (insn); + if (s390_is_longrunning (insn)) { - /* Only normal insns are left (mask == 0). */ - switch (s390_sched_state) - { - case 0: - starts_group = true; - /* fallthrough */ - case 1: - case 2: - s390_sched_state++; - break; - case S390_SCHED_STATE_NORMAL: - starts_group = true; - s390_sched_state = 1; - break; - case S390_SCHED_STATE_CRACKED: - s390_sched_state = S390_SCHED_STATE_NORMAL; - break; - } + if (get_attr_z13_unit_fxd (insn) + || get_attr_z14_unit_fxd (insn)) + fxd_longrunning[sched_state.side] = latency; + else + fpd_longrunning[sched_state.side] = latency; } if (s390_tune >= PROCESSOR_2964_Z13) @@ -14661,24 +14627,29 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more) last_scheduled_unit_distance[i]++; } - /* If this insn started a new group, the side flipped. */ - if (starts_group) - current_side = current_side ? 0 : 1; - - for (int i = 0; i < 2; i++) + if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 + || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0 + || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0 + || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0) { - fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1); - fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1); + sched_state.group_state = 0; + ends_group = true; } - - unsigned latency = insn_default_latency (insn); - if (s390_is_longrunning (insn)) + else { - if (get_attr_z13_unit_fxd (insn) - || get_attr_z14_unit_fxd (insn)) - fxd_longrunning[current_side] = latency; - else - fpd_longrunning[current_side] = latency; + switch (sched_state.group_state) + { + case 0: + sched_state.group_state++; + break; + case 1: + sched_state.group_state++; + break; + case 2: + sched_state.group_state++; + ends_group = true; + break; + } } if (verbose > 5) @@ -14707,7 +14678,7 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more) fprintf (file, " %d", j); fprintf (file, ")"); } - fprintf (file, " sched state: %d\n", s390_sched_state); + fprintf (file, " sched state: %d\n", sched_state.group_state); if (s390_tune >= PROCESSOR_2964_Z13) { @@ -14721,6 +14692,13 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more) fprintf (file, "\n"); } } + + /* If this insn ended a group, the next will be on the other side. */ + if (ends_group) + { + sched_state.group_state = 0; + sched_state.side = sched_state.side ? 0 : 1; + } } if (GET_CODE (PATTERN (insn)) != USE @@ -14741,7 +14719,7 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED, /* If the next basic block is most likely entered via a fallthru edge we keep the last sched state. Otherwise we start a new group. The scheduler traverses basic blocks in "instruction stream" ordering - so if we see a fallthru edge here, s390_sched_state will be of its + so if we see a fallthru edge here, sched_state will be of its source block. current_sched_info->prev_head is the insn before the first insn of the @@ -14751,7 +14729,7 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED, ? NEXT_INSN (current_sched_info->prev_head) : NULL; basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL; if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb)) - s390_sched_state = 0; + sched_state.group_state = 0; } /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates -- 2.17.0