The patch in the attachment fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77541
The patch was successfully tested and bootstrapped on x86-64. Committed as rev. 242848
Index: ChangeLog =================================================================== --- ChangeLog (revision 242713) +++ ChangeLog (working copy) @@ -1,3 +1,11 @@ +2016-11-24 Vladimir Makarov <vmaka...@redhat.com> + + PR rtl-optimization/77541 + * lra-constraints.c (struct input_reload): Add field match_p. + (get_reload_reg): Check modes of input reloads to generate unique + value reload pseudo. + (match_reload): Add input reload pseudo for the current insn. + 2016-11-22 Alexander Monakov <amona...@ispras.ru> * internal-fn.c (expand_GOMP_SIMT_LANE): New. Index: lra-constraints.c =================================================================== --- lra-constraints.c (revision 242713) +++ lra-constraints.c (working copy) @@ -529,6 +529,8 @@ init_curr_operand_mode (void) /* Structure describes input reload of the current insns. */ struct input_reload { + /* True for input reload of matched operands. */ + bool match_p; /* Reloaded value. */ rtx input; /* Reload pseudo used. */ @@ -563,6 +565,7 @@ get_reload_reg (enum op_type type, machi { int i, regno; enum reg_class new_class; + bool unique_p = false; if (type == OP_OUT) { @@ -574,39 +577,53 @@ get_reload_reg (enum op_type type, machi e.g. volatile memory. */ if (! side_effects_p (original)) for (i = 0; i < curr_insn_input_reloads_num; i++) - if (rtx_equal_p (curr_insn_input_reloads[i].input, original) - && in_class_p (curr_insn_input_reloads[i].reg, rclass, &new_class)) - { - rtx reg = curr_insn_input_reloads[i].reg; - regno = REGNO (reg); - /* If input is equal to original and both are VOIDmode, - GET_MODE (reg) might be still different from mode. - Ensure we don't return *result_reg with wrong mode. */ - if (GET_MODE (reg) != mode) - { - if (in_subreg_p) - continue; - if (GET_MODE_SIZE (GET_MODE (reg)) < GET_MODE_SIZE (mode)) - continue; - reg = lowpart_subreg (mode, reg, GET_MODE (reg)); - if (reg == NULL_RTX || GET_CODE (reg) != SUBREG) - continue; - } - *result_reg = reg; - if (lra_dump_file != NULL) - { - fprintf (lra_dump_file, " Reuse r%d for reload ", regno); - dump_value_slim (lra_dump_file, original, 1); - } - if (new_class != lra_get_allocno_class (regno)) - lra_change_class (regno, new_class, ", change to", false); - if (lra_dump_file != NULL) - fprintf (lra_dump_file, "\n"); - return false; - } - *result_reg = lra_create_new_reg (mode, original, rclass, title); + { + if (! curr_insn_input_reloads[i].match_p + && rtx_equal_p (curr_insn_input_reloads[i].input, original) + && in_class_p (curr_insn_input_reloads[i].reg, rclass, &new_class)) + { + rtx reg = curr_insn_input_reloads[i].reg; + regno = REGNO (reg); + /* If input is equal to original and both are VOIDmode, + GET_MODE (reg) might be still different from mode. + Ensure we don't return *result_reg with wrong mode. */ + if (GET_MODE (reg) != mode) + { + if (in_subreg_p) + continue; + if (GET_MODE_SIZE (GET_MODE (reg)) < GET_MODE_SIZE (mode)) + continue; + reg = lowpart_subreg (mode, reg, GET_MODE (reg)); + if (reg == NULL_RTX || GET_CODE (reg) != SUBREG) + continue; + } + *result_reg = reg; + if (lra_dump_file != NULL) + { + fprintf (lra_dump_file, " Reuse r%d for reload ", regno); + dump_value_slim (lra_dump_file, original, 1); + } + if (new_class != lra_get_allocno_class (regno)) + lra_change_class (regno, new_class, ", change to", false); + if (lra_dump_file != NULL) + fprintf (lra_dump_file, "\n"); + return false; + } + /* If we have an input reload with a different mode, make sure it + will get a different hard reg. */ + else if (REG_P (original) + && REG_P (curr_insn_input_reloads[i].input) + && REGNO (original) == REGNO (curr_insn_input_reloads[i].input) + && (GET_MODE (original) + != GET_MODE (curr_insn_input_reloads[i].input))) + unique_p = true; + } + *result_reg = (unique_p + ? lra_create_new_reg_with_unique_value + : lra_create_new_reg) (mode, original, rclass, title); lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS); curr_insn_input_reloads[curr_insn_input_reloads_num].input = original; + curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = false; curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = *result_reg; return true; } @@ -1002,6 +1019,12 @@ match_reload (signed char out, signed ch lra_emit_move (copy_rtx (new_in_reg), in_rtx); *before = get_insns (); end_sequence (); + /* Add the new pseudo to consider values of subsequent input reload + pseudos. */ + lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS); + curr_insn_input_reloads[curr_insn_input_reloads_num].input = in_rtx; + curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = true; + curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = new_in_reg; for (i = 0; (in = ins[i]) >= 0; i++) { lra_assert Index: testsuite/ChangeLog =================================================================== --- testsuite/ChangeLog (revision 242713) +++ testsuite/ChangeLog (working copy) @@ -1,3 +1,8 @@ +2016-11-24 Vladimir Makarov <vmaka...@redhat.com> + + PR rtl-optimization/77541 + * gcc.target/i386/pr77541.c: New. + 2016-11-22 Jakub Jelinek <ja...@redhat.com> PR target/78451 Index: testsuite/gcc.target/i386/pr77541.c =================================================================== --- testsuite/gcc.target/i386/pr77541.c (revision 0) +++ testsuite/gcc.target/i386/pr77541.c (working copy) @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -Wno-psabi" } */ + +#define MAGIC 0x0706050403020100 + +typedef unsigned long long u64; +typedef unsigned __int128 v64u128 __attribute__ ((vector_size (64))); + +v64u128 __attribute__ ((noinline, noclone)) +foo (u64 x1, u64 x2, u64 x3, u64 x4, v64u128 x5) +{ + (void)x1, (void)x2; + x4 >>= x4 & 63; + return x3 + x4 + x5; +} + +int +main () +{ + v64u128 x = foo (0, 0, 0, MAGIC, (v64u128) {}); + if (x[0] != MAGIC || x[1] != MAGIC || x[2] != MAGIC || x[3] != MAGIC) + __builtin_abort(); + return 0; +}