As Rainer reported at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67421
Also, as described at https://gcc.gnu.org/ml/gcc-patches/2015-08/msg01147.html This patch relax the restriction on wide left shift. Previously we always honor target private pattern, so when the following check be true, we cancel the transformation. have_insn_for (ASHIFT, mode) While it's better to do a cost on generated instruction sequences to decided whether it's beneficial to honor backend pattern. Normally the generic transformation will be better. I haven't used GEN_FCN to invoke gen_* directly, instead I reused "expand_variable_shift" to let it handle all the left work. wide-shift-64 pass on sparc under the option "-mv8plus -mcpu=v9" now, and arm32 also generate better code for wide-shift-64. OK for trunk? 2015-09-03 Jiong. Wang <jiong.w...@arm.com> gcc/ PR rtl-optimization/67421 * expr.c (expand_expr_real_2): Cost instrcution sequences when doing left wide shift tranformation. -- Regards, Jiong
diff --git a/gcc/expr.c b/gcc/expr.c index ee0c1f9..cf28f44 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8892,7 +8892,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, && ! unsignedp && mode == GET_MODE_WIDER_MODE (word_mode) && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode) - && ! have_insn_for (ASHIFT, mode) && TREE_CONSTANT (treeop1) && TREE_CODE (treeop0) == SSA_NAME) { @@ -8908,6 +8907,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, && ((TREE_INT_CST_LOW (treeop1) + GET_MODE_BITSIZE (rmode)) >= GET_MODE_BITSIZE (word_mode))) { + rtx_insn *seq, *seq_old; unsigned int high_off = subreg_highpart_offset (word_mode, mode); rtx low = lowpart_subreg (word_mode, op0, mode); @@ -8918,6 +8918,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, - TREE_INT_CST_LOW (treeop1)); tree rshift = build_int_cst (TREE_TYPE (treeop1), ramount); + start_sequence (); /* dest_high = src_low >> (word_size - C). */ temp = expand_variable_shift (RSHIFT_EXPR, word_mode, low, rshift, dest_high, unsignedp); @@ -8930,7 +8931,28 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, if (temp != dest_low) emit_move_insn (dest_low, temp); + seq = get_insns (); + end_sequence (); temp = target ; + + if (have_insn_for (ASHIFT, mode)) + { + bool speed_p = optimize_insn_for_speed_p (); + start_sequence (); + rtx ret_old = expand_variable_shift (code, mode, op0, + treeop1, target, + unsignedp); + + seq_old = get_insns (); + end_sequence (); + if (seq_cost (seq, speed_p) + >= seq_cost (seq_old, speed_p)) + { + seq = seq_old; + temp = ret_old; + } + } + emit_insn (seq); } } }