Should lower-subreg be disabled for IBM long double TFmode? On powerpc64-linux, this testcase
long double ld_abs (long double x) { return __builtin_fabsl (x); } compiled with -m64 -O2 -S generates the horrible code shown on the left. The code on the right is ideal, as generated by gcc-4.2. We regressed with gcc-4.3.0, ie. with lower-subreg. .L.ld_abs: .L.ld_abs: fabs 0,1 fmr 0,1 stfd 2,-32(1) fabs 1,1 fcmpu 7,1,0 fcmpu 7,0,1 ori 2,2,0 beqlr 7 ld 10,-32(1) fneg 2,2 mr 9,10 blr beq 7,.L2 std 10,-24(1) ori 2,2,0 lfd 13,-24(1) fneg 13,13 stfd 13,-24(1) ori 2,2,0 ld 9,-24(1) .L2: stfd 0,-32(1) std 9,-8(1) ori 2,2,0 ld 8,-32(1) lfd 2,-8(1) std 8,-16(1) ori 2,2,0 lfd 1,-16(1) blr It isn't hard to see why we are going wrong. IBM long double is really a two element array of double, and the rs6000 backend uses subregs to access the elements. The problem is that lower-subreg lowers to word_mode, so we get DImode. word_mode makes sense for most targets where subregs of FP modes might be used to narrow an access for bit-twiddling operations on the sign bit. It doesn't make sense for us. We want DFmode for FP operations. An example is the expander used by the testcase. (define_expand "abstf2_internal" [(set (match_operand:TF 0 "gpc_reg_operand" "") (match_operand:TF 1 "gpc_reg_operand" "")) (set (match_dup 3) (match_dup 5)) (set (match_dup 5) (abs:DF (match_dup 5))) (set (match_dup 4) (compare:CCFP (match_dup 3) (match_dup 5))) (set (pc) (if_then_else (eq (match_dup 4) (const_int 0)) (label_ref (match_operand 2 "" "")) (pc))) (set (match_dup 6) (neg:DF (match_dup 6)))] "!TARGET_IEEEQUAD && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LONG_DOUBLE_128" " { const int hi_word = LONG_DOUBLE_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); const int lo_word = LONG_DOUBLE_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; operands[3] = gen_reg_rtx (DFmode); operands[4] = gen_reg_rtx (CCFPmode); operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word); }") The following patch disables lower-subreg for double double TFmode, bootstrap and regression tests are OK, but I'm a little unsure whether this is the right thing to do. * rs6000.c (TARGET_INIT_LOWER_SUBREG): Define. (rs6000_init_lower_subreg): New function. * lower-subreg.c (init_lower_subreg): Call targetm.init_lower_subreg. * target.def (init_lower_subreg): New. * doc/tm.texi.in (TARGET_INIT_LOWER_SUBREG): Document. * doc/tm.texi: Regenerate. Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 199781) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -59,6 +59,7 @@ #include "opts.h" #include "tree-vectorizer.h" #include "dumpfile.h" +#include "lower-subreg.h" #if TARGET_XCOFF #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ #endif @@ -1317,6 +1318,8 @@ #define TARGET_RTX_COSTS rs6000_rtx_costs #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 +#undef TARGET_INIT_LOWER_SUBREG +#define TARGET_INIT_LOWER_SUBREG rs6000_init_lower_subreg #undef TARGET_DWARF_REGISTER_SPAN #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span @@ -26865,6 +26955,20 @@ return ret; } +static void +rs6000_init_lower_subreg (void *data) +{ + if (!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128) + { + struct target_lower_subreg *info = (struct target_lower_subreg *) data; + info->x_choices[0].move_modes_to_split[TFmode] = false; + info->x_choices[1].move_modes_to_split[TFmode] = false; + } +} + /* Returns a code for a target-specific builtin that implements reciprocal of the function, or NULL_TREE if not available. */ Index: gcc/lower-subreg.c =================================================================== --- gcc/lower-subreg.c (revision 199781) +++ gcc/lower-subreg.c (working copy) @@ -39,6 +39,7 @@ #include "tree-pass.h" #include "df.h" #include "lower-subreg.h" +#include "target.h" #ifdef STACK_GROWS_DOWNWARD # undef STACK_GROWS_DOWNWARD @@ -287,6 +288,9 @@ if (LOG_COSTS) fprintf (stderr, "\nSpeed costs\n===========\n\n"); compute_costs (true, &rtxes); + + if (targetm.init_lower_subreg) + targetm.init_lower_subreg (this_target_lower_subreg); } static bool Index: gcc/target.def =================================================================== --- gcc/target.def (revision 199781) +++ gcc/target.def (working copy) @@ -2926,6 +2926,12 @@ void, (int *code, rtx *op0, rtx *op1, bool op0_preserve_value), default_canonicalize_comparison) +/* Allow modification of subreg choices. */ +DEFHOOK +(init_lower_subreg, + "", + void, (void *data), NULL) + DEFHOOKPOD (atomic_test_and_set_trueval, "This value should be set if the result written by\ Index: gcc/doc/tm.texi.in =================================================================== --- gcc/doc/tm.texi.in (revision 199781) +++ gcc/doc/tm.texi.in (working copy) @@ -6375,6 +6375,12 @@ registers on machines with lots of registers. @end deftypefn +@hook TARGET_INIT_LOWER_SUBREG +This hook allows modification of the choices the lower_subreg pass +will make for particular subreg modes. @var{data} is a pointer to a +@code{struct target_lower_subreg}. +@end deftypefn + @node Scheduling @section Adjusting the Instruction Scheduler -- Alan Modra Australia Development Lab, IBM