On Wed, Nov 16, 2016 at 04:15:10PM +0000, Andrew Stubbs wrote: > On 16/11/16 13:10, Michael Meissner wrote: > >Yeah, SFmode and DFmode should not have the TARGET_{S,D}F_FPR checks. > > So, I can safely resolve my initial problem by simply removing them? > And that wouldn't break the other use of that predicate? > > >But a secondary problem is the early clobber in the match_scratch. > > So, the FPR_FUSION insn works because operands 1 and 2 cannot > conflict, which means the early-clobber is not necessary, but the > GPR_FUSION insn cannot work because there's no way to ensure that > operands 1 and 2 don't conflict without also specifying that > operands 0 and 2 don't conflict, which they commonly do. > > We could fix it, for now, by adding new patterns that fit both cases > (given that the register numbers are known at peephole time). > > Or, we could disable the peephole in the case where this would occur > (as my original patch does, albeit bluntly).
I'm starting to test this patch right now (it's on LE power8 stage3 right now, and I need to build BE power8 and BE power7 versions when I get into the office shortly, and build spec 2017 with it for PR 78101): [gcc] 2016-11-16 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/78101 * config/rs6000/predicates.md (fusion_addis_mem_combo_load): Add the appropriate checks for SFmode/DFmode load/stores in GPR registers. (fusion_addis_mem_combo_store): Likewise. * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Rename fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar d-form instructions for traditional Altivec registers. (emit_fusion_p9_load): Likewise. (emit_fusion_p9_store): Likewise. * config/rs6000/rs6000.md (p9 fusion store peephole2): Remove early clobber from scratch register. Do not match if the register being stored is the scratch register. (fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load): Rename fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar d-form instructions for traditional Altivec registers. (fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load): Likewise. (fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store): Likewise. (fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store): Likewise. [gcc/testsuite] 2016-11-16 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/78101 * gcc.target/powerpc/fusion4.c: New test. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/predicates.md =================================================================== --- gcc/config/rs6000/predicates.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 242456) +++ gcc/config/rs6000/predicates.md (.../gcc/config/rs6000) (working copy) @@ -1844,7 +1844,7 @@ (define_predicate "fusion_gpr_mem_load" ;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the ;; memory field with both the addis and the memory offset. Sign extension ;; is not handled here, since lha and lwa are not fused. -;; With extended fusion, also match a FPR load (lfd, lfs) and float_extend +;; With P9 fusion, also match a fpr/vector load and float_extend (define_predicate "fusion_addis_mem_combo_load" (match_code "mem,zero_extend,float_extend") { @@ -1873,11 +1873,15 @@ (define_predicate "fusion_addis_mem_comb break; case SFmode: - case DFmode: if (!TARGET_P9_FUSION) return 0; break; + case DFmode: + if ((!TARGET_POWERPC64 && !TARGET_DF_FPR) || !TARGET_P9_FUSION) + return 0; + break; + default: return 0; } @@ -1920,6 +1924,7 @@ (define_predicate "fusion_addis_mem_comb case QImode: case HImode: case SImode: + case SFmode: break; case DImode: @@ -1927,13 +1932,8 @@ (define_predicate "fusion_addis_mem_comb return 0; break; - case SFmode: - if (!TARGET_SF_FPR) - return 0; - break; - case DFmode: - if (!TARGET_DF_FPR) + if (!TARGET_POWERPC64 && !TARGET_DF_FPR) return 0; break; Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 242456) +++ gcc/config/rs6000/rs6000.c (.../gcc/config/rs6000) (working copy) @@ -3441,28 +3441,28 @@ rs6000_init_hard_regno_mode_ok (bool glo static const struct fuse_insns addis_insns[] = { { SFmode, DImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_di_sf_load, - CODE_FOR_fusion_fpr_di_sf_store }, + CODE_FOR_fusion_vsx_di_sf_load, + CODE_FOR_fusion_vsx_di_sf_store }, { SFmode, SImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_si_sf_load, - CODE_FOR_fusion_fpr_si_sf_store }, + CODE_FOR_fusion_vsx_si_sf_load, + CODE_FOR_fusion_vsx_si_sf_store }, { DFmode, DImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_di_df_load, - CODE_FOR_fusion_fpr_di_df_store }, + CODE_FOR_fusion_vsx_di_df_load, + CODE_FOR_fusion_vsx_di_df_store }, { DFmode, SImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_si_df_load, - CODE_FOR_fusion_fpr_si_df_store }, + CODE_FOR_fusion_vsx_si_df_load, + CODE_FOR_fusion_vsx_si_df_store }, { DImode, DImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_di_di_load, - CODE_FOR_fusion_fpr_di_di_store }, + CODE_FOR_fusion_vsx_di_di_load, + CODE_FOR_fusion_vsx_di_di_store }, { DImode, SImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_si_di_load, - CODE_FOR_fusion_fpr_si_di_store }, + CODE_FOR_fusion_vsx_si_di_load, + CODE_FOR_fusion_vsx_si_di_store }, { QImode, DImode, RELOAD_REG_GPR, CODE_FOR_fusion_gpr_di_qi_load, @@ -3522,6 +3522,14 @@ rs6000_init_hard_regno_mode_ok (bool glo reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load; reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store; + + if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR) + { + reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX] + = addis_insns[i].load; + reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX] + = addis_insns[i].store; + } } } @@ -39817,6 +39825,15 @@ emit_fusion_p9_load (rtx reg, rtx mem, r else gcc_unreachable (); } + else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) + { + if (mode == SFmode) + load_string = "lxssp"; + else if (mode == DFmode || mode == DImode) + load_string = "lxsd"; + else + gcc_unreachable (); + } else if (INT_REGNO_P (r)) { switch (mode) @@ -39895,6 +39912,15 @@ emit_fusion_p9_store (rtx mem, rtx reg, else gcc_unreachable (); } + else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) + { + if (mode == SFmode) + store_string = "stxssp"; + else if (mode == DFmode || mode == DImode) + store_string = "stxsd"; + else + gcc_unreachable (); + } else if (INT_REGNO_P (r)) { switch (mode) Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 242456) +++ gcc/config/rs6000/rs6000.md (.../gcc/config/rs6000) (working copy) @@ -13438,7 +13438,8 @@ (define_peephole2 (set (match_operand:SFDF 2 "offsettable_mem_operand" "") (match_operand:SFDF 3 "toc_fusion_or_p9_reg_operand" ""))] "TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0]) - && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])" + && fusion_p9_p (operands[0], operands[1], operands[2], operands[3]) + && !rtx_equal_p (operands[0], operands[3])" [(const_int 0)] { expand_fusion_p9_store (operands); @@ -13496,7 +13497,7 @@ (define_insn "fusion_gpr_<P:mode>_<GPR_F (unspec:GPR_FUSION [(match_operand:GPR_FUSION 1 "int_reg_operand" "r")] UNSPEC_FUSION_P9)) - (clobber (match_operand:P 2 "base_reg_operand" "=&b"))] + (clobber (match_operand:P 2 "base_reg_operand" "=b"))] "TARGET_P9_FUSION" { return emit_fusion_p9_store (operands[0], operands[1], operands[2]); @@ -13504,8 +13505,8 @@ (define_insn "fusion_gpr_<P:mode>_<GPR_F [(set_attr "type" "store") (set_attr "length" "8")]) -(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load" - [(set (match_operand:FPR_FUSION 0 "fpr_reg_operand" "=d") +(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load" + [(set (match_operand:FPR_FUSION 0 "vsx_register_operand" "=dwb") (unspec:FPR_FUSION [(match_operand:FPR_FUSION 1 "fusion_addis_mem_combo_load" "wF")] UNSPEC_FUSION_P9)) @@ -13517,10 +13518,10 @@ (define_insn "fusion_fpr_<P:mode>_<FPR_F [(set_attr "type" "fpload") (set_attr "length" "8")]) -(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store" +(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store" [(set (match_operand:FPR_FUSION 0 "fusion_addis_mem_combo_store" "=wF") (unspec:FPR_FUSION - [(match_operand:FPR_FUSION 1 "fpr_reg_operand" "d")] + [(match_operand:FPR_FUSION 1 "vsx_register_operand" "dwb")] UNSPEC_FUSION_P9)) (clobber (match_operand:P 2 "base_reg_operand" "=b"))] "TARGET_P9_FUSION" Index: gcc/testsuite/gcc.target/powerpc/fusion4.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/fusion4.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/fusion4.c (.../gcc/testsuite/gcc.target/powerpc) (revision 242499) @@ -0,0 +1,13 @@ +/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ +/* { dg-options "-mcpu=power7 -mtune=power9 -O3 -msoft-float -m32" } */ + +#define LARGE 0x12345 + +float fusion_float_read (float *p){ return p[LARGE]; } + +void fusion_float_write (float *p, float f){ p[LARGE] = f; } + +/* { dg-final { scan-assembler "store fusion, type SF" } } */