When I patched the compiler for PR 58587 to not set -mvsx-timode automatically for ISA 2.06 sytems (power7/power8), there was an unexpected problem between ISA 2.07 quad memory support and not allowing TImode variables in VSX registers.
The movti insn in the -mno-vsx-timode case, did not have quad memory support enabled, and issued a split for quad memory load/stores, while the insn splitter said the insn could be done by a quad memory load/store, and did not split the insn. This patch allows TImode values to have register + offset form in the -mno-vsx-timode (-mvsx-timode currently limits TImode addresses to be a single register, so that it can be used with both quad memory and VSX load -- this is one more thing that needs to be addressed with secondary reload support), and it fixes the non VSX-timode insn to add quad memory support. I have done a bootstrap and run make check with no regressions. I have built all of Spec 2006 which showed the initial problem and it all built successfully. The only code changes that are due to these patches are due to load and store quad allowing reg+offset addressing on 64-bit power8. Are these patches ok to install? [gcc] 2013-10-11 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/58673 * config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only restrict TImode addresses to single indirect registers if both -mquad-memory and -mvsx-timode are used. (rs6000_output_move_128bit): Use quad_load_store_p to determine if we should emit load/store quad. Remove using %y for quad memory addresses. * config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add constraints to allow load/store quad on machines where TImode is not allowed in VSX registers. [gcc/testsuite] 2013-10-11 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/58673 * gcc.target/powerpc/pr58673-1.c: New file to test whether -mquad-word + -mno-vsx-timode causes errors. * gcc.target/powerpc/pr58673-2.c: Likewise. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 203389) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -7182,12 +7182,12 @@ rs6000_legitimate_address_p (enum machin if (reg_offset_p && legitimate_constant_pool_address_p (x, mode, reg_ok_strict)) return 1; - /* For TImode, if we have load/store quad, only allow register indirect - addresses. This will allow the values to go in either GPRs or VSX - registers without reloading. The vector types would tend to go into VSX - registers, so we allow REG+REG, while TImode seems somewhat split, in that - some uses are GPR based, and some VSX based. */ - if (mode == TImode && TARGET_QUAD_MEMORY) + /* For TImode, if we have load/store quad and TImode in VSX registers, only + allow register indirect addresses. This will allow the values to go in + either GPRs or VSX registers without reloading. The vector types would + tend to go into VSX registers, so we allow REG+REG, while TImode seems + somewhat split, in that some uses are GPR based, and some VSX based. */ + if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE) return 0; /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ if (! reg_ok_strict @@ -16060,12 +16060,11 @@ rs6000_output_move_128bit (rtx operands[ { if (dest_gpr_p) { - if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0 - && quad_memory_operand (src, mode) - && !reg_overlap_mentioned_p (dest, src)) + if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) { /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ - return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1"; + /* return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1"; */ + return "lq %0,%1"; } else return "#"; @@ -16095,11 +16094,11 @@ rs6000_output_move_128bit (rtx operands[ { if (src_gpr_p) { - if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0 - && quad_memory_operand (dest, mode)) + if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) { /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ - return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0"; + /* return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0"; */ + return "stq %1,%0"; } else return "#"; Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 203389) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -10312,15 +10312,15 @@ (define_insn "*mov<mode>_string" (const_string "conditional")))]) (define_insn "*mov<mode>_ppc64" - [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r") - (match_operand:TI2 1 "input_operand" "r,Y,r,F"))] + [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r") + (match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,F"))] "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode) && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)))" { return rs6000_output_move_128bit (operands); } - [(set_attr "type" "store,load,*,*") + [(set_attr "type" "store,store,load,load,*,*") (set_attr "length" "8")]) (define_split Index: gcc/testsuite/gcc.target/powerpc/pr58673-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr58673-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr58673-1.c (revision 0) @@ -0,0 +1,78 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -m64 -O1" } */ + +enum typecode +{ + QIcode, QUcode, HIcode, HUcode, SIcode, SUcode, DIcode, DUcode, SFcode, + DFcode, XFcode, Pcode, Tcode, LAST_AND_UNUSED_TYPECODE +}; +enum bytecode_opcode +{ + neverneverland, drop, duplicate, over, setstackSI, adjstackSI, constQI, + constHI, constSI, constDI, constSF, constDF, constXF, constP, loadQI, + loadHI, loadSI, loadDI, loadSF, loadDF, loadXF, loadP, storeQI, storeHI, + storeSI, storeDI, storeSF, storeDF, storeXF, storeP, storeBLK, clearBLK, + addconstPSI, newlocalSI, localP, argP, convertQIHI, convertHISI, + convertSIDI, convertQISI, convertQUHU, convertHUSU, convertSUDU, + convertQUSU, convertSFDF, convertDFXF, convertHIQI, convertSIHI, + convertDISI, convertSIQI, convertSUQU, convertDFSF, convertXFDF, + convertSISF, convertSIDF, convertSIXF, convertSUSF, convertSUDF, + convertSUXF, convertDISF, convertDIDF, convertDIXF, convertDUSF, + convertDUDF, convertDUXF, convertSFSI, convertDFSI, convertXFSI, + convertSFSU, convertDFSU, convertXFSU, convertSFDI, convertDFDI, + convertXFDI, convertSFDU, convertDFDU, convertXFDU, convertPSI, + convertSIP, convertSIT, convertDIT, convertSFT, convertDFT, convertXFT, + convertPT, zxloadBI, sxloadBI, sstoreBI, addSI, addDI, addSF, addDF, + addXF, addPSI, subSI, subDI, subSF, subDF, subXF, subPP, mulSI, mulDI, + mulSU, mulDU, mulSF, mulDF, mulXF, divSI, divDI, divSU, divDU, divSF, + divDF, divXF, modSI, modDI, modSU, modDU, andSI, andDI, iorSI, iorDI, + xorSI, xorDI, lshiftSI, lshiftSU, lshiftDI, lshiftDU, rshiftSI, rshiftSU, + rshiftDI, rshiftDU, ltSI, ltSU, ltDI, ltDU, ltSF, ltDF, ltXF, ltP, leSI, + leSU, leDI, leDU, leSF, leDF, leXF, leP, geSI, geSU, geDI, geDU, geSF, + geDF, geXF, geP, gtSI, gtSU, gtDI, gtDU, gtSF, gtDF, gtXF, gtP, eqSI, + eqDI, eqSF, eqDF, eqXF, eqP, neSI, neDI, neSF, neDF, neXF, neP, negSI, + negDI, negSF, negDF, negXF, notSI, notDI, notT, predecQI, predecHI, + predecSI, predecDI, predecP, predecSF, predecDF, predecXF, predecBI, + preincQI, preincHI, preincSI, preincDI, preincP, preincSF, preincDF, + preincXF, preincBI, postdecQI, postdecHI, postdecSI, postdecDI, postdecP, + postdecSF, postdecDF, postdecXF, postdecBI, postincQI, postincHI, + postincSI, postincDI, postincP, postincSF, postincDF, postincXF, + postincBI, xjumpif, xjumpifnot, jump, jumpP, caseSI, caseSU, caseDI, + caseDU, call, returnP, ret, linenote, LAST_AND_UNUSED_OPCODE +}; +struct binary_operator +{ + enum bytecode_opcode opcode; + enum typecode arg0; +}; +static struct conversion_recipe +{ + unsigned char *opcodes; + int cost; +} +conversion_recipe[((int) LAST_AND_UNUSED_TYPECODE)][((int) + LAST_AND_UNUSED_TYPECODE)]; +static struct conversion_recipe +deduce_conversion (from, to) + enum typecode from, to; +{ + (conversion_recipe[(int) from][(int) to]. + opcodes ? 0 : (conversion_recipe[(int) from][(int) to] = + deduce_conversion (from, to), 0)); +} + +void +bc_expand_binary_operation (optab, resulttype, arg0, arg1) + struct binary_operator optab[]; +{ + int i, besti, cost, bestcost; + enum typecode resultcode, arg0code; + for (i = 0; optab[i].opcode != -1; ++i) + { + (conversion_recipe[(int) arg0code][(int) optab[i].arg0]. + opcodes ? 0 : (conversion_recipe[(int) arg0code][(int) optab[i].arg0] = + deduce_conversion (arg0code, optab[i].arg0), 0)); + } +} Index: gcc/testsuite/gcc.target/powerpc/pr58673-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr58673-2.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr58673-2.c (revision 0) @@ -0,0 +1,217 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O3 -m64 -funroll-loops" } */ + +#include <stddef.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +typedef long unsigned int size_t; +typedef struct _IO_FILE FILE; +typedef float real; +typedef real rvec[3]; +typedef real matrix[3][3]; +typedef real tensor[3][3]; +enum +{ + F_BONDS, F_G96BONDS, F_MORSE, F_CUBICBONDS, F_CONNBONDS, F_HARMONIC, + F_ANGLES, F_G96ANGLES, F_PDIHS, F_RBDIHS, F_IDIHS, F_LJ14, F_COUL14, F_LJ, + F_BHAM, F_LJLR, F_DISPCORR, F_SR, F_LR, F_WPOL, F_POSRES, F_DISRES, + F_DISRESVIOL, F_ORIRES, F_ORIRESDEV, F_ANGRES, F_ANGRESZ, F_SHAKE, + F_SHAKENC, F_SETTLE, F_DUMMY2, F_DUMMY3, F_DUMMY3FD, F_DUMMY3FAD, + F_DUMMY3OUT, F_DUMMY4FD, F_EQM, F_EPOT, F_EKIN, F_ETOT, F_TEMP, F_PRES, + F_DVDL, F_DVDLKIN, F_NRE +}; +typedef union +{ + struct + { + } + bham; + struct + { + real rA, krA, rB, krB; + } + harmonic; +} +t_iparams; +typedef struct +{ + t_iparams *iparams; +} +t_idef; +typedef struct +{ +} +t_inputrec; +typedef struct +{ +} +t_commrec; +typedef struct +{ +} +t_forcerec; +typedef struct +{ +} +t_mdatoms; +typedef struct +{ +} +t_filenm; +enum +{ + eoPres, eoEpot, eoVir, eoDist, eoMu, eoForce, eoFx, eoFy, eoFz, eoPx, eoPy, + eoPz, eoPolarizability, eoDipole, eoObsNR, eoMemory = + eoObsNR, eoInter, eoUseVirial, eoNR +}; +extern char *eoNames[eoNR]; +typedef struct +{ + int bPrint; +} +t_coupl_LJ; +typedef struct +{ + int eObs; + t_iparams xi; +} +t_coupl_iparams; +typedef struct +{ + real act_value[eoObsNR]; + real av_value[eoObsNR]; + real ref_value[eoObsNR]; + int bObsUsed[eoObsNR]; + int nLJ, nBU, nQ, nIP; + t_coupl_LJ *tcLJ; +} +t_coupl_rec; +static void +pr_ff (t_coupl_rec * tcr, real time, t_idef * idef, t_commrec * cr, int nfile, + t_filenm fnm[]) +{ + static FILE *prop; + static FILE **out = ((void *) 0); + static FILE **qq = ((void *) 0); + static FILE **ip = ((void *) 0); + char buf[256]; + char *leg[] = { + "C12", "C6" + }; + char **raleg; + int i, j, index; + if ((prop == ((void *) 0)) && (out == ((void *) 0)) && (qq == ((void *) 0)) + && (ip == ((void *) 0))) + { + for (i = j = 0; (i < eoObsNR); i++) + { + if (tcr->bObsUsed[i]) + { + raleg[j++] = + (__extension__ + (__builtin_constant_p (eoNames[i]) + && ((size_t) (const void *) ((eoNames[i]) + 1) - + (size_t) (const void *) (eoNames[i]) == + 1) ? (((const char *) (eoNames[i]))[0] == + '\0' ? (char *) calloc ((size_t) 1, + (size_t) 1) : ( + { + size_t + __len + = + strlen + (eoNames + [i]) + + + 1; + char + *__retval + = + (char + *) + malloc + (__len); + __retval;} + )): __strdup (eoNames[i]))); + raleg[j++] = + (__extension__ + (__builtin_constant_p (buf) + && ((size_t) (const void *) ((buf) + 1) - + (size_t) (const void *) (buf) == + 1) ? (((const char *) (buf))[0] == + '\0' ? (char *) calloc ((size_t) 1, + (size_t) 1) : ( + { + size_t + __len + = + strlen + (buf) + + + 1; + char + *__retval + = + (char + *) + malloc + (__len); + __retval;} + )): __strdup (buf))); + } + } + if (tcr->nLJ) + { + for (i = 0; (i < tcr->nLJ); i++) + { + if (tcr->tcLJ[i].bPrint) + { + xvgr_legend (out[i], (sizeof (leg) / sizeof ((leg)[0])), + leg); + } + } + } + } +} + +void +do_coupling (FILE * log, int nfile, t_filenm fnm[], t_coupl_rec * tcr, real t, + int step, real ener[], t_forcerec * fr, t_inputrec * ir, + int bMaster, t_mdatoms * md, t_idef * idef, real mu_aver, + int nmols, t_commrec * cr, matrix box, tensor virial, + tensor pres, rvec mu_tot, rvec x[], rvec f[], int bDoIt) +{ + int i, j, ati, atj, atnr2, type, ftype; + real deviation[eoObsNR], prdev[eoObsNR], epot0, dist, rmsf; + real ff6, ff12, ffa, ffb, ffc, ffq, factor, dt, mu_ind; + int bTest, bPrint; + t_coupl_iparams *tip; + if (bPrint) + { + pr_ff (tcr, t, idef, cr, nfile, fnm); + } + for (i = 0; (i < eoObsNR); i++) + { + deviation[i] = + calc_deviation (tcr->av_value[i], tcr->act_value[i], + tcr->ref_value[i]); + prdev[i] = tcr->ref_value[i] - tcr->act_value[i]; + } + if (bPrint) + pr_dev (tcr, t, prdev, cr, nfile, fnm); + for (i = 0; (i < atnr2); i++) + { + factor = dt * deviation[tip->eObs]; + switch (ftype) + { + case F_BONDS: + if (fabs (tip->xi.harmonic.krA) > 1.2e-38) + idef->iparams[type].harmonic.krA *= + (1 + factor / tip->xi.harmonic.krA); + } + } +}