Hi!

On Wed, Dec 09, 2020 at 11:04:44AM -0600, acsaw...@linux.ibm.com wrote:
> This patch implements a RTL pass that looks for pc-relative loads of the
> address of an external variable using the PCREL_GOT relocation and a
> single load or store that uses that external address.

> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -509,7 +509,7 @@ or1k*-*-*)
>       ;;
>  powerpc*-*-*)
>       cpu_type=rs6000
> -     extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o 
> rs6000-call.o"
> +     extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o 
> rs6000-call.o pcrel-opt.o"

Make this fit on its line?  Just like extra_headers for example:

>       extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>       extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h"
>       extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"


> +/* This file implements a RTL pass that looks for pc-relative loads of the
> +   address of an external variable using the PCREL_GOT relocation and a 
> single
> +   load that uses that external address.  If that is found we create the
> +   PCREL_OPT relocation to possibly convert:
> +
> +     pld addr_reg,var@pcrel@got
> +
> +     <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> +     lwz data_reg,0(addr_reg)
> +
> +   into:
> +
> +     plwz data_reg,var@pcrel
> +
> +     <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> +     nop

The nop first seems much simpler, but you cannot replace a 4-byte insn
with an 8-byte one (without huge effort).  Pity.  Maybe mention that
somewhere?

> +   If the variable is not defined in the main program or the code using it is
> +   not in the main program, the linker puts the address in the .got section 
> and
> +   generates:
> +
> +             .section .got
> +     .Lvar_got:
> +             .dword var
> +
> +             .section .text
> +             pld addr_reg,.Lvar_got@pcrel
> +
> +             <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> +             lwz data_reg,0(addr_reg)

What is the advantage of this, over what we started with?

> +   We look for a single usage in the basic block where the external
> +   address is loaded.  Multiple uses or references in another basic block 
> will
> +   force us to not use the PCREL_OPT relocation.
> +
> +   We also optimize stores to the address of an external variable using the
> +   PCREL_GOT relocation and a single store that uses that external address.  
> If
> +   that is found we create the PCREL_OPT relocation to possibly convert:
> +
> +     pld addr_reg,var@pcrel@got
> +
> +     <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> +     stw data_reg,0(addr_reg)
> +
> +   into:
> +
> +     pstw data_reg,var@pcrel
> +
> +     <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> +     nop
> +
> +   If the variable is not defined in the main program or the code using it is
> +   not in the main program, the linker put the address in the .got section 
> and
> +   do:

"puts and does"?  Or "will put and will do"?

> +             .section .got
> +     .Lvar_got:
> +             .dword var
> +
> +             .section .text
> +             pld addr_reg,.Lvar_got@pcrel
> +
> +             <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> +             stw data_reg,0(addr_reg)
> +
> +   We only look for a single usage in the basic block where the external
> +   address is loaded.  Multiple uses or references in another basic block 
> will
> +   force us to not use the PCREL_OPT relocation.  */

That sounds like it is a restriction, but you cannot do better at all,
not without communicating a lot more info to the linker anyway.

> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "rtl.h"
> +#include "tree.h"
> +#include "memmodel.h"
> +#include "expmed.h"
> +#include "optabs.h"
> +#include "recog.h"
> +#include "df.h"
> +#include "tm_p.h"
> +#include "ira.h"
> +#include "print-tree.h"
> +#include "varasm.h"
> +#include "explow.h"
> +#include "expr.h"
> +#include "output.h"
> +#include "tree-pass.h"
> +#include "rtx-vector-builder.h"
> +#include "print-rtl.h"
> +#include "insn-attr.h"
> +#include "insn-codes.h"

Do you need all these header files?  It looks quite reduced, and in the
right order, but did you check :-)

> +/* Various counters.  */
> +static struct {
> +  unsigned long extern_addrs;
> +  unsigned long loads;
> +  unsigned long adjacent_loads;
> +  unsigned long failed_loads;
> +  unsigned long stores;
> +  unsigned long adjacent_stores;
> +  unsigned long failed_stores;
> +} counters;

There is the whole statistics.[ch] you could use for this.  I've never
used it, no idea if there are actual advantages to it :-)

> +/* Return a marker to identify the PCREL_OPT load address and
> +   load/store instruction.  We use a unique integer which is appended
> +   to ".Lpcrel" to make the label.  */
> +
> +static rtx
> +pcrel_opt_next_marker (void)
> +{
> +  static unsigned int pcrel_opt_next_num;
> +
> +  pcrel_opt_next_num++;
> +  return GEN_INT (pcrel_opt_next_num);
> +}

You could just open-code it the whole two places you use this, that is
more straightforward.

> +/* Optimize a PC-relative load address to be used in a load.
> +
> +   If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. 
> no
> +   additional references to the address register, the address register dies 
> at
> +   the load, and no references to the load), convert insns of the form:
> +
> +     (set (reg:DI addr)
> +          (symbol_ref:DI "ext_symbol"))
> +
> +     ...
> +
> +     (set (reg:<MODE> value)
> +          (mem:<MODE> (reg:DI addr)))
> +
> +   into:
> +
> +     (parallel [(set (reg:DI addr)
> +                     (unspec:<MODE> [(symbol_ref:DI "ext_symbol")
> +                                     (const_int label_num)
> +                                     (const_int 0)]
> +                                    UNSPEC_PCREL_OPT_LD_ADDR))
> +                (set (reg:DI data)
> +                     (unspec:DI [(const_int 0)]
> +                                UNSPEC_PCREL_OPT_LD_ADDR))])
> +
> +     ...
> +
> +     (parallel [(set (reg:<MODE>)
> +                     (unspec:<MODE> [(mem:<MODE> (reg:DI addr))
> +                                     (reg:DI data)
> +                                     (const_int label_num)]
> +                                    UNSPEC_PCREL_OPT_LD_RELOC))
> +                (clobber (reg:DI addr))])

What are the two (const_int 0) in that?  (As usual, mention that in the
code, not just to me :-) )  Also, why does this not simply use multiple
values as index (instead of UNSPEC_PCREL_OPT_LD_ADDR for all three
cases)?

The second of those has no inputs, what prevents this from being
optimised wrongly (say, a few of them can be CSE'd together)?

> +   If the register being loaded is the same register that was used to hold 
> the
> +   external address, we generate the following insn instead:
> +
> +     (set (reg:DI data)
> +          (unspec:DI [(symbol_ref:DI "ext_symbol")
> +                      (const_int label_num)
> +                      (const_int 1)]
> +                     UNSPEC_PCREL_OPT_LD_ADDR))
> +
> +   In the first insn, we set both the address of the external variable, and
> +   mark that the variable being loaded both are created in that insn, and are
> +   consumed in the second insn.  It doesn't matter what mode the register 
> that
> +   we will ultimately do the load into, so we use DImode.  We just need to 
> mark
> +   that both registers may be set in the first insn, and will be used in the
> +   second insn.

Fix up the language in this paragraph?  It breaks apart in a few places.

> +static void
> +pcrel_opt_load (rtx_insn *addr_insn,         /* insn loading address.  */
> +             rtx_insn *load_insn)            /* insn using address.  */

Write a comment before the function instead, explaining what it is, and
what the parameters are (and are for).  And say what it does if it
figures out it cannot optimise this access (namely, nothing; the
function name should probably include "maybe_" or similar).

> +  /* Make sure there are no references to the register being loaded
> +     between the two insns.  */
> +  rtx reg = SET_DEST (load_set);
> +  if (!register_operand (reg, GET_MODE (reg))
> +      || reg_used_between_p (reg, addr_insn, load_insn)
> +      || reg_set_between_p (reg, addr_insn, load_insn))
> +    return;

What is the register_operand check for?  Should the caller not check
that?  You have asserts for most other preconditions here.

(More on this code later).

> +  machine_mode reg_mode = GET_MODE (reg);
> +  machine_mode mem_mode = GET_MODE (mem);

Those two cannot be different?  Oh, you later adjust mem_mode if "mem"
is not a mem at all (and set "mem_inner" to the actual mem involved).
A bit confusing.

> +  /* LWA is a DS format instruction, but LWZ is a D format instruction.  We 
> use
> +     DImode for the mode to force checking whether the bottom 2 bits are 0.
> +     However FPR and vector registers uses the LFIWAX/LXSIWAX instructions
> +     which only have indexed forms.  */

... And why is that bad?  (Again, that needs to be a code comment).

Is this simply a premature optimisation, since you later check that this
is an offset access anyway?

Btw, don't explain why you cannot handle some situation, since most
likely you *can*, with enough effort; just say you do not handle it :-)

> +  /* If the address isn't a non-prefixed offsettable instruction, we can't do
> +     the optimization.  */
> +  if (!offsettable_non_prefixed_memory (reg, mem_mode, mem_inner))
> +    return;

This is a very misleading name.  Whether the address is offsettable is
inconsequential, but that is not what that function compute anyway: it
simply looks if this is some kind of D-mode instruction (or can be
implemented as one).  An "offsettable address" in GCC has a very
specific meaning, namely (from recog.c:offsettable_address_addr_space_p):

  Return 1 if Y is a memory address which contains no side effects
  and would remain valid for address space AS after the addition of
  a positive integer less than the size of that mode.

> +  /* Note whether the changes were sucessful or not.  */

(typo)

> +  if (apply_change_group ())
> +    {
> +      /* PCREL_OPT load optimization succeeded.  */
> +      counters.loads++;
> +      if (next_nonnote_insn (addr_insn) == load_insn)
> +     counters.adjacent_loads++;
> +
> +      if (dump_file)
> +     fprintf (dump_file,
> +              "PCREL_OPT load (addr insn = %d, use insn = %d).\n",
> +              INSN_UID (addr_insn),
> +              INSN_UID (load_insn));
> +
> +      df_analyze ();
> +    }
> +  else

The df_analyze needs a comment, or better: make the code mode obvious?
(Or both ;-) )

> +/* Optimize a PC-relative load address to be used in a store.
> +
> +   If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. 
> no
> +   additional references to the address register, the address register dies 
> at
> +   the load, and no references to the load), convert insns of the form:

So you want to know that the register is set only once, and used only
once.  This can be much easier and much more obvious (and obviously
correct!) checked using DF.

> +  /*  Don't allow storing the address of the external variable.  Make sure 
> the
> +      value being stored wasn't updated.  */

That comment can use some work, itt is hard to see how it corresponds to
the code:

> +  if (!register_operand (reg, GET_MODE (reg))
> +      && reg_or_subregno (reg) != reg_or_subregno (addr_reg)
> +      && !reg_set_between_p (reg, addr_insn, store_insn))
> +    return;


> +  /* Update the store insn.  Add an explicit clobber of the external address
> +     register just to be sure there are no additional uses of the address
> +     register.

You could just do that in a separate statement then; but what is the
point really?  You could check if it is really dead here, *that* makes
sense perhaps.

> +     (parallel [(set (mem (addr_reg)
> +                     (unspec:<MODE> [(reg)
> +                                     (const_int label_num)]
> +                                    UNSPEC_PCREL_OPT_ST_RELOC))
> +               (clobber (reg:DI addr_reg))])  */



> +  /* Note whether changes succeeded or not.  */
> +  if (apply_change_group ())
> +    {
> +      /* PCREL_OPT store succeeded.  */
> +      counters.stores++;
> +      if (next_nonnote_insn (addr_insn) == store_insn)
> +     counters.adjacent_stores++;
> +
> +      if (dump_file)
> +     fprintf (dump_file,
> +              "PCREL_OPT store (addr insn = %d, use insn = %d).\n",
> +              INSN_UID (addr_insn),
> +              INSN_UID (store_insn));
> +
> +      df_analyze();
> +    }
> +  else
> +    {
> +      /* PCREL_OPT store failed.  */
> +      counters.failed_stores++;
> +      if (dump_file)
> +     fprintf (dump_file,
> +              "PCREL_OPT store failed (addr insn = %d, use insn = %d).\n",
> +              INSN_UID (addr_insn),
> +              INSN_UID (store_insn));
> +    }
> +
> +  return;
> +}

  if (!apply_change_group ())
    {
      failure stuff;
      return;
    }

  non-failure stuff;

Also, never end a function with  return;  without value, that is just
silly.

> +/* Return the register used as the base register of MEM.  We look for
> +   BSWAP and UNSPEC (which might be LFIWAX/LFIWZX/STFIWX) to exclude
> +   instructions that do not have a pc-relative form.  We don't
> +   explicitly look for lxvd2x (rotate or vec_select) because we do not
> +   expect to see that generated for p9 or newer anyway.  */

Then assert for that?

> +
> +static rtx
> +get_mem_base_reg (rtx mem)
> +{
> +  const char * fmt;
> +  /* If we have a zero_extend, etc., strip them.  */

This comment is misleading, the code here does many other things.

> +  while (!MEM_P (mem))
> +    {
> +      if (GET_CODE (mem) == BSWAP
> +       || GET_CODE (mem) == UNSPEC)
> +     return NULL_RTX;
> +      if (GET_RTX_LENGTH (GET_CODE (mem)) < 1)
> +     return NULL_RTX;

This is unsigned, so just write  != 0  if that is what you want.  But
why do this anyway?  Just check if the rtx_class is what you want, or
even just assert something?

> +      fmt = GET_RTX_FORMAT (GET_CODE (mem));
> +      if (fmt[0] != 'e')
> +     return NULL_RTX;

So this catches UNSPEC already.  So remove it from above (or also handle
things like UNSPEC_VOLATILE there!)

> +      mem = XEXP (mem, 0);
> +      if (mem == NULL_RTX )
> +     return NULL_RTX;

It is invalid RTL then.  Either just assert or let it ICE elsewhere?
It might not even get this far anyway.

> +  rtx addr_rtx;
> +  if (!MEM_SIZE_KNOWN_P (mem))
> +    return NULL_RTX;
> +
> +  addr_rtx = (XEXP (mem, 0));

So declare it here, not randomly some line earlier?

> +  if (GET_CODE (addr_rtx) == PRE_MODIFY)
> +    addr_rtx = XEXP (addr_rtx, 1);
> +
> +  while (GET_CODE (addr_rtx) == PLUS
> +      && CONST_INT_P (XEXP (addr_rtx, 1)))
> +    addr_rtx = XEXP (addr_rtx, 0);
> +
> +  return REG_P (addr_rtx) ? addr_rtx : NULL_RTX;

  if (!REG_P (addr_rtx))
    return NULL_RTX;

  return addr_rtx;

Don't obfuscate the control flow please.  ?: is great for golfing, but
for writing real code it is fine to be more verbose if that is clearer.

> +/* Check whether INSN contains an invalid reference to REGNO.  If TYPE is a
> +   load or store instruction, then we cannot allow any definitions of REGNO.
> +   If TYPE is a load instruction, then we cannot allow any uses either.  */
> +
> +static bool
> +insn_references_regno_p (rtx_insn *insn, unsigned int regno, enum attr_type 
> type)

(line too long)

That name suggests this function does something totally different from
what its comment says.

(And "invalid" is not a good word to use here -- what you want to say
here is something like "disallowed").

> +  /* Any uses of REGNO are invalid if we're attempting to optimize a load.  
> */

This needs explanation.

> +  /* The use instruction must be a single non-prefixed instruction.  */
> +  if (get_attr_length (use_insn) != 4)
> +    return;

This seems fragile.

> +  /* Check the insns between loading the address and its use to classify what
> +     type of insn it is.  */

What does this mean?  This is not what the code does.

> +  for (insn = NEXT_INSN (addr_insn);
> +       insn != use_insn;
> +       insn = NEXT_INSN (insn))
> +    {
> +      /* If we see things like labels, calls, etc., or we've reached the end
> +      of the block without seeing the load or store, then don't do the
> +      PCREL_OPT optimization.  */

You cannot have a label without having a new bb.

> +static unsigned int
> +pcrel_opt_pass (function *fun)
> +{
> +  basic_block bb;
> +  rtx_insn *insn, *curr_insn = 0;
> +
> +  memset ((char *) &counters, '\0', sizeof (counters));

memset (&counters, 0, sizeof counters);

Or simply
  counters = {0};

> +  /* Dataflow analysis for use-def chains.  */
> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> +  df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
> +  df_note_add_problem ();

Why?  Is that used?

> +  df_analyze ();
> +  df_set_flags (DF_DEFER_INSN_RESCAN | DF_LR_RUN_DCE);

Does defer work here?  Can we ever consider an insn that has already
been modified?  What guarantees that?

> +  /* Look at each basic block to see if there is a load of an external
> +     variable's external address, and a single load/store using that external
> +     address.  */
> +  FOR_ALL_BB_FN (bb, fun)
> +    {
> +      FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
> +     {
> +       if (NONJUMP_INSN_P (insn)
> +           && single_set (insn)
> +           && get_attr_loads_extern_addr (insn) == LOADS_EXTERN_ADDR_YES)
> +         pcrel_opt_address (insn);
> +     }
> +    }

This loop (together with scanning insns manually in pcrel_opt_address)
is one of the many places this code is (potentially) quadratic time.

> +  if (dump_file)
> +    {
> +      fprintf (dump_file,
> +            "\n# of load(s) of an address of an external symbol = %lu\n",
> +            counters.extern_addrs);

Number of loads is always plural -- "number of load" is incorrect.

> +  df_remove_problem (df_chain);
> +  df_process_deferred_rescans ();
> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS | DF_LR_RUN_DCE);
> +  df_analyze ();

Why do another df_analyze here?  Or the df_set_flags, even.

> --- /dev/null
> +++ b/gcc/config/rs6000/pcrel-opt.md
> @@ -0,0 +1,386 @@
> +;; Machine description for the PCREL_OPT optimization.
> +;; Copyright (C) 2020 Free Software Foundation, Inc.

(Fix the copyright dates).

> +;; Modes that are supported for PCREL_OPT
> +(define_mode_iterator PO [QI HI SI DI TI SF DF KF
> +                       V1TI V2DI V4SI V8HI V16QI V2DF V4SF
> +                       (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")])

"PO"?  Please use a more meaningful name.  All such names are global
(everything in an MD is).

> +;; Scalar types that can be optimized by loading them into floating point
> +;; or Altivec registers.
> +(define_mode_iterator PO_FP [DI DF SF])
> +
> +;; Load instructions to load up scalar floating point or 64-bit integer 
> values
> +;; into floating point registers or Altivec registers.
> +(define_mode_attr PO_FPR_LD [(DI "lfd")       (DF "lfd")  (SF "lfs")])

(stray tab)

> +(define_mode_attr PO_AVX_LD [(DI "lxsd") (DF "lxsd") (SF "lxssp")])

"AVX" is an x86 thing.  AltiVec is called VMX if you want a short name.
It's perhaps easier to just describe this as all VSX registers (you just
have different opcodes for the low and high 32 regs, but exactly the
same semantics).

> +
> +;; PCREL_OPT load operation of scalar DF/DI/SF into vector registers.
> +(define_insn "*pcrel_opt_ld<mode>_vsx"
> +  [(set (match_operand:PO_FP 0 "vsx_register_operand" "+d,v")
> +     (unspec:PO_FP [(match_operand:PO_FP 1 "d_form_memory" "o,o")

"o" is always wrong, we have no insns that can take any "o".  You do not
want "o" here anyway, you want to say this is D-form I presume.  So just
say that!  Some of thses are D, some are DS.  It works the way this is
written because these insns can only be generated by the pcrelopt code
(the unspec helps guarantee that).  So just use "m"?

> +(define_insn "*pcrel_opt_stdi"
> +  [(set (match_operand:DI 0 "d_form_memory" "=o,o,o")
> +     (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r,d,v")
> +                 (match_operand 2 "const_int_operand" "n,n,n")]
> +                UNSPEC_PCREL_OPT_ST_RELOC))
> +   (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
> +  "TARGET_PCREL_OPT && TARGET_POWERPC64"
> +{
> +  output_pcrel_opt_reloc (operands[2]);
> +  switch (which_alternative)
> +    {
> +    case 0: return "std %1,%0";
> +    case 1: return "stfd %1,%0";
> +    case 2: return "stxsd %1,%0";
> +    default: gcc_unreachable ();
> +    }
> +}
> +  [(set_attr "type" "store,fpstore,fpstore")])

Wrong indent (needs a newline after the colons).
Is there no better way to do this (less manual)?

> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -1876,3 +1876,26 @@ (define_predicate "prefixed_memory"
>  {
>    return address_is_prefixed (XEXP (op, 0), mode, NON_PREFIXED_DEFAULT);
>  })
> +
> +;; Return true if the operand is a valid memory operand with an offsettable
> +;; address that could be merged with the load of a PC-relative external 
> address
> +;; with the PCREL_OPT optimization.

This has nothing to do with offsettable addresses.  An offsettable
address X for accessing something with mode M is one where X + size of M
is also a valid address.  Just write "some D-form" when that is what you
need here.

>  We don't check here whether or not the
> +;; offset needs to be used in a DS-FORM (bottom 2 bits 0) or DQ-FORM (bottom 
> 4
> +;; bits 0) instruction.
> +(define_predicate "d_form_memory"
> +  (match_code "mem")
> +{
> +  if (!memory_operand (op, mode))
> +    return false;
> +
> +  rtx addr = XEXP (op, 0);
> +
> +  if (REG_P (addr) || SUBREG_P (addr))
> +    return true;

That is not correct; you need to check that it is a SUBREG of a REG.
You shouldn't see subregs of mem after reload, but if someone starts
using this predicate for something else there will be unwelcome
surprises.

> +  if (GET_CODE (addr) != PLUS)
> +    return false;
> +
> +  return (base_reg_operand (XEXP (addr, 0), Pmode)
> +       && satisfies_constraint_I (XEXP (addr, 1)));

Why?!?  You already checked this is a memory_operand right at the start.
The only thing you need to check is that it is not an indexed operand
(via indexed_address, perhaps).

> +  /* Pass to add the appropriate vector swaps on power8 little endian 
> systems.
> +     The power8 does not have instructions that automaticaly do the byte 
> swaps
> +     for loads and stores.  */
>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);

That should be a separate patch.  Please remove this from this patch.

> +  if (!TARGET_PCREL && TARGET_PCREL_OPT)
> +    {
> +      if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL_OPT) != 0)
> +     error ("%qs requires %qs", "-mpcrel-opt", "-mpcrel");
> +
> +     rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
> +    }

Please just make the new flag not do anything if pcrel is not enabled
(but having the pass not run in that case).

> +/* Return true if an REG with a given MODE is loaded from or stored into a 
> MEM
> +   location uses a non-prefixed offsettable address.  This is used to 
> validate

Please fix that sentence.  Also, offsettable is nonsense.

> @@ -25771,11 +25854,34 @@ void
>  rs6000_asm_output_opcode (FILE *stream)
>  {
>    if (next_insn_prefixed_p)
> -    fprintf (stream, "p");
> +    {
> +      fprintf (stream, "p");
> +
> +      /* Reset flag in case there are separate insn lines in the sequence, so
> +      the 'p' is only emited for the first line.  This shows up when we are

Spelling.

> +;; Whether an insn loads an external address for the PCREL_OPT optimizaton.
> +(define_attr "loads_extern_addr" "no,yes"
> +  (const_string "no"))

It should be called loads_external_address if that is what it does.  Not
random characters removed from the name.

It really needs a better name than this.  It seems to be about loading
the value of a global symbol or such, instead?

> --- a/gcc/config/rs6000/t-rs6000
> +++ b/gcc/config/rs6000/t-rs6000
> @@ -23,6 +23,10 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
>  TM_H += $(srcdir)/config/rs6000/rs6000-modes.h
>  PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def
>  
> +pcrel-opt.o: $(srcdir)/config/rs6000/pcrel-opt.c
> +     $(COMPILE) $<
> +     $(POSTCOMPILE)

The filename should start with rs6000-, to prevent collissions (in the
object dir this shares the space with everything from gcc/).

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */

Don't check for lp64 separately?  The pcrel test already does currently,
and if that ever changes we *want* this to be tested.


So many things wrong with this still.  But, we can reliably disable the
whole thing, and it of course improves performance a lot.

Aaron, could you please fix everything you think you can do easily, and
repost (noting the things you did *not* do?)  Thanks in advance!


Segher

Reply via email to