[PATCH 1/3] Power10: Add PCREL_OPT load support.

This patch adds support for optimizing power10 loads of an external variable to
eliminate loading the address of the variable, and then doing a subsequent load
using that address.

I have built compilers with and without these set of 3 patches doing a
bootstrap build and make check.  There were no regressions, and the new tests
passed.  Can I check these patches into the master branch for GCC?  Because
this is new functionality, I do not intend to back port these patches to GCC 10
at this time.

gcc/
2020-08-18  Michael Meissner  <meiss...@linux.ibm.com>

        * config.gcc (powerpc*-*-*): Add pcrel-opt.o.
        (rs6000*-*-*): Add pcrel-opt.o.
        * config/rs6000/pcrel-opt.c: New file.
        * config/rs6000/pcrel-opt.md: New file.
        * config/rs6000/predicates.md (d_form_memory): New predicate.
        * config/rs6000/rs6000-cpus.def (OTHER_POWER10_MASKS): Add
        -mpcrel-opt.
        (POWERPC_MASKS): Add -mpcrel-opt.
        * config/rs6000/rs6000-passes.def: Add PCREL_OPT pass.
        * config/rs6000/rs6000-protos.h (reg_to_non_prefixed): New
        declaration.
        (make_pass_pcrel_opt): New declaration.
        * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
        support for -mpcrel-opt.
        (rs6000_delegitimize_address): Add support for PCREL_OPT
        addresses.
        (print_operand, 'r' case): New operand for PCREL_OPT.
        (rs6000_opt_masks): Add -mpcrel-opt.
        (rs6000_asm_output_opcode): Reset flag to emit the initial 'p'
        after use.
        * config/rs6000/rs6000.md (loads_extern_addr attribute): New
        attribute.
        (isa attribute): Add pcrel_opt sub-case.
        (enabled attribute): Add support for pcrel_opt.
        (pcrel_extern_addr): Set loads_extern_addr attribute.
        (toplevel): Include pcrel-opt.md.
        * config/rs6000/rs6000.opt (-mpcrel-opt): New debug option.
        * config/rs6000/t-rs6000 (pcrel-opt.o): Add build rule.
        (MD_INCLUDES): Add pcrel-opt.md.
---
 gcc/config.gcc                      |   6 +-
 gcc/config/rs6000/pcrel-opt.c       | 656 ++++++++++++++++++++++++++++++++++++
 gcc/config/rs6000/pcrel-opt.md      | 248 ++++++++++++++
 gcc/config/rs6000/predicates.md     |  23 ++
 gcc/config/rs6000/rs6000-cpus.def   |   2 +
 gcc/config/rs6000/rs6000-passes.def |   8 +
 gcc/config/rs6000/rs6000-protos.h   |   2 +
 gcc/config/rs6000/rs6000.c          |  40 ++-
 gcc/config/rs6000/rs6000.md         |  14 +-
 gcc/config/rs6000/rs6000.opt        |   4 +
 gcc/config/rs6000/t-rs6000          |   7 +-
 11 files changed, 1001 insertions(+), 9 deletions(-)
 create mode 100644 gcc/config/rs6000/pcrel-opt.c
 create mode 100644 gcc/config/rs6000/pcrel-opt.md

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 2370368..605d743 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -505,7 +505,7 @@ or1k*-*-*)
        ;;
 powerpc*-*-*)
        cpu_type=rs6000
-       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o 
rs6000-call.o"
+       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o 
rs6000-call.o pcrel-opt.o"
        extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
        extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h"
        extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"
@@ -520,6 +520,7 @@ powerpc*-*-*)
        esac
        extra_options="${extra_options} g.opt fused-madd.opt 
rs6000/rs6000-tables.opt"
        target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c 
\$(srcdir)/config/rs6000/rs6000-call.c"
+       target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/pcrel-opt.c"
        ;;
 pru-*-*)
        cpu_type=pru
@@ -531,8 +532,9 @@ riscv*)
        ;;
 rs6000*-*-*)
        extra_options="${extra_options} g.opt fused-madd.opt 
rs6000/rs6000-tables.opt"
-       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o 
rs6000-call.o"
+       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o 
rs6000-call.o pcrel-opt.o"
        target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c 
\$(srcdir)/config/rs6000/rs6000-call.c"
+       target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/pcrel-opt.c"
        ;;
 sparc*-*-*)
        cpu_type=sparc
diff --git a/gcc/config/rs6000/pcrel-opt.c b/gcc/config/rs6000/pcrel-opt.c
new file mode 100644
index 0000000..10b4bc4
--- /dev/null
+++ b/gcc/config/rs6000/pcrel-opt.c
@@ -0,0 +1,656 @@
+/* Subroutines used support the pc-relative linker optimization.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file implements a RTL pass that looks for pc-relative loads of the
+   address of an external variable using the PCREL_GOT relocation and a single
+   load that uses that external address.  If that is found we create the
+   PCREL_OPT relocation to possibly convert:
+
+       pld addr_reg,var@pcrel@got(0),1
+
+       <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+       lwz data_reg,0(addr_reg)
+
+   into:
+
+       plwz data_reg,var@pcrel(0),1
+
+       <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+       nop
+
+   If the variable is not defined in the main program or the code using it is
+   not in the main program, the linker put the address in the .got section and
+   do:
+
+               .section .got
+       .Lvar_got:
+               .dword var
+
+               .section .text
+               pld addr_reg,.Lvar_got@pcrel(0),1
+
+               <possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+               lwz data_reg,0(addr_reg)
+
+   We only look for a single usage in the basic block where the external
+   address is loaded.  Multiple uses or references in another basic block will
+   force us to not use the PCREL_OPT relocation.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "expmed.h"
+#include "optabs.h"
+#include "recog.h"
+#include "df.h"
+#include "tm_p.h"
+#include "ira.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "tree-pass.h"
+#include "rtx-vector-builder.h"
+#include "print-rtl.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+
+
+// Maximum number of insns to scan between the load address and the load that
+// uses that address.  This can be bumped up if desired.  If the insns are far
+// enough away, the PCREL_OPT optimization probably does not help, since the
+// load of the external address has probably completed by the time we do the
+// load of the variable at that address.
+const int MAX_PCREL_OPT_INSNS  = 10;
+
+/* Next PCREL_OPT label number.  */
+static unsigned int pcrel_opt_next_num;
+
+/* Various counters.  */
+static struct {
+  unsigned long extern_addrs;
+  unsigned long loads;
+  unsigned long load_separation[MAX_PCREL_OPT_INSNS+1];
+} counters;
+
+
+// Optimize a PC-relative load address to be used in a load.
+//
+// If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. no
+// additional references to the address register, the address register dies at
+// the load, and no references to the load), convert insns of the form:
+//
+//     (set (reg:DI addr)
+//          (symbol_ref:DI "ext_symbol"))
+//
+//     ...
+//
+//     (set (reg:<MODE> value)
+//          (mem:<MODE> (reg:DI addr)))
+//
+// into:
+//
+//     (parallel [(set (reg:DI addr)
+//                      (unspec:<MODE> [(symbol_ref:DI "ext_symbol")
+//                                      (const_int label_num)
+//                                      (const_int 0)]
+//                                     UNSPEC_PCREL_OPT_LD_ADDR))
+//                 (set (reg:DI data)
+//                      (unspec:DI [(const_int 0)]
+//                                UNSPEC_PCREL_OPT_LD_ADDR))])
+//
+//     ...
+//
+//     (parallel [(set (reg:<MODE>)
+//                      (unspec:<MODE> [(mem:<MODE> (reg:DI addr))
+//                                     (reg:DI data)
+//                                      (const_int label_num)]
+//                                     UNSPEC_PCREL_OPT_LD_RELOC))
+//                 (clobber (reg:DI addr))])
+//
+// If the register being loaded is the same register that was used to hold the
+// external address, we generate the following insn instead:
+//
+//     (set (reg:DI data)
+//           (unspec:DI [(symbol_ref:DI "ext_symbol")
+//                       (const_int label_num)
+//                       (const_int 1)]
+//                     UNSPEC_PCREL_OPT_LD_ADDR))
+//
+// In the first insn, we set both the address of the external variable, and
+// mark that the variable being loaded both are created in that insn, and are
+// consumed in the second insn.  It doesn't matter what mode the register that
+// we will ultimately do the load into, so we use DImode.  We just need to mark
+// that both registers may be set in the first insn, and will be used in the
+// second insn.
+//
+// The UNSPEC_PCREL_OPT_LD_ADDR insn will generate the load address plus
+// a definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_LD_RELOC
+// insn will generate the .reloc to tell the linker to tie the load address and
+// load using that address together.
+//
+//     pld b,ext_symbol@got@pcrel(0),1
+// .Lpcrel1:
+//
+//     ...
+//
+//     .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+//     lwz r,0(b)
+//
+// If ext_symbol is defined in another object file in the main program and we
+// are linking the main program, the linker will convert the above instructions
+// to:
+//
+//     plwz r,ext_symbol@got@pcrel(0),1
+//
+//     ...
+//
+//     nop
+//
+// Return true if the PCREL_OPT load optimization succeeded.
+
+static bool
+do_pcrel_opt_load (rtx_insn *addr_insn,                // insn loading address
+                  rtx_insn *load_insn)         // insn using address
+{
+  rtx addr_set = PATTERN (addr_insn);
+  rtx addr_reg = SET_DEST (addr_set);
+  rtx addr_symbol = SET_SRC (addr_set);
+  rtx load_set = single_set (load_insn);
+  rtx reg = SET_DEST (load_set);
+  rtx mem = SET_SRC (load_set);
+  machine_mode reg_mode = GET_MODE (reg);
+  machine_mode mem_mode = GET_MODE (mem);
+  rtx mem_inner = mem;
+  unsigned int reg_regno = reg_or_subregno (reg);
+
+  // LWA is a DS format instruction, but LWZ is a D format instruction.  We use
+  // DImode for the mode to force checking whether the bottom 2 bits are 0.
+  // However FPR and vector registers uses the LFIWAX instruction which is
+  // indexed only.
+  if (GET_CODE (mem) == SIGN_EXTEND && GET_MODE (XEXP (mem, 0)) == SImode)
+    {
+      if (!INT_REGNO_P (reg_regno))
+       return false;
+
+      mem_inner = XEXP (mem, 0);
+      mem_mode = DImode;
+    }
+
+  else if (GET_CODE (mem) == SIGN_EXTEND
+          || GET_CODE (mem) == ZERO_EXTEND
+          || GET_CODE (mem) == FLOAT_EXTEND)
+    {
+      mem_inner = XEXP (mem, 0);
+      mem_mode = GET_MODE (mem_inner);
+    }
+
+  if (!MEM_P (mem_inner))
+    return false;
+
+  // If this is LFIWAX or similar instructions that are indexed only, we can't
+  // do the optimization.
+  enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
+  if (non_prefixed == NON_PREFIXED_X)
+    return false;
+
+  // The optimization will only work on non-prefixed offsettable loads.
+  rtx addr = XEXP (mem_inner, 0);
+  enum insn_form iform = address_to_insn_form (addr, mem_mode, non_prefixed);
+  if (iform != INSN_FORM_BASE_REG
+      && iform != INSN_FORM_D
+      && iform != INSN_FORM_DS
+      && iform != INSN_FORM_DQ)
+    return false;
+
+  // Allocate a new PC-relative label, and update the load external address
+  // insn.
+  //
+  // (parallel [(set (reg load)
+  //                 (unspec [(symbol_ref addr_symbol)
+  //                          (const_int label_num)
+  //                          (const_int 0)]
+  //                         UNSPEC_PCREL_OPT_LD_ADDR))
+  //            (set (reg addr)
+  //                 (unspec [(const_int 0)]
+  //                        UNSPEC_PCREL_OPT_LD_ADDR))])
+
+  ++pcrel_opt_next_num;
+  unsigned int addr_regno = reg_or_subregno (addr_reg);
+  rtx label_num = GEN_INT (pcrel_opt_next_num);
+  rtx reg_di = gen_rtx_REG (DImode, reg_regno);
+
+  PATTERN (addr_insn)
+    = ((addr_regno != reg_regno)
+       ? gen_pcrel_opt_ld_addr (addr_reg, addr_symbol, label_num, reg_di)
+       : gen_pcrel_opt_ld_addr_same_reg (addr_reg, addr_symbol, label_num));
+
+  // Revalidate the insn, backing out of the optimization if the insn is not
+  // supported.
+  INSN_CODE (addr_insn) = recog (PATTERN (addr_insn), addr_insn, 0);
+  if (INSN_CODE (addr_insn) < 0)
+    {
+      PATTERN (addr_insn) = addr_set;
+      INSN_CODE (addr_insn) = recog (PATTERN (addr_insn), addr_insn, 0);
+      return false;
+    }
+
+  // Update the load insn.  If the mem had a sign/zero/float extend, add that
+  // also after doing the UNSPEC.  Add an explicit clobber of the external
+  // address register just to make it clear that the address register dies.
+  //
+  // (parallel [(set (reg:<MODE> data)
+  //                 (unspec:<MODE> [(mem (addr_reg)
+  //                                 (reg:DI data)
+  //                                 (const_int label_num)]
+  //                                UNSPEC_PCREL_OPT_LD_RELOC))
+  //            (clobber (reg:DI addr_reg))])
+
+  rtvec v_load = gen_rtvec (3, mem_inner, reg_di, label_num);
+  rtx new_load = gen_rtx_UNSPEC (GET_MODE (mem_inner), v_load,
+                                UNSPEC_PCREL_OPT_LD_RELOC);
+
+  if (GET_CODE (mem) != GET_CODE (mem_inner))
+    new_load = gen_rtx_fmt_e (GET_CODE (mem), reg_mode, new_load);
+
+  rtx old_load_set = PATTERN (load_insn);
+  rtx new_load_set = gen_rtx_SET (reg, new_load);
+  rtx load_clobber = gen_rtx_CLOBBER (VOIDmode,
+                                     (addr_regno == reg_regno
+                                      ? gen_rtx_SCRATCH (Pmode)
+                                      : addr_reg));
+  PATTERN (load_insn)
+    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_load_set, load_clobber));
+
+  // Revalidate the insn, backing out of the optimization if the insn is not
+  // supported.
+
+  INSN_CODE (load_insn) = recog (PATTERN (load_insn), load_insn, 0);
+  if (INSN_CODE (load_insn) < 0)
+    {
+      PATTERN (addr_insn) = addr_set;
+      INSN_CODE (addr_insn) = recog (PATTERN (addr_insn), addr_insn, 0);
+
+      PATTERN (load_insn) = old_load_set;
+      INSN_CODE (load_insn) = recog (PATTERN (load_insn), load_insn, 0);
+      return false;
+    }
+
+  return true;
+}
+
+
+/* Given an insn, find the next insn in the basic block.  Stop if we find a the
+   end of a basic block, such as a label, call or jump, and return NULL.  */
+
+static rtx_insn *
+next_active_insn_in_basic_block (rtx_insn *insn)
+{
+  insn = NEXT_INSN (insn);
+
+  while (insn != NULL_RTX)
+    {
+      /* If the basic block ends or there is a jump of some kind, exit the
+        loop.  */
+      if (CALL_P (insn)
+         || JUMP_P (insn)
+         || JUMP_TABLE_DATA_P (insn)
+         || LABEL_P (insn)
+         || BARRIER_P (insn))
+       return NULL;
+
+      /* If this is a real insn, return it.  */
+      if (!insn->deleted ()
+         && NONJUMP_INSN_P (insn)
+         && GET_CODE (PATTERN (insn)) != USE
+         && GET_CODE (PATTERN (insn)) != CLOBBER)
+       return insn;
+
+      /* Loop for USE, CLOBBER, DEBUG_INSN, NOTEs.  */
+      insn = NEXT_INSN (insn);
+    }
+
+  return NULL;
+}
+
+
+// Validate that a load is actually a single instruction that can be optimized
+// with the PCREL_OPT optimization.
+
+static bool
+is_single_instruction (rtx_insn *insn, rtx reg)
+{
+  if (!REG_P (reg) && !SUBREG_P (reg))
+    return false;
+
+  if (get_attr_length (insn) != 4)
+    return false;
+
+  // _Decimal128 and IBM extended double are always multiple instructions.
+  machine_mode mode = GET_MODE (reg);
+  if (mode == TFmode && !TARGET_IEEEQUAD)
+    return false;
+
+  if (mode == TDmode || mode == IFmode)
+    return false;
+
+  // Don't optimize PLQ/PSTQ instructions
+  unsigned int regno = reg_or_subregno (reg);
+  unsigned int size = GET_MODE_SIZE (mode);
+  if (size >= 16 && !VSX_REGNO_P (regno))
+    return false;
+
+  return true;
+}
+
+
+// Given an insn with that loads up a base register with the address of an
+// external symbol, see if we can optimize it with the PCREL_OPT optimization.
+
+static void
+do_pcrel_opt_addr (rtx_insn *addr_insn)
+{
+  int num_insns = 0;
+
+  // Do some basic validation.
+  rtx addr_set = PATTERN (addr_insn);
+  if (GET_CODE (addr_set) != SET)
+    return;
+
+  rtx addr_reg = SET_DEST (addr_set);
+  rtx addr_symbol = SET_SRC (addr_set);
+
+  if (!base_reg_operand (addr_reg, Pmode)
+      || !pcrel_external_address (addr_symbol, Pmode))
+    return;
+
+  rtx_insn *insn = addr_insn;
+  bool looping = true;
+  bool had_load = false;       // whether intermediate insns had a load
+  bool had_store = false;      // whether intermediate insns had a store
+  bool is_load = false;                // whether the current insn is a load
+  bool is_store = false;       // whether the current insn is a store
+
+  // Check the following insns and see if it is a load or store that uses the
+  // external address.  If we can't do the optimization, just return.
+  while (looping)
+    {
+      is_load = is_store = false;
+
+      // Don't allow too many insns between the load of the external address
+      // and the eventual load or store.
+      if (++num_insns >= MAX_PCREL_OPT_INSNS)
+       return;
+
+      insn = next_active_insn_in_basic_block (insn);
+      if (!insn)
+       return;
+
+      // See if the current insn is a load or store
+      switch (get_attr_type (insn))
+       {
+         // While load of the external address is a 'load' for scheduling
+         // purposes, it should be safe to allow loading other external
+         // addresses between the load of the external address we are
+         // currently looking at and the load or store using that address.
+       case TYPE_LOAD:
+         if (get_attr_loads_extern_addr (insn) == LOADS_EXTERN_ADDR_YES)
+           break;
+         /* fall through */
+
+       case TYPE_FPLOAD:
+       case TYPE_VECLOAD:
+         is_load = true;
+         break;
+
+       case TYPE_STORE:
+       case TYPE_FPSTORE:
+       case TYPE_VECSTORE:
+         is_store = true;
+         break;
+
+         // Don't do the optimization through atomic operations.
+       case TYPE_LOAD_L:
+       case TYPE_STORE_C:
+       case TYPE_HTM:
+       case TYPE_HTMSIMPLE:
+         return;
+
+       default:
+         break;
+       }
+
+      // If the external addresss register was referenced, it must also die in
+      // the same insn.
+      if (reg_referenced_p (addr_reg, PATTERN (insn)))
+       {
+         if (!dead_or_set_p (insn, addr_reg))
+           return;
+
+         looping = false;
+       }
+
+      // If it dies by being set without being referenced, exit.
+      else if (dead_or_set_p (insn, addr_reg))
+       return;
+
+      // If it isn't the insn we want, remember if there were loads or stores.
+      else
+       {
+         had_load |= is_load;
+         had_store |= is_store;
+       }
+    }
+
+  // If the insn does not use the external address, or the external address
+  // register does not die at this insn, we can't do the optimization.
+  if (!reg_referenced_p (addr_reg, PATTERN (insn))
+      || !dead_or_set_p (insn, addr_reg))
+    return;
+
+  rtx set = single_set (insn);
+  if (!set)
+    return;
+
+  // Optimize loads
+  if (is_load)
+    {
+      // If there were any stores in the insns between loading the external
+      // address and doing the load, turn off the optimization.
+      if (had_store)
+       return;
+
+      rtx reg = SET_DEST (set);
+      if (!is_single_instruction (insn, reg))
+       return;
+
+      rtx mem = SET_SRC (set);
+      switch (GET_CODE (mem))
+       {
+       case MEM:
+         break;
+
+       case SIGN_EXTEND:
+       case ZERO_EXTEND:
+       case FLOAT_EXTEND:
+         if (!MEM_P (XEXP (mem, 0)))
+           return;
+         break;
+
+       default:
+         return;
+       }
+
+      // If the register being loaded was used or set between the load of
+      // the external address and the load using the address, we can't do
+      // the optimization.
+      if (reg_used_between_p (reg, addr_insn, insn)
+         || reg_set_between_p (reg, addr_insn, insn))
+       return;
+
+      // Process the load in detail
+      if (do_pcrel_opt_load (addr_insn, insn))
+       {
+         counters.loads++;
+         counters.load_separation[num_insns-1]++;
+       }
+    }
+
+  return;
+}
+
+
+// Optimize pcrel external variable references
+
+static unsigned int
+do_pcrel_opt_pass (function *fun)
+{
+  basic_block bb;
+  rtx_insn *insn, *curr_insn = 0;
+
+  memset ((char *) &counters, '\0', sizeof (counters));
+
+  // Dataflow analysis for use-def chains.
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+  df_note_add_problem ();
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN | DF_LR_RUN_DCE);
+
+  // Look at each basic block to see if there is a load of an external
+  // variable's external address, and a single load using that external
+  // address.
+  FOR_ALL_BB_FN (bb, fun)
+    {
+      FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
+       {
+         if (NONJUMP_INSN_P (insn) && single_set (insn)
+             && get_attr_loads_extern_addr (insn) == LOADS_EXTERN_ADDR_YES)
+           {
+             counters.extern_addrs++;
+             do_pcrel_opt_addr (insn);
+           }
+       }
+    }
+
+  df_remove_problem (df_chain);
+  df_process_deferred_rescans ();
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS | DF_LR_RUN_DCE);
+  df_chain_add_problem (DF_UD_CHAIN);
+  df_note_add_problem ();
+  df_analyze ();
+
+  if (dump_file)
+    {
+      if (!counters.extern_addrs)
+       fprintf (dump_file, "\nNo external symbols were referenced\n");
+
+      else
+       {
+         fprintf (dump_file,
+                  "\n# of loads of an address of an external symbol = %lu\n",
+                  counters.extern_addrs);
+
+         if (!counters.loads)
+           fprintf (dump_file,
+                    "\nNo PCREL_OPT load optimizations were done\n");
+
+         else
+           {
+             fprintf (dump_file, "# of PCREL_OPT loads = %lu\n",
+                      counters.loads);
+
+             fprintf (dump_file, "# of adjacent PCREL_OPT loads = %lu\n",
+                      counters.load_separation[0]);
+
+             for (int i = 1; i < MAX_PCREL_OPT_INSNS; i++)
+               {
+                 if (counters.load_separation[i])
+                   fprintf (dump_file,
+                            "# of PCREL_OPT loads separated by %d insn%s = 
%lu\n",
+                            i, (i == 1) ? "" : "s",
+                            counters.load_separation[i]);
+               }
+           }
+       }
+
+      fprintf (dump_file, "\n");
+    }
+
+  return 0;
+}
+
+
+// Optimize pc-relative references for the new PCREL_OPT pass
+const pass_data pass_data_pcrel_opt =
+{
+  RTL_PASS,                    // type
+  "pcrel_opt",                 // name
+  OPTGROUP_NONE,               // optinfo_flags
+  TV_NONE,                     // tv_id
+  0,                           // properties_required
+  0,                           // properties_provided
+  0,                           // properties_destroyed
+  0,                           // todo_flags_start
+  TODO_df_finish,              // todo_flags_finish
+};
+
+// Pass data structures
+class pcrel_opt : public rtl_opt_pass
+{
+public:
+  pcrel_opt (gcc::context *ctxt)
+  : rtl_opt_pass (pass_data_pcrel_opt, ctxt)
+  {}
+
+  ~pcrel_opt (void)
+  {}
+
+  // opt_pass methods:
+  virtual bool gate (function *)
+  {
+    return (TARGET_PCREL && TARGET_PCREL_OPT && optimize);
+  }
+
+  virtual unsigned int execute (function *fun)
+  {
+    return do_pcrel_opt_pass (fun);
+  }
+
+  opt_pass *clone ()
+  {
+    return new pcrel_opt (m_ctxt);
+  }
+};
+
+rtl_opt_pass *
+make_pass_pcrel_opt (gcc::context *ctxt)
+{
+  return new pcrel_opt (ctxt);
+}
diff --git a/gcc/config/rs6000/pcrel-opt.md b/gcc/config/rs6000/pcrel-opt.md
new file mode 100644
index 0000000..00a3bc4
--- /dev/null
+++ b/gcc/config/rs6000/pcrel-opt.md
@@ -0,0 +1,248 @@
+;; Machine description for the PCREL_OPT optimization.
+;; Copyright (C) 2020 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner (meiss...@linux.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Support for the PCREL_OPT optimization.  PCREL_OPT looks for instances where
+;; an external variable is used only once, either for reading or for writing.
+;;
+;; If we are optimizing a single read, normally the code would look like:
+;;
+;;     (set (reg:DI <ptr>)
+;;          (symbol_ref:DI "<extern_addr>"))   # <data> is currently dead
+;;
+;;             ...     # insns do not need to be adjacent
+;;
+;;     (set (reg:SI <data>)
+;;          (mem:SI (reg:DI <xxx>)))           # <ptr> dies with this insn
+;;
+;; We transform this into:
+;;
+;;     (parallel [(set (reg:DI <ptr>)
+;;                     (unspec:SI [(symbol_ref:DI <extern_addr>)
+;;                                 (const_int <marker>)]
+;;                                UNSPEC_PCREL_OPT_LD_ADDR))
+;;                (set (reg:DI <data>)
+;;                     (unspec:DI [(const_int 0)]
+;;                                UNSPEC_PCREL_OPT_LD_ADDR))])
+;;
+;;     ...
+;;
+;;     (parallel [(set (reg:SI <data>)
+;;                (unspec:SI [(mem:SI (reg:DI <ptr>))
+;;                            (reg:DI <data>)
+;;                            (const_int <marker>)]
+;;                           UNSPEC_PCREL_OPT_LD))
+;;                (clobber (reg:DI <ptr>))])
+;;
+;; The marker is an integer constant that links the load of the external
+;; address to the load of the actual variable.
+;;
+;; In the first insn, we set both the address of the external variable, and
+;; mark that the variable being loaded both are created in that insn, and are
+;; consumed in the second insn.  It doesn't matter what mode the register that
+;; we will ultimately do the load into, so we use DImode.  We just need to mark
+;; that both registers may be set in the first insn, and will be used in the
+;; second insn.
+;;
+;; Since we use UNSPEC's and link both the the register holding the external
+;; address and the value being loaded, it should prevent other passes from
+;; modifying it.
+;;
+;; If the register being loaded is the same as the base register, we use an
+;; alternate form of the insns.
+;;
+;;     (set (reg:DI <data_ptr>)
+;;          (unspec:DI [(symbol_ref:DI <extern_addr>)
+;;                      (const_int <marker>)]
+;;                     UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))
+;;
+;;     ...
+;;
+;;     (parallel [(set (reg:SI <data>)
+;;                (unspec:SI [(mem:SI (reg:DI <ptr>))
+;;                            (reg:DI <data>)
+;;                            (const_int <marker>)]
+;;                           UNSPEC_PCREL_OPT_LD))
+;;                (clobber (reg:DI <ptr>))])
+
+(define_c_enum "unspec"
+  [UNSPEC_PCREL_OPT_LD_ADDR
+   UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG
+   UNSPEC_PCREL_OPT_LD_RELOC])
+
+;; Modes that are supported for PCREL_OPT
+(define_mode_iterator PO [QI HI SI DI TI SF DF KF
+                         V1TI V2DI V4SI V8HI V16QI V2DF V4SF
+                         (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")])
+
+;; Vector modes for PCREL_OPT
+(define_mode_iterator PO_VECT [TI KF V1TI V2DI V4SI V8HI V16QI V2DF V4SF
+                              (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")])
+
+;; Insn for loading the external address, where the register being loaded is 
not
+;; the same as the register being loaded with the data.
+(define_insn "pcrel_opt_ld_addr"
+  [(set (match_operand:DI 0 "base_reg_operand" "=&b,&b")
+       (unspec:DI [(match_operand:DI 1 "pcrel_external_address")
+                   (match_operand 2 "const_int_operand" "n,n")]
+                  UNSPEC_PCREL_OPT_LD_ADDR))
+   (set (match_operand:DI 3 "gpc_reg_operand" "=r,wa")
+       (unspec:DI [(const_int 0)]
+                  UNSPEC_PCREL_OPT_LD_ADDR))]
+  "TARGET_PCREL_OPT
+   && reg_or_subregno (operands[0]) != reg_or_subregno (operands[3])"
+  "ld %0,%a1\n.Lpcrel%2:"
+  [(set_attr "prefixed" "yes")
+   (set_attr "type" "load")
+   (set_attr "isa" "pcrel_opt")
+   (set_attr "loads_extern_addr" "yes")])
+
+;; Alternate form of loading up the external address that is the same register
+;; as the final load.
+(define_insn "pcrel_opt_ld_addr_same_reg"
+  [(set (match_operand:DI 0 "base_reg_operand" "=b")
+       (unspec:DI [(match_operand:DI 1 "pcrel_external_address")
+                   (match_operand 2 "const_int_operand" "n")]
+                  UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))]
+  "TARGET_PCREL_OPT"
+  "ld %0,%a1\n.Lpcrel%2:"
+  [(set_attr "prefixed" "yes")
+   (set_attr "type" "load")
+   (set_attr "isa" "pcrel_opt")
+   (set_attr "loads_extern_addr" "yes")])
+
+;; PCREL_OPT modes that are optimized for loading or storing GPRs.
+(define_mode_iterator PO_GPR [QI HI SI DI SF DF])
+
+(define_mode_attr PO_GPR_LD [(QI "lbz")
+                            (HI "lhz")
+                            (SI "lwz")
+                            (SF "lwz")
+                            (DI "ld")
+                            (DF "ld")])
+
+;; PCREL_OPT load operation of GPRs.  Operand 4 (the register used to hold the
+;; address of the external symbol) is SCRATCH if the same register is used for
+;; the normal load.
+(define_insn "*pcrel_opt_ld<mode>_gpr"
+  [(parallel [(set (match_operand:PO_GPR 0 "int_reg_operand" "+r")
+                  (unspec:PO_GPR [(match_operand:PO_GPR 1 "d_form_memory" "o")
+                                  (match_operand:DI 2 "int_reg_operand" "0")
+                                  (match_operand 3 "const_int_operand" "n")]
+                                 UNSPEC_PCREL_OPT_LD_RELOC))
+             (clobber (match_scratch:DI 4 "=bX"))])]
+  "TARGET_PCREL_OPT
+   && (GET_CODE (operands[4]) == SCRATCH
+       || reg_mentioned_p (operands[4], operands[1]))"
+  "%r3<PO_GPR_LD> %0,%1"
+  [(set_attr "type" "load")
+   (set_attr "isa" "pcrel_opt")])
+
+;; PCREL_OPT load with sign/zero extension
+(define_insn "*pcrel_opt_ldsi_<u><mode>_gpr"
+  [(set (match_operand:EXTSI 0 "int_reg_operand" "+r")
+       (any_extend:EXTSI
+        (unspec:SI [(match_operand:SI 1 "d_form_memory" "o")
+                    (match_operand:DI 2 "int_reg_operand" "0")
+                    (match_operand 3 "const_int_operand" "n")]
+                    UNSPEC_PCREL_OPT_LD_RELOC)))
+   (clobber (match_scratch:DI 4 "=bX"))]
+  "TARGET_PCREL_OPT"
+  "%r3lw<az> %0,%1"
+  [(set_attr "type" "load")
+   (set_attr "isa" "pcrel_opt")])
+
+(define_insn "*pcrel_opt_ldhi_<u><mode>_gpr"
+  [(set (match_operand:EXTHI 0 "int_reg_operand" "+r")
+       (any_extend:EXTHI
+        (unspec:HI [(match_operand:HI 1 "d_form_memory" "o")
+                    (match_operand:DI 2 "int_reg_operand" "0")
+                    (match_operand 3 "const_int_operand" "n")]
+                    UNSPEC_PCREL_OPT_LD_RELOC)))
+   (clobber (match_scratch:DI 4 "=bX"))]
+  "TARGET_PCREL_OPT"
+  "%r3lh<az> %0,%1"
+  [(set_attr "type" "load")
+   (set_attr "isa" "pcrel_opt")])
+
+(define_insn "*pcrel_opt_ldqi_u<mode>_gpr"
+  [(set (match_operand:EXTQI 0 "int_reg_operand" "+r")
+       (zero_extend:EXTQI
+        (unspec:QI [(match_operand:QI 1 "d_form_memory" "o")
+                    (match_operand:DI 2 "int_reg_operand" "0")
+                    (match_operand 3 "const_int_operand" "n")]
+                    UNSPEC_PCREL_OPT_LD_RELOC)))
+   (clobber (match_scratch:DI 4 "=bX"))]
+  "TARGET_PCREL_OPT"
+  "%r3lbz %0,%1"
+  [(set_attr "type" "load")
+   (set_attr "isa" "pcrel_opt")])
+
+;; Scalar types that can be optimized by loading them into floating point
+;; or Altivec registers.
+(define_mode_iterator PO_FP [DI DF SF])
+
+;; Load instructions to load up scalar floating point or 64-bit integer values
+;; into floating point registers or Altivec registers.
+(define_mode_attr PO_FPR_LD [(DI "lfd")  (DF "lfd")  (SF "lfs")])
+(define_mode_attr PO_AVX_LD [(DI "lxsd") (DF "lxsd") (SF "lxssp")])
+
+;; PCREL_OPT load operation of scalar DF/DI/SF into vector registers.
+(define_insn "*pcrel_opt_ld<mode>_vsx"
+  [(set (match_operand:PO_FP 0 "vsx_register_operand" "+d,v")
+       (unspec:PO_FP [(match_operand:PO_FP 1 "d_form_memory" "o,o")
+                      (match_operand:DI 2 "vsx_register_operand" "0,0")
+                      (match_operand 3 "const_int_operand" "n,n")]
+                      UNSPEC_PCREL_OPT_LD_RELOC))
+   (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))]
+  "TARGET_PCREL_OPT"
+  "@
+   %r3<PO_FPR_LD> %0,%1
+   %r3<PO_AVX_LD> %0,%1"
+  [(set_attr "type" "fpload")
+   (set_attr "isa" "pcrel_opt")])
+
+;; PCREL_OPT optimization extending SFmode to DFmode via a load.
+(define_insn "*pcrel_opt_ldsf_df"
+  [(set (match_operand:DF 0 "vsx_register_operand" "+d,v")
+       (float_extend:DF
+        (unspec:SF [(match_operand:SF 1 "d_form_memory" "o,o")
+                    (match_operand:DI 2 "vsx_register_operand" "0,0")
+                    (match_operand 3 "const_int_operand" "n,n")]
+                   UNSPEC_PCREL_OPT_LD_RELOC)))
+   (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))]
+  "TARGET_PCREL_OPT"
+  "@
+   %r3lfs %0,%1
+   %r3lxssp %0,%1"
+  [(set_attr "type" "fpload")
+   (set_attr "isa" "pcrel_opt")])
+
+;; PCREL_OPT load operation of vector/float128 types into vector registers.
+(define_insn "*pcrel_opt_ld<mode>"
+  [(set (match_operand:PO_VECT 0 "vsx_register_operand" "+wa")
+       (unspec:PO_VECT [(match_operand:PO_VECT 1 "d_form_memory" "o")
+                        (match_operand:DI 2 "vsx_register_operand" "0")
+                        (match_operand 3 "const_int_operand" "n")]
+                       UNSPEC_PCREL_OPT_LD_RELOC))
+   (clobber (match_operand:DI 4 "base_reg_operand" "=b"))]
+  "TARGET_PCREL_OPT"
+  "%r3lxv %x0,%1"
+  [(set_attr "type" "vecload")
+   (set_attr "isa" "pcrel_opt")])
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 2709e46..38ae9cd 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1876,3 +1876,26 @@ (define_predicate "prefixed_memory"
 {
   return address_is_prefixed (XEXP (op, 0), mode, NON_PREFIXED_DEFAULT);
 })
+
+;; Return true if the operand is a valid memory operand with an offsettable
+;; address that could be merged with the load of a PC-relative external address
+;; with the PCREL_OPT optimization.  We don't check here whether or not the
+;; offset needs to be used in a DS-FORM (bottom 2 bits 0) or DQ-FORM (bottom 4
+;; bits 0) instruction.
+(define_predicate "d_form_memory"
+  (match_code "mem")
+{
+  if (!memory_operand (op, mode))
+    return false;
+
+  rtx addr = XEXP (op, 0);
+
+  if (REG_P (addr) || SUBREG_P (addr))
+    return true;
+
+  if (GET_CODE (addr) != PLUS)
+    return false;
+
+  return (base_reg_operand (XEXP (addr, 0), Pmode)
+         && satisfies_constraint_I (XEXP (addr, 1)));
+})
diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 8d2c1ff..d3f72d7 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -78,6 +78,7 @@
 /* Flags that need to be turned off if -mno-power10.  */
 #define OTHER_POWER10_MASKS    (OPTION_MASK_MMA                        \
                                 | OPTION_MASK_PCREL                    \
+                                | OPTION_MASK_PCREL_OPT                \
                                 | OPTION_MASK_PREFIXED)
 
 #define ISA_3_1_MASKS_SERVER   (ISA_3_0_MASKS_SERVER                   \
@@ -142,6 +143,7 @@
                                 | OPTION_MASK_P9_MISC                  \
                                 | OPTION_MASK_P9_VECTOR                \
                                 | OPTION_MASK_PCREL                    \
+                                | OPTION_MASK_PCREL_OPT                \
                                 | OPTION_MASK_POPCNTB                  \
                                 | OPTION_MASK_POPCNTD                  \
                                 | OPTION_MASK_POWERPC64                \
diff --git a/gcc/config/rs6000/rs6000-passes.def 
b/gcc/config/rs6000/rs6000-passes.def
index 5164c52..9b93fc7 100644
--- a/gcc/config/rs6000/rs6000-passes.def
+++ b/gcc/config/rs6000/rs6000-passes.def
@@ -24,4 +24,12 @@ along with GCC; see the file COPYING3.  If not see
    REPLACE_PASS (PASS, INSTANCE, TGT_PASS)
  */
 
+  /* Pass to add the appropriate vector swaps on power8 little endian systems.
+     The power8 does not have instructions that automaticaly do the byte swaps
+     for loads and stores.  */
   INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
+
+  /* Pass to do the PCREL_OPT optimization that combines the load of an
+     external symbol's address along with a single load or store using that
+     address as a base register.  */
+  INSERT_PASS_AFTER (pass_sched2, 1, pass_pcrel_opt);
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 28e859f..517713a 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -189,6 +189,7 @@ enum non_prefixed_form {
 
 extern enum insn_form address_to_insn_form (rtx, machine_mode,
                                            enum non_prefixed_form);
+extern enum non_prefixed_form reg_to_non_prefixed (rtx, machine_mode);
 extern bool prefixed_load_p (rtx_insn *);
 extern bool prefixed_store_p (rtx_insn *);
 extern bool prefixed_paddi_p (rtx_insn *);
@@ -305,6 +306,7 @@ namespace gcc { class context; }
 class rtl_opt_pass;
 
 extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
+extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
 extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
 extern bool rs6000_quadword_masked_address_p (const_rtx exp);
 extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index fe93cf6..6877de5 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1175,7 +1175,6 @@ static bool rs6000_secondary_reload_move (enum 
rs6000_reg_type,
                                          machine_mode,
                                          secondary_reload_info *,
                                          bool);
-static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
 
 /* Hash table stuff for keeping track of TOC entries.  */
@@ -4316,6 +4315,14 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_MMA;
     }
 
+  if (!TARGET_PCREL && TARGET_PCREL_OPT)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL_OPT) != 0)
+       error ("%qs requires %qs", "-mpcrel-opt", "-mpcrel");
+
+       rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
+    }
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
     rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
 
@@ -8515,7 +8522,10 @@ rs6000_delegitimize_address (rtx orig_x)
 {
   rtx x, y, offset;
 
-  if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
+  if (GET_CODE (orig_x) == UNSPEC
+      && (XINT (orig_x, 1) == UNSPEC_FUSION_GPR
+         || XINT (orig_x, 1) == UNSPEC_PCREL_OPT_LD_ADDR
+         || XINT (orig_x, 1) == UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))
     orig_x = XVECEXP (orig_x, 0, 0);
 
   orig_x = delegitimize_mem_from_attrs (orig_x);
@@ -13197,6 +13207,19 @@ print_operand (FILE *file, rtx x, int code)
        fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
       return;
 
+    case 'r':
+      /* X is a label number for the PCREL_OPT optimization.  Emit the .reloc
+        to enable this optimization, unless the value is 0.  */
+      gcc_assert (CONST_INT_P (x));
+      if (UINTVAL (x) != 0)
+       {
+         unsigned int label_num = UINTVAL (x);
+         fprintf (file,
+                  ".reloc .Lpcrel%u-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%u-8)\n\t",
+                  label_num, label_num);
+       }
+      return;
+
     case 's':
       /* Low 5 bits of 32 - value */
       if (! INT_P (x))
@@ -23244,6 +23267,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "mulhw",                   OPTION_MASK_MULHW,              false, true  },
   { "multiple",                        OPTION_MASK_MULTIPLE,           false, 
true  },
   { "pcrel",                   OPTION_MASK_PCREL,              false, true  },
+  { "pcrel-opt",               OPTION_MASK_PCREL_OPT,          false, true  },
   { "popcntb",                 OPTION_MASK_POPCNTB,            false, true  },
   { "popcntd",                 OPTION_MASK_POPCNTD,            false, true  },
   { "power8-fusion",           OPTION_MASK_P8_FUSION,          false, true  },
@@ -25368,7 +25392,7 @@ is_lfs_stfs_insn (rtx_insn *insn)
 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
    instruction format (D/DS/DQ) used for offset memory.  */
 
-static enum non_prefixed_form
+enum non_prefixed_form
 reg_to_non_prefixed (rtx reg, machine_mode mode)
 {
   /* If it isn't a register, use the defaults.  */
@@ -25591,7 +25615,15 @@ void
 rs6000_asm_output_opcode (FILE *stream)
 {
   if (next_insn_prefixed_p)
-    fprintf (stream, "p");
+    {
+      fprintf (stream, "p");
+
+      /* Reset flag in case there are separate insn lines in the sequence, so
+        the 'p' is only emited for the first line.  This shows up when we are
+        doing the PCREL_OPT optimization, in that the label created with %r<n>
+        would have a leading 'p' printed.  */
+      next_insn_prefixed_p = false;
+    }
 
   return;
 }
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 43b620a..d9dd25f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -292,6 +292,10 @@ (define_attr "prefixed" "no,yes"
 
        (const_string "no")))
 
+;; Whether an insn loads an external address for the PCREL_OPT optimizaton.
+(define_attr "loads_extern_addr" "no,yes"
+  (const_string "no"))
+
 ;; Return the number of real hardware instructions in a combined insn.  If it
 ;; is 0, just use the length / 4.
 (define_attr "num_insns" "" (const_int 0))
@@ -323,7 +327,7 @@ (define_attr "cpu"
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10,pcrel_opt"
   (const_string "any"))
 
 ;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -371,6 +375,10 @@ (define_attr "enabled" ""
      (and (eq_attr "isa" "p10")
          (match_test "TARGET_POWER10"))
      (const_int 1)
+
+     (and (eq_attr "isa" "pcrel_opt")
+         (match_test "TARGET_PCREL_OPT"))
+     (const_int 1)
     ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor
@@ -10226,7 +10234,8 @@ (define_insn "*pcrel_extern_addr"
   "TARGET_PCREL"
   "ld %0,%a1"
   [(set_attr "prefixed" "yes")
-   (set_attr "type" "load")])
+   (set_attr "type" "load")
+   (set_attr "loads_extern_addr" "yes")])
 
 ;; TOC register handling.
 
@@ -14900,3 +14909,4 @@ (define_insn "*cmpeqb_internal"
 (include "dfp.md")
 (include "crypto.md")
 (include "htm.md")
+(include "pcrel-opt.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 9d3e740..22d3af4 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -582,6 +582,10 @@ mpcrel
 Target Report Mask(PCREL) Var(rs6000_isa_flags)
 Generate (do not generate) pc-relative memory addressing.
 
+mpcrel-opt
+Target Undocumented Mask(PCREL_OPT) Var(rs6000_isa_flags)
+Generate (do not generate) pc-relative memory optimizations for externals.
+
 mmma
 Target Report Mask(MMA) Var(rs6000_isa_flags)
 Generate (do not generate) MMA instructions.
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb572..a617276 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -23,6 +23,10 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
 TM_H += $(srcdir)/config/rs6000/rs6000-modes.h
 PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def
 
+pcrel-opt.o: $(srcdir)/config/rs6000/pcrel-opt.c
+       $(COMPILE) $<
+       $(POSTCOMPILE)
+
 rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
        $(COMPILE) $<
        $(POSTCOMPILE)
@@ -86,4 +90,5 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
        $(srcdir)/config/rs6000/mma.md \
        $(srcdir)/config/rs6000/crypto.md \
        $(srcdir)/config/rs6000/htm.md \
-       $(srcdir)/config/rs6000/dfp.md
+       $(srcdir)/config/rs6000/dfp.md \
+       $(srcdir)/config/rs6000/pcrel-opt.md
-- 
1.8.3.1


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797

Reply via email to