This patch adds a new RTL pass that occurs before the final pass to implement
the PCREL_OPT optimization that is implemented by the linker.

Without this optimization, access to external symbols loads up the address from
a .GOT section and does the normal operation.  For example:

        extern unsigned int esym;

        /* ... */

        esym = 1;

would generate:

        pld 9,esym@got@pcrel
        li 10,1
        stw 10,0(9)

I.e. load the address of 'esym' into r9, and do a normal 'stw'.

With the PCREL_OPT optimization, the compiler would generate:

        li 9,1
        pld 10,esym@got@pcrel
.Lpcrel1:
        .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
        stw 9,0(10)

When the module is linked, if the object file is in the main program and the
'esym' variable is also in the main program, the linker will change the code
to:

        li 9,1
        pstw 9,esym@pcrel
        nop

If either the object file is in a shared library, or the variable 'esym' is in
a shared library, then the old code is used:

        li 9,1
        pld 10,esym.got@pcrel
        stw 9,0(10)

        .section .got
esym.got:
        .quad esym

When optimizing loads with PCREL_OPT, this patch makes sure that the register
being loaded is not live between the PLD instruction loading the address and
the normal load instruction.

Similarly, when optimizing stores with PCREL_OPT, this patch makes sures values
being stored must be live at the time the address is loaded and still live at
the time the store is done.

If there is more than reference to the external symbol in the basic block, or
the load of the address is in one basic block and the memory reference is in
another basic block, this pass does not optimize the reference to use
PCREL_OPT.  For example:

        extern unsigned int esym;

        void inc (void)
        {
          esym++;
        }

Generates:

        pld 10,esym@got@pcrel
        lwz 9,0(10)
        addi 9,9,1
        stw 9,0(10)

As with the other patches, I have bootstraped the changes on a little endian
power8 system and there were no regressions.  Once the previous patches are
checked in, can I check this patch into the trunk?

[gcc]
2019-08-14   Michael Meissner  <meiss...@linux.ibm.com>

        * config/rs6000/pcrel.md: New file.
        * config/rs6000/predicates.md (one_reg_memory_operand): New
        predicate.
        * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Add
        -mpcrel-opt.
        (OTHER_FUTURE_MASKS): Add -mpcrel-opt.
        (POWERPC_MASKS): Add -mpcrel-opt.
        * config/rs6000/rs6000-passes.def: Add pc-relative optimization
        pass.
        * config/rs6000/rs6000-pcrel.c: New file.
        * config/rs6000/rs6000-prefixed.c (pcrel_opt_label_num): New
        static variable.
        (rs6000_final_prescan_insn): Add support for pc-relative
        optimization pass.
        (rs6000_asm_output_opcode): Add support for pc-relative
        optimization pass.
        * config/rs6000/rs6000-protos.h (rs6000_final_prescan_insn):
        Change calling signature.
        (make_pass_pcrel_opt): New declaration.
        * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
        support for -mpcrel-opt.
        (rs6000_opt_masks): Add -mpcrel-opt.
        * config/rs6000/rs6000.h (FINAL_PRESCAN_INSN): Update
        rs6000_final_prescan_insn call.
        * config/rs6000/rs6000.md: Include pcrel.md.
        (pcrel_opt attribute): New RTL attribute.
        * config/rs6000/rs6000.opt (-mpcrel-opt): New option.
        * config/rs6000/t-rs6000 (rs6000-pcrel.o): Add build rule.
        (MD_INCLUDES): Add pcrel.md.
        * config.gcc (powerpc*-*-*): Add rs6000-pcrel.o.
        (rs6000*-*-*): Add rs6000-pcrel.o.

[gcc/testsuite]
2019-08-07   Michael Meissner  <meiss...@linux.ibm.com>

        * gcc.target/powerpc/pcrel-opt-di.c: New test.

Index: gcc/config/rs6000/pcrel.md
===================================================================
--- gcc/config/rs6000/pcrel.md  (revision 0)
+++ gcc/config/rs6000/pcrel.md  (working copy)
@@ -0,0 +1,563 @@
+;; PC relative support.
+;; Copyright (C) 2019 Free Software Foundation, Inc.
+;; Contributed by Peter Bergner <berg...@linux.ibm.com> and
+;;               Michael Meissner <meiss...@linux.ibm.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;
+;; UNSPEC usage
+;;
+
+(define_c_enum "unspec"
+  [UNSPEC_PCREL_LD
+   UNSPEC_PCREL_ST
+  ])
+
+
+;; Optimize references to external variables to combine loading up the external
+;; address from the GOT and doing the load or store operation.
+;;
+;; A typical optimization looks like:
+;;
+;;             pld b,var@pcrel@got(0),1
+;;     100:
+;;             ...
+;;             .reloc 100b-8,R_PPC64_PCREL_OPT,0
+;;             lwz r,0(b)
+;;
+;; If 'var' is an external variable defined in another module in the main
+;; program, and the code is being linked for the main program, then the
+;; linker can optimize this to:
+;;
+;;             plwz r,var(0),1
+;;     100:
+;;             ...
+;;             nop
+;;
+;; If either the variable or the code being linked is defined in a shared
+;; library, then the linker puts the address in the GOT area, and the pld will
+;; load up the pointer, and then that pointer is used for the load or store.
+;; If there is more than one reference to the GOT pointer, the compiler will
+;; not do this optimization, and use the GOT pointer normally.
+;;
+;; Having the label after the pld instruction and using label-8 in the .reloc
+;; addresses the prefixed instruction properly.  If we put the label before the
+;; pld instruction, then the relocation might point to the NOP that is
+;; generated if the prefixed instruction is not aligned.
+;;
+;; We need to rewrite the normal GOT load operation before register allocation
+;; to include setting the eventual destination register for loads, or referring
+;; to the value being stored for store operations so that the proper register
+;; lifetime is set in case the optimization is done and the pld/lwz is
+;; converted to plwz/nop.
+
+(define_mode_iterator PO [QI HI SI DI SF DF
+                         V16QI V8HI V4SI V4SF V2DI V2DF V1TI KF
+                         (TF "FLOAT128_IEEE_P (TFmode)")])
+
+;; Vector types for pcrel optimization
+(define_mode_iterator POV [V16QI V8HI V4SI V4SF V2DI V2DF V1TI KF
+                          (TF "FLOAT128_IEEE_P (TFmode)")])
+
+;; Define the constraints for each mode for pcrel_opt.  The order of the
+;; constraints should have the most natural register class first.
+(define_mode_attr PO_constraint [(QI    "r,d,v")
+                                (HI    "r,d,v")
+                                (SI    "r,d,v")
+                                (DI    "r,d,v")
+                                (SF    "d,v,r")
+                                (DF    "d,v,r")
+                                (V16QI "wa,wn,wn")
+                                (V8HI  "wa,wn,wn")
+                                (V4SI  "wa,wn,wn")
+                                (V4SF  "wa,wn,wn")
+                                (V2DI  "wa,wn,wn")
+                                (V2DF  "wa,wn,wn")
+                                (V1TI  "wa,wn,wn")
+                                (KF    "wa,wn,wn")
+                                (TF    "wa,wn,wn")])
+
+;; Combiner pattern that combines the load of the GOT along with the load.  The
+;; first split pass before register allocation will split this into the load of
+;; the GOT that indicates the resultant value may be created if the PCREL_OPT
+;; relocation is done.
+;;
+;; The (set (match_dup 0)
+;;         (unspec:<MODE> [(const_int 0)] UNSPEC_PCREL_LD))
+;;
+;; Is to signal to the register allocator that the destination register may be
+;; set by the GOT operation (if the linker does the optimization).
+;;
+;; We need to set the "cost" explicitly so that the instruction length is not
+;; used.  We return the same cost as a normal load (4 if we are not optimizing
+;; for speed, 8 if we are optimizing for speed)
+
+(define_insn_and_split "*mov<mode>_pcrel_opt_load"
+  [(set (match_operand:PO 0 "gpc_reg_operand")
+       (match_operand:PO 1 "pcrel_external_mem_operand"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 2)
+                  (match_dup 3))
+             (set (match_dup 0)
+                  (unspec:<MODE> [(const_int 0)] UNSPEC_PCREL_LD))
+             (use (const_int 0))])
+   (parallel [(set (match_dup 0)
+                  (match_dup 4))
+             (use (match_dup 0))
+             (use (const_int 0))])]
+{
+  rtx mem = operands[1];
+  rtx got = gen_reg_rtx (DImode);
+
+  operands[2] = got;
+  operands[3] = XEXP (mem, 0);
+  operands[4] = change_address (mem, <MODE>mode, got);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "16")
+   (set (attr "cost")
+       (if_then_else (match_test "optimize_function_for_speed_p (cfun)")
+                     (const_string "8")
+                     (const_string "4")))
+   (set_attr "prefixed" "yes")])
+
+;; Zero extend combiner patterns
+(define_insn_and_split "*mov<mode>_pcrel_opt_zero_extend"
+  [(set (match_operand:DI 0 "gpc_reg_operand")
+       (zero_extend:DI
+        (match_operand:QHSI 1 "pcrel_external_mem_operand")))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 2)
+                  (match_dup 3))
+             (set (match_dup 0)
+                  (unspec:DI [(const_int 0)] UNSPEC_PCREL_LD))
+             (use (const_int 0))])
+   (parallel [(set (match_dup 0)
+                  (zero_extend:DI
+                   (match_dup 4)))
+             (use (match_dup 0))
+             (use (const_int 0))])]
+{
+  rtx mem = operands[1];
+  rtx got = gen_reg_rtx (DImode);
+
+  operands[2] = got;
+  operands[3] = XEXP (mem, 0);
+  operands[4] = change_address (mem, <MODE>mode, got);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "16")
+   (set (attr "cost")
+       (if_then_else (match_test "optimize_function_for_speed_p (cfun)")
+                     (const_string "8")
+                     (const_string "4")))
+   (set_attr "prefixed" "yes")])
+
+;; Sign extend combiner patterns
+(define_insn_and_split "*mov<mode>_pcrel_opt_sign_extend"
+  [(set (match_operand:DI 0 "gpc_reg_operand")
+       (sign_extend:DI
+        (match_operand:HSI 1 "pcrel_external_mem_operand")))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 2)
+                  (match_dup 3))
+             (set (match_dup 0)
+                  (unspec:DI [(const_int 0)] UNSPEC_PCREL_LD))
+             (use (const_int 0))])
+   (parallel [(set (match_dup 0)
+                  (sign_extend:DI
+                   (match_dup 4)))
+             (use (match_dup 0))
+             (use (const_int 0))])]
+{
+  rtx mem = operands[1];
+  rtx got = gen_reg_rtx (DImode);
+
+  operands[2] = got;
+  operands[3] = XEXP (mem, 0);
+  operands[4] = change_address (mem, <MODE>mode, got);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "16")
+   (set (attr "cost")
+       (if_then_else (match_test "optimize_function_for_speed_p (cfun)")
+                     (const_string "8")
+                     (const_string "4")))
+   (set_attr "prefixed" "yes")])
+
+;; Float extend combiner pattern
+(define_insn_and_split "*movdf_pcrel_opt_float_extend"
+  [(set (match_operand:DF 0 "gpc_reg_operand")
+       (float_extend:DF
+        (match_operand:SF 1 "pcrel_external_mem_operand")))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 2)
+                  (match_dup 3))
+             (set (match_dup 0)
+                  (unspec:DF [(const_int 0)] UNSPEC_PCREL_LD))
+             (use (const_int 0))])
+   (parallel [(set (match_dup 0)
+                  (float_extend:DF
+                   (match_dup 4)))
+             (use (match_dup 0))
+             (use (const_int 0))])]
+{
+  rtx mem = operands[1];
+  rtx got = gen_reg_rtx (DImode);
+
+  operands[2] = got;
+  operands[3] = XEXP (mem, 0);
+  operands[4] = change_address (mem, SFmode, got);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "16")
+   (set (attr "cost")
+       (if_then_else (match_test "optimize_function_for_speed_p (cfun)")
+                     (const_string "8")
+                     (const_string "4")))
+   (set_attr "prefixed" "yes")])
+
+;; Patterns to load up the GOT address that may be changed into the load of the
+;; actual variable.
+(define_insn "*mov<mode>_pcrel_opt_load_got"
+  [(set (match_operand:DI 0 "base_reg_operand" "=b,b,b")
+       (match_operand:DI 1 "pcrel_external_address"))
+   (set (match_operand:PO 2 "gpc_reg_operand" "=<PO_constraint>")
+       (unspec:PO [(const_int 0)] UNSPEC_PCREL_LD))
+   (use (match_operand:DI 3 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+{
+  return (INTVAL (operands[3])) ? "ld %0,%a1\n.Lpcrel%3:" : "ld %0,%a1";
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "12")
+   (set_attr "pcrel_opt" "load_got")
+   (set (attr "cost")
+       (if_then_else (match_test "optimize_function_for_speed_p (cfun)")
+                     (const_string "8")
+                     (const_string "4")))
+   (set_attr "prefixed" "yes")])
+
+;; The secondary load insns that uses the GOT pointer that may become a NOP.
+(define_insn "*mov<mode>_pcrel_opt_load_mem"
+  [(set (match_operand:QHI 0 "gpc_reg_operand" "+r,wa")
+       (match_operand:QHI 1 "one_reg_memory_operand" "Q,Q"))
+   (use (match_operand:QHI 2 "gpc_reg_operand" "0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   l<wd>z %0,%1
+   lxsi<wd>zx %x0,%y1"
+  [(set_attr "type" "load,fpload")
+   (set_attr "pcrel_opt" "load,no")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*movsi_pcrel_opt_load_mem"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "+r,d,v")
+       (match_operand:SI 1 "one_reg_memory_operand" "Q,Q,Q"))
+   (use (match_operand:SI 2 "gpc_reg_operand" "0,0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   lwz %0,%1
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1"
+  [(set_attr "type" "load,fpload,fpload")
+   (set_attr "pcrel_opt" "load,no,no")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*movdi_pcrel_opt_load_mem"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "+r,d,v")
+       (match_operand:DI 1 "one_reg_memory_operand" "Q,Q,Q"))
+   (use (match_operand:DI 2 "gpc_reg_operand" "0,0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   ld %0,%1
+   lfd %0,%1
+   lxsd %0,%1"
+  [(set_attr "type" "load,fpload,fpload")
+   (set_attr "pcrel_opt" "load")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*movsf_pcrel_opt_load_mem"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "+d,v,r")
+       (match_operand:SF 1 "one_reg_memory_operand" "Q,Q,Q"))
+   (use (match_operand:SF 2 "gpc_reg_operand" "0,0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   lfs %0,%1
+   lxssp %0,%1
+   lwz %0,%1"
+  [(set_attr "type" "fpload,fpload,load")
+   (set_attr "pcrel_opt" "load")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*movdf_pcrel_opt_load_mem"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "+d,v,r")
+       (match_operand:DF 1 "one_reg_memory_operand" "Q,Q,Q"))
+   (use (match_operand:DF 2 "gpc_reg_operand" "0,0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   lfd %0,%1
+   lxsd %0,%1
+   ld %0,%1"
+  [(set_attr "type" "fpload,fpload,load")
+   (set_attr "pcrel_opt" "load")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*mov<mode>_pcrel_opt_load_mem"
+  [(set (match_operand:POV 0 "gpc_reg_operand" "+wa")
+       (match_operand:POV 1 "one_reg_memory_operand" "Q"))
+   (use (match_operand:POV 2 "gpc_reg_operand" "0"))
+   (use (match_operand:DI 3 "const_int_operand" "n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "lxv %x0,%1"
+  [(set_attr "type" "vecload")
+   (set_attr "pcrel_opt" "load")
+   (set_attr "prefixed" "no")])
+
+;; Zero extend insns
+(define_insn "*mov<mode>_pcrel_opt_load_zero_extend2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "+r,wa")
+       (zero_extend:DI
+        (match_operand:QHI 1 "one_reg_memory_operand" "Q,Q")))
+   (use (match_operand:DI 2 "gpc_reg_operand" "0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   l<wd>z %0,%1
+   lxsi<wd>zx %x0,%y1"
+  [(set_attr "type" "load,fpload")
+   (set_attr "pcrel_opt" "load,no")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*movsi_pcrel_opt_load_zero_extend2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "+r,d,v")
+       (zero_extend:DI
+        (match_operand:SI 1 "one_reg_memory_operand" "Q,Q,Q")))
+   (use (match_operand:DI 2 "gpc_reg_operand" "0,0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   lwz %0,%1
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1"
+  [(set_attr "type" "load,fpload,fpload")
+   (set_attr "pcrel_opt" "load,no,no")
+   (set_attr "prefixed" "no")])
+
+;; Sign extend insns
+(define_insn "*movsi_pcrel_opt_load_sign_extend2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "+r,d,v")
+       (sign_extend:DI
+        (match_operand:SI 1 "one_reg_memory_operand" "Q,Q,Q")))
+   (use (match_operand:DI 2 "gpc_reg_operand" "0,0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   lwa %0,%1
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1"
+  [(set_attr "type" "load,fpload,fpload")
+   (set_attr "pcrel_opt" "load,no,no")
+   (set_attr "prefixed" "no")])
+
+(define_insn_and_split "*movhi_pcrel_opt_load_sign_extend2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "+r,v")
+       (sign_extend:DI
+        (match_operand:HI 1 "one_reg_memory_operand" "Q,Q")))
+   (use (match_operand:DI 2 "gpc_reg_operand" "0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   lha %0,%1
+   #"
+  "&& reload_completed && altivec_register_operand (operands[0], HImode)"
+  [(parallel [(set (match_dup 4)
+                  (match_dup 1))
+             (use (match_dup 4))
+             (use (const_int 0))])
+   (set (match_dup 0)
+       (sign_extend:DI
+        (match_dup 4)))]
+{
+  operands[4] = gen_rtx_REG (HImode, REGNO (operands[0]));
+}
+  [(set_attr "type" "load,fpload")
+   (set_attr "pcrel_opt" "load,no")
+   (set_attr "length" "4,8")
+   (set_attr "prefixed" "no")])
+
+;; Floating point extend insn
+(define_insn "*movsf_pcrel_opt_load_float_extend2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "+d,v")
+       (float_extend:DF
+        (match_operand:SF 1 "one_reg_memory_operand" "Q,Q")))
+   (use (match_operand:DF 2 "gpc_reg_operand" "0,0"))
+   (use (match_operand:DI 3 "const_int_operand" "n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+   lfs %0,%1
+   lxssp %0,%1"
+  [(set_attr "type" "fpload")
+   (set_attr "pcrel_opt" "load")
+   (set_attr "prefixed" "no")])
+
+; ;; Store combiner insns that merge together loading up the address of the
+; ;; external variable and doing the store.  This is split in the first split
+; ;; pass before register allocation.
+;;
+;; We need to set the "cost" explicitly so that the instruction length is not
+;; used.  We return the same cost as a normal store (4).
+(define_insn_and_split "*mov<mode>_pcrel_opt_store"
+  [(set (match_operand:PO 0 "pcrel_external_mem_operand")
+       (match_operand:PO 1 "gpc_reg_operand"))]
+   "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64
+    && can_create_pseudo_p ()"
+   "#"
+   "&& 1"
+   [(set (match_dup 2)
+        (unspec:DI [(match_dup 1)
+                    (match_dup 3)
+                    (const_int 0)] UNSPEC_PCREL_ST))
+    (parallel [(set (match_dup 4)
+                   (match_dup 1))
+              (use (const_int 0))])]
+{
+  rtx mem = operands[0];
+  rtx addr = XEXP (mem, 0);
+  rtx got = gen_reg_rtx (DImode);
+
+  operands[2] = got;
+  operands[3] = addr;
+  operands[4] = change_address (mem, <MODE>mode, got);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "20")
+   (set_attr "pcrel_opt" "store_got")
+   (set_attr "cost" "4")
+   (set_attr "prefixed" "yes")])
+
+;; Load of the GOT address for a store operation that may be converted into a
+;; direct store.
+(define_insn "*mov<mode>_pcrel_opt_store_got"
+  [(set (match_operand:DI 0 "base_reg_operand" "=&b,&b,&b")
+       (unspec:DI [(match_operand:PO 1 "gpc_reg_operand" "<PO_constraint>")
+                   (match_operand:DI 2 "pcrel_external_address")
+                   (match_operand:DI 3 "const_int_operand" "n,n,n")]
+                  UNSPEC_PCREL_ST))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+{
+  return (INTVAL (operands[3])) ? "ld %0,%a2\n.Lpcrel%3:" : "ld %0,%a2";
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "12")
+   (set_attr "pcrel_opt" "store_got")
+   (set_attr "cost" "4")
+   (set_attr "prefixed" "yes")])
+
+;; Secondary store instruction that uses the GOT pointer, and may be optimized
+;; into a NOP instruction.
+(define_insn "*mov<mode>_pcrel_opt_store_mem"
+  [(set (match_operand:QHI 0 "one_reg_memory_operand" "=Q,Q")
+       (match_operand:QHI 1 "gpc_reg_operand" "r,wa"))
+   (use (match_operand:DI 2 "const_int_operand" "n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+  st<wd> %1,%0
+  stxsi<wd>x %x1,%y0"
+  [(set_attr "type" "store,fpstore")
+   (set_attr "pcrel_opt" "store,no")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*movsi_pcrel_opt_store_mem"
+  [(set (match_operand:SI 0 "one_reg_memory_operand" "=Q,Q,Q")
+       (match_operand:SI 1 "gpc_reg_operand" "r,d,v"))
+   (use (match_operand:DI 2 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+  stw %1,%0
+  stfiwx %1,%y0
+  stxsiwx %1,%y0"
+  [(set_attr "type" "store,fpstore,fpstore")
+   (set_attr "pcrel_opt" "store,no,no")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*movdi_pcrel_opt_store_mem"
+  [(set (match_operand:DI 0 "one_reg_memory_operand" "=Q,Q,Q")
+       (match_operand:DI 1 "gpc_reg_operand" "r,d,v"))
+   (use (match_operand:DI 2 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+  std %1,%0
+  stfd %1,%0
+  stxsd %1,%0"
+  [(set_attr "type" "store,fpstore,fpstore")
+   (set_attr "pcrel_opt" "store")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*movsf_pcrel_opt_store_mem"
+  [(set (match_operand:SF 0 "one_reg_memory_operand" "=Q,Q,Q")
+       (match_operand:SF 1 "gpc_reg_operand" "d,v,r"))
+   (use (match_operand:DI 2 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+  stfs %1,%0
+  stxssp %1,%0
+  stw %1,%0"
+  [(set_attr "type" "fpstore,fpstore,store")
+   (set_attr "pcrel_opt" "store")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*movdf_pcrel_opt_store_mem"
+  [(set (match_operand:DF 0 "one_reg_memory_operand" "=Q,Q,Q")
+       (match_operand:DF 1 "gpc_reg_operand" "d,v,r"))
+   (use (match_operand:DI 2 "const_int_operand" "n,n,n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "@
+  stfd %1,%0
+  stxsd %1,%0
+  std %1,%0"
+  [(set_attr "type" "fpstore,fpstore,store")
+   (set_attr "pcrel_opt" "store")
+   (set_attr "prefixed" "no")])
+
+(define_insn "*mov<mode>_pcrel_opt_store_mem"
+  [(set (match_operand:POV 0 "one_reg_memory_operand" "=Q")
+       (match_operand:POV 1 "gpc_reg_operand" "wa"))
+   (use (match_operand:DI 2 "const_int_operand" "n"))]
+  "TARGET_PCREL && TARGET_PCREL_OPT && TARGET_POWERPC64"
+  "stxv %x1,%0"
+  [(set_attr "type" "vecstore")
+   (set_attr "pcrel_opt" "store")
+   (set_attr "prefixed" "no")])
Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md     (revision 274194)
+++ gcc/config/rs6000/predicates.md     (working copy)
@@ -775,6 +775,13 @@ (define_predicate "indexed_or_indirect_o
   return indexed_or_indirect_address (op, mode);
 })
 
+;; Return 1 if the operand uses a single register for the address.
+(define_predicate "one_reg_memory_operand"
+  (match_code "mem")
+{
+  return REG_P (XEXP (op, 0));
+})
+
 ;; Like indexed_or_indirect_operand, but also allow a GPR register if direct
 ;; moves are supported.
 (define_predicate "reg_or_indexed_operand"
Index: gcc/config/rs6000/rs6000-cpus.def
===================================================================
--- gcc/config/rs6000/rs6000-cpus.def   (revision 274194)
+++ gcc/config/rs6000/rs6000-cpus.def   (working copy)
@@ -79,10 +79,12 @@
 #define ISA_FUTURE_MASKS_SERVER        (ISA_3_0_MASKS_SERVER                   
\
                                 | OPTION_MASK_FUTURE                   \
                                 | OPTION_MASK_PCREL                    \
+                                | OPTION_MASK_PCREL_OPT                \
                                 | OPTION_MASK_PREFIXED_ADDR)
 
 /* Flags that need to be turned off if -mno-future.  */
 #define OTHER_FUTURE_MASKS     (OPTION_MASK_PCREL                      \
+                                | OPTION_MASK_PCREL_OPT                \
                                 | OPTION_MASK_PREFIXED_ADDR)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
@@ -138,6 +140,7 @@
                                 | OPTION_MASK_P9_MISC                  \
                                 | OPTION_MASK_P9_VECTOR                \
                                 | OPTION_MASK_PCREL                    \
+                                | OPTION_MASK_PCREL_OPT                \
                                 | OPTION_MASK_POPCNTB                  \
                                 | OPTION_MASK_POPCNTD                  \
                                 | OPTION_MASK_POWERPC64                \
Index: gcc/config/rs6000/rs6000-passes.def
===================================================================
--- gcc/config/rs6000/rs6000-passes.def (revision 274194)
+++ gcc/config/rs6000/rs6000-passes.def (working copy)
@@ -25,3 +25,12 @@ along with GCC; see the file COPYING3.
  */
 
   INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
+
+/* The pcrel_opt pass must be the final pass before final.  This pass combines
+   references to external pc-relative variables with their use.  There must be
+   only one reference to the external pointer loaded in order to do the
+   optimization.  Otherwise we load up the addresses (either via PADDI if the
+   label is local or via a PLD from the got section if it is defined in another
+   module) and the value as a base pointer.  */
+
+  INSERT_PASS_BEFORE (pass_final, 1, pass_pcrel_opt);
Index: gcc/config/rs6000/rs6000-pcrel.c
===================================================================
--- gcc/config/rs6000/rs6000-pcrel.c    (revision 0)
+++ gcc/config/rs6000/rs6000-pcrel.c    (working copy)
@@ -0,0 +1,463 @@
+/* Subroutines used support the pc-relative linker optimization.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file implements a RTL pass that looks for pc-relative loads of the
+   address of an external variable using the PCREL_GOT relocation and a single
+   load/store that uses that GOT pointer.  If that is found we create the
+   PCREL_OPT relocation to possibly convert:
+
+       pld b,var@pcrel@got(0),1
+
+       # possibly other instructions that do not use the base register 'b' or
+        # the result register 'r'.
+
+       lwz r,0(b)
+
+   into:
+
+       plwz r,var@pcrel(0),1
+
+       # possibly other instructions that do not use the base register 'b' or
+        # the result register 'r'.
+
+       nop
+
+   If the variable is not defined in the main program or the code using it is
+   not in the main program, the linker put the address in the .got section and
+   do:
+
+       .section .got
+       .Lvar_got:      .dword var
+
+       .section .text
+       pld b,.Lvar_got@pcrel(0),1
+
+       # possibly other instructions that do not use the base register 'b' or
+        # the result register 'r'.
+
+       lwz r,0(b)
+
+   We only look for a single usage in the basic block where the GOT pointer is
+   loaded.  Multiple uses or references in another basic block will force us to
+   not use the PCREL_OPT relocation.
+
+   This file also contains the support function for prefixed memory to emit the
+   leading 'p' in front of prefixed instructions, and to create the necessary
+   relocations needed for PCREL_OPT.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "df.h"
+#include "tm_p.h"
+#include "ira.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "tree-pass.h"
+#include "rtx-vector-builder.h"
+#include "print-rtl.h"
+#include "insn-attr.h"
+
+
+// Optimize pc-relative references
+const pass_data pass_data_pcrel =
+{
+  RTL_PASS,                    // type
+  "pcrel",                     // name
+  OPTGROUP_NONE,               // optinfo_flags
+  TV_NONE,                     // tv_id
+  0,                           // properties_required
+  0,                           // properties_provided
+  0,                           // properties_destroyed
+  0,                           // todo_flags_start
+  TODO_df_finish,              // todo_flags_finish
+};
+
+// Pass data structures
+class pcrel : public rtl_opt_pass
+{
+private:
+  // Function to optimize pc relative loads/stores
+  unsigned int do_pcrel_opt (function *);
+
+  // A GOT pointer used for a load
+  void load_got (rtx_insn *);
+
+  // A load insn that uses the GOT ponter
+  void load_insn (rtx_insn *);
+
+  // A GOT pointer used for a store
+  void store_got (rtx_insn *);
+
+  // A store insn that uses the GOT ponter
+  void store_insn (rtx_insn *);
+
+  // Record the number of loads and stores optimized
+  unsigned long num_got_loads;
+  unsigned long num_got_stores;
+  unsigned long num_loads;
+  unsigned long num_stores;
+  unsigned long num_opt_loads;
+  unsigned long num_opt_stores;
+
+  // We record the GOT insn for each register that sets a GOT for a load or a
+  // store instruction.
+  rtx_insn *got_reg[32];
+
+public:
+  pcrel (gcc::context *ctxt)
+  : rtl_opt_pass (pass_data_pcrel, ctxt),
+    num_got_loads (0),
+    num_got_stores (0),
+    num_loads (0),
+    num_stores (0),
+    num_opt_loads (0),
+    num_opt_stores (0)
+  {}
+
+  ~pcrel (void)
+  {}
+
+  // opt_pass methods:
+  virtual bool gate (function *)
+  {
+    return TARGET_PCREL && TARGET_PCREL_OPT && optimize;
+  }
+
+  virtual unsigned int execute (function *fun)
+  {
+    return do_pcrel_opt (fun);
+  }
+
+  opt_pass *clone ()
+  {
+    return new pcrel (m_ctxt);
+  }
+};
+
+
+/* Return a marker to create the backward pointing label that links the load or
+   store to the insn that loads the adddress of an external label with
+   PCREL_GOT.  This allows us to create the necessary R_PPC64_PCREL_OPT
+   relocation to link the two instructions.  */
+
+static rtx
+pcrel_marker (void)
+{
+  static unsigned int label_number = 0;
+
+  label_number++;
+  return GEN_INT (label_number);
+}
+
+
+// Save the current PCREL_OPT load GOT insn address in the register # of the
+// GOT pointer that is loaded.
+//
+// The PCREL_OPT LOAD_GOT insn looks like:
+//
+//     (parallel [(set (base) (addr))
+//                (set (reg)  (unspec [(const_int 0)] UNSPEC_PCREL_LD))
+//                (use (marker))])
+//
+// The base register is the GOT address, and the marker is a numeric label that
+// is created in this pass if the only use of the GOT load pointer is for a
+// single load.
+
+void
+pcrel::load_got (rtx_insn *insn)
+{
+  rtx pattern = PATTERN (insn);
+  rtx set = XVECEXP (pattern, 0, 0);
+  int got = REGNO (SET_DEST (set));
+
+  gcc_assert (IN_RANGE (got, FIRST_GPR_REGNO+1, LAST_GPR_REGNO));
+  got_reg[got] = insn;
+  num_got_loads++;
+}
+
+// See if the use of this load of a GOT pointer is the only usage.  If so,
+// allocate a marker to create a label.
+//
+// The PCREL_OPT LOAD insn looks like:
+//
+//     (parallel [(set (reg) (mem))
+//                (use (reg)
+//                (use (marker))])
+//
+// Between the reg and the memory might be a SIGN_EXTEND, ZERO_EXTEND, or
+// FLOAT_EXTEND:
+//
+//     (parallel [(set (reg) (sign_extend (mem)))
+//                (use (reg)
+//                (use (marker))])
+
+void
+pcrel::load_insn (rtx_insn *insn)
+{
+  num_loads++;
+
+  /* If the optimizer has changed the load instruction, just use the GOT
+     pointer as an address.  */
+  rtx pattern = PATTERN (insn);
+  if (GET_CODE (pattern) != PARALLEL || XVECLEN (pattern, 0) != 3)
+    return;
+
+  rtx set = XVECEXP (pattern, 0, 0);
+  if (GET_CODE (set) != SET
+      || GET_CODE (XVECEXP (pattern, 0, 1)) != USE
+      || GET_CODE (XVECEXP (pattern, 0, 2)) != USE)
+    return;
+
+  rtx dest = SET_DEST (set);
+  rtx src = SET_SRC (set);
+
+  if (!rtx_equal_p (dest, XEXP (XVECEXP (pattern, 0, 1), 0)))
+    return;
+
+  if (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND
+      || GET_CODE (src) == FLOAT_EXTEND)
+    src = XEXP (src, 0);
+
+  if (!MEM_P (src))
+    return;
+
+  rtx addr = XEXP (src, 0);
+  if (!REG_P (addr))
+    return;
+
+  int r = REGNO (addr);
+  if (!IN_RANGE (r, FIRST_GPR_REGNO+1, LAST_GPR_REGNO))
+    return;
+
+  rtx_insn *got_insn = got_reg[r];
+
+  // See if this is the only reference, and there is a set of the GOT pointer
+  // previously in the same basic block.  If this is the only reference,
+  // optimize it.
+  if (got_insn
+      && get_attr_pcrel_opt (got_insn) == PCREL_OPT_LOAD_GOT
+      && !reg_used_between_p (addr, got_insn, insn)
+      && (find_reg_note (insn, REG_DEAD, addr) || rtx_equal_p (dest, addr)))
+    {
+      rtx marker = pcrel_marker ();
+      rtx got_use = XVECEXP (PATTERN (got_insn), 0, 2);
+      rtx insn_use = XVECEXP (pattern, 0, 2);
+
+      gcc_checking_assert (rtx_equal_p (XEXP (got_use, 0), const0_rtx));
+      gcc_checking_assert (rtx_equal_p (XEXP (insn_use, 0), const0_rtx));
+
+      XEXP (got_use, 0) = marker;
+      XEXP (insn_use, 0) = marker;
+      num_opt_loads++;
+    }
+
+  // Forget the GOT now that we've used it.
+  got_reg[r] = (rtx_insn *)0;
+}
+
+// Save the current PCREL_OPT store GOT insn address in the register # of the
+// GOT pointer that is loaded.
+//
+// The PCREL_OPT STORE_GOT insn looks like:
+//
+//     (set (set (base)
+//          (unspec:DI [(src)
+//                      (addr)
+//                      (marker)] UNSPEC_PCREL_ST))
+//
+// The base register is the GOT address, and the marker is a numeric label that
+// is created in this pass or 0 to indicate there are other uses of the GOT
+// pointer.
+
+void
+pcrel::store_got (rtx_insn *insn)
+{
+  rtx pattern = PATTERN (insn);
+  int got = REGNO (SET_DEST (pattern));
+
+  gcc_checking_assert (IN_RANGE (got, FIRST_GPR_REGNO+1, LAST_GPR_REGNO));
+  got_reg[got] = insn;
+  num_got_stores++;
+}
+
+// See if the use of this store using a GOT pointer is the only usage.  If so,
+// allocate a marker to create a label.
+//
+// The PCREL_OPT STORE insn looks like:
+//
+//     (parallel [(set (mem) (reg))
+//                (use (marker))])
+
+void
+pcrel::store_insn (rtx_insn *insn)
+{
+  num_stores++;
+
+  /* If the optimizer has changed the store instruction, just use the GOT
+     pointer as an address.  */
+  rtx pattern = PATTERN (insn);
+  if (GET_CODE (pattern) != PARALLEL || XVECLEN (pattern, 0) != 2)
+    return;
+
+  rtx set = XVECEXP (pattern, 0, 0);
+  if (GET_CODE (set) != SET || GET_CODE (XVECEXP (pattern, 0, 1)) != USE)
+    return;
+
+  rtx dest = SET_DEST (set);
+
+  if (!MEM_P (dest))
+    return;
+
+  rtx addr = XEXP (dest, 0);
+  if (!REG_P (addr))
+    return;
+
+  int r = REGNO (addr);
+  if (!IN_RANGE (r, FIRST_GPR_REGNO+1, LAST_GPR_REGNO))
+    return;
+
+  rtx_insn *got_insn = got_reg[r];
+
+  // See if this is the only reference, and there is a GOT pointer previously.
+  // If this is the only reference, optimize it.
+  if (got_insn
+      && get_attr_pcrel_opt (got_insn) == PCREL_OPT_STORE_GOT
+      && !reg_used_between_p (addr, got_insn, insn)
+      && find_reg_note (insn, REG_DEAD, addr))
+    {
+      rtx marker = pcrel_marker ();
+      rtx got_src = SET_SRC (PATTERN (got_insn));
+      rtx insn_use = XVECEXP (pattern, 0, 1);
+
+      gcc_checking_assert (rtx_equal_p (XVECEXP (got_src, 0, 2), const0_rtx));
+      gcc_checking_assert (rtx_equal_p (XEXP (insn_use, 0), const0_rtx));
+
+      XVECEXP (got_src, 0, 2) = marker;
+      XEXP (insn_use, 0) = marker;
+      num_opt_stores++;
+    }
+
+  // Forget the GOT now
+  got_reg[r] = (rtx_insn *)0;
+}
+
+// Optimize pcrel external variable references
+
+unsigned int
+pcrel::do_pcrel_opt (function *fun)
+{
+  basic_block bb;
+  rtx_insn *insn, *curr_insn = 0;
+
+  // Dataflow analysis for use-def chains.
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN | DF_LR_RUN_DCE);
+
+  // Look at each basic block to see if there is a load of an external
+  // variable's GOT address, and a single load/store using that GOT address.
+  FOR_ALL_BB_FN (bb, fun)
+    {
+      bool clear_got_p = true;
+
+      FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
+       {
+         if (clear_got_p)
+           {
+             memset ((void *) &got_reg[0], 0, sizeof (got_reg));
+             clear_got_p = false;
+           }
+
+         if (NONJUMP_INSN_P (insn))
+           {
+             rtx pattern = PATTERN (insn);
+             if (GET_CODE (pattern) == SET || GET_CODE (pattern) == PARALLEL)
+               {
+                 switch (get_attr_pcrel_opt (insn))
+                   {
+                   case PCREL_OPT_NO:
+                     break;
+
+                   case PCREL_OPT_LOAD_GOT:
+                     load_got (insn);
+                     break;
+
+                   case PCREL_OPT_LOAD:
+                     load_insn (insn);
+                     break;
+
+                   case PCREL_OPT_STORE_GOT:
+                     store_got (insn);
+                     break;
+
+                   case PCREL_OPT_STORE:
+                     store_insn (insn);
+                     break;
+
+                   default:
+                     gcc_unreachable ();
+                   }
+               }
+           }
+
+         /* Don't let the GOT load be moved before a label, jump, or call and
+            the dependent load/store after the label, jump, or call.  */
+         else if (JUMP_P (insn) || CALL_P (insn) || LABEL_P (insn))
+           clear_got_p = true;
+       }
+    }
+
+  // Rebuild ud chains.
+  df_remove_problem (df_chain);
+  df_process_deferred_rescans ();
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS | DF_LR_RUN_DCE);
+  df_chain_add_problem (DF_UD_CHAIN);
+  df_analyze ();
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\npc-relative optimizations:\n");
+      fprintf (dump_file, "\tgot loads        = %lu\n", num_got_loads);
+      fprintf (dump_file, "\tpotential loads  = %lu\n", num_loads);
+      fprintf (dump_file, "\toptimized loads  = %lu\n", num_opt_loads);
+      fprintf (dump_file, "\tgot stores       = %lu\n", num_got_stores);
+      fprintf (dump_file, "\tpotential stores = %lu\n", num_stores);
+      fprintf (dump_file, "\toptimized stores = %lu\n\n", num_opt_stores);
+    }
+
+  return 0;
+}
+
+
+rtl_opt_pass *
+make_pass_pcrel_opt (gcc::context *ctxt)
+{
+  return new pcrel (ctxt);
+}
Index: gcc/config/rs6000/rs6000-prefixed.c
===================================================================
--- gcc/config/rs6000/rs6000-prefixed.c (revision 274194)
+++ gcc/config/rs6000/rs6000-prefixed.c (working copy)
@@ -46,13 +46,39 @@
    instruction is printed out.  */
 static bool next_insn_prefixed_p;
 
+/* Numeric label that is the address of the GOT load instruction + 8 that we
+   link the R_PPC64_PCREL_OPT relocation to for on the next instruction.  */
+static unsigned int pcrel_opt_label_num;
+
 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
    outputting the assembler code.  On the PowerPC, we remember if the current
-   insn is a prefixed insn where we need to emit a 'p' before the insn.  */
+   insn is a prefixed insn where we need to emit a 'p' before the insn.
+
+   In addition, if the insn is part of a pc-relative reference to an external
+   label optimization, this is recorded also.  */
 void
-rs6000_final_prescan_insn (rtx_insn *insn)
+rs6000_final_prescan_insn (rtx_insn *insn, rtx operands[], int noperands)
 {
   next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
+
+  enum attr_pcrel_opt pcrel_attr = get_attr_pcrel_opt (insn);
+
+  /* For the load and store instructions that are tied to a GOT pointer, we
+     know that operand 3 constains a marker for loads and operand 2 contains
+     the marker for stores.  If it is non-zero, it is the numeric label where
+     we load the address + 8.  */
+  if (pcrel_attr == PCREL_OPT_LOAD)
+    {
+      gcc_assert (noperands >= 3);
+      pcrel_opt_label_num = INTVAL (operands[3]);
+    }
+  else if (pcrel_attr == PCREL_OPT_STORE)
+    {
+      gcc_assert (noperands >= 2);
+      pcrel_opt_label_num = INTVAL (operands[2]);
+    }
+  else
+    pcrel_opt_label_num = 0;
   return;
 }
 
@@ -64,6 +90,13 @@ rs6000_final_prescan_insn (rtx_insn *ins
 void
 rs6000_asm_output_opcode (FILE *stream, const char *)
 {
+  if (pcrel_opt_label_num)
+    {
+      fprintf (stream, ".reloc 
.Lpcrel%u-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%u-8)\n\t",
+              pcrel_opt_label_num, pcrel_opt_label_num);
+      pcrel_opt_label_num = 0;
+    }
+
   if (next_insn_prefixed_p)
     {
       next_insn_prefixed_p = false;
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h   (revision 274194)
+++ gcc/config/rs6000/rs6000-protos.h   (working copy)
@@ -251,7 +251,7 @@ extern bool prefixed_load_p (rtx_insn *)
 extern bool prefixed_store_p (rtx_insn *);
 extern bool prefixed_paddi_p (rtx_insn *);
 extern void rs6000_asm_output_opcode (FILE *, const char *);
-void rs6000_final_prescan_insn (rtx_insn *);
+void rs6000_final_prescan_insn (rtx_insn *, rtx [], int);
 #endif
 
 extern unsigned char rs6000_class_max_nregs[][LIM_REG_CLASSES];
@@ -263,6 +263,7 @@ extern bool rs6000_linux_float_exception
 namespace gcc { class context; }
 class rtl_opt_pass;
 
+extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
 extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
 extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
 extern bool rs6000_quadword_masked_address_p (const_rtx exp);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 274194)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -4212,7 +4212,17 @@ rs6000_option_override_internal (bool gl
       if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
        error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
 
-      rs6000_isa_flags &= ~OPTION_MASK_PCREL;
+      rs6000_isa_flags &= ~(OPTION_MASK_PCREL
+                           | OPTION_MASK_PCREL_OPT);
+    }
+
+  /* Check -mfuture debug switches.  */
+  if (!TARGET_PCREL && TARGET_PCREL_OPT)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL_OPT) != 0)
+       error ("%qs requires %qs", "-mpcrel-opt", "-mpcrel");
+
+      rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
     }
 
   /* Print the options after updating the defaults.  */
@@ -4353,7 +4363,8 @@ rs6000_option_override_internal (bool gl
       if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
        error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
 
-      rs6000_isa_flags &= ~OPTION_MASK_PCREL;
+      rs6000_isa_flags &= ~(OPTION_MASK_PCREL
+                           | OPTION_MASK_PCREL_OPT);
     }
 
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
@@ -23169,6 +23180,7 @@ static struct rs6000_opt_mask const rs60
   { "mulhw",                   OPTION_MASK_MULHW,              false, true  },
   { "multiple",                        OPTION_MASK_MULTIPLE,           false, 
true  },
   { "pcrel",                   OPTION_MASK_PCREL,              false, true  },
+  { "pcrel-opt",               OPTION_MASK_PCREL_OPT,          false, true  },
   { "popcntb",                 OPTION_MASK_POPCNTB,            false, true  },
   { "popcntd",                 OPTION_MASK_POPCNTD,            false, true  },
   { "power8-fusion",           OPTION_MASK_P8_FUSION,          false, true  },
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h  (revision 274194)
+++ gcc/config/rs6000/rs6000.h  (working copy)
@@ -2580,7 +2580,7 @@ typedef struct GTY(()) machine_function
 do                                                                     \
   {                                                                    \
     if (TARGET_PREFIXED_ADDR)                                          \
-      rs6000_final_prescan_insn (INSN);                                        
\
+      rs6000_final_prescan_insn (INSN, OPERANDS, NOPERANDS);           \
   }                                                                    \
 while (0)
 
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 274194)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -269,6 +269,31 @@ (define_enum "insn_form" [unknown  ; Unkn
                          x             ; Indexed addressing
                          prefixed])    ; Prefixed instruction
 
+;; Whether this instruction is part of the two instruction sequence that
+;; supports PCREL_OPT optimizations, where the linker can change code of the
+;; form:
+;;
+;;             pld b,var@got@pcrel
+;;     100:
+;;             # possibly other instructions
+;;             .reloc 100b-8,R_PPC64_PCREL_OPT,0
+;;             lwz r,0(b)
+;;
+;; into the following if 'var' is in the main program:
+;;
+;;             plwz r,0(b)
+;;             # possibly other instructions
+;;             nop
+;;
+;; The states are:
+;;     no              -- insn is not involved with PCREL_OPT optimizations
+;;     load_got        -- insn loads up the GOT pointer for a load instruction
+;;     load            -- insn is an offsettable load that uses the GOT pointer
+;;     store_got       -- insn loads up the GOT pointer for a store instruction
+;;     store           -- insn is an offsettable store that uses the GOT 
pointer
+
+(define_attr "pcrel_opt" "no,load_got,load,store_got,store" (const_string 
"no"))
+
 ;; Whether an insn is a prefixed insn, and an initial 'p' should be printed
 ;; before the instruction.  A prefixed instruction has a prefix instruction
 ;; word that extends the immediate value of the instructions from 12-16 bits to
@@ -14543,6 +14568,7 @@ (define_insn "*cmp<mode>_hw"
 
 
 
+(include "pcrel.md")
 (include "sync.md")
 (include "vector.md")
 (include "vsx.md")
Index: gcc/config/rs6000/rs6000.opt
===================================================================
--- gcc/config/rs6000/rs6000.opt        (revision 274194)
+++ gcc/config/rs6000/rs6000.opt        (working copy)
@@ -577,3 +577,7 @@ Generate (do not generate) prefixed memo
 mpcrel
 Target Report Mask(PCREL) Var(rs6000_isa_flags)
 Generate (do not generate) pc-relative memory addressing.
+
+mpcrel-opt
+Target Undocumented Mask(PCREL_OPT) Var(rs6000_isa_flags)
+Generate (do not generate) pc-relative memory optimizations for externals.
Index: gcc/config/rs6000/t-rs6000
===================================================================
--- gcc/config/rs6000/t-rs6000  (revision 274194)
+++ gcc/config/rs6000/t-rs6000  (working copy)
@@ -47,6 +47,10 @@ rs6000-call.o: $(srcdir)/config/rs6000/r
        $(COMPILE) $<
        $(POSTCOMPILE)
 
+rs6000-pcrel.o: $(srcdir)/config/rs6000/rs6000-pcrel.c
+       $(COMPILE) $<
+       $(POSTCOMPILE)
+
 rs6000-prefixed.o: $(srcdir)/config/rs6000/rs6000-prefixed.c
        $(COMPILE) $<
        $(POSTCOMPILE)
@@ -83,6 +87,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs
        $(srcdir)/config/rs6000/predicates.md \
        $(srcdir)/config/rs6000/constraints.md \
        $(srcdir)/config/rs6000/darwin.md \
+       $(srcdir)/config/rs6000/pcrel.md \
        $(srcdir)/config/rs6000/sync.md \
        $(srcdir)/config/rs6000/vector.md \
        $(srcdir)/config/rs6000/vsx.md \
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc      (revision 274194)
+++ gcc/config.gcc      (working copy)
@@ -500,7 +500,7 @@ or1k*-*-*)
 powerpc*-*-*)
        cpu_type=rs6000
        extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o 
rs6000-call.o"
-       extra_objs="${extra_objs} rs6000-prefixed.o"
+       extra_objs="${extra_objs} rs6000-prefixed.o rs6000-pcrel.o"
        extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
        extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h"
        extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"
@@ -516,6 +516,7 @@ powerpc*-*-*)
        extra_options="${extra_options} g.opt fused-madd.opt 
rs6000/rs6000-tables.opt"
        target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c 
\$(srcdir)/config/rs6000/rs6000-call.c"
        target_gtfiles="$target_gtfiles 
\$(srcdir)/config/rs6000/rs6000-prefixed.c"
+       target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel.c"
        ;;
 pru-*-*)
        cpu_type=pru
@@ -528,9 +529,10 @@ riscv*)
 rs6000*-*-*)
        extra_options="${extra_options} g.opt fused-madd.opt 
rs6000/rs6000-tables.opt"
        extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o 
rs6000-call.o"
-       extra_objs="${extra_objs} rs6000-prefixed.o"
+       extra_objs="${extra_objs} rs6000-prefixed.o rs6000-pcrel.o"
        target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c 
\$(srcdir)/config/rs6000/rs6000-call.c"
        target_gtfiles="$target_gtfiles 
\$(srcdir)/config/rs6000/rs6000-prefixed.c"
+       target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel.c"
        ;;
 sparc*-*-*)
        cpu_type=sparc
Index: gcc/testsuite/gcc.target/powerpc/pcrel-opt-di.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pcrel-opt-di.c     (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pcrel-opt-di.c     (working copy)
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Determine if the pc-relative optimization using the R_PPC64_PCREL_OPT
+   optimization is supported.  */
+
+#ifndef TYPE
+#define TYPE long
+#endif
+
+extern TYPE ext;
+
+/* This should generate:
+               PLD 9,ext@got@pcrel
+       .Label:
+               .reloc .Label-8,R_PPC64_PCREL_OPT,0
+               LD 3,0(9)  */
+TYPE
+get_ext (void)
+{
+  return ext;
+}
+
+/* This should generate:
+               PLD 9,ext@got@pcrel
+       .Label:
+               .reloc .Label-8,R_PPC64_PCREL_OPT,0
+               STD 3,0(9)  */
+
+void
+set_ext (TYPE a)
+{
+  ext = a;
+}
+
+/* Because it has two references to 'ext', this should not generate a
+   R_PPC64_PCREL_OPT relocation.  Instead it should generate:
+               PLD 10,ext@got@pcrel
+               LD 9,0(10)
+               ADDI 9,9,1
+               STD 9,0(10)  */
+
+void
+inc_ext (void)
+{
+  ext++;
+}
+
+/* { dg-final { scan-assembler-times "ext@got@pcrel"     3 } } */
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
+

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797

Reply via email to