This patch adds support for the paddis instruction that might be added to a
future PowerPC processor.
I have committed all of the patches in my backlog (dense math registers, other
-mcpu=future instructions, random bug fixes, support for _Float16 and
__bfloat16, and optimizations for vector logical operations on power10/power11)
into the IBM vendor branch:
vendors/ibm/gcc-17-future
2026-07-01 Michael Meissner <[email protected]>
gcc/
* config/rs6000/constraints.md (eU): New constraint.
(eV): Likewise.
* config/rs6000/predicates.md (paddis_operand): New predicate.
(paddis_paddi_operand): Likewise.
(add_cint_operand): Add paddis support.
(reg_or_add_cint_operand): Add support for adds that can be done with
paddis and paddi/addi.
(add_operand): Add support for adds that can be done with paddis, but
not paddis + paddi/addi..
* config/rs6000/rs6000.cc (num_insns_constant_gpr): Add support for adds
that can be done with paddis and also paddis combined with paddi/addi.
(print_operand): Add %B<n> for paddis support.
* config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
(SIGNED_INTEGER_64BIT_P): Likewise.
* config/rs6000/rs6000.md (add<mode>3 define_expand): Add paddis
support.
(*add<mode>3 define_insn): Likewise.
(movdi_internal64): Likewise.
(movdi splitter): New splitter for paddis + paddi/addi.
* doc/md.texi (PowerPC constraints): Add eU and eV documentation.
gcc/testsuite/
* gcc.target/powerpc/prefixed-addis.c: New test.
---
gcc/config/rs6000/constraints.md | 10 ++
gcc/config/rs6000/predicates.md | 79 ++++++++++-
gcc/config/rs6000/rs6000.cc | 21 ++-
gcc/config/rs6000/rs6000.h | 12 ++
gcc/config/rs6000/rs6000.md | 129 ++++++++++++++----
gcc/doc/md.texi | 6 +
.../gcc.target/powerpc/prefixed-addis.c | 24 ++++
7 files changed, 245 insertions(+), 36 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 0d1cde5bd4d..0169a7b8522 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,6 +222,16 @@ (define_constraint "eQ"
"An IEEE 128-bit constant that can be loaded into VSX registers."
(match_operand 0 "easy_vector_constant_ieee128"))
+(define_constraint "eU"
+ "@internal integer constant that can be loaded with paddis"
+ (and (match_code "const_int")
+ (match_operand 0 "paddis_operand")))
+
+(define_constraint "eV"
+ "@A signed integer constant that paddis and paddi instructions generate."
+ (and (match_code "const_int")
+ (match_operand 0 "paddis_paddi_operand")))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 737eafc2bc5..556ff755f5a 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -378,6 +378,68 @@ (define_predicate "cint34_operand"
return SIGNED_INTEGER_34BIT_P (INTVAL (op));
})
+;; Return 1 if op is a 64-bit constant that uses the paddis instruction
+(define_predicate "paddis_operand"
+ (match_code "const_int")
+{
+ if (!TARGET_PADDIS)
+ return false;
+
+ if (mode != VOIDmode && mode != DImode)
+ return false;
+
+ HOST_WIDE_INT value = INTVAL (op);
+
+ if (!SIGNED_INTEGER_64BIT_P (value))
+ return false;
+
+ /* If paddi alone can handle the number, don't return true. */
+ if (SIGNED_INTEGER_34BIT_P (value))
+ return false;
+
+ /* If the bottom 32-bits are non-zero, paddis alone can't handle it. */
+ if ((value & HOST_WIDE_INT_C(0xffffffff)) != 0)
+ return false;
+
+ return true;
+})
+
+;; Return 1 if op is a 64-bit constant that can be created with a
+;; combination of paddi and paddis. Don't generate paddi and paddis if
+;; we can do it via addis and rldicl.
+(define_predicate "paddis_paddi_operand"
+ (match_code "const_int")
+{
+ if (!TARGET_PADDIS)
+ return false;
+
+ if (mode != VOIDmode && mode != DImode)
+ return false;
+
+ HOST_WIDE_INT value = INTVAL (op);
+
+ if (!SIGNED_INTEGER_64BIT_P (value))
+ return false;
+
+ /* Don't worry about negative values at the moment. */
+ if (value < 0)
+ return false;
+
+ /* If paddi alone can handle the number, don't return true. */
+ if (SIGNED_INTEGER_34BIT_P (value))
+ return false;
+
+ /* If we can do the add or generate the constant via addis/rldicl, fail. */
+ if (rs6000_is_valid_and_mask (op, mode))
+ return false;
+
+ /* Only return true if we need both paddi and paddis. */
+ if ((value & HOST_WIDE_INT_C(0xffffffff)) == 0)
+ return false;
+
+ return true;
+})
+
;; Return 1 if op is a register that is not special.
;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -568,18 +630,22 @@ (define_predicate "reg_or_zero_operand"
(ior (match_operand 0 "zero_constant")
(match_operand 0 "gpc_reg_operand")))
-;; Return 1 if op is a constant integer valid for addition with addis, addi.
+;; Return 1 if op is a constant integer valid for addition with addis,
+;; addi, paddi, or paddis.
(define_predicate "add_cint_operand"
(and (match_code "const_int")
- (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)
- + (mode == SImode ? 0x80000000 : 0x80008000))
- < (unsigned HOST_WIDE_INT) 0x100000000ll")))
+ (ior (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)
+ + (mode == SImode ? 0x80000000 : 0x80008000))
+ < (unsigned HOST_WIDE_INT) 0x100000000ll")
+ (match_operand 0 "cint34_operand")
+ (match_operand 0 "paddis_operand"))))
;; Return 1 if op is a constant integer valid for addition
;; or non-special register.
(define_predicate "reg_or_add_cint_operand"
(if_then_else (match_code "const_int")
- (match_operand 0 "add_cint_operand")
+ (ior (match_operand 0 "add_cint_operand")
+ (match_operand 0 "paddis_paddi_operand"))
(match_operand 0 "gpc_reg_operand")))
;; Return 1 if op is a constant integer valid for subtraction
@@ -1122,7 +1188,8 @@ (define_predicate "add_operand"
(if_then_else (match_code "const_int")
(match_test "satisfies_constraint_I (op)
|| satisfies_constraint_L (op)
- || satisfies_constraint_eI (op)")
+ || satisfies_constraint_eI (op)
+ || satisfies_constraint_eU (op)")
(match_operand 0 "gpc_reg_operand")))
;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 0767c89df8b..d8ce482ca0f 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -6167,7 +6167,18 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
return 1;
- else if (TARGET_POWERPC64)
+ /* PADDIS support. */
+ else if (TARGET_PADDIS)
+ {
+ rtx num = GEN_INT (value);
+ if (paddis_operand (num, VOIDmode))
+ return 1; /* paddis alone. */
+
+ if (paddis_paddi_operand (num, VOIDmode))
+ return 2; /* paddis + paddi/addi. */
+ }
+
+ if (TARGET_POWERPC64)
{
int num_insns = 0;
rs6000_emit_set_long_const (nullptr, value, &num_insns);
@@ -14277,6 +14288,14 @@ print_operand (FILE *file, rtx x, int code)
fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
return;
+ case 'B':
+ /* Upper 32-bits of a constant. */
+ if (!CONST_INT_P (x))
+ output_operand_lossage ("Not a constant.");
+
+ fprintf (file, "%" HOST_LONG_FORMAT "d", INTVAL (x) >> 32);
+ return;
+
case 'D':
/* Like 'J' but get to the GT bit only. */
if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3064db2a7e4..cccb839b489 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -574,6 +574,11 @@ extern int rs6000_vector_align[];
/* Whether we have XVRLW support. */
#define TARGET_XVRLW TARGET_FUTURE
+/* Whether we have PADDIS support. */
+#define TARGET_PADDIS (TARGET_FUTURE \
+ && TARGET_PREFIXED \
+ && TARGET_POWERPC64)
+
/* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default. */
@@ -2497,6 +2502,13 @@ typedef struct GTY(()) machine_function
#define SIGNED_INTEGER_16BIT_P(VALUE) SIGNED_INTEGER_NBIT_P (VALUE, 16)
#define SIGNED_INTEGER_34BIT_P(VALUE) SIGNED_INTEGER_NBIT_P (VALUE, 34)
+#if HOST_BITS_PER_WIDE_INT > 64
+#define SIGNED_INTEGER_64BIT_P(VALUE) SIGNED_INTEGER_NBIT_P (VALUE, 64)
+
+#else
+#define SIGNED_INTEGER_64BIT_P(VALUE) 1
+#endif
+
/* Like SIGNED_INTEGER_16BIT_P and SIGNED_INTEGER_34BIT_P, but with an extra
argument that gives a length to validate a range of addresses, to allow for
splitting insns into several insns, each of which has an offsettable
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3759bdb1563..0dfe71ae0da 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -427,6 +427,7 @@ (define_attr "enabled" ""
(and (eq_attr "isa" "future")
(match_test "TARGET_FUTURE"))
(const_int 1)
+
] (const_int 0)))
;; If this instruction is microcoded on the CELL processor
@@ -1796,14 +1797,18 @@ (define_expand "add<mode>3"
(match_operand:SDI 2 "reg_or_add_cint_operand")))]
""
{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+
if (<MODE>mode == DImode && !TARGET_POWERPC64)
{
- rtx lo0 = gen_lowpart (SImode, operands[0]);
- rtx lo1 = gen_lowpart (SImode, operands[1]);
- rtx lo2 = gen_lowpart (SImode, operands[2]);
- rtx hi0 = gen_highpart (SImode, operands[0]);
- rtx hi1 = gen_highpart (SImode, operands[1]);
- rtx hi2 = gen_highpart_mode (SImode, DImode, operands[2]);
+ rtx lo0 = gen_lowpart (SImode, op0);
+ rtx lo1 = gen_lowpart (SImode, op1);
+ rtx lo2 = gen_lowpart (SImode, op2);
+ rtx hi0 = gen_highpart (SImode, op0);
+ rtx hi1 = gen_highpart (SImode, op1);
+ rtx hi2 = gen_highpart_mode (SImode, DImode, op2);
if (!reg_or_short_operand (lo2, SImode))
lo2 = force_reg (SImode, lo2);
@@ -1815,24 +1820,40 @@ (define_expand "add<mode>3"
DONE;
}
- if (CONST_INT_P (operands[2]) && !add_operand (operands[2], <MODE>mode))
+ if (CONST_INT_P (op2) && !add_operand (op2, <MODE>mode))
{
- rtx tmp = ((!can_create_pseudo_p ()
- || rtx_equal_p (operands[0], operands[1]))
- ? operands[0] : gen_reg_rtx (<MODE>mode));
+ rtx tmp = ((!can_create_pseudo_p () || rtx_equal_p (op0, op1))
+ ? op0
+ : gen_reg_rtx (<MODE>mode));
/* Adding a constant to r0 is not a valid insn, so use a different
strategy in that case. */
- if (reg_or_subregno (operands[1]) == 0 || reg_or_subregno (tmp) == 0)
+ if (reg_or_subregno (op1) == 0 || reg_or_subregno (tmp) == 0)
{
- if (operands[0] == operands[1])
+ if (op0 == op1)
FAIL;
- rs6000_emit_move (operands[0], operands[2], <MODE>mode);
- emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[0]));
+ rs6000_emit_move (op0, op2, <MODE>mode);
+ emit_insn (gen_add<mode>3 (op0, op1, op0));
+ DONE;
+ }
+
+ HOST_WIDE_INT val = INTVAL (op2);
+
+ /* If we have paddis, split the add into paddis and either addi or
+ paddi. However, if we can generate addis and rldicl, do that
+ instead of doing paddis/paddi. Emit the paddis first, just
+ in case this is a memory operation and we could fold the offset
+ into the memory ooperation. */
+
+ if (TARGET_PADDIS && paddis_paddi_operand (op2, <MODE>mode))
+ {
+ const HOST_WIDE_INT mask = HOST_WIDE_INT_C(0xffffffff);
+
+ emit_insn (gen_add<mode>3 (tmp, op1, GEN_INT (val & ~mask)));
+ emit_insn (gen_add<mode>3 (op0, tmp, GEN_INT (val & mask)));
DONE;
}
- HOST_WIDE_INT val = INTVAL (operands[2]);
HOST_WIDE_INT low = sext_hwi (val, 16);
HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode);
@@ -1842,24 +1863,28 @@ (define_expand "add<mode>3"
/* The ordering here is important for the prolog expander.
When space is allocated from the stack, adding 'low' first may
produce a temporary deallocation (which would be bad). */
- emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (rest)));
- emit_insn (gen_add<mode>3 (operands[0], tmp, GEN_INT (low)));
+ emit_insn (gen_add<mode>3 (tmp, op1, GEN_INT (rest)));
+ emit_insn (gen_add<mode>3 (op0, tmp, GEN_INT (low)));
DONE;
}
})
(define_insn "*add<mode>3"
- [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r,r")
- (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b,b")
- (match_operand:GPR 2 "add_operand" "r,I,L,eI")))]
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r, r, r")
+ (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b, b, b")
+ (match_operand:GPR 2 "add_operand" "r,I,L,eI,eU")))]
""
"@
add %0,%1,%2
addi %0,%1,%2
addis %0,%1,%v2
- addi %0,%1,%2"
+ addi %0,%1,%2
+ paddis %0,%1,%B2"
[(set_attr "type" "add")
- (set_attr "isa" "*,*,*,p10")])
+ (set_attr "isa" "*,*,*,p10,future")
+ (set_attr "length" "*,*,*,*,12")
+ (set_attr "prefixed" "*,*,*,*,yes")
+ (set_attr "maybe_prefixed" "*,*,*,*,no")])
(define_insn "*addsi3_high"
[(set (match_operand:SI 0 "gpc_reg_operand" "=b")
@@ -9882,7 +9907,7 @@ (define_split
DONE;
})
-;; GPR store GPR load GPR move
+;; GPR store GPR load GPR move GPR paddis GPR paddis+paddi
;; GPR li GPR lis GPR pli GPR #
;; FPR store FPR load FPR move
;; AVX store AVX store AVX load AVX load VSX move
@@ -9892,7 +9917,7 @@ (define_split
;; VSX->GPR GPR->VSX
(define_insn "*movdi_internal64"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=YZ, r, r,
+ "=YZ, r, r, r, b,
r, r, r, r,
m, ^d, ^d,
wY, Z, $v, $v, ^wa,
@@ -9901,7 +9926,7 @@ (define_insn "*movdi_internal64"
r, *h, *h,
?r, ?wa")
(match_operand:DI 1 "input_operand"
- "r, YZ, r,
+ "r, YZ, r, eU, eV,
I, L, eI, nF,
^d, m, ^d,
^v, $v, wY, Z, ^wa,
@@ -9916,6 +9941,8 @@ (define_insn "*movdi_internal64"
std%U0%X0 %1,%0
ld%U1%X1 %0,%1
mr %0,%1
+ paddis %0,0,%B1
+ #
li %0,%1
lis %0,%v1
li %0,%1
@@ -9941,7 +9968,7 @@ (define_insn "*movdi_internal64"
mfvsrd %0,%x1
mtvsrd %x0,%1"
[(set_attr "type"
- "store, load, *,
+ "store, load, *, *, *,
*, *, *, *,
fpstore, fpload, fpsimple,
fpstore, fpstore, fpload, fpload, veclogical,
@@ -9951,7 +9978,7 @@ (define_insn "*movdi_internal64"
mfvsr, mtvsr")
(set_attr "size" "64")
(set_attr "length"
- "*, *, *,
+ "*, *, *, 12, 24,
*, *, *, 20,
*, *, *,
*, *, *, *, *,
@@ -9960,14 +9987,32 @@ (define_insn "*movdi_internal64"
*, *, *,
*, *")
(set_attr "isa"
- "*, *, *,
+ "*, *, *, future, future,
*, *, p10, *,
*, *, *,
p9v, p7v, p9v, p7v, *,
p9v, p9v, p7v, *, *,
p7v, p7v,
*, *, *,
- p8v, p8v")])
+ p8v, p8v")
+ (set_attr "prefixed"
+ "*, *, *, yes, yes,
+ *, *, *, *,
+ *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *,
+ *, *, *,
+ *, *")
+ (set_attr "maybe_prefixed"
+ "*, *, *, no, no,
+ *, *, *, *,
+ *, *, *,
+ *, *, *, *, *,
+ *, *, *, *, *,
+ *, *,
+ *, *, *,
+ *, *")])
; Some DImode loads are best done as a load of -1 followed by a mask
; instruction.
@@ -9985,6 +10030,32 @@ (define_split
(match_dup 1)))]
"")
+;; Split a constant that can be generated by a paddis and paddi into 2
+;; instructions. We can't split setting r0 since that would generate:
+;; paddis r0,0,upper
+;; paddi r0,r0,lower
+;;
+;; which gives the wrong value.
+
+(define_split
+ [(set (match_operand:DI 0 "base_reg_operand")
+ (match_operand:DI 1 "paddis_paddi_operand"))]
+ "TARGET_PADDIS"
+ [(set (match_dup 2)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:DI (match_dup 2)
+ (match_dup 4)))]
+{
+ HOST_WIDE_INT value = INTVAL (operands[1]);
+ const HOST_WIDE_INT mask = HOST_WIDE_INT_C (0xffffffff);
+ operands[2] = (can_create_pseudo_p ()
+ ? gen_reg_rtx (DImode)
+ : operands[0]);
+ operands[3] = GEN_INT (value & ~mask);
+ operands[4] = GEN_INT (value & mask);
+})
+
;; Split a load of a large constant into the appropriate five-instruction
;; sequence. Handle anything in a constant number of insns.
;; When non-easy constants can go in the TOC, this should use
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index f227353bd82..b22d9092ea2 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3368,6 +3368,12 @@ loaded to a VSX register with one prefixed instruction.
An IEEE 128-bit constant that can be loaded into a VSX register with
the @code{lxvkq} instruction.
+@item eU
+A signed integer constant that can be used with the paddis instruction.
+
+@item eV
+A signed integer constant that paddis and paddi instructions generate.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/prefixed-addis.c
b/gcc/testsuite/gcc.target/powerpc/prefixed-addis.c
new file mode 100644
index 00000000000..d08e3675f94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/prefixed-addis.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the xvrl (vector word rotate left using VSX registers insead of
+ Altivec registers is generated. */
+
+#include <stddef.h>
+
+size_t
+prefix_addis_addi (size_t x)
+{
+ return x + 0x123456789ABCDEUL; /* paddis + paddi. */
+}
+
+size_t
+prefix_addis (size_t x)
+{
+ return x + 0x12345600000000UL; /* paddis. */
+}
+
+/* { dg-final { scan-assembler-times {\mpaddis\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mpaddi\M} 1 } } */
--
2.54.0
--
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: [email protected]