Hi All,
Following patch depends on these 2 patches in the following order:
1. mcpu=future:
https://gcc.gnu.org/pipermail/gcc-patches/2025-December/703739.html
2. future builtin infra:
https://gcc.gnu.org/pipermail/gcc-patches/2026-March/709782.html
Bootstrapped and regtested on powerpc64le-linux-gnu with no regressions.
Thanks and regards,
Kishan Parmar
Add support for vector uncompress and unpack instructions proposed in
RFC02691. These instructions may or may not be added to a future Power
processor, and the names of the builtins may change in the future.
The instructions are exposed through new builtins and intrinsics
interfaces and are enabled when compiling with -mcpu=future.
This patch adds RTL patterns for vector uncompress (nibble, byte, and
halfword) and unpack operations in altivec.md, along with the
corresponding builtin definitions in rs6000-builtins.def and overload
entries in rs6000-overload.def.
The following new builtins are provided:
vector unsigned short vec_uncompresshn (vector unsigned char, vector unsigned
int)
vector unsigned int vec_uncompresshb (vector unsigned short, vector unsigned
short)
vector unsigned long long vec_uncompresshh (vector unsigned int, vector
unsigned char)
vector unsigned short vec_uncompressln (vector unsigned char, vector unsigned
int)
vector unsigned int vec_uncompresslb (vector unsigned short, vector unsigned
short)
vector unsigned long long vec_uncompresslh (vector unsigned int, vector
unsigned char)
vector signed char vec_unpack_hsn_to_byte (vector unsigned long long)
vector signed char vec_unpack_lsn_to_byte (vector unsigned long long)
vector unsigned char vec_unpack_int4_to_bf16 (vector unsigned short, const
int<2>)
vector unsigned char vec_unpack_int8_to_bf16 (vector unsigned short, const
int<1>)
vector float vec_unpack_int4_to_fp32 (vector unsigned int, const int<3>)
vector float vec_unpack_int8_to_fp32 (vector unsigned int, const int<2>)
gcc/ChangeLog:
* config/rs6000/altivec.md (altivec_vupkhsntob): New define_insn for
vupk[lh]sntob.
(altivec_vupklsntob): Likewise.
(altivec_vupkint4tobf16): New define_insn vupkint4tobf16.
(altivec_vupkint8tobf16): New define_insn vupkint8tobf16.
(altivec_vupkint4tofp32): New define_insn vupkint4tofp32.
(altivec_vupkint8tofp32): New define_insn vupkint8tofp32.
(vu_hl): New attribute.
(vu_lh): Likewise.
(vucmpr_splt_val): Likewise.
(VUCMPR_N): New int iterator.
(VUCMPR_B): Likewise.
(VUCMPR_H): Likewise.
(altivec_vucmpr<vu_hl>n): New define_expand.
(altivec_vucmpr<vu_hl>n_direct): New define_insn for vucmpr<vu_hl>n.
(altivec_vucmpr<vu_hl>b): New define_expand.
(altivec_vucmpr<vu_hl>b_direct): New define_insn for vucmpr<vu_hl>b.
(altivec_vucmpr<vu_hl>h): New define_expand.
(altivec_vucmpr<vu_hl>h_direct): New define_insn for vucmpr<vu_hl>h.
* config/rs6000/rs6000-builtins.def: Add vector uncompress and unpack
builtins under [future].
* config/rs6000/rs6000-overload.def: Add vec_uncompress* and vec_unpack*
interfaces.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/future-vucmpr.c: New test.
* gcc.target/powerpc/future-vupk.c: New test.
---
gcc/config/rs6000/altivec.md | 182 ++++++++++++++++++
gcc/config/rs6000/rs6000-builtins.def | 38 ++++
gcc/config/rs6000/rs6000-overload.def | 48 +++++
.../gcc.target/powerpc/future-vucmpr.c | 60 ++++++
.../gcc.target/powerpc/future-vupk.c | 48 +++++
5 files changed, 376 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/powerpc/future-vucmpr.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/future-vupk.c
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 129f56245cd..e40ed7b442d 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -171,6 +171,16 @@
UNSPEC_SLDB
UNSPEC_SRDB
UNSPEC_VECTOR_SHIFT
+ UNSPEC_VUCMPRHN
+ UNSPEC_VUCMPRLN
+ UNSPEC_VUCMPRHB
+ UNSPEC_VUCMPRLB
+ UNSPEC_VUCMPRHH
+ UNSPEC_VUCMPRLH
+ UNSPEC_VUPKINT4TOBF16
+ UNSPEC_VUPKINT8TOBF16
+ UNSPEC_VUPKINT4TOFP32
+ UNSPEC_VUPKINT8TOFP32
])
(define_c_enum "unspecv"
@@ -4826,3 +4836,175 @@
(match_dup 3)]
UNSPEC_BCD_ADD_SUB)
(match_dup 4)))])])
+
+;; Vector unpack instructions for future.
+
+(define_insn "altivec_vupkhsntob"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v")]
+ UNSPEC_VUNPACK_HI_SIGN))]
+ "TARGET_FUTURE"
+{
+ if (BYTES_BIG_ENDIAN)
+ return "vupkhsntob %0, %1";
+ else
+ return "vupklsntob %0, %1";
+}
+ [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupklsntob"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v")]
+ UNSPEC_VUNPACK_LO_SIGN))]
+ "TARGET_FUTURE"
+{
+ if (BYTES_BIG_ENDIAN)
+ return "vupklsntob %0, %1";
+ else
+ return "vupkhsntob %0, %1";
+}
+ [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkint4tobf16"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+ (match_operand:QI 2 "const_0_to_3_operand" "i")]
+ UNSPEC_VUPKINT4TOBF16))]
+ "TARGET_FUTURE"
+ "vupkint4tobf16 %0, %1, %2"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkint8tobf16"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+ (match_operand:QI 2 "const_0_to_1_operand" "i")]
+ UNSPEC_VUPKINT8TOBF16))]
+ "TARGET_FUTURE"
+ "vupkint8tobf16 %0, %1, %2"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkint4tofp32"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:QI 2 "const_0_to_7_operand" "i")]
+ UNSPEC_VUPKINT4TOFP32))]
+ "TARGET_FUTURE"
+ "vupkint4tofp32 %0, %1, %2"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkint8tofp32"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:QI 2 "const_0_to_3_operand" "i")]
+ UNSPEC_VUPKINT8TOFP32))]
+ "TARGET_FUTURE"
+ "vupkint8tofp32 %0, %1, %2"
+ [(set_attr "type" "vecperm")])
+
+(define_int_attr vu_hl [(UNSPEC_VUCMPRHN "h") (UNSPEC_VUCMPRLN "l")
+ (UNSPEC_VUCMPRHB "h") (UNSPEC_VUCMPRLB "l")
+ (UNSPEC_VUCMPRHH "h") (UNSPEC_VUCMPRLH "l")])
+
+(define_int_attr vu_lh [(UNSPEC_VUCMPRHN "l") (UNSPEC_VUCMPRLN "h")
+ (UNSPEC_VUCMPRHB "l") (UNSPEC_VUCMPRLB "h")
+ (UNSPEC_VUCMPRHH "l") (UNSPEC_VUCMPRLH "h")])
+
+(define_int_attr vucmpr_splt_val [(UNSPEC_VUCMPRHN "3") (UNSPEC_VUCMPRLN "2")
+ (UNSPEC_VUCMPRHB "7") (UNSPEC_VUCMPRLB "6")
+ (UNSPEC_VUCMPRHH "15") (UNSPEC_VUCMPRLH
"14")])
+
+;; Vector uncompress instructions for future.
+
+;; Vector Uncompress Nibbles
+
+(define_int_iterator VUCMPR_N [UNSPEC_VUCMPRHN UNSPEC_VUCMPRLN])
+
+(define_expand "altivec_vucmpr<vu_hl>n"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ VUCMPR_N))]
+ "TARGET_FUTURE"
+ {
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_altivec_vucmpr<vu_hl>n_direct (operands[0], operands[1],
operands[2]));
+ else
+ {
+ rtx tmp = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vspltw_direct (tmp, operands[2], GEN_INT
(<vucmpr_splt_val>)));
+ emit_insn (gen_altivec_vucmpr<vu_lh>n_direct (operands[0],
operands[1], tmp));
+ }
+ DONE;
+ })
+
+(define_insn "altivec_vucmpr<vu_hl>n_direct"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ VUCMPR_N))]
+ "TARGET_FUTURE"
+ "vucmpr<vu_hl>n %0, %1, %2"
+ [(set_attr "type" "vecperm")])
+
+
+;; Vector Uncompress Bytes
+
+(define_int_iterator VUCMPR_B [UNSPEC_VUCMPRHB UNSPEC_VUCMPRLB])
+
+(define_expand "altivec_vucmpr<vu_hl>b"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+ (match_operand:V8HI 2 "register_operand" "v")]
+ VUCMPR_B))]
+ "TARGET_FUTURE"
+ {
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_altivec_vucmpr<vu_hl>b_direct (operands[0],
operands[1], operands[2]));
+ else
+ {
+ rtx tmp = gen_reg_rtx (V8HImode);
+ emit_insn (gen_altivec_vsplth_direct (tmp, operands[2], GEN_INT
(<vucmpr_splt_val>)));
+ emit_insn (gen_altivec_vucmpr<vu_lh>b_direct (operands[0],
operands[1], tmp));
+ }
+ DONE;
+ })
+
+(define_insn "altivec_vucmpr<vu_hl>b_direct"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+ (match_operand:V8HI 2 "register_operand" "v")]
+ VUCMPR_B))]
+ "TARGET_FUTURE"
+ "vucmpr<vu_hl>b %0, %1, %2"
+ [(set_attr "type" "vecperm")])
+
+;; Vector Uncompress Halfwords
+
+(define_int_iterator VUCMPR_H [UNSPEC_VUCMPRHH UNSPEC_VUCMPRLH])
+
+(define_expand "altivec_vucmpr<vu_hl>h"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V16QI 2 "register_operand" "v")]
+ VUCMPR_H))]
+ "TARGET_FUTURE"
+ {
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_altivec_vucmpr<vu_hl>h_direct (operands[0],
operands[1], operands[2]));
+ else
+ {
+ rtx tmp = gen_reg_rtx (V16QImode);
+ emit_insn (gen_altivec_vspltb_direct (tmp, operands[2], GEN_INT
(<vucmpr_splt_val>)));
+ emit_insn (gen_altivec_vucmpr<vu_lh>h_direct (operands[0],
operands[1], tmp));
+ }
+ DONE;
+ })
+
+(define_insn "altivec_vucmpr<vu_hl>h_direct"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V16QI 2 "register_operand" "v")]
+ VUCMPR_H))]
+ "TARGET_FUTURE"
+ "vucmpr<vu_hl>h %0, %1, %2"
+ [(set_attr "type" "vecperm")])
diff --git a/gcc/config/rs6000/rs6000-builtins.def
b/gcc/config/rs6000/rs6000-builtins.def
index 7e5a4fb96e7..7ade43098f9 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3924,3 +3924,41 @@
void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
STXVP nothing {mma,pair}
+
+
+[future]
+ const vus __builtin_altivec_uncompresshn (vuc, vui);
+ VUCMPRHN altivec_vucmprhn {}
+
+ const vui __builtin_altivec_uncompresshb (vus, vus);
+ VUCMPRHB altivec_vucmprhb {}
+
+ const vull __builtin_altivec_uncompresshh (vui, vuc);
+ VUCMPRHH altivec_vucmprhh {}
+
+ const vus __builtin_altivec_uncompressln (vuc, vui);
+ VUCMPRLN altivec_vucmprln {}
+
+ const vui __builtin_altivec_uncompresslb (vus, vus);
+ VUCMPRLB altivec_vucmprlb {}
+
+ const vull __builtin_altivec_uncompresslh (vui, vuc);
+ VUCMPRLH altivec_vucmprlh {}
+
+ const vsc __builtin_altivec_unpack_hsn_to_byte (vull);
+ VUPKHSNTOB altivec_vupkhsntob {}
+
+ const vsc __builtin_altivec_unpack_lsn_to_byte (vull);
+ VUPKLSNTOB altivec_vupklsntob {}
+
+ const vuc __builtin_altivec_unpack_int4_to_bf16 (vus, const int<2>);
+ VUPKINT4TOBF16 altivec_vupkint4tobf16 {}
+
+ const vuc __builtin_altivec_unpack_int8_to_bf16 (vus, const int<1>);
+ VUPKINT8TOBF16 altivec_vupkint8tobf16 {}
+
+ const vf __builtin_altivec_unpack_int4_to_fp32 (vui, const int<3>);
+ VUPKINT4TOFP32 altivec_vupkint4tofp32 {}
+
+ const vf __builtin_altivec_unpack_int8_to_fp32 (vui, const int<2>);
+ VUPKINT8TOFP32 altivec_vupkint8tofp32 {}
diff --git a/gcc/config/rs6000/rs6000-overload.def
b/gcc/config/rs6000/rs6000-overload.def
index 5238c81b214..532e9c7a68a 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -5015,6 +5015,54 @@
vd __builtin_vsx_xxsldwi (vd, vd, const int);
XXSLDWI_2DF XXSLDWI_VD2
+[VEC_UCMPRHN, vec_uncompresshn, __builtin_vec_uncompresshn]
+ vus __builtin_vec_uncompresshn (vuc, vui);
+ VUCMPRHN
+
+[VEC_UCMPRHB, vec_uncompresshb, __builtin_vec_uncomresshb]
+ vui __builtin_vec_uncomresshb (vus, vus);
+ VUCMPRHB
+
+[VEC_UCMPRHH, vec_uncompresshh, __builtin_vec_uncomresshh]
+ vull __builtin_vec_uncomresshh (vui, vuc);
+ VUCMPRHH
+
+[VEC_UCMPRLN, vec_uncompressln, __builtin_vec_uncomressln]
+ vus __builtin_vec_uncomressln (vuc, vui);
+ VUCMPRLN
+
+[VEC_UCMPRLB, vec_uncompresslb, __builtin_vec_uncomresslb]
+ vui __builtin_vec_uncomresslb (vus, vus);
+ VUCMPRLB
+
+[VEC_UCMPRLH, vec_uncompresslh, __builtin_vec_uncomresslh]
+ vull __builtin_vec_uncomresslh (vui, vuc);
+ VUCMPRLH
+
+[VEC_UNPACK_HSN_TO_BYTE, vec_unpack_hsn_to_byte,
__builtin_vec_unpack_hsn_to_byte]
+ vsc __builtin_vec_unpack_hsn_to_byte (vull);
+ VUPKHSNTOB
+
+[VEC_UNPACK_LSN_TO_BYTE, vec_unpack_lsn_to_byte,
__builtin_vec_unpack_lsn_to_byte]
+ vsc __builtin_vec_unpack_lsn_to_byte (vull);
+ VUPKLSNTOB
+
+[VEC_UNPACK_INT4_TO_BF16, vec_unpack_int4_to_bf16,
__builtin_vec_unpack_int4_to_bf16]
+ vuc __builtin_vec_unpack_int4_to_bf16 (vus, const int<2>);
+ VUPKINT4TOBF16
+
+[VEC_UNPACK_INT8_TO_BF16, vec_unpack_int8_to_bf16,
__builtin_vec_unpack_int8_to_bf16]
+ vuc __builtin_vec_unpack_int8_to_bf16 (vus, const int<1>);
+ VUPKINT8TOBF16
+
+[VEC_UNPACK_INT4_TO_FP32, vec_unpack_int4_to_fp32,
__builtin_vec_unpack_int4_to_fp32]
+ vf __builtin_vec_unpack_int4_to_fp32 (vui, const int<3>);
+ VUPKINT4TOFP32
+
+[VEC_UNPACK_INT8_TO_FP32, vec_unpack_int8_to_fp32,
__builtin_vec_unpack_int8_to_fp32]
+ vf __builtin_vec_unpack_int8_to_fp32 (vui, const int<2>);
+ VUPKINT8TOFP32
+
; **************************************************************************
; **************************************************************************
diff --git a/gcc/testsuite/gcc.target/powerpc/future-vucmpr.c
b/gcc/testsuite/gcc.target/powerpc/future-vucmpr.c
new file mode 100644
index 00000000000..58ffa67ebb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-vucmpr.c
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=future" } */
+
+#include <altivec.h>
+
+vector unsigned short test_uncompresshn(vector unsigned char a,
+ vector unsigned int b)
+{
+ return vec_uncompresshn(a, b);
+}
+
+vector unsigned int test_uncompresshb(vector unsigned short a,
+ vector unsigned short b)
+{
+ return vec_uncompresshb(a, b);
+}
+
+vector unsigned long long test_uncompresshh(vector unsigned int a,
+ vector unsigned char b)
+{
+ return vec_uncompresshh(a, b);
+}
+
+vector unsigned short test_uncompressln(vector unsigned char a,
+ vector unsigned int b)
+{
+ return vec_uncompressln(a, b);
+}
+
+vector unsigned int test_uncompresslb(vector unsigned short a,
+ vector unsigned short b)
+{
+ return vec_uncompresslb(a, b);
+}
+
+vector unsigned long long test_uncompresslh(vector unsigned int a,
+ vector unsigned char b)
+{
+ return vec_uncompresslh(a, b);
+}
+
+/* BE: direct instructions, no splats */
+
+/* { dg-final { scan-assembler-not "vspltw" { target { be } } } } */
+/* { dg-final { scan-assembler-not "vsplth" { target { be } } } } */
+/* { dg-final { scan-assembler-not "vspltb" { target { be } } } } */
+
+/* LE: splats must appear */
+
+/* { dg-final { scan-assembler-times "vspltw" 2 { target { le } } } } */
+/* { dg-final { scan-assembler-times "vsplth" 2 { target { le } } } } */
+/* { dg-final { scan-assembler-times "vspltb" 2 { target { le } } } } */
+
+/* { dg-final { scan-assembler-times "vucmprln" 1 } } */
+/* { dg-final { scan-assembler-times "vucmprlb" 1 } } */
+/* { dg-final { scan-assembler-times "vucmprlh" 1 } } */
+
+/* { dg-final { scan-assembler-times "vucmprhn" 1 } } */
+/* { dg-final { scan-assembler-times "vucmprhb" 1 } } */
+/* { dg-final { scan-assembler-times "vucmprhh" 1 } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/powerpc/future-vupk.c
b/gcc/testsuite/gcc.target/powerpc/future-vupk.c
new file mode 100644
index 00000000000..fa4876dd7eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-vupk.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=future" } */
+
+#include <altivec.h>
+
+vector signed char
+test_unpack_hsn_to_byte(vector unsigned long long a)
+{
+ return vec_unpack_hsn_to_byte(a);
+}
+
+vector signed char
+test_unpack_lsn_to_byte(vector unsigned long long a)
+{
+ return vec_unpack_lsn_to_byte(a);
+}
+
+vector unsigned char
+test_unpack_int4_to_bf16(vector unsigned short a)
+{
+ return vec_unpack_int4_to_bf16(a, 0);
+}
+
+vector unsigned char
+test_unpack_int8_to_bf16(vector unsigned short a)
+{
+ return vec_unpack_int8_to_bf16(a, 0);
+}
+
+vector float
+test_unpack_int4_to_fp32(vector unsigned int a)
+{
+ return vec_unpack_int4_to_fp32(a, 0);
+}
+
+vector float
+test_unpack_int8_to_fp32(vector unsigned int a)
+{
+ return vec_unpack_int8_to_fp32(a, 0);
+}
+
+
+/* { dg-final { scan-assembler-times "vupkhsntob" 1 } } */
+/* { dg-final { scan-assembler-times "vupklsntob" 1 } } */
+/* { dg-final { scan-assembler-times "vupkint4tobf16" 1 } } */
+/* { dg-final { scan-assembler-times "vupkint8tobf16" 1 } } */
+/* { dg-final { scan-assembler-times "vupkint4tofp32" 1 } } */
+/* { dg-final { scan-assembler-times "vupkint8tofp32" 1 } } */
\ No newline at end of file
--
2.47.3