Hi All,

Following patch depends on these 2 patches in the following order:
1. mcpu=future: 
https://gcc.gnu.org/pipermail/gcc-patches/2025-December/703739.html
2. future builtin infra: 
https://gcc.gnu.org/pipermail/gcc-patches/2026-March/709782.html

Bootstrapped and regtested on powerpc64le-linux-gnu with no regressions.

Thanks and regards,
Kishan Parmar

Add support for vector uncompress and unpack instructions proposed in
RFC02691.  These instructions may or may not be added to a future Power
processor, and the names of the builtins may change in the future.

The instructions are exposed through new builtins and intrinsics
interfaces and are enabled when compiling with -mcpu=future.

This patch adds RTL patterns for vector uncompress (nibble, byte, and
halfword) and unpack operations in altivec.md, along with the
corresponding builtin definitions in rs6000-builtins.def and overload
entries in rs6000-overload.def.

The following new builtins are provided:

vector unsigned short vec_uncompresshn (vector unsigned char, vector unsigned 
int)
vector unsigned int vec_uncompresshb (vector unsigned short, vector unsigned 
short)
vector unsigned long long vec_uncompresshh (vector unsigned int, vector 
unsigned char)
vector unsigned short vec_uncompressln (vector unsigned char, vector unsigned 
int)
vector unsigned int vec_uncompresslb (vector unsigned short, vector unsigned 
short)
vector unsigned long long vec_uncompresslh (vector unsigned int, vector 
unsigned char)
vector signed char vec_unpack_hsn_to_byte (vector unsigned long long)
vector signed char vec_unpack_lsn_to_byte (vector unsigned long long)
vector unsigned char vec_unpack_int4_to_bf16 (vector unsigned short, const 
int<2>)
vector unsigned char vec_unpack_int8_to_bf16 (vector unsigned short, const 
int<1>)
vector float vec_unpack_int4_to_fp32 (vector unsigned int, const int<3>)
vector float vec_unpack_int8_to_fp32 (vector unsigned int, const int<2>)

gcc/ChangeLog:

        * config/rs6000/altivec.md (altivec_vupkhsntob): New define_insn for
        vupk[lh]sntob.
        (altivec_vupklsntob): Likewise.
        (altivec_vupkint4tobf16): New define_insn vupkint4tobf16.
        (altivec_vupkint8tobf16): New define_insn vupkint8tobf16.
        (altivec_vupkint4tofp32): New define_insn vupkint4tofp32.
        (altivec_vupkint8tofp32): New define_insn vupkint8tofp32.
        (vu_hl): New attribute.
        (vu_lh): Likewise.
        (vucmpr_splt_val): Likewise.
        (VUCMPR_N): New int iterator.
        (VUCMPR_B): Likewise.
        (VUCMPR_H): Likewise.
        (altivec_vucmpr<vu_hl>n): New define_expand.
        (altivec_vucmpr<vu_hl>n_direct): New define_insn for vucmpr<vu_hl>n.
        (altivec_vucmpr<vu_hl>b): New define_expand.
        (altivec_vucmpr<vu_hl>b_direct): New define_insn for vucmpr<vu_hl>b.
        (altivec_vucmpr<vu_hl>h): New define_expand.
        (altivec_vucmpr<vu_hl>h_direct): New define_insn for vucmpr<vu_hl>h.
        * config/rs6000/rs6000-builtins.def: Add vector uncompress and unpack
        builtins under [future].
        * config/rs6000/rs6000-overload.def: Add vec_uncompress* and vec_unpack*
        interfaces.

gcc/testsuite/ChangeLog:

        * gcc.target/powerpc/future-vucmpr.c: New test.
        * gcc.target/powerpc/future-vupk.c: New test.
---
 gcc/config/rs6000/altivec.md                  | 182 ++++++++++++++++++
 gcc/config/rs6000/rs6000-builtins.def         |  38 ++++
 gcc/config/rs6000/rs6000-overload.def         |  48 +++++
 .../gcc.target/powerpc/future-vucmpr.c        |  60 ++++++
 .../gcc.target/powerpc/future-vupk.c          |  48 +++++
 5 files changed, 376 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/future-vucmpr.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/future-vupk.c

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 129f56245cd..e40ed7b442d 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -171,6 +171,16 @@
    UNSPEC_SLDB
    UNSPEC_SRDB
    UNSPEC_VECTOR_SHIFT
+   UNSPEC_VUCMPRHN
+   UNSPEC_VUCMPRLN
+   UNSPEC_VUCMPRHB
+   UNSPEC_VUCMPRLB
+   UNSPEC_VUCMPRHH
+   UNSPEC_VUCMPRLH
+   UNSPEC_VUPKINT4TOBF16
+   UNSPEC_VUPKINT8TOBF16
+   UNSPEC_VUPKINT4TOFP32
+   UNSPEC_VUPKINT8TOFP32
 ])
 
 (define_c_enum "unspecv"
@@ -4826,3 +4836,175 @@
                                  (match_dup 3)]
                                 UNSPEC_BCD_ADD_SUB)
                    (match_dup 4)))])])
+
+;; Vector unpack instructions for future.
+
+(define_insn "altivec_vupkhsntob"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v")]
+                        UNSPEC_VUNPACK_HI_SIGN))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vupkhsntob %0, %1";
+  else
+    return "vupklsntob %0, %1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupklsntob"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v")]
+                        UNSPEC_VUNPACK_LO_SIGN))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vupklsntob %0, %1";
+  else
+    return "vupkhsntob %0, %1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkint4tobf16"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:QI 2 "const_0_to_3_operand" "i")]
+                      UNSPEC_VUPKINT4TOBF16))]
+  "TARGET_FUTURE"
+  "vupkint4tobf16 %0, %1, %2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkint8tobf16"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:QI 2 "const_0_to_1_operand" "i")]
+                      UNSPEC_VUPKINT8TOBF16))]
+  "TARGET_FUTURE"
+  "vupkint8tobf16 %0, %1, %2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkint4tofp32"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:QI 2 "const_0_to_7_operand" "i")]
+                      UNSPEC_VUPKINT4TOFP32))]
+  "TARGET_FUTURE"
+  "vupkint4tofp32 %0, %1, %2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkint8tofp32"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:QI 2 "const_0_to_3_operand" "i")]
+                      UNSPEC_VUPKINT8TOFP32))]
+  "TARGET_FUTURE"
+  "vupkint8tofp32 %0, %1, %2"
+  [(set_attr "type" "vecperm")])
+
+(define_int_attr vu_hl [(UNSPEC_VUCMPRHN "h") (UNSPEC_VUCMPRLN "l")
+                        (UNSPEC_VUCMPRHB "h") (UNSPEC_VUCMPRLB "l")
+                        (UNSPEC_VUCMPRHH "h") (UNSPEC_VUCMPRLH "l")])
+
+(define_int_attr vu_lh [(UNSPEC_VUCMPRHN "l") (UNSPEC_VUCMPRLN "h")
+                        (UNSPEC_VUCMPRHB "l") (UNSPEC_VUCMPRLB "h")
+                        (UNSPEC_VUCMPRHH "l") (UNSPEC_VUCMPRLH "h")])
+
+(define_int_attr vucmpr_splt_val [(UNSPEC_VUCMPRHN "3") (UNSPEC_VUCMPRLN "2")
+                                  (UNSPEC_VUCMPRHB "7") (UNSPEC_VUCMPRLB "6")
+                                  (UNSPEC_VUCMPRHH "15") (UNSPEC_VUCMPRLH 
"14")])
+
+;; Vector uncompress instructions for future.
+
+;; Vector Uncompress Nibbles
+
+(define_int_iterator VUCMPR_N [UNSPEC_VUCMPRHN UNSPEC_VUCMPRLN])
+
+(define_expand "altivec_vucmpr<vu_hl>n"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                       (match_operand:V4SI 2 "register_operand" "v")]
+                     VUCMPR_N))]
+  "TARGET_FUTURE"
+  {
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_altivec_vucmpr<vu_hl>n_direct (operands[0], operands[1], 
operands[2]));
+    else
+      {
+        rtx tmp = gen_reg_rtx (V4SImode);
+        emit_insn (gen_altivec_vspltw_direct (tmp, operands[2], GEN_INT 
(<vucmpr_splt_val>)));
+        emit_insn (gen_altivec_vucmpr<vu_lh>n_direct (operands[0], 
operands[1], tmp));
+      }
+    DONE;
+  })
+
+(define_insn "altivec_vucmpr<vu_hl>n_direct"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+                     VUCMPR_N))]
+  "TARGET_FUTURE"
+  "vucmpr<vu_hl>n %0, %1, %2"
+  [(set_attr "type" "vecperm")])
+
+
+;; Vector Uncompress Bytes
+
+(define_int_iterator VUCMPR_B [UNSPEC_VUCMPRHB UNSPEC_VUCMPRLB])
+
+(define_expand "altivec_vucmpr<vu_hl>b"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     VUCMPR_B))]
+  "TARGET_FUTURE"
+  {
+    if (BYTES_BIG_ENDIAN)
+        emit_insn (gen_altivec_vucmpr<vu_hl>b_direct (operands[0], 
operands[1], operands[2]));
+    else
+      {
+        rtx tmp = gen_reg_rtx (V8HImode);
+        emit_insn (gen_altivec_vsplth_direct (tmp, operands[2], GEN_INT 
(<vucmpr_splt_val>)));
+        emit_insn (gen_altivec_vucmpr<vu_lh>b_direct (operands[0], 
operands[1], tmp));
+      }
+    DONE;
+  })
+
+(define_insn "altivec_vucmpr<vu_hl>b_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     VUCMPR_B))]
+  "TARGET_FUTURE"
+  "vucmpr<vu_hl>b %0, %1, %2"
+  [(set_attr "type" "vecperm")])
+
+;; Vector Uncompress Halfwords
+
+(define_int_iterator VUCMPR_H [UNSPEC_VUCMPRHH UNSPEC_VUCMPRLH])
+
+(define_expand "altivec_vucmpr<vu_hl>h"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+        (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     VUCMPR_H))]
+  "TARGET_FUTURE"
+  {
+    if (BYTES_BIG_ENDIAN)
+        emit_insn (gen_altivec_vucmpr<vu_hl>h_direct (operands[0], 
operands[1], operands[2]));
+    else
+      {
+        rtx tmp = gen_reg_rtx (V16QImode);
+        emit_insn (gen_altivec_vspltb_direct (tmp, operands[2], GEN_INT 
(<vucmpr_splt_val>)));
+        emit_insn (gen_altivec_vucmpr<vu_lh>h_direct (operands[0], 
operands[1], tmp));
+      }
+    DONE;
+  })
+
+(define_insn "altivec_vucmpr<vu_hl>h_direct"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+        (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     VUCMPR_H))]
+  "TARGET_FUTURE"
+  "vucmpr<vu_hl>h %0, %1, %2"
+  [(set_attr "type" "vecperm")])
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 7e5a4fb96e7..7ade43098f9 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3924,3 +3924,41 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
     STXVP nothing {mma,pair}
+
+
+[future]
+  const vus __builtin_altivec_uncompresshn (vuc, vui);
+    VUCMPRHN altivec_vucmprhn {}
+
+  const vui __builtin_altivec_uncompresshb (vus, vus);
+    VUCMPRHB altivec_vucmprhb {}
+
+  const vull __builtin_altivec_uncompresshh (vui, vuc);
+    VUCMPRHH altivec_vucmprhh {}
+
+  const vus __builtin_altivec_uncompressln (vuc, vui);
+    VUCMPRLN altivec_vucmprln {}
+
+  const vui __builtin_altivec_uncompresslb (vus, vus);
+    VUCMPRLB altivec_vucmprlb {}
+
+  const vull __builtin_altivec_uncompresslh (vui, vuc);
+    VUCMPRLH altivec_vucmprlh {}
+
+  const vsc __builtin_altivec_unpack_hsn_to_byte (vull);
+    VUPKHSNTOB altivec_vupkhsntob {}
+
+  const vsc __builtin_altivec_unpack_lsn_to_byte (vull);
+    VUPKLSNTOB altivec_vupklsntob {}
+
+  const vuc __builtin_altivec_unpack_int4_to_bf16 (vus, const int<2>);
+    VUPKINT4TOBF16 altivec_vupkint4tobf16 {}
+
+  const vuc __builtin_altivec_unpack_int8_to_bf16 (vus, const int<1>);
+    VUPKINT8TOBF16 altivec_vupkint8tobf16 {}
+
+  const vf __builtin_altivec_unpack_int4_to_fp32 (vui, const int<3>);
+    VUPKINT4TOFP32 altivec_vupkint4tofp32 {}
+
+  const vf __builtin_altivec_unpack_int8_to_fp32 (vui, const int<2>);
+    VUPKINT8TOFP32 altivec_vupkint8tofp32 {}
diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def
index 5238c81b214..532e9c7a68a 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -5015,6 +5015,54 @@
   vd __builtin_vsx_xxsldwi (vd, vd, const int);
     XXSLDWI_2DF  XXSLDWI_VD2
 
+[VEC_UCMPRHN, vec_uncompresshn, __builtin_vec_uncompresshn]
+  vus __builtin_vec_uncompresshn (vuc, vui);
+    VUCMPRHN
+
+[VEC_UCMPRHB, vec_uncompresshb, __builtin_vec_uncomresshb]
+  vui __builtin_vec_uncomresshb (vus, vus);
+    VUCMPRHB
+
+[VEC_UCMPRHH, vec_uncompresshh, __builtin_vec_uncomresshh]
+  vull __builtin_vec_uncomresshh (vui, vuc);
+    VUCMPRHH
+
+[VEC_UCMPRLN, vec_uncompressln, __builtin_vec_uncomressln]
+  vus __builtin_vec_uncomressln (vuc, vui);
+    VUCMPRLN
+
+[VEC_UCMPRLB, vec_uncompresslb, __builtin_vec_uncomresslb]
+  vui __builtin_vec_uncomresslb (vus, vus);
+    VUCMPRLB
+
+[VEC_UCMPRLH, vec_uncompresslh, __builtin_vec_uncomresslh]
+  vull __builtin_vec_uncomresslh (vui, vuc);
+    VUCMPRLH
+
+[VEC_UNPACK_HSN_TO_BYTE, vec_unpack_hsn_to_byte, 
__builtin_vec_unpack_hsn_to_byte]
+  vsc __builtin_vec_unpack_hsn_to_byte (vull);
+    VUPKHSNTOB
+
+[VEC_UNPACK_LSN_TO_BYTE, vec_unpack_lsn_to_byte, 
__builtin_vec_unpack_lsn_to_byte]
+  vsc __builtin_vec_unpack_lsn_to_byte (vull);
+    VUPKLSNTOB
+
+[VEC_UNPACK_INT4_TO_BF16, vec_unpack_int4_to_bf16, 
__builtin_vec_unpack_int4_to_bf16]
+  vuc __builtin_vec_unpack_int4_to_bf16 (vus, const int<2>);
+    VUPKINT4TOBF16
+
+[VEC_UNPACK_INT8_TO_BF16, vec_unpack_int8_to_bf16, 
__builtin_vec_unpack_int8_to_bf16]
+  vuc __builtin_vec_unpack_int8_to_bf16 (vus, const int<1>);
+    VUPKINT8TOBF16
+
+[VEC_UNPACK_INT4_TO_FP32, vec_unpack_int4_to_fp32, 
__builtin_vec_unpack_int4_to_fp32]
+  vf __builtin_vec_unpack_int4_to_fp32 (vui, const int<3>);
+    VUPKINT4TOFP32
+
+[VEC_UNPACK_INT8_TO_FP32, vec_unpack_int8_to_fp32, 
__builtin_vec_unpack_int8_to_fp32]
+  vf __builtin_vec_unpack_int8_to_fp32 (vui, const int<2>);
+    VUPKINT8TOFP32
+
 
 ; **************************************************************************
 ; **************************************************************************
diff --git a/gcc/testsuite/gcc.target/powerpc/future-vucmpr.c 
b/gcc/testsuite/gcc.target/powerpc/future-vucmpr.c
new file mode 100644
index 00000000000..58ffa67ebb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-vucmpr.c
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=future" } */
+
+#include <altivec.h>
+
+vector unsigned short test_uncompresshn(vector unsigned char a,
+                                        vector unsigned int b)
+{
+  return vec_uncompresshn(a, b);
+}
+
+vector unsigned int test_uncompresshb(vector unsigned short a,
+                                      vector unsigned short b)
+{
+  return vec_uncompresshb(a, b);
+}
+
+vector unsigned long long test_uncompresshh(vector unsigned int a,
+                                            vector unsigned char b)
+{
+  return vec_uncompresshh(a, b);
+}
+
+vector unsigned short test_uncompressln(vector unsigned char a,
+                                        vector unsigned int b)
+{
+  return vec_uncompressln(a, b);
+}
+
+vector unsigned int test_uncompresslb(vector unsigned short a,
+                                      vector unsigned short b)
+{
+  return vec_uncompresslb(a, b);
+}
+
+vector unsigned long long test_uncompresslh(vector unsigned int a,
+                                            vector unsigned char b)
+{
+  return vec_uncompresslh(a, b);
+}
+
+/* BE: direct instructions, no splats */
+
+/* { dg-final { scan-assembler-not "vspltw" { target { be } } } } */
+/* { dg-final { scan-assembler-not "vsplth" { target { be } } } } */
+/* { dg-final { scan-assembler-not "vspltb" { target { be } } } } */
+
+/* LE: splats must appear */
+
+/* { dg-final { scan-assembler-times "vspltw" 2 { target { le } } } } */
+/* { dg-final { scan-assembler-times "vsplth" 2 { target { le } } } } */
+/* { dg-final { scan-assembler-times "vspltb" 2 { target { le } } } } */
+
+/* { dg-final { scan-assembler-times "vucmprln" 1 } } */
+/* { dg-final { scan-assembler-times "vucmprlb" 1 } } */
+/* { dg-final { scan-assembler-times "vucmprlh" 1 } } */
+
+/* { dg-final { scan-assembler-times "vucmprhn" 1 } } */
+/* { dg-final { scan-assembler-times "vucmprhb" 1 } } */
+/* { dg-final { scan-assembler-times "vucmprhh" 1 } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/powerpc/future-vupk.c 
b/gcc/testsuite/gcc.target/powerpc/future-vupk.c
new file mode 100644
index 00000000000..fa4876dd7eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-vupk.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=future" } */
+
+#include <altivec.h>
+
+vector signed char
+test_unpack_hsn_to_byte(vector unsigned long long a)
+{
+  return vec_unpack_hsn_to_byte(a);
+}
+
+vector signed char
+test_unpack_lsn_to_byte(vector unsigned long long a)
+{
+  return vec_unpack_lsn_to_byte(a);
+}
+
+vector unsigned char
+test_unpack_int4_to_bf16(vector unsigned short a)
+{
+  return vec_unpack_int4_to_bf16(a, 0);
+}
+
+vector unsigned char
+test_unpack_int8_to_bf16(vector unsigned short a)
+{
+  return vec_unpack_int8_to_bf16(a, 0);
+}
+
+vector float
+test_unpack_int4_to_fp32(vector unsigned int a)
+{
+  return vec_unpack_int4_to_fp32(a, 0);
+}
+
+vector float
+test_unpack_int8_to_fp32(vector unsigned int a)
+{
+  return vec_unpack_int8_to_fp32(a, 0);
+}
+
+
+/* { dg-final { scan-assembler-times "vupkhsntob" 1  } } */
+/* { dg-final { scan-assembler-times "vupklsntob" 1  } } */
+/* { dg-final { scan-assembler-times "vupkint4tobf16" 1 } } */
+/* { dg-final { scan-assembler-times "vupkint8tobf16" 1 } } */
+/* { dg-final { scan-assembler-times "vupkint4tofp32" 1 } } */
+/* { dg-final { scan-assembler-times "vupkint8tofp32" 1 } } */
\ No newline at end of file
-- 
2.47.3

Reply via email to