[PATCH V2] rs6000: Add Future Vector Integer Arithmetic Instructions [RFC02680]

jeevitha Mon, 11 May 2026 05:19:36 -0700

Hi All,

This patch depends on the smul/umul pattern fix. This will be upstreamed
after the above fix is upstreamed. These changes have been bootstrapped
and regression tested on powerpc64le-linux. Is this okay for trunk?


Changes from V1:
  * Incorporated wording suggestions.
  * Renamed mode iterators to VIMulH and VIArith for better clarity.
  * Removed extra blank line in vsx_simd-*.c test case.
  * Replaced unspec with proper smul_highpart and umul_highpart RTL
    codes for multiply-high patterns.
  * Added * prefix to internal patterns for vsx_add, vsx_sub, altivec_add,
    altivec_sub, altivec_smul and altivec_umul.
  * Updated extend.texi.

This patch adds support for VSX vector arithmetic instructions that may
be added to future PowerPC processors. Note that the names of these
builtins may change in the future.

New VSX patterns are added for vector add, subtract, multiply, and 
multiply-high instructions guarded by TARGET_FUTURE. Existing Altivec
patterns are renamed to altivec_* to avoid name conflicts.

2026-05-11  Jeevitha Palanisamy  <[email protected]>

gcc/
        * config/rs6000/altivec.md (*vsx_add<mode>3): New pattern for VSX
        vector add for halfword and word.
        (*altivec_add<mode>3): Renamed from add<mode>3.
        (*vsx_sub<mode>3): New pattern for VSX vector subtract for halfword and
        word.
        (*altivec_sub<mode>3): Renamed from sub<mode>3.
        * config/rs6000/vector.md (VIMulH): New mode iterator for V4SI and V2DI.
        (add<mode>3): New expand pattern for integer vector add.
        (sub<mode>3): New expand pattern for integer vector subtract.
        (smul<mode>3_highpart): New expand pattern for signed vector
        multiply-high part on VIMulH modes.
        (umul<mode>3_highpart): New expand pattern for unsigned vector
        multiply-high part on VIMulH modes.
        * config/rs6000/vsx.md (VIArith): New mode iterator for V8HI and V4SI.
        (vsx_mul<mode>3): New VSX vector multiply pattern for halfword and word.
        (vsx_smul<mode>3_highpart): New VSX signed multiply-high pattern for
        halfword and word.
        (vsx_umul<mode>3_highpart): New VSX unsigned multiply-high pattern for
        halfword and word.
        (*altivec_smul<mode>3_highpart): Renamed from smul<mode>3_highpart.
        (*altivec_umul<mode>3_highpart): Renamed from umul<mode>3_highpart.
        * config/rs6000/rs6000-builtins.def (__builtin_vsx_xvmulhuh): New
        builtin for VSX unsigned multiply-high halfword.
        (__builtin_vsx_xvmulhsh): New builtin for VSX signed multiply-high
        halfword.
        * config/rs6000/rs6000-overload.def (__builtin_vec_mulh): Add
        overloads for vector multiply-high signed/unsigned halfword.
        * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions Available
        on Future ISA): Document new functions.

gcc/testsuite/
        * gcc.target/powerpc/vsx_simd-1.c: New test.
        * gcc.target/powerpc/vsx_simd-2.c: New test.

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 129f56245cd..ed04aebdba9 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -503,11 +503,18 @@
 
 ;; Simple binary operations.
 
+(define_insn "*vsx_add<mode>3"
+  [(set (match_operand:VIArith 0 "vsx_register_operand" "=wa")
+        (plus:VIArith (match_operand:VIArith 1 "vsx_register_operand" "wa")
+                      (match_operand:VIArith 2 "vsx_register_operand" "wa")))]
+  "TARGET_FUTURE"
+  "xvaddu<VI_char>m %x0,%x1,%x2")
+
 ;; add
-(define_insn "add<mode>3"
-  [(set (match_operand:VI2 0 "register_operand" "=v")
-        (plus:VI2 (match_operand:VI2 1 "register_operand" "v")
-                 (match_operand:VI2 2 "register_operand" "v")))]
+(define_insn "*altivec_add<mode>3"
+  [(set (match_operand:VEC_I 0 "register_operand" "=v")
+        (plus:VEC_I (match_operand:VEC_I 1 "register_operand" "v")
+                    (match_operand:VEC_I 2 "register_operand" "v")))]
   "<VI_unit>"
   "vaddu<VI_char>m %0,%1,%2"
   [(set_attr "type" "vecsimple")])
@@ -547,11 +554,18 @@
   "vadds<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "*vsx_sub<mode>3"
+  [(set (match_operand:VIArith 0 "vsx_register_operand" "=wa")
+        (minus:VIArith (match_operand:VIArith 1 "vsx_register_operand" "wa")
+                       (match_operand:VIArith 2 "vsx_register_operand" "wa")))]
+  "TARGET_FUTURE"
+  "xvsubu<VI_char>m %x0,%x1,%x2")
+
 ;; sub
-(define_insn "sub<mode>3"
-  [(set (match_operand:VI2 0 "register_operand" "=v")
-        (minus:VI2 (match_operand:VI2 1 "register_operand" "v")
-                  (match_operand:VI2 2 "register_operand" "v")))]
+(define_insn "*altivec_sub<mode>3"
+  [(set (match_operand:VEC_I 0 "register_operand" "=v")
+        (minus:VEC_I (match_operand:VEC_I 1 "register_operand" "v")
+                     (match_operand:VEC_I 2 "register_operand" "v")))]
   "<VI_unit>"
   "vsubu<VI_char>m %0,%1,%2"
   [(set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 0d1529b71d4..8bac1133a6b 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3970,3 +3970,9 @@
 
   const vuc __builtin_galois_field_mult_xts (vuc, vuc);
     XXGFMUL128XTS xxgfmul128xts {}
+
+  const vus __builtin_vsx_xvmulhuh (vus, vus);
+    XVMULHUH vsx_umulv8hi3_highpart {}
+
+  const vss __builtin_vsx_xvmulhsh (vss, vss);
+    XVMULHSH vsx_smulv8hi3_highpart {}
diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def
index ef7b59ed112..09be9e7de71 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -2530,6 +2530,10 @@
     VMULEUD
 
 [VEC_MULH, vec_mulh, __builtin_vec_mulh]
+  vss __builtin_vec_mulh (vss, vss);
+    XVMULHSH
+  vus __builtin_vec_mulh (vus, vus);
+    XVMULHUH
   vsi __builtin_vec_mulh (vsi, vsi);
     VMULHSW
   vui __builtin_vec_mulh (vui, vui);
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index e6adf91002e..6dbb95e8982 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -71,6 +71,9 @@
 ;; Vector integer modes
 (define_mode_iterator VI [V4SI V8HI V16QI])
 
+;; Vector integer multiply high modes
+(define_mode_iterator VIMulH [V4SI V2DI])
+
 ;; Base type from vector mode
 (define_mode_attr VEC_base [(V16QI "QI")
                            (V8HI  "HI")
@@ -188,6 +191,13 @@
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+(define_expand "add<mode>3"
+  [(set (match_operand:VEC_I 0 "register_operand")
+        (plus:VEC_I (match_operand:VEC_I 1 "register_operand")
+                    (match_operand:VEC_I 2 "register_operand")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
 (define_expand "sub<mode>3"
   [(set (match_operand:VEC_F 0 "vfloat_operand")
        (minus:VEC_F (match_operand:VEC_F 1 "vfloat_operand")
@@ -195,6 +205,13 @@
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
+(define_expand "sub<mode>3"
+  [(set (match_operand:VEC_I 0 "register_operand")
+        (minus:VEC_I (match_operand:VEC_I 1 "register_operand")
+                     (match_operand:VEC_I 2 "register_operand")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
 (define_expand "mul<mode>3"
   [(set (match_operand:VEC_F 0 "vfloat_operand")
        (mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand")
@@ -208,6 +225,22 @@
     }
 })
 
+(define_expand "smul<mode>3_highpart"
+  [(set (match_operand:VIMulH 0 "register_operand")
+        (smul_highpart:VIMulH
+          (match_operand:VIMulH 1 "register_operand")
+          (match_operand:VIMulH 2 "register_operand")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && TARGET_POWER10"
+  "")
+
+(define_expand "umul<mode>3_highpart"
+  [(set (match_operand:VIMulH 0 "register_operand")
+        (umul_highpart:VIMulH
+          (match_operand:VIMulH 1 "register_operand")
+          (match_operand:VIMulH 2 "register_operand")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && TARGET_POWER10"
+  "")
+
 (define_expand "div<mode>3"
   [(set (match_operand:VEC_F 0 "vfloat_operand")
        (div:VEC_F (match_operand:VEC_F 1 "vfloat_operand")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index c99b6602135..9d1b5404095 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -418,6 +418,9 @@
 (define_int_attr vczlsbb_char [(UNSPEC_VCLZLSBB "l")
                               (UNSPEC_VCTZLSBB "t")])
 
+;; Vector integer arithmetic modes
+(define_mode_iterator VIArith [V8HI V4SI])
+
 ;; VSX moves
 
 ;; TImode memory to memory move optimization on LE with p8vector
@@ -1711,6 +1714,13 @@
   "xvsub<sd>p %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")])
 
+(define_insn "vsx_mul<mode>3"
+  [(set (match_operand:VIArith 0 "vsx_register_operand" "=wa")
+        (mult:VIArith (match_operand:VIArith 1 "vsx_register_operand" "wa")
+                      (match_operand:VIArith 2 "vsx_register_operand" "wa")))]
+  "TARGET_FUTURE"
+  "xvmulu<wd>m %x0,%x1,%x2")
+
 (define_insn "*vsx_mul<mode>3"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
@@ -6546,20 +6556,36 @@
   [(set_attr "type" "vecdiv")
    (set_attr "size" "<bits>")])
 
-(define_insn "smul<mode>3_highpart"
-  [(set (match_operand:VIlong 0 "altivec_register_operand" "=v")
-        (smul_highpart:VIlong
-          (match_operand:VIlong 1 "altivec_register_operand" "v")
-          (match_operand:VIlong 2 "altivec_register_operand" "v")))]
+(define_insn "vsx_smul<mode>3_highpart"
+  [(set (match_operand:VIArith 0 "vsx_register_operand" "=wa")
+        (smul_highpart:VIArith
+          (match_operand:VIArith 1 "vsx_register_operand" "wa")
+          (match_operand:VIArith 2 "vsx_register_operand" "wa")))]
+  "TARGET_FUTURE"
+  "xvmulhs<wd> %x0,%x1,%x2")
+
+(define_insn "vsx_umul<mode>3_highpart"
+  [(set (match_operand:VIArith 0 "vsx_register_operand" "=wa")
+        (umul_highpart:VIArith
+          (match_operand:VIArith 1 "vsx_register_operand" "wa")
+          (match_operand:VIArith 2 "vsx_register_operand" "wa")))]
+  "TARGET_FUTURE"
+  "xvmulhu<wd> %x0,%x1,%x2")
+
+(define_insn "*altivec_smul<mode>3_highpart"
+  [(set (match_operand:VIMulH 0 "altivec_register_operand" "=v")
+        (smul_highpart:VIMulH
+          (match_operand:VIMulH 1 "altivec_register_operand" "v")
+          (match_operand:VIMulH 2 "altivec_register_operand" "v")))]
   "TARGET_POWER10"
   "vmulhs<wd> %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "umul<mode>3_highpart"
-  [(set (match_operand:VIlong 0 "altivec_register_operand" "=v")
-        (umul_highpart:VIlong
-          (match_operand:VIlong 1 "altivec_register_operand" "v")
-          (match_operand:VIlong 2 "altivec_register_operand" "v")))]
+(define_insn "*altivec_umul<mode>3_highpart"
+  [(set (match_operand:VIMulH 0 "altivec_register_operand" "=v")
+       (umul_highpart:VIMulH
+          (match_operand:VIMulH 1 "altivec_register_operand" "v")
+          (match_operand:VIMulH 2 "altivec_register_operand" "v")))]
   "TARGET_POWER10"
   "vmulhu<wd> %0,%1,%2"
   [(set_attr "type" "veccomplex")])
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 42f83b98a05..7495a9df31d 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -26737,6 +26737,22 @@ vec_t __builtin_galois_field_mult_gcm (vec_t, vec_t);
 vec_t __builtin_galois_field_mult_xts (vec_t, vec_t);
 @end smallexample
 
+Future PowerPC processors may add new instructions for vector integer
+multiply high for halfword. GCC provides support for these instructions through
+the following built-in functions.
+
+@findex vec_mulh
+@smallexample
+@exdent vector signed short
+@exdent vec_mulh (vector signed short @var{a}, vector signed short @var{b});
+@exdent vector unsigned short
+@exdent vec_mulh (vector unsigned short @var{a}, vector unsigned short 
@var{b});
+@end smallexample
+
+For each integer value @code{i} from 0 to 7, do the following. The integer
+value in halfword element @code{i} of @var{a} is multiplied by the integer
+value in halfword element @code{i} of @var{b}. The high-order 16 bits of the
+32-bit product are placed into halfword element @code{i} of the vector 
returned.
 
 @node PowerPC Hardware Transactional Memory Built-in Functions
 @subsection PowerPC Hardware Transactional Memory Built-in Functions
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx_simd-1.c 
b/gcc/testsuite/gcc.target/powerpc/vsx_simd-1.c
new file mode 100644
index 00000000000..aca3ea67013
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx_simd-1.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <altivec.h>
+
+typedef vector signed int   v4si_t;
+typedef vector signed short v8hi_t;
+
+__attribute__((noinline))
+v4si_t int_add (v4si_t x, v4si_t y)
+{
+  return vec_add (x, y);            /* xvadduwm */
+}
+
+__attribute__((noinline))
+v4si_t int_sub (v4si_t x, v4si_t y)
+{
+  return vec_sub (x, y);            /* xvsubuwm */
+}
+
+__attribute__((noinline))
+v4si_t int_mul (v4si_t x, v4si_t y)
+{
+  return vec_mul (x, y);            /* xvmuluwm */
+}
+
+__attribute__((noinline))
+v4si_t int_mulhi (v4si_t x, v4si_t y)
+{
+  return vec_mulh (x, y);           /* xvmulhsw */
+}
+
+__attribute__((noinline))
+v8hi_t short_add (v8hi_t x, v8hi_t y)
+{
+  return vec_add (x, y);            /* xvadduhm */
+}
+
+__attribute__((noinline))
+v8hi_t short_sub (v8hi_t x, v8hi_t y)
+{
+  return vec_sub (x, y);            /* xvsubuhm */
+}
+
+__attribute__((noinline))
+v8hi_t short_mul (v8hi_t x, v8hi_t y)
+{
+  return vec_mul (x, y);            /* xvmuluhm */
+}
+
+__attribute__((noinline))
+v8hi_t short_mulhi (v8hi_t x, v8hi_t y)
+{
+  return vec_mulh (x, y);            /* xvmulhsh */
+}
+
+/* { dg-final { scan-assembler-times "xvadduwm" 1 } } */
+/* { dg-final { scan-assembler-times "xvsubuwm" 1 } } */
+/* { dg-final { scan-assembler-times "xvmuluwm" 1 } } */
+/* { dg-final { scan-assembler-times "xvmulhsw" 1 } } */
+/* { dg-final { scan-assembler-times "xvadduhm" 1 } } */
+/* { dg-final { scan-assembler-times "xvsubuhm" 1 } } */
+/* { dg-final { scan-assembler-times "xvmuluhm" 1 } } */
+/* { dg-final { scan-assembler-times "xvmulhsh" 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx_simd-2.c 
b/gcc/testsuite/gcc.target/powerpc/vsx_simd-2.c
new file mode 100644
index 00000000000..31594255509
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx_simd-2.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <altivec.h>
+
+typedef vector unsigned int   v4si_t;
+typedef vector unsigned short v8hi_t;
+
+__attribute__((noinline))
+v4si_t int_add (v4si_t x, v4si_t y)
+{
+  return vec_add (x, y);            /* xvadduwm */
+}
+
+__attribute__((noinline))
+v4si_t int_sub (v4si_t x, v4si_t y)
+{
+  return vec_sub (x, y);            /* xvsubuwm */
+}
+
+__attribute__((noinline))
+v4si_t int_mul (v4si_t x, v4si_t y)
+{
+  return vec_mul (x, y);            /* xvmuluwm */
+}
+
+__attribute__((noinline))
+v4si_t int_mulhi (v4si_t x, v4si_t y)
+{
+  return vec_mulh (x, y);           /* xvmulhuw */
+}
+
+__attribute__((noinline))
+v8hi_t short_add (v8hi_t x, v8hi_t y)
+{
+  return vec_add (x, y);            /* xvadduhm */
+}
+
+__attribute__((noinline))
+v8hi_t short_sub (v8hi_t x, v8hi_t y)
+{
+  return vec_sub (x, y);            /* xvsubuhm */
+}
+
+__attribute__((noinline))
+v8hi_t short_mul (v8hi_t x, v8hi_t y)
+{
+  return vec_mul (x, y);            /* xvmuluhm */
+}
+
+__attribute__((noinline))
+v8hi_t short_mulhi (v8hi_t x, v8hi_t y)
+{
+  return vec_mulh (x, y);            /* xvmulhuh */
+}
+
+/* { dg-final { scan-assembler-times "xvadduwm" 1 } } */
+/* { dg-final { scan-assembler-times "xvsubuwm" 1 } } */
+/* { dg-final { scan-assembler-times "xvmuluwm" 1 } } */
+/* { dg-final { scan-assembler-times "xvmulhuw" 1 } } */
+/* { dg-final { scan-assembler-times "xvadduhm" 1 } } */
+/* { dg-final { scan-assembler-times "xvsubuhm" 1 } } */
+/* { dg-final { scan-assembler-times "xvmuluhm" 1 } } */
+/* { dg-final { scan-assembler-times "xvmulhuh" 1 } } */

[PATCH V2] rs6000: Add Future Vector Integer Arithmetic Instructions [RFC02680]

Reply via email to