Hi, On 21/04/26 11:34 AM, jeevitha wrote: > This patch depends on the -mcpu=future infrastructure and the > smul/umul pattern fix patches. This will be upstreamed after those > patches are upstreamed. These changes have been bootstrapped and > regression tested on powerpc64le-linux. > > This patch adds support for VSX vector arithmetic instructions that may > be added to future PowerPC processors. Note that the names of these > builtins may change in the future. > > Add new VSX patterns for vector add, subtract, multiply, and > multiply-high instructions guarded by TARGET_FUTURE. Rename existing > Altivec patterns to altivec_* to avoid name conflicts and introduce > corresponding VSX patterns.
Please correct the above para: New VSX patterns are added...Existing Altivec patterns are renamed...New VSX patterns are introduced... > > 2026-04-21 Jeevitha Palanisamy <[email protected]> > > gcc/ > * config/rs6000/altivec.md (vsx_add<mode>3): New pattern for The changelog entry should be 80 chars (including the 8 chars tab at the beginning) in length. > VSX vector add for halfword and word. > (altivec_add<mode>3): Renamed from add<mode>3. > (vsx_sub<mode>3): New pattern for VSX vector subtract for > halfword and word. > (altivec_sub<mode>3): Renamed from sub<mode>3. > * config/rs6000/vector.md (VIlong1): New mode iterator for > V4SI and V2DI. > (VI_1): New mode iterator for V8HI and V4SI. Please see if a more appropriate name can be used here. For example, look at the mode iterators VP (for pack/unpack), VNEG (for negate), VParity (for parity) etc. > (add<mode>3): New expand pattern for integer vector add. > (sub<mode>3): New expand pattern for integer vector subtract. > (smul<mode>3_highpart): New expand pattern for signed vector > multiply-high part on VIlong1 modes. > (umul<mode>3_highpart): New expand pattern for unsigned vector > multiply-high part on VIlong1 modes. > * config/rs6000/vsx.md (vsx_mul<mode>3): New VSX vector multiply > pattern for halfword and word. > (vsx_smul<mode>3_highpart): New VSX signed multiply-high > pattern for halfword and word. > (vsx_umul<mode>3_highpart): New VSX unsigned multiply-high > pattern for halfword and word. > (altivec_smul<mode>3_highpart): Renamed from smul<mode>3_highpart. > (altivec_umul<mode>3_highpart): Renamed from umul<mode>3_highpart. > * config/rs6000/rs6000-builtins.def (__builtin_vsx_xvmulhuh): > New builtin for VSX unsigned multiply-high halfword. > (__builtin_vsx_xvmulhsh): New builtin for VSX signed multiply-high > halfword. > * config/rs6000/rs6000-overload.def (__builtin_vec_mulh): Add > overloads for vector multiply-high signed/unsigned halfword. > * doc/extend.texi (PowerPC AltiVec Built-in Functions that may be > available on future PowerPCs): Document new functions. > > gcc/testsuite/ > * gcc.target/powerpc/vsx_simd-1.c: New test. > * gcc.target/powerpc/vsx_simd-2.c: New test. > > > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index 129f56245cd..dcae4547f84 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -503,8 +503,15 @@ > > ;; Simple binary operations. > > +(define_insn "vsx_add<mode>3" To denote that the above pattern will not be used during expand, pls add '*' as a prefix to the pattern name: *vsx_add<mode>3. > + [(set (match_operand:VI_1 0 "vsx_register_operand" "=wa") > + (plus:VI_1 (match_operand:VI_1 1 "vsx_register_operand" "wa") > + (match_operand:VI_1 2 "vsx_register_operand" "wa")))] > + "TARGET_FUTURE" > + "xvaddu<VI_char>m %x0,%x1,%x2") > + > ;; add > -(define_insn "add<mode>3" > +(define_insn "altivec_add<mode>3" Ditto. > [(set (match_operand:VI2 0 "register_operand" "=v") > (plus:VI2 (match_operand:VI2 1 "register_operand" "v") > (match_operand:VI2 2 "register_operand" "v")))] > @@ -547,8 +554,15 @@ > "vadds<VI_char>s %0,%1,%2" > [(set_attr "type" "vecsimple")]) > > +(define_insn "vsx_sub<mode>3" > + [(set (match_operand:VI_1 0 "vsx_register_operand" "=wa") > + (minus:VI_1 (match_operand:VI_1 1 "vsx_register_operand" "wa") > + (match_operand:VI_1 2 "vsx_register_operand" "wa")))] > + "TARGET_FUTURE" > + "xvsubu<VI_char>m %x0,%x1,%x2") > + > ;; sub > -(define_insn "sub<mode>3" > +(define_insn "altivec_sub<mode>3" > [(set (match_operand:VI2 0 "register_operand" "=v") > (minus:VI2 (match_operand:VI2 1 "register_operand" "v") > (match_operand:VI2 2 "register_operand" "v")))] > diff --git a/gcc/config/rs6000/rs6000-builtins.def > b/gcc/config/rs6000/rs6000-builtins.def > index 7e5a4fb96e7..3bcf310eac8 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -3924,3 +3924,10 @@ > > void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); > STXVP nothing {mma,pair} > + > +[future] > + const vus __builtin_vsx_xvmulhuh (vus, vus); > + XVMULHUH vsx_umulv8hi3_highpart {} > + > + const vss __builtin_vsx_xvmulhsh (vss, vss); > + XVMULHSH vsx_smulv8hi3_highpart {} > diff --git a/gcc/config/rs6000/rs6000-overload.def > b/gcc/config/rs6000/rs6000-overload.def > index 5238c81b214..3690a6dce7b 100644 > --- a/gcc/config/rs6000/rs6000-overload.def > +++ b/gcc/config/rs6000/rs6000-overload.def > @@ -2530,6 +2530,10 @@ > VMULEUD > > [VEC_MULH, vec_mulh, __builtin_vec_mulh] > + vss __builtin_vec_mulh (vss, vss); > + XVMULHSH > + vus __builtin_vec_mulh (vus, vus); > + XVMULHUH > vsi __builtin_vec_mulh (vsi, vsi); > VMULHSW > vui __builtin_vec_mulh (vui, vui); > diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md > index e6adf91002e..44cf811d0d3 100644 > --- a/gcc/config/rs6000/vector.md > +++ b/gcc/config/rs6000/vector.md > @@ -71,6 +71,10 @@ > ;; Vector integer modes > (define_mode_iterator VI [V4SI V8HI V16QI]) > > +(define_mode_iterator VIlong1 [V4SI V2DI]) > + > +(define_mode_iterator VI_1 [V8HI V4SI]) > + > ;; Base type from vector mode > (define_mode_attr VEC_base [(V16QI "QI") > (V8HI "HI") > @@ -188,6 +192,13 @@ > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > "") > > +(define_expand "add<mode>3" > + [(set (match_operand:VEC_I 0 "register_operand") Instead of VEC_I, can you please use the same mode iterator as used in "altivec_add<mode>3" for better readability? > + (plus:VEC_I (match_operand:VEC_I 1 "register_operand") > + (match_operand:VEC_I 2 "register_operand")))] > + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > + "") > + > (define_expand "sub<mode>3" > [(set (match_operand:VEC_F 0 "vfloat_operand") > (minus:VEC_F (match_operand:VEC_F 1 "vfloat_operand") > @@ -195,6 +206,13 @@ > "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > "") > > +(define_expand "sub<mode>3" > + [(set (match_operand:VEC_I 0 "register_operand") > + (minus:VEC_I (match_operand:VEC_I 1 "register_operand") > + (match_operand:VEC_I 2 "register_operand")))] > + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > + "") > + > (define_expand "mul<mode>3" > [(set (match_operand:VEC_F 0 "vfloat_operand") > (mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand") > @@ -208,6 +226,22 @@ > } > }) > > +(define_expand "smul<mode>3_highpart" > + [(set (match_operand:VIlong1 0 "register_operand") > + (unspec:VIlong1 [(match_operand:VIlong1 1 "register_operand") > + (match_operand:VIlong1 2 "register_operand")] > + UNSPEC_VMULHS))] > + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && TARGET_POWER10" > + "") > + > +(define_expand "umul<mode>3_highpart" > + [(set (match_operand:VIlong1 0 "register_operand") > + (unspec:VIlong1 [(match_operand:VIlong1 1 "register_operand") > + (match_operand:VIlong1 2 "register_operand")] > + UNSPEC_VMULHU))] > + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && TARGET_POWER10" > + "") > + > (define_expand "div<mode>3" > [(set (match_operand:VEC_F 0 "vfloat_operand") > (div:VEC_F (match_operand:VEC_F 1 "vfloat_operand") > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index f4979e447de..ae3c88a2b6e 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -1711,6 +1711,13 @@ > "xvsub<sd>p %x0,%x1,%x2" > [(set_attr "type" "<VStype_simple>")]) > > +(define_insn "vsx_mul<mode>3" > + [(set (match_operand:VI_1 0 "vsx_register_operand" "=wa") > + (mult:VI_1 (match_operand:VI_1 1 "vsx_register_operand" "wa") > + (match_operand:VI_1 2 "vsx_register_operand" "wa")))] > + "TARGET_FUTURE" > + "xvmulu<wd>m %x0,%x1,%x2") > + > (define_insn "*vsx_mul<mode>3" > [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") > (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") > @@ -6546,7 +6553,23 @@ > [(set_attr "type" "vecdiv") > (set_attr "size" "<bits>")]) > > -(define_insn "smul<mode>3_highpart" > +(define_insn "vsx_smul<mode>3_highpart" > + [(set (match_operand:VI_1 0 "vsx_register_operand" "=wa") > + (unspec:VI_1 [(match_operand:VI_1 1 "vsx_register_operand" "wa") > + (match_operand:VI_1 2 "vsx_register_operand" "wa")] > + UNSPEC_VMULHS))] > + "TARGET_FUTURE" > + "xvmulhs<wd> %x0,%x1,%x2") > + > +(define_insn "vsx_umul<mode>3_highpart" > + [(set (match_operand:VI_1 0 "vsx_register_operand" "=wa") > + (unspec:VI_1 [(match_operand:VI_1 1 "vsx_register_operand" "wa") > + (match_operand:VI_1 2 "vsx_register_operand" "wa")] > + UNSPEC_VMULHU))] > + "TARGET_FUTURE" > + "xvmulhu<wd> %x0,%x1,%x2") > + > +(define_insn "altivec_smul<mode>3_highpart" > [(set (match_operand:VIlong 0 "altivec_register_operand" "=v") > (unspec:VIlong [(match_operand:VIlong 1 "altivec_register_operand" > "v") > (match_operand:VIlong 2 "altivec_register_operand" > "v")] > @@ -6555,7 +6578,7 @@ > "vmulhs<wd> %0,%1,%2" > [(set_attr "type" "veccomplex")]) > > -(define_insn "umul<mode>3_highpart" > +(define_insn "altivec_umul<mode>3_highpart" > [(set (match_operand:VIlong 0 "altivec_register_operand" "=v") > (unspec:VIlong [(match_operand:VIlong 1 "altivec_register_operand" > "v") > (match_operand:VIlong 2 "altivec_register_operand" > "v")] > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index a22ef39226a..f8225fa2a24 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -24608,6 +24608,7 @@ The PVIPR documents the following overloaded > functions: > * PowerPC AltiVec Built-in Functions Available on ISA 2.07:: > * PowerPC AltiVec Built-in Functions Available on ISA 3.0:: > * PowerPC AltiVec Built-in Functions Available on ISA 3.1:: > +* PowerPC AltiVec Built-in Functions that may be available on future > PowerPCs:: > @end menu > > @node PowerPC AltiVec Built-in Functions on ISA 2.05 > @@ -26641,6 +26642,27 @@ vector unsigned char); > vector unsigned char); > @end smallexample > > +@node PowerPC AltiVec Built-in Functions that may be available on future > PowerPCs > +@subsubsection PowerPC Future AltiVec Built-in Functions > +The built-in functions described in this section may be available on > +future PowerPC processors. At present, these built-ins exist to allow > +testing of new instructions. There is no guarantee that these > +instructions will actually be implemented. > + > +Vector Integer Multiply High > + > +@smallexample > +@exdent vector signed short > +@exdent vec_mulh (vector signed short @var{a}, vector signed short @var{b}); > +@exdent vector unsigned short > +@exdent vec_mulh (vector unsigned short @var{a}, vector unsigned short > @var{b}); > +@end smallexample > + > +For each integer value @code{i} from 0 to 7, do the following. The integer > +value in halfword element @code{i} of a is multiplied by the integer value in > +halfword element @code{i} of b. The high-order 16 bits of the 32-bit product > are > +placed into halfword element @code{i} of the vector returned. > + I think @findex is missing above. Also, 'a' and 'b' need to have some formatting directives (like @code or @var). Else, how to differentiate them from the rest of the description. I can see that this is missing in the existing documentation for vec_mulh, but let us correct it here at least. > @node PowerPC Hardware Transactional Memory Built-in Functions > @subsection PowerPC Hardware Transactional Memory Built-in Functions > GCC provides two interfaces for accessing the Hardware Transactional > diff --git a/gcc/testsuite/gcc.target/powerpc/vsx_simd-1.c > b/gcc/testsuite/gcc.target/powerpc/vsx_simd-1.c > new file mode 100644 > index 00000000000..8bff745aaba > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vsx_simd-1.c > @@ -0,0 +1,65 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mdejagnu-cpu=future -O2" } */ > + > +#include <altivec.h> > + > +typedef vector signed int v4si_t; > +typedef vector signed short v8hi_t; > + > +__attribute__((noinline)) > +v4si_t int_add (v4si_t x, v4si_t y) > +{ > + return vec_add (x, y); /* xvadduwm */ > +} > + > +__attribute__((noinline)) > +v4si_t int_sub (v4si_t x, v4si_t y) > +{ > + return vec_sub (x, y); /* xvsubuwm */ > +} > + > +__attribute__((noinline)) > +v4si_t int_mul (v4si_t x, v4si_t y) > +{ > + return vec_mul (x, y); /* xvmuluwm */ > +} > + > +__attribute__((noinline)) > +v4si_t int_mulhi (v4si_t x, v4si_t y) > +{ > + return vec_mulh (x, y); /* xvmulhsw */ > +} > + > +__attribute__((noinline)) > +v8hi_t short_add (v8hi_t x, v8hi_t y) > +{ > + return vec_add (x, y); /* xvadduhm */ > +} > + > +__attribute__((noinline)) > +v8hi_t short_sub (v8hi_t x, v8hi_t y) > +{ > + return vec_sub (x, y); /* xvsubuhm */ > +} > + > +__attribute__((noinline)) > +v8hi_t short_mul (v8hi_t x, v8hi_t y) > +{ > + return vec_mul (x, y); /* xvmuluhm */ > +} > + > + > +__attribute__((noinline)) > +v8hi_t short_mulhi (v8hi_t x, v8hi_t y) > +{ > + return vec_mulh (x, y); /* xvmulhsh */ > +} > + > +/* { dg-final { scan-assembler-times "xvadduwm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvsubuwm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvmuluwm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvmulhsw" 1 } } */ > +/* { dg-final { scan-assembler-times "xvadduhm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvsubuhm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvmuluhm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvmulhsh" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/vsx_simd-2.c > b/gcc/testsuite/gcc.target/powerpc/vsx_simd-2.c > new file mode 100644 > index 00000000000..782efb67473 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vsx_simd-2.c > @@ -0,0 +1,65 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mdejagnu-cpu=future -O2" } */ > + > +#include <altivec.h> > + > +typedef vector unsigned int v4si_t; > +typedef vector unsigned short v8hi_t; > + > +__attribute__((noinline)) > +v4si_t int_add (v4si_t x, v4si_t y) > +{ > + return vec_add (x, y); /* xvadduwm */ > +} > + > +__attribute__((noinline)) > +v4si_t int_sub (v4si_t x, v4si_t y) > +{ > + return vec_sub (x, y); /* xvsubuwm */ > +} > + > +__attribute__((noinline)) > +v4si_t int_mul (v4si_t x, v4si_t y) > +{ > + return vec_mul (x, y); /* xvmuluwm */ > +} > + > +__attribute__((noinline)) > +v4si_t int_mulhi (v4si_t x, v4si_t y) > +{ > + return vec_mulh (x, y); /* xvmulhuw */ > +} > + > +__attribute__((noinline)) > +v8hi_t short_add (v8hi_t x, v8hi_t y) > +{ > + return vec_add (x, y); /* xvadduhm */ > +} > + > +__attribute__((noinline)) > +v8hi_t short_sub (v8hi_t x, v8hi_t y) > +{ > + return vec_sub (x, y); /* xvsubuhm */ > +} > + > +__attribute__((noinline)) > +v8hi_t short_mul (v8hi_t x, v8hi_t y) > +{ > + return vec_mul (x, y); /* xvmuluhm */ > +} > + > + Nit: extra line. -Surya > +__attribute__((noinline)) > +v8hi_t short_mulhi (v8hi_t x, v8hi_t y) > +{ > + return vec_mulh (x, y); /* xvmulhuh */ > +} > + > +/* { dg-final { scan-assembler-times "xvadduwm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvsubuwm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvmuluwm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvmulhuw" 1 } } */ > +/* { dg-final { scan-assembler-times "xvadduhm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvsubuhm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvmuluhm" 1 } } */ > +/* { dg-final { scan-assembler-times "xvmulhuh" 1 } } */ >
