This is patch #45 of 45 to generate the 'XXEVAL' instruction on power10 and power11 instead of using the Altivec 'VAND' instruction feeding into 'VNAND'. The 'XXEVAL' instruction can use all 64 vector registers, instead of the 32 registers that traditional Altivec vector instructions use. By allowing all of the vector registers to be used, it reduces the amount of spilling that a large benchmark generated.
This patch adds the tests for generating 'XXEVAL' to the testsuite. I have tested these patches on both big endian and little endian PowerPC servers, with no regressions. Can I check these patchs into the trunk? 2025-06-11 Michael Meissner <meiss...@linux.ibm.com> gcc/testsuite/ PR target/117251 * gcc.target/powerpc/p10-vector-fused-1.c: New test. * gcc.target/powerpc/p10-vector-fused-2.c: Likewise. --- .../gcc.target/powerpc/p10-vector-fused-1.c | 409 ++++++++ .../gcc.target/powerpc/p10-vector-fused-2.c | 936 ++++++++++++++++++ 2 files changed, 1345 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c new file mode 100644 index 00000000000..28e0874b345 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c @@ -0,0 +1,409 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Generate and check most of the vector logical instruction combinations that + may or may not generate xxeval to do a fused operation on power10. */ + +#include <stddef.h> +#include <stdlib.h> +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> + +static int errors = 0; +static int tests = 0; +#endif + +typedef vector unsigned int vector_t; +typedef unsigned int scalar_t; + +/* Vector logical functions. */ +static inline vector_t +vector_and (vector_t x, vector_t y) +{ + return x & y; +} + +static inline vector_t +vector_or (vector_t x, vector_t y) +{ + return x | y; +} + +static inline vector_t +vector_xor (vector_t x, vector_t y) +{ + return x ^ y; +} + +static inline vector_t +vector_andc (vector_t x, vector_t y) +{ + return x & ~y; +} + +static inline vector_t +vector_orc (vector_t x, vector_t y) +{ + return x | ~y; +} + +static inline vector_t +vector_nand (vector_t x, vector_t y) +{ + return ~(x & y); +} + +static inline vector_t +vector_nor (vector_t x, vector_t y) +{ + return ~(x | y); +} + +static inline vector_t +vector_eqv (vector_t x, vector_t y) +{ + return ~(x ^ y); +} + +/* Scalar logical functions. */ +static inline scalar_t +scalar_and (scalar_t x, scalar_t y) +{ + return x & y; +} + +static inline scalar_t +scalar_or (scalar_t x, scalar_t y) +{ + return x | y; +} + +static inline scalar_t +scalar_xor (scalar_t x, scalar_t y) +{ + return x ^ y; +} + +static inline scalar_t +scalar_andc (scalar_t x, scalar_t y) +{ + return x & ~y; +} + +static inline scalar_t +scalar_orc (scalar_t x, scalar_t y) +{ + return x | ~y; +} + +static inline scalar_t +scalar_nand (scalar_t x, scalar_t y) +{ + return ~(x & y); +} + +static inline scalar_t +scalar_nor (scalar_t x, scalar_t y) +{ + return ~(x | y); +} + +static inline scalar_t +scalar_eqv (scalar_t x, scalar_t y) +{ + return ~(x ^ y); +} + + +/* + * Generate one function for each combination that we are checking. Do 4 + * operations: + * + * Use FPR regs that should generate either XXEVAL or XXL* insns; + * Use Altivec registers than may generated fused V* insns; + * Use VSX registers, insure fusing it not done via asm; (and) + * Use GPR registers on scalar operations. + */ + +#ifdef DEBUG +#define TRACE(INNER, OUTER) \ + do { \ + tests++; \ + printf ("%s_%s\n", INNER, OUTER); \ + fflush (stdout); \ + } while (0) \ + +#define FAILED(INNER, OUTER) \ + do { \ + errors++; \ + printf ("%s_%s failed\n", INNER, OUTER); \ + fflush (stdout); \ + } while (0) \ + +#else +#define TRACE(INNER, OUTER) +#define FAILED(INNER, OUTER) abort () +#endif + +#define FUSED_FUNC(INNER, OUTER) \ +static void \ +INNER ## _ ## OUTER (vector_t a, vector_t b, vector_t c) \ +{ \ + vector_t f_a, f_b, f_c, f_r, f_t; \ + vector_t v_a, v_b, v_c, v_r, v_t; \ + vector_t w_a, w_b, w_c, w_r, w_t; \ + scalar_t s_a, s_b, s_c, s_r, s_t; \ + \ + TRACE (#INNER, #OUTER); \ + \ + f_a = a; \ + f_b = b; \ + f_c = c; \ + \ + __asm__ (" # fpr regs: %x0,%x1,%x2 " #INNER "_" #OUTER \ + : "+d" (f_a), \ + "+d" (f_b), \ + "+d" (f_c)); \ + \ + f_t = vector_ ## INNER (f_b, f_c); \ + f_r = vector_ ## OUTER (f_a, f_t); \ + \ + __asm__ (" # fpr regs result: %x0 " #INNER "_" #OUTER \ + : "+d" (f_r)); \ + \ + v_a = a; \ + v_b = b; \ + v_c = c; \ + \ + __asm__ (" # altivec regs: %x0,%x1,%x2 " #INNER "_" #OUTER \ + : "+v" (v_a), \ + "+v" (v_b), \ + "+v" (v_c)); \ + \ + v_t = vector_ ## INNER (v_b, v_c); \ + v_r = vector_ ## OUTER (v_a, v_t); \ + \ + __asm__ (" # altivec regs result: %x0 " #INNER "_" #OUTER \ + : "+v" (v_r)); \ + \ + w_a = a; \ + w_b = b; \ + w_c = c; \ + \ + __asm__ (" # vsx regs: %x0,%x1,%x2 " #INNER "_" #OUTER \ + : "+wa" (w_a), \ + "+wa" (w_b), \ + "+wa" (w_c)); \ + \ + w_t = vector_ ## INNER (w_b, w_c); \ + __asm__ ("nop # break vsx fusion reg %x0" : "+wa" (w_t)); \ + w_r = vector_ ## OUTER (w_a, w_t); \ + \ + __asm__ (" # vsx regs result: %x0 " #INNER "_" #OUTER \ + : "+wa" (w_r)); \ + \ + s_a = a[0]; \ + s_b = b[0]; \ + s_c = c[0]; \ + \ + __asm__ (" # gpr regs: %0,%1,%2 " #INNER "_" #OUTER \ + : "+r" (s_a), \ + "+r" (s_b), \ + "+r" (s_c)); \ + \ + s_t = scalar_ ## INNER (s_b, s_c); \ + s_r = scalar_ ## OUTER (s_a, s_t); \ + \ + __asm__ (" # gpr regs result: %0 " #INNER "_" #OUTER \ + : "+r" (s_r)); \ + \ + if (!vec_all_eq (w_r, f_r) \ + || !vec_all_eq (w_r, v_r) \ + || s_r != w_r[0]) \ + FAILED (#INNER, #OUTER); \ + \ + return; \ +} + +FUSED_FUNC (and, and) +FUSED_FUNC (andc, and) +FUSED_FUNC (eqv, and) +FUSED_FUNC (nand, and) +FUSED_FUNC (nor, and) +FUSED_FUNC (or, and) +FUSED_FUNC (orc, and) +FUSED_FUNC (xor, and) + +FUSED_FUNC (and, andc) +FUSED_FUNC (andc, andc) +FUSED_FUNC (eqv, andc) +FUSED_FUNC (nand, andc) +FUSED_FUNC (nor, andc) +FUSED_FUNC (or, andc) +FUSED_FUNC (orc, andc) +FUSED_FUNC (xor, andc) + +FUSED_FUNC (and, eqv) +FUSED_FUNC (andc, eqv) +FUSED_FUNC (eqv, eqv) +FUSED_FUNC (nand, eqv) +FUSED_FUNC (nor, eqv) +FUSED_FUNC (or, eqv) +FUSED_FUNC (orc, eqv) +FUSED_FUNC (xor, eqv) + +FUSED_FUNC (and, nand) +FUSED_FUNC (andc, nand) +FUSED_FUNC (eqv, nand) +FUSED_FUNC (nand, nand) +FUSED_FUNC (nor, nand) +FUSED_FUNC (or, nand) +FUSED_FUNC (orc, nand) +FUSED_FUNC (xor, nand) + +FUSED_FUNC (and, nor) +FUSED_FUNC (andc, nor) +FUSED_FUNC (eqv, nor) +FUSED_FUNC (nand, nor) +FUSED_FUNC (nor, nor) +FUSED_FUNC (or, nor) +FUSED_FUNC (orc, nor) +FUSED_FUNC (xor, nor) + +FUSED_FUNC (and, or) +FUSED_FUNC (andc, or) +FUSED_FUNC (eqv, or) +FUSED_FUNC (nand, or) +FUSED_FUNC (nor, or) +FUSED_FUNC (or, or) +FUSED_FUNC (orc, or) +FUSED_FUNC (xor, or) + +FUSED_FUNC (and, orc) +FUSED_FUNC (andc, orc) +FUSED_FUNC (eqv, orc) +FUSED_FUNC (nand, orc) +FUSED_FUNC (nor, orc) +FUSED_FUNC (or, orc) +FUSED_FUNC (orc, orc) +FUSED_FUNC (xor, orc) + +FUSED_FUNC (and, xor) +FUSED_FUNC (andc, xor) +FUSED_FUNC (eqv, xor) +FUSED_FUNC (nand, xor) +FUSED_FUNC (nor, xor) +FUSED_FUNC (or, xor) +FUSED_FUNC (orc, xor) +FUSED_FUNC (xor, xor) + + +/* List of functions to check. */ +typedef void func_t (vector_t, + vector_t, + vector_t); + +typedef func_t *ptr_func_t; + +static ptr_func_t functions[] = { + and_and, + andc_and, + eqv_and, + nand_and, + nor_and, + or_and, + orc_and, + xor_and, + + and_andc, + andc_andc, + eqv_andc, + nand_andc, + nor_andc, + or_andc, + orc_andc, + xor_andc, + + and_eqv, + andc_eqv, + eqv_eqv, + nand_eqv, + nor_eqv, + or_eqv, + orc_eqv, + xor_eqv, + + and_nand, + andc_nand, + eqv_nand, + nand_nand, + nor_nand, + or_nand, + orc_nand, + xor_nand, + + and_nor, + andc_nor, + eqv_nor, + nand_nor, + nor_nor, + or_nor, + orc_nor, + xor_nor, + + and_or, + andc_or, + eqv_or, + nand_or, + nor_or, + or_or, + orc_or, + xor_or, + + and_orc, + andc_orc, + eqv_orc, + nand_orc, + nor_orc, + or_orc, + orc_orc, + xor_orc, + + and_xor, + andc_xor, + eqv_xor, + nand_xor, + nor_xor, + or_xor, + orc_xor, + xor_xor, +}; + + +int +main (void) +{ + scalar_t s_a = 0x0fu; + scalar_t s_b = 0xaau; + scalar_t s_c = 0xccu; + + vector_t a = (vector_t) { s_a, s_a, ~s_a, ~s_a }; + vector_t b = (vector_t) { s_b, ~s_b, s_b, ~s_b }; + vector_t c = (vector_t) { s_c, ~s_c, ~s_c, s_c }; + + size_t i; + + for (i = 0; i < sizeof (functions) / sizeof (functions[0]); i++) + functions[i] (a, b, c); + +#ifdef DEBUG + printf ("Done, %d tests, %d failures\n", tests, errors); + return errors; + +#else + return 0; +#endif +} diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c new file mode 100644 index 00000000000..f074622c9f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c @@ -0,0 +1,936 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Make sure all of the fusion cases that generate the xxeval instruction + actually generate it. */ +typedef vector unsigned int vector_t; + +static inline vector_t +vector_and (vector_t x, vector_t y) +{ + return x & y; +} + +static inline vector_t +vector_or (vector_t x, vector_t y) +{ + return x | y; +} + +static inline vector_t +vector_xor (vector_t x, vector_t y) +{ + return x ^ y; +} + +static inline vector_t +vector_andc (vector_t x, vector_t y) +{ + return x & ~y; +} + +static inline vector_t +vector_orc (vector_t x, vector_t y) +{ + return x | ~y; +} + +static inline vector_t +vector_nand (vector_t x, vector_t y) +{ + return ~(x & y); +} + +static inline vector_t +vector_nor (vector_t x, vector_t y) +{ + return ~(x | y); +} + +static inline vector_t +vector_eqv (vector_t x, vector_t y) +{ + return ~(x ^ y); +} + +void +and_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,1. */ + r = vector_and (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,14. */ + r = vector_andc (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,31. */ + r = vector_or (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,239. */ + r = vector_orc (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,30. */ + r = vector_xor (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,14. */ + r = vector_andc (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,11. */ + r = vector_andc (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,210. */ + r = vector_eqv (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,239. */ + r = vector_nand (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,47. */ + r = vector_or (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,191. */ + r = vector_orc (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,45. */ + r = vector_xor (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,9. */ + r = vector_and (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,210. */ + r = vector_eqv (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,105. */ + r = vector_eqv (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,159. */ + r = vector_or (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,111. */ + r = vector_orc (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,14. */ + r = vector_and (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,1. */ + r = vector_andc (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,30. */ + r = vector_eqv (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,2. */ + r = vector_nor (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,31. */ + r = vector_orc (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,8. */ + r = vector_and (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,7. */ + r = vector_andc (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,120. */ + r = vector_eqv (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,191. */ + r = vector_nand (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,143. */ + r = vector_or (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,127. */ + r = vector_orc (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,7. */ + r = vector_and (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,8. */ + r = vector_andc (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,127. */ + r = vector_or (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,143. */ + r = vector_orc (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,120. */ + r = vector_xor (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,11. */ + r = vector_and (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,2. */ + r = vector_andc (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,75. */ + r = vector_eqv (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_nor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,8. */ + r = vector_nor (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,191. */ + r = vector_or (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,47. */ + r = vector_orc (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,180. */ + r = vector_xor (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,6. */ + r = vector_and (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,9. */ + r = vector_andc (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,249. */ + r = vector_nand (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,111. */ + r = vector_or (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,159. */ + r = vector_orc (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,105. */ + r = vector_xor (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +/* Make sure none of traditional logical instructions are generated. Skip + checking for xxlor in case the register allocator decides to add some vector + moves. */ +/* { dg-final { scan-assembler-not {\mv(and|or|xor|andc|orc|nand|nor|eqv)\M} } } */ +/* { dg-final { scan-assembler-not {\mxxl(and|xor|andc|orc|nand|nor|eqv)\M} } } */ +/* { dg-final { scan-assembler-times {\mxxeval\M} 46 } } */ -- 2.49.0 -- Michael Meissner, IBM PO Box 98, Ayer, Massachusetts, USA, 01432 email: meiss...@linux.ibm.com