https://gcc.gnu.org/g:2f541cae69f67d9e61b6bafa6f4847fa27fdaf05
commit 2f541cae69f67d9e61b6bafa6f4847fa27fdaf05 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Tue Jun 10 18:24:33 2025 -0400 PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations 2025-06-10 Michael Meissner <meiss...@linux.ibm.com> gcc/testsuite/ PR target/117251 * gcc.target/powerpc/p10-vector-fused-1.c: New test. * gcc.target/powerpc/p10-vector-fused-2.c: Likewise. Diff: --- .../gcc.target/powerpc/p10-vector-fused-1.c | 409 +++++++++ .../gcc.target/powerpc/p10-vector-fused-2.c | 936 +++++++++++++++++++++ 2 files changed, 1345 insertions(+) diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c new file mode 100644 index 000000000000..28e0874b3454 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c @@ -0,0 +1,409 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Generate and check most of the vector logical instruction combinations that + may or may not generate xxeval to do a fused operation on power10. */ + +#include <stddef.h> +#include <stdlib.h> +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> + +static int errors = 0; +static int tests = 0; +#endif + +typedef vector unsigned int vector_t; +typedef unsigned int scalar_t; + +/* Vector logical functions. */ +static inline vector_t +vector_and (vector_t x, vector_t y) +{ + return x & y; +} + +static inline vector_t +vector_or (vector_t x, vector_t y) +{ + return x | y; +} + +static inline vector_t +vector_xor (vector_t x, vector_t y) +{ + return x ^ y; +} + +static inline vector_t +vector_andc (vector_t x, vector_t y) +{ + return x & ~y; +} + +static inline vector_t +vector_orc (vector_t x, vector_t y) +{ + return x | ~y; +} + +static inline vector_t +vector_nand (vector_t x, vector_t y) +{ + return ~(x & y); +} + +static inline vector_t +vector_nor (vector_t x, vector_t y) +{ + return ~(x | y); +} + +static inline vector_t +vector_eqv (vector_t x, vector_t y) +{ + return ~(x ^ y); +} + +/* Scalar logical functions. */ +static inline scalar_t +scalar_and (scalar_t x, scalar_t y) +{ + return x & y; +} + +static inline scalar_t +scalar_or (scalar_t x, scalar_t y) +{ + return x | y; +} + +static inline scalar_t +scalar_xor (scalar_t x, scalar_t y) +{ + return x ^ y; +} + +static inline scalar_t +scalar_andc (scalar_t x, scalar_t y) +{ + return x & ~y; +} + +static inline scalar_t +scalar_orc (scalar_t x, scalar_t y) +{ + return x | ~y; +} + +static inline scalar_t +scalar_nand (scalar_t x, scalar_t y) +{ + return ~(x & y); +} + +static inline scalar_t +scalar_nor (scalar_t x, scalar_t y) +{ + return ~(x | y); +} + +static inline scalar_t +scalar_eqv (scalar_t x, scalar_t y) +{ + return ~(x ^ y); +} + + +/* + * Generate one function for each combination that we are checking. Do 4 + * operations: + * + * Use FPR regs that should generate either XXEVAL or XXL* insns; + * Use Altivec registers than may generated fused V* insns; + * Use VSX registers, insure fusing it not done via asm; (and) + * Use GPR registers on scalar operations. + */ + +#ifdef DEBUG +#define TRACE(INNER, OUTER) \ + do { \ + tests++; \ + printf ("%s_%s\n", INNER, OUTER); \ + fflush (stdout); \ + } while (0) \ + +#define FAILED(INNER, OUTER) \ + do { \ + errors++; \ + printf ("%s_%s failed\n", INNER, OUTER); \ + fflush (stdout); \ + } while (0) \ + +#else +#define TRACE(INNER, OUTER) +#define FAILED(INNER, OUTER) abort () +#endif + +#define FUSED_FUNC(INNER, OUTER) \ +static void \ +INNER ## _ ## OUTER (vector_t a, vector_t b, vector_t c) \ +{ \ + vector_t f_a, f_b, f_c, f_r, f_t; \ + vector_t v_a, v_b, v_c, v_r, v_t; \ + vector_t w_a, w_b, w_c, w_r, w_t; \ + scalar_t s_a, s_b, s_c, s_r, s_t; \ + \ + TRACE (#INNER, #OUTER); \ + \ + f_a = a; \ + f_b = b; \ + f_c = c; \ + \ + __asm__ (" # fpr regs: %x0,%x1,%x2 " #INNER "_" #OUTER \ + : "+d" (f_a), \ + "+d" (f_b), \ + "+d" (f_c)); \ + \ + f_t = vector_ ## INNER (f_b, f_c); \ + f_r = vector_ ## OUTER (f_a, f_t); \ + \ + __asm__ (" # fpr regs result: %x0 " #INNER "_" #OUTER \ + : "+d" (f_r)); \ + \ + v_a = a; \ + v_b = b; \ + v_c = c; \ + \ + __asm__ (" # altivec regs: %x0,%x1,%x2 " #INNER "_" #OUTER \ + : "+v" (v_a), \ + "+v" (v_b), \ + "+v" (v_c)); \ + \ + v_t = vector_ ## INNER (v_b, v_c); \ + v_r = vector_ ## OUTER (v_a, v_t); \ + \ + __asm__ (" # altivec regs result: %x0 " #INNER "_" #OUTER \ + : "+v" (v_r)); \ + \ + w_a = a; \ + w_b = b; \ + w_c = c; \ + \ + __asm__ (" # vsx regs: %x0,%x1,%x2 " #INNER "_" #OUTER \ + : "+wa" (w_a), \ + "+wa" (w_b), \ + "+wa" (w_c)); \ + \ + w_t = vector_ ## INNER (w_b, w_c); \ + __asm__ ("nop # break vsx fusion reg %x0" : "+wa" (w_t)); \ + w_r = vector_ ## OUTER (w_a, w_t); \ + \ + __asm__ (" # vsx regs result: %x0 " #INNER "_" #OUTER \ + : "+wa" (w_r)); \ + \ + s_a = a[0]; \ + s_b = b[0]; \ + s_c = c[0]; \ + \ + __asm__ (" # gpr regs: %0,%1,%2 " #INNER "_" #OUTER \ + : "+r" (s_a), \ + "+r" (s_b), \ + "+r" (s_c)); \ + \ + s_t = scalar_ ## INNER (s_b, s_c); \ + s_r = scalar_ ## OUTER (s_a, s_t); \ + \ + __asm__ (" # gpr regs result: %0 " #INNER "_" #OUTER \ + : "+r" (s_r)); \ + \ + if (!vec_all_eq (w_r, f_r) \ + || !vec_all_eq (w_r, v_r) \ + || s_r != w_r[0]) \ + FAILED (#INNER, #OUTER); \ + \ + return; \ +} + +FUSED_FUNC (and, and) +FUSED_FUNC (andc, and) +FUSED_FUNC (eqv, and) +FUSED_FUNC (nand, and) +FUSED_FUNC (nor, and) +FUSED_FUNC (or, and) +FUSED_FUNC (orc, and) +FUSED_FUNC (xor, and) + +FUSED_FUNC (and, andc) +FUSED_FUNC (andc, andc) +FUSED_FUNC (eqv, andc) +FUSED_FUNC (nand, andc) +FUSED_FUNC (nor, andc) +FUSED_FUNC (or, andc) +FUSED_FUNC (orc, andc) +FUSED_FUNC (xor, andc) + +FUSED_FUNC (and, eqv) +FUSED_FUNC (andc, eqv) +FUSED_FUNC (eqv, eqv) +FUSED_FUNC (nand, eqv) +FUSED_FUNC (nor, eqv) +FUSED_FUNC (or, eqv) +FUSED_FUNC (orc, eqv) +FUSED_FUNC (xor, eqv) + +FUSED_FUNC (and, nand) +FUSED_FUNC (andc, nand) +FUSED_FUNC (eqv, nand) +FUSED_FUNC (nand, nand) +FUSED_FUNC (nor, nand) +FUSED_FUNC (or, nand) +FUSED_FUNC (orc, nand) +FUSED_FUNC (xor, nand) + +FUSED_FUNC (and, nor) +FUSED_FUNC (andc, nor) +FUSED_FUNC (eqv, nor) +FUSED_FUNC (nand, nor) +FUSED_FUNC (nor, nor) +FUSED_FUNC (or, nor) +FUSED_FUNC (orc, nor) +FUSED_FUNC (xor, nor) + +FUSED_FUNC (and, or) +FUSED_FUNC (andc, or) +FUSED_FUNC (eqv, or) +FUSED_FUNC (nand, or) +FUSED_FUNC (nor, or) +FUSED_FUNC (or, or) +FUSED_FUNC (orc, or) +FUSED_FUNC (xor, or) + +FUSED_FUNC (and, orc) +FUSED_FUNC (andc, orc) +FUSED_FUNC (eqv, orc) +FUSED_FUNC (nand, orc) +FUSED_FUNC (nor, orc) +FUSED_FUNC (or, orc) +FUSED_FUNC (orc, orc) +FUSED_FUNC (xor, orc) + +FUSED_FUNC (and, xor) +FUSED_FUNC (andc, xor) +FUSED_FUNC (eqv, xor) +FUSED_FUNC (nand, xor) +FUSED_FUNC (nor, xor) +FUSED_FUNC (or, xor) +FUSED_FUNC (orc, xor) +FUSED_FUNC (xor, xor) + + +/* List of functions to check. */ +typedef void func_t (vector_t, + vector_t, + vector_t); + +typedef func_t *ptr_func_t; + +static ptr_func_t functions[] = { + and_and, + andc_and, + eqv_and, + nand_and, + nor_and, + or_and, + orc_and, + xor_and, + + and_andc, + andc_andc, + eqv_andc, + nand_andc, + nor_andc, + or_andc, + orc_andc, + xor_andc, + + and_eqv, + andc_eqv, + eqv_eqv, + nand_eqv, + nor_eqv, + or_eqv, + orc_eqv, + xor_eqv, + + and_nand, + andc_nand, + eqv_nand, + nand_nand, + nor_nand, + or_nand, + orc_nand, + xor_nand, + + and_nor, + andc_nor, + eqv_nor, + nand_nor, + nor_nor, + or_nor, + orc_nor, + xor_nor, + + and_or, + andc_or, + eqv_or, + nand_or, + nor_or, + or_or, + orc_or, + xor_or, + + and_orc, + andc_orc, + eqv_orc, + nand_orc, + nor_orc, + or_orc, + orc_orc, + xor_orc, + + and_xor, + andc_xor, + eqv_xor, + nand_xor, + nor_xor, + or_xor, + orc_xor, + xor_xor, +}; + + +int +main (void) +{ + scalar_t s_a = 0x0fu; + scalar_t s_b = 0xaau; + scalar_t s_c = 0xccu; + + vector_t a = (vector_t) { s_a, s_a, ~s_a, ~s_a }; + vector_t b = (vector_t) { s_b, ~s_b, s_b, ~s_b }; + vector_t c = (vector_t) { s_c, ~s_c, ~s_c, s_c }; + + size_t i; + + for (i = 0; i < sizeof (functions) / sizeof (functions[0]); i++) + functions[i] (a, b, c); + +#ifdef DEBUG + printf ("Done, %d tests, %d failures\n", tests, errors); + return errors; + +#else + return 0; +#endif +} diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c new file mode 100644 index 000000000000..f074622c9f67 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c @@ -0,0 +1,936 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Make sure all of the fusion cases that generate the xxeval instruction + actually generate it. */ +typedef vector unsigned int vector_t; + +static inline vector_t +vector_and (vector_t x, vector_t y) +{ + return x & y; +} + +static inline vector_t +vector_or (vector_t x, vector_t y) +{ + return x | y; +} + +static inline vector_t +vector_xor (vector_t x, vector_t y) +{ + return x ^ y; +} + +static inline vector_t +vector_andc (vector_t x, vector_t y) +{ + return x & ~y; +} + +static inline vector_t +vector_orc (vector_t x, vector_t y) +{ + return x | ~y; +} + +static inline vector_t +vector_nand (vector_t x, vector_t y) +{ + return ~(x & y); +} + +static inline vector_t +vector_nor (vector_t x, vector_t y) +{ + return ~(x | y); +} + +static inline vector_t +vector_eqv (vector_t x, vector_t y) +{ + return ~(x ^ y); +} + +void +and_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,1. */ + r = vector_and (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,14. */ + r = vector_andc (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,31. */ + r = vector_or (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,239. */ + r = vector_orc (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,30. */ + r = vector_xor (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,14. */ + r = vector_andc (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,11. */ + r = vector_andc (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,210. */ + r = vector_eqv (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,239. */ + r = vector_nand (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,47. */ + r = vector_or (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,191. */ + r = vector_orc (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,45. */ + r = vector_xor (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,9. */ + r = vector_and (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,210. */ + r = vector_eqv (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,105. */ + r = vector_eqv (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,159. */ + r = vector_or (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,111. */ + r = vector_orc (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,14. */ + r = vector_and (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,1. */ + r = vector_andc (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,30. */ + r = vector_eqv (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,2. */ + r = vector_nor (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,31. */ + r = vector_orc (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,8. */ + r = vector_and (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,7. */ + r = vector_andc (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,120. */ + r = vector_eqv (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,191. */ + r = vector_nand (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,143. */ + r = vector_or (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,127. */ + r = vector_orc (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,7. */ + r = vector_and (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,8. */ + r = vector_andc (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,127. */ + r = vector_or (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,143. */ + r = vector_orc (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,120. */ + r = vector_xor (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,11. */ + r = vector_and (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,2. */ + r = vector_andc (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,75. */ + r = vector_eqv (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_nor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,8. */ + r = vector_nor (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,191. */ + r = vector_or (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,47. */ + r = vector_orc (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,180. */ + r = vector_xor (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,6. */ + r = vector_and (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,9. */ + r = vector_andc (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,249. */ + r = vector_nand (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,111. */ + r = vector_or (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,159. */ + r = vector_orc (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,105. */ + r = vector_xor (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +/* Make sure none of traditional logical instructions are generated. Skip + checking for xxlor in case the register allocator decides to add some vector + moves. */ +/* { dg-final { scan-assembler-not {\mv(and|or|xor|andc|orc|nand|nor|eqv)\M} } } */ +/* { dg-final { scan-assembler-not {\mxxl(and|xor|andc|orc|nand|nor|eqv)\M} } } */ +/* { dg-final { scan-assembler-times {\mxxeval\M} 46 } } */