https://gcc.gnu.org/g:2f541cae69f67d9e61b6bafa6f4847fa27fdaf05

commit 2f541cae69f67d9e61b6bafa6f4847fa27fdaf05
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Tue Jun 10 18:24:33 2025 -0400

    PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations
    
    2025-06-10  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/testsuite/
    
            PR target/117251
            * gcc.target/powerpc/p10-vector-fused-1.c: New test.
            * gcc.target/powerpc/p10-vector-fused-2.c: Likewise.

Diff:
---
 .../gcc.target/powerpc/p10-vector-fused-1.c        | 409 +++++++++
 .../gcc.target/powerpc/p10-vector-fused-2.c        | 936 +++++++++++++++++++++
 2 files changed, 1345 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c 
b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c
new file mode 100644
index 000000000000..28e0874b3454
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c
@@ -0,0 +1,409 @@
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Generate and check most of the vector logical instruction combinations that
+   may or may not generate xxeval to do a fused operation on power10.  */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+
+static int errors = 0;
+static int tests  = 0;
+#endif
+
+typedef vector unsigned int    vector_t;
+typedef unsigned int           scalar_t;
+
+/* Vector logical functions.  */
+static inline vector_t
+vector_and (vector_t x, vector_t y)
+{
+  return x & y;
+}
+
+static inline vector_t
+vector_or (vector_t x, vector_t y)
+{
+  return x | y;
+}
+
+static inline vector_t
+vector_xor (vector_t x, vector_t y)
+{
+  return x ^ y;
+}
+
+static inline vector_t
+vector_andc (vector_t x, vector_t y)
+{
+  return x & ~y;
+}
+
+static inline vector_t
+vector_orc (vector_t x, vector_t y)
+{
+  return x | ~y;
+}
+
+static inline vector_t
+vector_nand (vector_t x, vector_t y)
+{
+  return ~(x & y);
+}
+
+static inline vector_t
+vector_nor (vector_t x, vector_t y)
+{
+  return ~(x | y);
+}
+
+static inline vector_t
+vector_eqv (vector_t x, vector_t y)
+{
+  return ~(x ^ y);
+}
+
+/* Scalar logical functions.  */
+static inline scalar_t
+scalar_and (scalar_t x, scalar_t y)
+{
+  return x & y;
+}
+
+static inline scalar_t
+scalar_or (scalar_t x, scalar_t y)
+{
+  return x | y;
+}
+
+static inline scalar_t
+scalar_xor (scalar_t x, scalar_t y)
+{
+  return x ^ y;
+}
+
+static inline scalar_t
+scalar_andc (scalar_t x, scalar_t y)
+{
+  return x & ~y;
+}
+
+static inline scalar_t
+scalar_orc (scalar_t x, scalar_t y)
+{
+  return x | ~y;
+}
+
+static inline scalar_t
+scalar_nand (scalar_t x, scalar_t y)
+{
+  return ~(x & y);
+}
+
+static inline scalar_t
+scalar_nor (scalar_t x, scalar_t y)
+{
+  return ~(x | y);
+}
+
+static inline scalar_t
+scalar_eqv (scalar_t x, scalar_t y)
+{
+  return ~(x ^ y);
+}
+
+
+/*
+ * Generate one function for each combination that we are checking.  Do 4
+ * operations:
+ *
+ * Use FPR regs that should generate either XXEVAL or XXL* insns;
+ * Use Altivec registers than may generated fused V* insns;
+ * Use VSX registers, insure fusing it not done via asm; (and)
+ * Use GPR registers on scalar operations.
+ */
+
+#ifdef DEBUG
+#define TRACE(INNER, OUTER)                                            \
+  do {                                                                 \
+    tests++;                                                           \
+    printf ("%s_%s\n", INNER, OUTER);                                  \
+    fflush (stdout);                                                   \
+  } while (0)                                                          \
+
+#define FAILED(INNER, OUTER)                                           \
+  do {                                                                 \
+    errors++;                                                          \
+    printf ("%s_%s failed\n", INNER, OUTER);                           \
+    fflush (stdout);                                                   \
+  } while (0)                                                          \
+
+#else
+#define TRACE(INNER, OUTER)
+#define FAILED(INNER, OUTER)   abort ()
+#endif
+
+#define FUSED_FUNC(INNER, OUTER)                                       \
+static void                                                            \
+INNER ## _ ## OUTER (vector_t a, vector_t b, vector_t c)               \
+{                                                                      \
+  vector_t f_a, f_b, f_c, f_r, f_t;                                    \
+  vector_t v_a, v_b, v_c, v_r, v_t;                                    \
+  vector_t w_a, w_b, w_c, w_r, w_t;                                    \
+  scalar_t s_a, s_b, s_c, s_r, s_t;                                    \
+                                                                       \
+  TRACE (#INNER, #OUTER);                                              \
+                                                                       \
+  f_a = a;                                                             \
+  f_b = b;                                                             \
+  f_c = c;                                                             \
+                                                                       \
+  __asm__ (" # fpr regs: %x0,%x1,%x2 " #INNER "_" #OUTER               \
+          : "+d" (f_a),                                                \
+            "+d" (f_b),                                                \
+            "+d" (f_c));                                               \
+                                                                       \
+  f_t = vector_ ## INNER (f_b, f_c);                                   \
+  f_r = vector_ ## OUTER (f_a, f_t);                                   \
+                                                                       \
+  __asm__ (" # fpr regs result: %x0 " #INNER "_" #OUTER                        
\
+          : "+d" (f_r));                                               \
+                                                                       \
+  v_a = a;                                                             \
+  v_b = b;                                                             \
+  v_c = c;                                                             \
+                                                                       \
+  __asm__ (" # altivec regs: %x0,%x1,%x2 " #INNER "_" #OUTER           \
+          : "+v" (v_a),                                                \
+            "+v" (v_b),                                                \
+            "+v" (v_c));                                               \
+                                                                       \
+  v_t = vector_ ## INNER (v_b, v_c);                                   \
+  v_r = vector_ ## OUTER (v_a, v_t);                                   \
+                                                                       \
+  __asm__ (" # altivec regs result: %x0 " #INNER "_" #OUTER            \
+          : "+v" (v_r));                                               \
+                                                                       \
+  w_a = a;                                                             \
+  w_b = b;                                                             \
+  w_c = c;                                                             \
+                                                                       \
+  __asm__ (" # vsx regs: %x0,%x1,%x2 " #INNER "_" #OUTER               \
+          : "+wa" (w_a),                                               \
+            "+wa" (w_b),                                               \
+            "+wa" (w_c));                                              \
+                                                                       \
+  w_t = vector_ ## INNER (w_b, w_c);                                   \
+  __asm__ ("nop # break vsx fusion reg %x0" : "+wa" (w_t));            \
+  w_r = vector_ ## OUTER (w_a, w_t);                                   \
+                                                                       \
+  __asm__ (" # vsx regs result: %x0 " #INNER "_" #OUTER                        
\
+          : "+wa" (w_r));                                              \
+                                                                       \
+  s_a = a[0];                                                          \
+  s_b = b[0];                                                          \
+  s_c = c[0];                                                          \
+                                                                       \
+  __asm__ (" # gpr regs: %0,%1,%2 " #INNER "_" #OUTER                  \
+          : "+r" (s_a),                                                \
+            "+r" (s_b),                                                \
+            "+r" (s_c));                                               \
+                                                                       \
+  s_t = scalar_ ## INNER (s_b, s_c);                                   \
+  s_r = scalar_ ## OUTER (s_a, s_t);                                   \
+                                                                       \
+  __asm__ (" # gpr regs result: %0 " #INNER "_" #OUTER                 \
+          : "+r" (s_r));                                               \
+                                                                       \
+  if (!vec_all_eq (w_r, f_r)                                           \
+      || !vec_all_eq (w_r, v_r)                                                
\
+      || s_r != w_r[0])                                                        
\
+    FAILED (#INNER, #OUTER);                                           \
+                                                                       \
+  return;                                                              \
+}
+
+FUSED_FUNC (and,  and)
+FUSED_FUNC (andc, and)
+FUSED_FUNC (eqv,  and)
+FUSED_FUNC (nand, and)
+FUSED_FUNC (nor,  and)
+FUSED_FUNC (or,   and)
+FUSED_FUNC (orc,  and)
+FUSED_FUNC (xor,  and)
+
+FUSED_FUNC (and,  andc)
+FUSED_FUNC (andc, andc)
+FUSED_FUNC (eqv,  andc)
+FUSED_FUNC (nand, andc)
+FUSED_FUNC (nor,  andc)
+FUSED_FUNC (or,   andc)
+FUSED_FUNC (orc,  andc)
+FUSED_FUNC (xor,  andc)
+
+FUSED_FUNC (and,  eqv)
+FUSED_FUNC (andc, eqv)
+FUSED_FUNC (eqv,  eqv)
+FUSED_FUNC (nand, eqv)
+FUSED_FUNC (nor,  eqv)
+FUSED_FUNC (or,   eqv)
+FUSED_FUNC (orc,  eqv)
+FUSED_FUNC (xor,  eqv)
+
+FUSED_FUNC (and,  nand)
+FUSED_FUNC (andc, nand)
+FUSED_FUNC (eqv,  nand)
+FUSED_FUNC (nand, nand)
+FUSED_FUNC (nor,  nand)
+FUSED_FUNC (or,   nand)
+FUSED_FUNC (orc,  nand)
+FUSED_FUNC (xor,  nand)
+
+FUSED_FUNC (and,  nor)
+FUSED_FUNC (andc, nor)
+FUSED_FUNC (eqv,  nor)
+FUSED_FUNC (nand, nor)
+FUSED_FUNC (nor,  nor)
+FUSED_FUNC (or,   nor)
+FUSED_FUNC (orc,  nor)
+FUSED_FUNC (xor,  nor)
+
+FUSED_FUNC (and,  or)
+FUSED_FUNC (andc, or)
+FUSED_FUNC (eqv,  or)
+FUSED_FUNC (nand, or)
+FUSED_FUNC (nor,  or)
+FUSED_FUNC (or,   or)
+FUSED_FUNC (orc,  or)
+FUSED_FUNC (xor,  or)
+
+FUSED_FUNC (and,  orc)
+FUSED_FUNC (andc, orc)
+FUSED_FUNC (eqv,  orc)
+FUSED_FUNC (nand, orc)
+FUSED_FUNC (nor,  orc)
+FUSED_FUNC (or,   orc)
+FUSED_FUNC (orc,  orc)
+FUSED_FUNC (xor,  orc)
+
+FUSED_FUNC (and,  xor)
+FUSED_FUNC (andc, xor)
+FUSED_FUNC (eqv,  xor)
+FUSED_FUNC (nand, xor)
+FUSED_FUNC (nor,  xor)
+FUSED_FUNC (or,   xor)
+FUSED_FUNC (orc,  xor)
+FUSED_FUNC (xor,  xor)
+
+
+/* List of functions to check.  */
+typedef void func_t (vector_t,
+                    vector_t,
+                    vector_t);
+
+typedef func_t *ptr_func_t;
+
+static ptr_func_t functions[] = {
+  and_and,
+  andc_and,
+  eqv_and,
+  nand_and,
+  nor_and,
+  or_and,
+  orc_and,
+  xor_and,
+
+  and_andc,
+  andc_andc,
+  eqv_andc,
+  nand_andc,
+  nor_andc,
+  or_andc,
+  orc_andc,
+  xor_andc,
+
+  and_eqv,
+  andc_eqv,
+  eqv_eqv,
+  nand_eqv,
+  nor_eqv,
+  or_eqv,
+  orc_eqv,
+  xor_eqv,
+
+  and_nand,
+  andc_nand,
+  eqv_nand,
+  nand_nand,
+  nor_nand,
+  or_nand,
+  orc_nand,
+  xor_nand,
+
+  and_nor,
+  andc_nor,
+  eqv_nor,
+  nand_nor,
+  nor_nor,
+  or_nor,
+  orc_nor,
+  xor_nor,
+
+  and_or,
+  andc_or,
+  eqv_or,
+  nand_or,
+  nor_or,
+  or_or,
+  orc_or,
+  xor_or,
+
+  and_orc,
+  andc_orc,
+  eqv_orc,
+  nand_orc,
+  nor_orc,
+  or_orc,
+  orc_orc,
+  xor_orc,
+
+  and_xor,
+  andc_xor,
+  eqv_xor,
+  nand_xor,
+  nor_xor,
+  or_xor,
+  orc_xor,
+  xor_xor,
+};
+
+
+int
+main (void)
+{
+  scalar_t s_a = 0x0fu;
+  scalar_t s_b = 0xaau;
+  scalar_t s_c = 0xccu;
+
+  vector_t a = (vector_t) { s_a,  s_a, ~s_a, ~s_a };
+  vector_t b = (vector_t) { s_b, ~s_b,  s_b, ~s_b };
+  vector_t c = (vector_t) { s_c, ~s_c, ~s_c,  s_c };
+
+  size_t i;
+
+  for (i = 0; i < sizeof (functions) / sizeof (functions[0]); i++)
+    functions[i] (a, b, c);
+
+#ifdef DEBUG
+  printf ("Done, %d tests, %d failures\n", tests, errors);
+  return errors;
+
+#else
+  return 0;
+#endif
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c 
b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c
new file mode 100644
index 000000000000..f074622c9f67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c
@@ -0,0 +1,936 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Make sure all of the fusion cases that generate the xxeval instruction
+   actually generate it.  */
+typedef vector unsigned int vector_t;
+
+static inline vector_t
+vector_and (vector_t x, vector_t y)
+{
+  return x & y;
+}
+
+static inline vector_t
+vector_or (vector_t x, vector_t y)
+{
+  return x | y;
+}
+
+static inline vector_t
+vector_xor (vector_t x, vector_t y)
+{
+  return x ^ y;
+}
+
+static inline vector_t
+vector_andc (vector_t x, vector_t y)
+{
+  return x & ~y;
+}
+
+static inline vector_t
+vector_orc (vector_t x, vector_t y)
+{
+  return x | ~y;
+}
+
+static inline vector_t
+vector_nand (vector_t x, vector_t y)
+{
+  return ~(x & y);
+}
+
+static inline vector_t
+vector_nor (vector_t x, vector_t y)
+{
+  return ~(x | y);
+}
+
+static inline vector_t
+vector_eqv (vector_t x, vector_t y)
+{
+  return ~(x ^ y);
+}
+
+void
+and_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,1.  */
+  r = vector_and (a, vector_and (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+and_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,14.  */
+  r = vector_andc (a, vector_and (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+and_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,31.  */
+  r = vector_or (a, vector_and (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+and_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,239.  */
+  r = vector_orc (a, vector_and (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+and_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,30.  */
+  r = vector_xor (a, vector_and (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+andc_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,14.  */
+  r = vector_andc (a, vector_and (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+andc_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,11.  */
+  r = vector_andc (a, vector_andc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+andc_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,210.  */
+  r = vector_eqv (a, vector_andc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+andc_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,239.  */
+  r = vector_nand (a, vector_andc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+andc_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,47.  */
+  r = vector_or (a, vector_andc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+andc_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,191.  */
+  r = vector_orc (a, vector_andc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+andc_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,45.  */
+  r = vector_xor (a, vector_andc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+eqv_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,9.  */
+  r = vector_and (a, vector_eqv (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+eqv_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,210.  */
+  r = vector_eqv (a, vector_andc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+eqv_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,105.  */
+  r = vector_eqv (a, vector_eqv (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+eqv_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,159.  */
+  r = vector_or (a, vector_eqv (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+eqv_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,111.  */
+  r = vector_orc (a, vector_eqv (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nand_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,14.  */
+  r = vector_and (a, vector_nand (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nand_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,1.  */
+  r = vector_andc (a, vector_nand (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nand_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,30.  */
+  r = vector_eqv (a, vector_nand (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nand_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,2.  */
+  r = vector_nor (a, vector_nand (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nand_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,31.  */
+  r = vector_orc (a, vector_nand (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nor_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,8.  */
+  r = vector_and (a, vector_nor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nor_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,7.  */
+  r = vector_andc (a, vector_nor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nor_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,120.  */
+  r = vector_eqv (a, vector_nor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nor_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,191.  */
+  r = vector_nand (a, vector_nor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nor_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,143.  */
+  r = vector_or (a, vector_nor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+nor_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,127.  */
+  r = vector_orc (a, vector_nor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+or_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,7.  */
+  r = vector_and (a, vector_or (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+or_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,8.  */
+  r = vector_andc (a, vector_or (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+or_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,127.  */
+  r = vector_or (a, vector_or (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+or_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,143.  */
+  r = vector_orc (a, vector_or (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+or_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,120.  */
+  r = vector_xor (a, vector_or (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+orc_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,11.  */
+  r = vector_and (a, vector_orc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+orc_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,2.  */
+  r = vector_andc (a, vector_orc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+orc_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,75.  */
+  r = vector_eqv (a, vector_orc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+orc_nor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,8.  */
+  r = vector_nor (a, vector_orc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+orc_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,191.  */
+  r = vector_or (a, vector_orc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+orc_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,47.  */
+  r = vector_orc (a, vector_orc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+orc_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,180.  */
+  r = vector_xor (a, vector_orc (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+xor_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,6.  */
+  r = vector_and (a, vector_xor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+xor_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,9.  */
+  r = vector_andc (a, vector_xor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+xor_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,249.  */
+  r = vector_nand (a, vector_xor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+xor_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,111.  */
+  r = vector_or (a, vector_xor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+xor_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,159.  */
+  r = vector_orc (a, vector_xor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+void
+xor_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r)
+{
+  vector_t a = *p_a;
+  vector_t b = *p_b;
+  vector_t c = *p_c;
+  vector_t r;
+
+  __asm__ (" # force fpr registers, %x0,%x1,%x2"
+          : "+d" (a), "+d" (b), "+d" (c));
+
+  /* xxeval r,a,b,c,105.  */
+  r = vector_xor (a, vector_xor (b, c));
+
+  __asm__ (" # force fpr result, %x0" : "+d" (r));
+  *p_r = r;
+  return;
+}
+
+/* Make sure none of traditional logical instructions are generated.  Skip
+   checking for xxlor in case the register allocator decides to add some vector
+   moves.  */
+/* { dg-final { scan-assembler-not   {\mv(and|or|xor|andc|orc|nand|nor|eqv)\M} 
} } */
+/* { dg-final { scan-assembler-not   {\mxxl(and|xor|andc|orc|nand|nor|eqv)\M}  
} } */
+/* { dg-final { scan-assembler-times {\mxxeval\M} 46 } } */

Reply via email to