Sometimes GCC generates ternlog with three operands, but some of them are 
invariant.
For example:

vpternlogq      $252, %zmm2, %zmm1, %zmm0

In this case zmm1 register isnt used by ternlog.
So should replace zmm1 with zmm0 or zmm2:

vpternlogq      $252, %zmm0, %zmm1, %zmm0

When the third operand of ternlog is memory and both others are invariant 
should add load instruction from this memory to register
and replace the first and the second operands to this register. 
So insted of

vpternlogq      $85, (%rdi), %zmm1, %zmm0

Should emit

vmovdqa64       (%rdi), %zmm0
vpternlogq      $85, %zmm0, %zmm0, %zmm0

gcc/ChangeLog:

        * config/i386/i386.cc (ternlog_invariant_operand_mask): New helper
        function for replacing invariant operands.
        (reduce_ternlog_operands): Likewise.
        * config/i386/i386-protos.h (ternlog_invariant_operand_mask): Prototype 
here.
        (reduce_ternlog_operands): Likewise.
        * config/i386/sse.md:

gcc/testsuite/ChangeLog:

        * gcc.target/i386/reduce-ternlog-operands-1.c: New test.
        * gcc.target/i386/reduce-ternlog-operands-2.c: New test.
---
 gcc/config/i386/i386-protos.h                 |  2 +
 gcc/config/i386/i386.cc                       | 45 +++++++++++++++++++
 gcc/config/i386/sse.md                        | 43 ++++++++++++++++++
 .../i386/reduce-ternlog-operands-1.c          | 20 +++++++++
 .../i386/reduce-ternlog-operands-2.c          | 11 +++++
 5 files changed, 121 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 27fe73ca65c..49398ef9936 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -57,6 +57,8 @@ extern int standard_80387_constant_p (rtx);
 extern const char *standard_80387_constant_opcode (rtx);
 extern rtx standard_80387_constant_rtx (int);
 extern int standard_sse_constant_p (rtx, machine_mode);
+extern int ternlog_invariant_operand_mask (rtx *operands);
+extern void reduce_ternlog_operands (rtx *operands);
 extern const char *standard_sse_constant_opcode (rtx_insn *, rtx *);
 extern bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx);
 extern bool ix86_pre_reload_split (void);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f0d6167e667..140de478571 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5070,6 +5070,51 @@ ix86_check_no_addr_space (rtx insn)
     }
   return true;
 }
+
+/* Return mask of invariant operands:
+   bit number     0 1 2
+   operand number 1 2 3.  */
+
+int
+ternlog_invariant_operand_mask (rtx *operands)
+{
+  int mask = 0;
+  int imm8 = XINT (operands[4], 0);
+
+  if (((imm8 >> 4) & 0xF) == (imm8 & 0xF))
+    mask |= 1;
+  if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
+    mask |= (1 << 1);
+  if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
+    mask |= (1 << 2);
+
+  return mask;
+}
+
+/* Replace one of the unused operators with the one used.  */
+
+void
+reduce_ternlog_operands (rtx *operands)
+{
+  int mask = ternlog_invariant_operand_mask (operands);
+
+  if (mask & 1) /* the first operand is invariant.  */
+    operands[1] = operands[2];
+
+  if (mask & 2) /* the second operand is invariant.  */
+    operands[2] = operands[1];
+
+  if (mask & 4)        /* the third operand is invariant.  */
+   operands[3] = operands[1];
+  else if (!MEM_P (operands[3]))
+    {
+      if (mask & 1) /* the first operand is invariant.  */
+       operands[1] = operands[3];
+      if (mask & 2) /* the second operands is invariant.  */
+       operands[2] = operands[3];
+    }
+}
+
 
 /* Initialize the table of extra 80387 mathematical constants.  */
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a2099373123..f88d82b315c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12625,6 +12625,49 @@
                      (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
                      (const_string "*")))])
 
+;; If the first and the second operands of ternlog are invariant and
+;; the third operand is memory
+;; then we should add load third operand from memory to register and
+;; replace first and second operands with this register
+(define_split
+  [(set (match_operand:V 0 "register_operand")
+       (unspec:V
+         [(match_operand:V 1 "register_operand")
+          (match_operand:V 2 "register_operand")
+          (match_operand:V 3 "memory_operand")
+          (match_operand:SI 4 "const_0_to_255_operand")]
+         UNSPEC_VTERNLOG))]
+  "ternlog_invariant_operand_mask (operands) == 3 && !reload_completed"
+  [(set (match_dup 0)
+       (match_dup 3))
+   (set (match_dup 0)
+       (unspec:V
+         [(match_dup 0)
+          (match_dup 0)
+          (match_dup 0)
+          (match_dup 4)]
+         UNSPEC_VTERNLOG))])
+
+;; Replace invariant ternlog operands with used operands
+;; (except for the case discussed in the previous define_split)
+(define_split
+  [(set (match_operand:V 0 "register_operand")
+       (unspec:V
+         [(match_operand:V 1 "register_operand")
+          (match_operand:V 2 "register_operand")
+          (match_operand:V 3 "nonimmediate_operand")
+          (match_operand:SI 4 "const_0_to_255_operand")]
+         UNSPEC_VTERNLOG))]
+  "ternlog_invariant_operand_mask (operands) != 0 && !reload_completed"
+  [(set (match_dup 0)
+       (unspec:V
+         [(match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (match_dup 4)]
+         UNSPEC_VTERNLOG))]
+  "reduce_ternlog_operands (operands);")
+
 ;; There must be lots of other combinations like
 ;;
 ;; (any_logic:V
diff --git a/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c 
b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c
new file mode 100644
index 00000000000..a7063df9dcb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times {vmovdqa*} "4" } } */
+
+#include <immintrin.h>
+
+__m512i f(__m512i* a, __m512i* b, __m512i* c)
+{
+       return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 119);
+}
+
+__m512i g(__m512i* a, __m512i* b, __m512i* c)
+{
+       return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 250);
+}
+
+__m512i h(__m512i* a, __m512i* b, __m512i* c)
+{
+       return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 252);
+}
diff --git a/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c 
b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c
new file mode 100644
index 00000000000..b44986cc259
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vpternlog.*0.*0.*0" } } */
+
+#include <immintrin.h>
+
+__m512i f(__m512i a, __m512i b, __m512i* c)
+{
+       return _mm512_ternarylogic_epi64 (a, b, c[0], 0x55);
+}
+
-- 
2.34.1

Reply via email to