From: MITSUNARI Shigeo <[email protected]>

Add a new instruction pattern that uses MULX to compute only the high
part of an unsigned multiplication on BMI2 targets.  Previously, when
only the high part was needed, GCC would emit MULQ followed by a MOV
to retrieve the result from RDX.  With this pattern, MULX writes the
high part directly to the destination register, saving one instruction.

This benefits unsigned 32-bit integer division by constants that require
33-bit magic multipliers (e.g., division by 7), reducing the sequence
from 4 instructions to 3 on BMI2 targets.

Before:
    movabsq $2635249153617166336, %rcx
    movl    %edi, %eax
    mulq    %rcx
    movl    %edx, %eax

After:
    movabsq $2635249153617166336, %rax
    movl    %edi, %edx
    mulx    %rax, %rdx, %rax

gcc/ChangeLog:

        * config/i386/i386.md (*bmi2_umul<mode>3_highpart): New pattern.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/bmi2-mulx-highpart-1.c: New test.

Signed-off-by: MITSUNARI Shigeo <[email protected]>
---
 gcc/config/i386/i386.md                        | 15 +++++++++++++++
 .../gcc.target/i386/bmi2-mulx-highpart-1.c     | 18 ++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-mulx-highpart-1.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e514809453d..472f9d41332 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11507,6 +11507,21 @@
              (set (match_dup 5)
                   (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])])
 
+;; BMI2 MULX highpart-only pattern.  Uses MULX to get only the high part,
+;; discarding the low part into a scratch register.  This avoids the
+;; mov from rdx after mulq when only the high part is needed.
+(define_insn "*bmi2_umul<mode>3_highpart"
+  [(set (match_operand:DWIH 0 "register_operand" "=r")
+       (umul_highpart:DWIH
+         (match_operand:DWIH 1 "register_operand" "d")
+         (match_operand:DWIH 2 "nonimmediate_operand" "rm")))
+   (clobber (match_scratch:DWIH 3 "=r"))]
+  "TARGET_BMI2"
+  "mulx\t{%2, %3, %0|%0, %3, %2}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 ;; Highpart multiplication patterns
 (define_insn "<s>mul<mode>3_highpart"
   [(set (match_operand:DWIH 0 "register_operand" "=d")
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx-highpart-1.c 
b/gcc/testsuite/gcc.target/i386/bmi2-mulx-highpart-1.c
new file mode 100644
index 00000000000..dd7b32aa0b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx-highpart-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target *-*-linux* *-*-gnu* 
} } } */
+
+/*
+**div7:
+**     movabsq \$2635249153617166336, %rax
+**     movl    %edi, %edx
+**     mulx    %rax, %rdx, %rax
+**     ret
+**...
+*/
+
+unsigned int
+div7 (unsigned int x)
+{
+  return x / 7;
+}
-- 
2.43.0

Reply via email to