From: MITSUNARI Shigeo <[email protected]>
When the register allocator selects the MUL-based highpart pattern
(umuldi3_highpart) with the source value already in %rdx, it inserts
a redundant mov to %rax before the mul instruction. Add a peephole2
that detects this mov + mul sequence and converts it to a single mulx,
eliminating the extra mov.
This improves inlined loops that perform multiple unsigned divisions
by constants. For example, a loop with three div-by-constant
operations now generates 15 instructions (matching LLVM) instead
of 18.
Before (loop body excerpt):
mov rax, rdx
mul r9
After:
mulx rdx, rax, r9
gcc/ChangeLog:
* config/i386/i386.md: Add peephole2 to convert
mov + umul_highpart to mulx on BMI2 targets.
gcc/testsuite/ChangeLog:
* gcc.target/i386/bmi2-mulx-highpart-2.c: New test.
Signed-off-by: MITSUNARI Shigeo <[email protected]>
---
gcc/config/i386/i386.md | 17 +++++++++++++++++
.../gcc.target/i386/bmi2-mulx-highpart-2.c | 19 +++++++++++++++++++
2 files changed, 36 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-mulx-highpart-2.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 472f9d41332..1c394690b04 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11522,6 +11522,23 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
+;; Convert mov + highpart mul to mulx when the mov source is %rdx.
+;; mov %rdx, %rax; mulq %src -> mulx %src, %rax, %out
+(define_peephole2
+ [(set (match_operand:DWIH 0 "register_operand")
+ (match_operand:DWIH 1 "register_operand"))
+ (parallel [(set (match_operand:DWIH 2 "register_operand")
+ (umul_highpart:DWIH (match_dup 0)
+ (match_operand:DWIH 3 "nonimmediate_operand")))
+ (clobber (match_dup 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_BMI2
+ && REGNO (operands[1]) == DX_REG
+ && REGNO (operands[0]) != REGNO (operands[2])"
+ [(parallel [(set (match_dup 2)
+ (umul_highpart:DWIH (match_dup 1) (match_dup 3)))
+ (clobber (match_dup 0))])])
+
;; Highpart multiplication patterns
(define_insn "<s>mul<mode>3_highpart"
[(set (match_operand:DWIH 0 "register_operand" "=d")
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx-highpart-2.c
b/gcc/testsuite/gcc.target/i386/bmi2-mulx-highpart-2.c
new file mode 100644
index 00000000000..be56cf15d07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx-highpart-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target *-*-linux* *-*-gnu*
} } } */
+
+/*
+**div7loop:
+**...
+** mulx %rsi, %rax, %rdx
+**...
+*/
+
+unsigned int
+div7loop (unsigned int x)
+{
+ for (int i = 0; i < 10000; i++) {
+ x ^= (i ^ x) / 7;
+ }
+ return x;
+}
--
2.43.0