Hi!

The following patch adds patterns and splitters for {,u}divmodsi4 followed
by zero-extension, similarly to other 32-bit operand instructions divl and
idivl zero extends both results to 64-bit, so there is no need to extend it
again.  The REE pass ignores instructions that have more than one SET, but
at least the combiner doesn't.  The patch adds both patterns/splitters that
zero extend the quotient and patterns/splttiers that zero extend the modulo
(the combiner wants in that case the modulo to be the first operation).
I have a patch which I'll attach to the PR, which also has patterns for
both results zero extended, but as neither combiner nor anything else is
able to match them right now, I'm not including it here.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-09-29  Jakub Jelinek  <ja...@redhat.com>

        PR target/82361
        * config/i386/i386.md
        (TARGET_USE_8BIT_IDIV zext divmodsi4 splitter): New define_split.
        (divmodsi4_zext_1, divmodsi4_zext_2, *divmodsi4_zext_1,
        *divmodsi4_zext_2): New define_insn_and_split.
        (*divmodsi4_noext_zext_1, *divmodsi4_noext_zext_2): New define_insn.
        (TARGET_USE_8BIT_IDIV zext udivmodsi4 splitter): New define_split.
        (udivmodsi4_zext_1, udivmodsi4_zext_2, *udivmodsi4_zext_1,
        *udivmodsi4_zext_2, *udivmodsi4_pow2_zext_1, *udivmodsi4_pow2_zext_2):
        New define_insn_and_split.
        (*udivmodsi4_noext_zext_1, *udivmodsi4_noext_zext_2): New define_insn.
        * config/i386/i386.c (ix86_split_idivmod): Handle operands[0] or
        operands[1] having DImode when mode is SImode.

        * gcc.target/i386/pr82361-1.c: New test.
        * gcc.target/i386/pr82361-2.c: New test.

--- gcc/config/i386/i386.md.jj  2017-09-29 09:19:42.000000000 +0200
+++ gcc/config/i386/i386.md     2017-09-29 19:19:34.795293575 +0200
@@ -7635,6 +7635,36 @@ (define_split
   [(const_int 0)]
   "ix86_split_idivmod (<MODE>mode, operands, true); DONE;")
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (zero_extend:DI
+         (div:SI (match_operand:SI 2 "register_operand")
+                 (match_operand:SI 3 "nonimmediate_operand"))))
+   (set (match_operand:SI 1 "register_operand")
+       (mod:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (SImode, operands, true); DONE;")
+
+(define_split
+  [(set (match_operand:DI 1 "register_operand")
+       (zero_extend:DI
+         (mod:SI (match_operand:SI 2 "register_operand")
+                 (match_operand:SI 3 "nonimmediate_operand"))))
+   (set (match_operand:SI 0 "register_operand")
+       (div:SI  (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (SImode, operands, true); DONE;")
+
 (define_insn_and_split "divmod<mode>4_1"
   [(set (match_operand:SWI48 0 "register_operand" "=a")
        (div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
@@ -7670,6 +7700,79 @@ (define_insn_and_split "divmod<mode>4_1"
   [(set_attr "type" "multi")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "divmodsi4_zext_1"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+       (zero_extend:DI
+         (div:SI (match_operand:SI 2 "register_operand" "0")
+                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 1 "register_operand" "=&d")
+       (mod:SI (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 1)
+                  (ashiftrt:SI (match_dup 4) (match_dup 5)))
+             (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+                  (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 1)
+                  (mod:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 1))
+             (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
+
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[1], operands[2]);
+      operands[4] = operands[1];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "divmodsi4_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=&d")
+       (zero_extend:DI
+         (mod:SI (match_operand:SI 2 "register_operand" "0")
+                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 0 "register_operand" "=a")
+       (div:SI (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 6)
+                  (ashiftrt:SI (match_dup 4) (match_dup 5)))
+             (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 1)
+                  (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 0)
+                  (div:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 6))
+             (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
+  operands[6] = gen_lowpart (SImode, operands[1]);
+
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[6], operands[2]);
+      operands[4] = operands[6];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
 (define_insn_and_split "*divmod<mode>4"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
        (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7705,6 +7808,77 @@ (define_insn_and_split "*divmod<mode>4"
   [(set_attr "type" "multi")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "*divmodsi4_zext_1"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+       (zero_extend:DI
+         (div:SI (match_operand:SI 2 "register_operand" "0")
+                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 1 "register_operand" "=&d")
+       (mod:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 1)
+                  (ashiftrt:SI (match_dup 4) (match_dup 5)))
+             (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+                  (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 1)
+                  (mod:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 1))
+             (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
+
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[1], operands[2]);
+      operands[4] = operands[1];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*divmodsi4_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=&d")
+       (zero_extend:DI
+         (mod:SI (match_operand:SI 2 "register_operand" "0")
+                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 0 "register_operand" "=a")
+       (div:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 6)
+                  (ashiftrt:SI (match_dup 4) (match_dup 5)))
+             (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 1)
+                  (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 0)
+                  (div:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 6))
+             (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
+  operands[6] = gen_lowpart (SImode, operands[1]);
+
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[6], operands[2]);
+      operands[4] = operands[6];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
 (define_insn "*divmod<mode>4_noext"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
        (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7718,6 +7892,34 @@ (define_insn "*divmod<mode>4_noext"
   [(set_attr "type" "idiv")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*divmodsi4_noext_zext_1"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+       (zero_extend:DI
+         (div:SI (match_operand:SI 2 "register_operand" "0")
+                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 1 "register_operand" "=d")
+       (mod:SI (match_dup 2) (match_dup 3)))
+   (use (match_operand:SI 4 "register_operand" "1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "idiv{l}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "SI")])
+
+(define_insn "*divmodsi4_noext_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=d")
+       (zero_extend:DI
+         (mod:SI (match_operand:SI 2 "register_operand" "0")
+                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 0 "register_operand" "=a")
+       (div:SI (match_dup 2) (match_dup 3)))
+   (use (match_operand:SI 4 "register_operand" "1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "idiv{l}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "SI")])
+
 (define_expand "divmodqi4"
   [(parallel [(set (match_operand:QI 0 "register_operand")
                   (div:QI
@@ -7808,6 +8010,38 @@ (define_split
   [(const_int 0)]
   "ix86_split_idivmod (<MODE>mode, operands, false); DONE;")
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (zero_extend:DI
+         (udiv:SI (match_operand:SI 2 "register_operand")
+                  (match_operand:SI 3 "nonimmediate_operand"))))
+   (set (match_operand:SI 1 "register_operand")
+       (umod:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (SImode, operands, false); DONE;")
+
+(define_split
+  [(set (match_operand:DI 1 "register_operand")
+       (zero_extend:DI
+         (umod:SI (match_operand:SI 2 "register_operand")
+                  (match_operand:SI 3 "nonimmediate_operand"))))
+   (set (match_operand:SI 0 "register_operand")
+       (udiv:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (SImode, operands, false); DONE;")
+
 (define_insn_and_split "udivmod<mode>4_1"
   [(set (match_operand:SWI48 0 "register_operand" "=a")
        (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
@@ -7830,6 +8064,52 @@ (define_insn_and_split "udivmod<mode>4_1
   [(set_attr "type" "multi")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "udivmodsi4_zext_1"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+       (zero_extend:DI
+         (udiv:SI (match_operand:SI 2 "register_operand" "0")
+                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 1 "register_operand" "=&d")
+       (umod:SI (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+                  (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 1)
+                  (umod:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 1))
+             (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "udivmodsi4_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=&d")
+       (zero_extend:DI
+         (umod:SI (match_operand:SI 2 "register_operand" "0")
+                (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 0 "register_operand" "=a")
+       (udiv:SI (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "reload_completed"
+  [(set (match_dup 4) (const_int 0))
+   (parallel [(set (match_dup 1)
+                  (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 0)
+                  (udiv:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 4))
+             (clobber (reg:CC FLAGS_REG))])]
+  "operands[4] = gen_lowpart (SImode, operands[1]);"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
 (define_insn_and_split "*udivmod<mode>4"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
        (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7851,6 +8131,50 @@ (define_insn_and_split "*udivmod<mode>4"
   [(set_attr "type" "multi")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "*udivmodsi4_zext_1"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+       (zero_extend:DI
+         (udiv:SI (match_operand:SI 2 "register_operand" "0")
+                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 1 "register_operand" "=&d")
+       (umod:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+                  (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 1)
+                  (umod:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 1))
+             (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*udivmodsi4_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=&d")
+       (zero_extend:DI
+         (umod:SI (match_operand:SI 2 "register_operand" "0")
+                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 0 "register_operand" "=a")
+       (udiv:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "reload_completed"
+  [(set (match_dup 4) (const_int 0))
+   (parallel [(set (match_dup 1)
+                  (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 0)
+                  (udiv:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 4))
+             (clobber (reg:CC FLAGS_REG))])]
+  "operands[4] = gen_lowpart (SImode, operands[1]);"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
 ;; Optimize division or modulo by constant power of 2, if the constant
 ;; materializes only after expansion.
 (define_insn_and_split "*udivmod<mode>4_pow2"
@@ -7877,6 +8201,60 @@ (define_insn_and_split "*udivmod<mode>4_
   [(set_attr "type" "multi")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "*udivmodsi4_pow2_zext_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (zero_extend:DI
+         (udiv:SI (match_operand:SI 2 "register_operand" "0")
+                  (match_operand:SI 3 "const_int_operand" "n"))))
+   (set (match_operand:SI 1 "register_operand" "=r")
+       (umod:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000))
+   && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0"
+  "#"
+  "&& 1"
+  [(set (match_dup 1) (match_dup 2))
+   (parallel [(set (match_dup 0)
+                  (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4))))
+             (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5)))
+             (clobber (reg:CC FLAGS_REG))])]
+{
+  int v = exact_log2 (UINTVAL (operands[3]));
+  operands[4] = GEN_INT (v);
+  operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*udivmodsi4_pow2_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=r")
+       (zero_extend:DI
+         (umod:SI (match_operand:SI 2 "register_operand" "0")
+                  (match_operand:SI 3 "const_int_operand" "n"))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+       (umod:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000))
+   && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0"
+  "#"
+  "&& 1"
+  [(set (match_dup 1) (match_dup 2))
+   (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4)))
+             (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 1)
+                  (zero_extend:DI (and:SI (match_dup 1) (match_dup 5))))
+             (clobber (reg:CC FLAGS_REG))])]
+{
+  int v = exact_log2 (UINTVAL (operands[3]));
+  operands[4] = GEN_INT (v);
+  operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
 (define_insn "*udivmod<mode>4_noext"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
        (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7890,6 +8268,34 @@ (define_insn "*udivmod<mode>4_noext"
   [(set_attr "type" "idiv")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*udivmodsi4_noext_zext_1"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+       (zero_extend:DI
+         (udiv:SI (match_operand:SI 2 "register_operand" "0")
+                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 1 "register_operand" "=d")
+       (umod:SI (match_dup 2) (match_dup 3)))
+   (use (match_operand:SI 4 "register_operand" "1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "div{l}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "SI")])
+
+(define_insn "*udivmodsi4_noext_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=d")
+       (zero_extend:DI
+         (umod:SI (match_operand:SI 2 "register_operand" "0")
+                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 0 "register_operand" "=a")
+       (udiv:SI (match_dup 2) (match_dup 3)))
+   (use (match_operand:SI 4 "register_operand" "1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "div{l}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "SI")])
+
 (define_expand "udivmodqi4"
   [(parallel [(set (match_operand:QI 0 "register_operand")
                   (udiv:QI
--- gcc/config/i386/i386.c.jj   2017-09-29 19:15:27.822267844 +0200
+++ gcc/config/i386/i386.c      2017-09-29 19:29:00.276483787 +0200
@@ -21927,9 +21927,22 @@ ix86_split_idivmod (machine_mode mode, r
   switch (mode)
     {
     case E_SImode:
-      gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
+      if (GET_MODE (operands[0]) == SImode)
+       {
+         if (GET_MODE (operands[1]) == SImode)
+           gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
+         else
+           gen_divmod4_1
+             = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
+         gen_zero_extend = gen_zero_extendqisi2;
+       }
+      else
+       {
+         gen_divmod4_1
+           = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
+         gen_zero_extend = gen_zero_extendqidi2;
+       }
       gen_test_ccno_1 = gen_testsi_ccno_1;
-      gen_zero_extend = gen_zero_extendqisi2;
       break;
     case E_DImode:
       gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
@@ -21988,16 +22001,24 @@ ix86_split_idivmod (machine_mode mode, r
       div = gen_rtx_UDIV (mode, operands[2], operands[3]);
       mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
     }
+  if (mode == SImode)
+    {
+      if (GET_MODE (operands[0]) != SImode)
+       div = gen_rtx_ZERO_EXTEND (DImode, div);
+      if (GET_MODE (operands[1]) != SImode)
+       mod = gen_rtx_ZERO_EXTEND (DImode, mod);
+    }
 
   /* Extract remainder from AH.  */
-  tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
+  tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
+                              tmp0, GEN_INT (8), GEN_INT (8));
   if (REG_P (operands[1]))
     insn = emit_move_insn (operands[1], tmp1);
   else
     {
       /* Need a new scratch register since the old one has result
         of 8bit divide.  */
-      scratch = gen_reg_rtx (mode);
+      scratch = gen_reg_rtx (GET_MODE (operands[1]));
       emit_move_insn (scratch, tmp1);
       insn = emit_move_insn (operands[1], scratch);
     }
--- gcc/testsuite/gcc.target/i386/pr82361-1.c.jj        2017-09-29 
19:21:12.744113987 +0200
+++ gcc/testsuite/gcc.target/i386/pr82361-1.c   2017-09-29 19:25:27.465046411 
+0200
@@ -0,0 +1,53 @@
+/* PR target/82361 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mtune=generic -masm=att -mno-8bit-idiv" } */
+/* We should be able to optimize all %eax to %rax zero extensions, because
+   div and idiv instructions with 32-bit operands zero-extend both results.   
*/
+/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
+/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
+   one.  */
+/* { dg-final { scan-assembler-times "movl\t%edx, %edx" 2 } } */
+
+void
+f1 (unsigned int a, unsigned int b)
+{
+  unsigned long long c = a / b;
+  unsigned long long d = a % b;
+  asm volatile ("" : : "r" (c), "r" (d));
+}
+
+void
+f2 (int a, int b)
+{
+  unsigned long long c = (unsigned int) (a / b);
+  unsigned long long d = (unsigned int) (a % b);
+  asm volatile ("" : : "r" (c), "r" (d));
+}
+
+void
+f3 (unsigned int a, unsigned int b)
+{
+  unsigned long long c = a / b;
+  asm volatile ("" : : "r" (c));
+}
+
+void
+f4 (int a, int b)
+{
+  unsigned long long c = (unsigned int) (a / b);
+  asm volatile ("" : : "r" (c));
+}
+
+void
+f5 (unsigned int a, unsigned int b)
+{
+  unsigned long long d = a % b;
+  asm volatile ("" : : "r" (d));
+}
+
+void
+f6 (int a, int b)
+{
+  unsigned long long d = (unsigned int) (a % b);
+  asm volatile ("" : : "r" (d));
+}
--- gcc/testsuite/gcc.target/i386/pr82361-2.c.jj        2017-09-29 
19:25:40.344891300 +0200
+++ gcc/testsuite/gcc.target/i386/pr82361-2.c   2017-09-29 19:31:56.725359101 
+0200
@@ -0,0 +1,10 @@
+/* PR target/82361 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mtune=generic -masm=att -m8bit-idiv" } */
+/* We should be able to optimize all %eax to %rax zero extensions, because
+   div and idiv instructions with 32-bit operands zero-extend both results.   
*/
+/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
+/* Ditto %edx to %rdx zero extensions.  */
+/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
+
+#include "pr82361-1.c"

        Jakub

Reply via email to