On Fri, Sep 29, 2017 at 11:05 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > The following patch adds patterns and splitters for {,u}divmodsi4 followed > by zero-extension, similarly to other 32-bit operand instructions divl and > idivl zero extends both results to 64-bit, so there is no need to extend it > again. The REE pass ignores instructions that have more than one SET, but > at least the combiner doesn't. The patch adds both patterns/splitters that > zero extend the quotient and patterns/splttiers that zero extend the modulo > (the combiner wants in that case the modulo to be the first operation). > I have a patch which I'll attach to the PR, which also has patterns for > both results zero extended, but as neither combiner nor anything else is > able to match them right now, I'm not including it here. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-09-29 Jakub Jelinek <ja...@redhat.com> > > PR target/82361 > * config/i386/i386.md > (TARGET_USE_8BIT_IDIV zext divmodsi4 splitter): New define_split. > (divmodsi4_zext_1, divmodsi4_zext_2, *divmodsi4_zext_1, > *divmodsi4_zext_2): New define_insn_and_split. > (*divmodsi4_noext_zext_1, *divmodsi4_noext_zext_2): New define_insn. > (TARGET_USE_8BIT_IDIV zext udivmodsi4 splitter): New define_split. > (udivmodsi4_zext_1, udivmodsi4_zext_2, *udivmodsi4_zext_1, > *udivmodsi4_zext_2, *udivmodsi4_pow2_zext_1, *udivmodsi4_pow2_zext_2): > New define_insn_and_split. > (*udivmodsi4_noext_zext_1, *udivmodsi4_noext_zext_2): New define_insn. > * config/i386/i386.c (ix86_split_idivmod): Handle operands[0] or > operands[1] having DImode when mode is SImode. > > * gcc.target/i386/pr82361-1.c: New test. > * gcc.target/i386/pr82361-2.c: New test.
OK, although this is quite some work for relatively small gain. The reason that zext for divisions was not implemented was that a zext was relatviely cheap comparing to idiv insn, so it was not a pressing issue, Thanks, Uros. > --- gcc/config/i386/i386.md.jj 2017-09-29 09:19:42.000000000 +0200 > +++ gcc/config/i386/i386.md 2017-09-29 19:19:34.795293575 +0200 > @@ -7635,6 +7635,36 @@ (define_split > [(const_int 0)] > "ix86_split_idivmod (<MODE>mode, operands, true); DONE;") > > +(define_split > + [(set (match_operand:DI 0 "register_operand") > + (zero_extend:DI > + (div:SI (match_operand:SI 2 "register_operand") > + (match_operand:SI 3 "nonimmediate_operand")))) > + (set (match_operand:SI 1 "register_operand") > + (mod:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_USE_8BIT_IDIV > + && TARGET_QIMODE_MATH > + && can_create_pseudo_p () > + && !optimize_insn_for_size_p ()" > + [(const_int 0)] > + "ix86_split_idivmod (SImode, operands, true); DONE;") > + > +(define_split > + [(set (match_operand:DI 1 "register_operand") > + (zero_extend:DI > + (mod:SI (match_operand:SI 2 "register_operand") > + (match_operand:SI 3 "nonimmediate_operand")))) > + (set (match_operand:SI 0 "register_operand") > + (div:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_USE_8BIT_IDIV > + && TARGET_QIMODE_MATH > + && can_create_pseudo_p () > + && !optimize_insn_for_size_p ()" > + [(const_int 0)] > + "ix86_split_idivmod (SImode, operands, true); DONE;") > + > (define_insn_and_split "divmod<mode>4_1" > [(set (match_operand:SWI48 0 "register_operand" "=a") > (div:SWI48 (match_operand:SWI48 2 "register_operand" "0") > @@ -7670,6 +7700,79 @@ (define_insn_and_split "divmod<mode>4_1" > [(set_attr "type" "multi") > (set_attr "mode" "<MODE>")]) > > +(define_insn_and_split "divmodsi4_zext_1" > + [(set (match_operand:DI 0 "register_operand" "=a") > + (zero_extend:DI > + (div:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 1 "register_operand" "=&d") > + (mod:SI (match_dup 2) (match_dup 3))) > + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "#" > + "reload_completed" > + [(parallel [(set (match_dup 1) > + (ashiftrt:SI (match_dup 4) (match_dup 5))) > + (clobber (reg:CC FLAGS_REG))]) > + (parallel [(set (match_dup 0) > + (zero_extend:DI (div:SI (match_dup 2) (match_dup 3)))) > + (set (match_dup 1) > + (mod:SI (match_dup 2) (match_dup 3))) > + (use (match_dup 1)) > + (clobber (reg:CC FLAGS_REG))])] > +{ > + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); > + > + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) > + operands[4] = operands[2]; > + else > + { > + /* Avoid use of cltd in favor of a mov+shift. */ > + emit_move_insn (operands[1], operands[2]); > + operands[4] = operands[1]; > + } > +} > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > +(define_insn_and_split "divmodsi4_zext_2" > + [(set (match_operand:DI 1 "register_operand" "=&d") > + (zero_extend:DI > + (mod:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 0 "register_operand" "=a") > + (div:SI (match_dup 2) (match_dup 3))) > + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "#" > + "reload_completed" > + [(parallel [(set (match_dup 6) > + (ashiftrt:SI (match_dup 4) (match_dup 5))) > + (clobber (reg:CC FLAGS_REG))]) > + (parallel [(set (match_dup 1) > + (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3)))) > + (set (match_dup 0) > + (div:SI (match_dup 2) (match_dup 3))) > + (use (match_dup 6)) > + (clobber (reg:CC FLAGS_REG))])] > +{ > + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); > + operands[6] = gen_lowpart (SImode, operands[1]); > + > + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) > + operands[4] = operands[2]; > + else > + { > + /* Avoid use of cltd in favor of a mov+shift. */ > + emit_move_insn (operands[6], operands[2]); > + operands[4] = operands[6]; > + } > +} > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > (define_insn_and_split "*divmod<mode>4" > [(set (match_operand:SWIM248 0 "register_operand" "=a") > (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") > @@ -7705,6 +7808,77 @@ (define_insn_and_split "*divmod<mode>4" > [(set_attr "type" "multi") > (set_attr "mode" "<MODE>")]) > > +(define_insn_and_split "*divmodsi4_zext_1" > + [(set (match_operand:DI 0 "register_operand" "=a") > + (zero_extend:DI > + (div:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 1 "register_operand" "=&d") > + (mod:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "#" > + "reload_completed" > + [(parallel [(set (match_dup 1) > + (ashiftrt:SI (match_dup 4) (match_dup 5))) > + (clobber (reg:CC FLAGS_REG))]) > + (parallel [(set (match_dup 0) > + (zero_extend:DI (div:SI (match_dup 2) (match_dup 3)))) > + (set (match_dup 1) > + (mod:SI (match_dup 2) (match_dup 3))) > + (use (match_dup 1)) > + (clobber (reg:CC FLAGS_REG))])] > +{ > + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); > + > + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) > + operands[4] = operands[2]; > + else > + { > + /* Avoid use of cltd in favor of a mov+shift. */ > + emit_move_insn (operands[1], operands[2]); > + operands[4] = operands[1]; > + } > +} > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > +(define_insn_and_split "*divmodsi4_zext_2" > + [(set (match_operand:DI 1 "register_operand" "=&d") > + (zero_extend:DI > + (mod:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 0 "register_operand" "=a") > + (div:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "#" > + "reload_completed" > + [(parallel [(set (match_dup 6) > + (ashiftrt:SI (match_dup 4) (match_dup 5))) > + (clobber (reg:CC FLAGS_REG))]) > + (parallel [(set (match_dup 1) > + (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3)))) > + (set (match_dup 0) > + (div:SI (match_dup 2) (match_dup 3))) > + (use (match_dup 6)) > + (clobber (reg:CC FLAGS_REG))])] > +{ > + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); > + operands[6] = gen_lowpart (SImode, operands[1]); > + > + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) > + operands[4] = operands[2]; > + else > + { > + /* Avoid use of cltd in favor of a mov+shift. */ > + emit_move_insn (operands[6], operands[2]); > + operands[4] = operands[6]; > + } > +} > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > (define_insn "*divmod<mode>4_noext" > [(set (match_operand:SWIM248 0 "register_operand" "=a") > (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") > @@ -7718,6 +7892,34 @@ (define_insn "*divmod<mode>4_noext" > [(set_attr "type" "idiv") > (set_attr "mode" "<MODE>")]) > > +(define_insn "*divmodsi4_noext_zext_1" > + [(set (match_operand:DI 0 "register_operand" "=a") > + (zero_extend:DI > + (div:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 1 "register_operand" "=d") > + (mod:SI (match_dup 2) (match_dup 3))) > + (use (match_operand:SI 4 "register_operand" "1")) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "idiv{l}\t%3" > + [(set_attr "type" "idiv") > + (set_attr "mode" "SI")]) > + > +(define_insn "*divmodsi4_noext_zext_2" > + [(set (match_operand:DI 1 "register_operand" "=d") > + (zero_extend:DI > + (mod:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 0 "register_operand" "=a") > + (div:SI (match_dup 2) (match_dup 3))) > + (use (match_operand:SI 4 "register_operand" "1")) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "idiv{l}\t%3" > + [(set_attr "type" "idiv") > + (set_attr "mode" "SI")]) > + > (define_expand "divmodqi4" > [(parallel [(set (match_operand:QI 0 "register_operand") > (div:QI > @@ -7808,6 +8010,38 @@ (define_split > [(const_int 0)] > "ix86_split_idivmod (<MODE>mode, operands, false); DONE;") > > +(define_split > + [(set (match_operand:DI 0 "register_operand") > + (zero_extend:DI > + (udiv:SI (match_operand:SI 2 "register_operand") > + (match_operand:SI 3 "nonimmediate_operand")))) > + (set (match_operand:SI 1 "register_operand") > + (umod:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT > + && TARGET_USE_8BIT_IDIV > + && TARGET_QIMODE_MATH > + && can_create_pseudo_p () > + && !optimize_insn_for_size_p ()" > + [(const_int 0)] > + "ix86_split_idivmod (SImode, operands, false); DONE;") > + > +(define_split > + [(set (match_operand:DI 1 "register_operand") > + (zero_extend:DI > + (umod:SI (match_operand:SI 2 "register_operand") > + (match_operand:SI 3 "nonimmediate_operand")))) > + (set (match_operand:SI 0 "register_operand") > + (udiv:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT > + && TARGET_USE_8BIT_IDIV > + && TARGET_QIMODE_MATH > + && can_create_pseudo_p () > + && !optimize_insn_for_size_p ()" > + [(const_int 0)] > + "ix86_split_idivmod (SImode, operands, false); DONE;") > + > (define_insn_and_split "udivmod<mode>4_1" > [(set (match_operand:SWI48 0 "register_operand" "=a") > (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0") > @@ -7830,6 +8064,52 @@ (define_insn_and_split "udivmod<mode>4_1 > [(set_attr "type" "multi") > (set_attr "mode" "<MODE>")]) > > +(define_insn_and_split "udivmodsi4_zext_1" > + [(set (match_operand:DI 0 "register_operand" "=a") > + (zero_extend:DI > + (udiv:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 1 "register_operand" "=&d") > + (umod:SI (match_dup 2) (match_dup 3))) > + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "#" > + "reload_completed" > + [(set (match_dup 1) (const_int 0)) > + (parallel [(set (match_dup 0) > + (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3)))) > + (set (match_dup 1) > + (umod:SI (match_dup 2) (match_dup 3))) > + (use (match_dup 1)) > + (clobber (reg:CC FLAGS_REG))])] > + "" > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > +(define_insn_and_split "udivmodsi4_zext_2" > + [(set (match_operand:DI 1 "register_operand" "=&d") > + (zero_extend:DI > + (umod:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 0 "register_operand" "=a") > + (udiv:SI (match_dup 2) (match_dup 3))) > + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "#" > + "reload_completed" > + [(set (match_dup 4) (const_int 0)) > + (parallel [(set (match_dup 1) > + (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3)))) > + (set (match_dup 0) > + (udiv:SI (match_dup 2) (match_dup 3))) > + (use (match_dup 4)) > + (clobber (reg:CC FLAGS_REG))])] > + "operands[4] = gen_lowpart (SImode, operands[1]);" > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > (define_insn_and_split "*udivmod<mode>4" > [(set (match_operand:SWIM248 0 "register_operand" "=a") > (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") > @@ -7851,6 +8131,50 @@ (define_insn_and_split "*udivmod<mode>4" > [(set_attr "type" "multi") > (set_attr "mode" "<MODE>")]) > > +(define_insn_and_split "*udivmodsi4_zext_1" > + [(set (match_operand:DI 0 "register_operand" "=a") > + (zero_extend:DI > + (udiv:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 1 "register_operand" "=&d") > + (umod:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "#" > + "reload_completed" > + [(set (match_dup 1) (const_int 0)) > + (parallel [(set (match_dup 0) > + (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3)))) > + (set (match_dup 1) > + (umod:SI (match_dup 2) (match_dup 3))) > + (use (match_dup 1)) > + (clobber (reg:CC FLAGS_REG))])] > + "" > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > +(define_insn_and_split "*udivmodsi4_zext_2" > + [(set (match_operand:DI 1 "register_operand" "=&d") > + (zero_extend:DI > + (umod:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 0 "register_operand" "=a") > + (udiv:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "#" > + "reload_completed" > + [(set (match_dup 4) (const_int 0)) > + (parallel [(set (match_dup 1) > + (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3)))) > + (set (match_dup 0) > + (udiv:SI (match_dup 2) (match_dup 3))) > + (use (match_dup 4)) > + (clobber (reg:CC FLAGS_REG))])] > + "operands[4] = gen_lowpart (SImode, operands[1]);" > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > ;; Optimize division or modulo by constant power of 2, if the constant > ;; materializes only after expansion. > (define_insn_and_split "*udivmod<mode>4_pow2" > @@ -7877,6 +8201,60 @@ (define_insn_and_split "*udivmod<mode>4_ > [(set_attr "type" "multi") > (set_attr "mode" "<MODE>")]) > > +(define_insn_and_split "*udivmodsi4_pow2_zext_1" > + [(set (match_operand:DI 0 "register_operand" "=r") > + (zero_extend:DI > + (udiv:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "const_int_operand" "n")))) > + (set (match_operand:SI 1 "register_operand" "=r") > + (umod:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT > + && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000)) > + && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0" > + "#" > + "&& 1" > + [(set (match_dup 1) (match_dup 2)) > + (parallel [(set (match_dup 0) > + (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4)))) > + (clobber (reg:CC FLAGS_REG))]) > + (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5))) > + (clobber (reg:CC FLAGS_REG))])] > +{ > + int v = exact_log2 (UINTVAL (operands[3])); > + operands[4] = GEN_INT (v); > + operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1); > +} > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > +(define_insn_and_split "*udivmodsi4_pow2_zext_2" > + [(set (match_operand:DI 1 "register_operand" "=r") > + (zero_extend:DI > + (umod:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "const_int_operand" "n")))) > + (set (match_operand:SI 0 "register_operand" "=r") > + (umod:SI (match_dup 2) (match_dup 3))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT > + && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000)) > + && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0" > + "#" > + "&& 1" > + [(set (match_dup 1) (match_dup 2)) > + (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4))) > + (clobber (reg:CC FLAGS_REG))]) > + (parallel [(set (match_dup 1) > + (zero_extend:DI (and:SI (match_dup 1) (match_dup 5)))) > + (clobber (reg:CC FLAGS_REG))])] > +{ > + int v = exact_log2 (UINTVAL (operands[3])); > + operands[4] = GEN_INT (v); > + operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1); > +} > + [(set_attr "type" "multi") > + (set_attr "mode" "SI")]) > + > (define_insn "*udivmod<mode>4_noext" > [(set (match_operand:SWIM248 0 "register_operand" "=a") > (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") > @@ -7890,6 +8268,34 @@ (define_insn "*udivmod<mode>4_noext" > [(set_attr "type" "idiv") > (set_attr "mode" "<MODE>")]) > > +(define_insn "*udivmodsi4_noext_zext_1" > + [(set (match_operand:DI 0 "register_operand" "=a") > + (zero_extend:DI > + (udiv:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 1 "register_operand" "=d") > + (umod:SI (match_dup 2) (match_dup 3))) > + (use (match_operand:SI 4 "register_operand" "1")) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "div{l}\t%3" > + [(set_attr "type" "idiv") > + (set_attr "mode" "SI")]) > + > +(define_insn "*udivmodsi4_noext_zext_2" > + [(set (match_operand:DI 1 "register_operand" "=d") > + (zero_extend:DI > + (umod:SI (match_operand:SI 2 "register_operand" "0") > + (match_operand:SI 3 "nonimmediate_operand" "rm")))) > + (set (match_operand:SI 0 "register_operand" "=a") > + (udiv:SI (match_dup 2) (match_dup 3))) > + (use (match_operand:SI 4 "register_operand" "1")) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_64BIT" > + "div{l}\t%3" > + [(set_attr "type" "idiv") > + (set_attr "mode" "SI")]) > + > (define_expand "udivmodqi4" > [(parallel [(set (match_operand:QI 0 "register_operand") > (udiv:QI > --- gcc/config/i386/i386.c.jj 2017-09-29 19:15:27.822267844 +0200 > +++ gcc/config/i386/i386.c 2017-09-29 19:29:00.276483787 +0200 > @@ -21927,9 +21927,22 @@ ix86_split_idivmod (machine_mode mode, r > switch (mode) > { > case E_SImode: > - gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; > + if (GET_MODE (operands[0]) == SImode) > + { > + if (GET_MODE (operands[1]) == SImode) > + gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; > + else > + gen_divmod4_1 > + = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2; > + gen_zero_extend = gen_zero_extendqisi2; > + } > + else > + { > + gen_divmod4_1 > + = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1; > + gen_zero_extend = gen_zero_extendqidi2; > + } > gen_test_ccno_1 = gen_testsi_ccno_1; > - gen_zero_extend = gen_zero_extendqisi2; > break; > case E_DImode: > gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1; > @@ -21988,16 +22001,24 @@ ix86_split_idivmod (machine_mode mode, r > div = gen_rtx_UDIV (mode, operands[2], operands[3]); > mod = gen_rtx_UMOD (mode, operands[2], operands[3]); > } > + if (mode == SImode) > + { > + if (GET_MODE (operands[0]) != SImode) > + div = gen_rtx_ZERO_EXTEND (DImode, div); > + if (GET_MODE (operands[1]) != SImode) > + mod = gen_rtx_ZERO_EXTEND (DImode, mod); > + } > > /* Extract remainder from AH. */ > - tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8)); > + tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]), > + tmp0, GEN_INT (8), GEN_INT (8)); > if (REG_P (operands[1])) > insn = emit_move_insn (operands[1], tmp1); > else > { > /* Need a new scratch register since the old one has result > of 8bit divide. */ > - scratch = gen_reg_rtx (mode); > + scratch = gen_reg_rtx (GET_MODE (operands[1])); > emit_move_insn (scratch, tmp1); > insn = emit_move_insn (operands[1], scratch); > } > --- gcc/testsuite/gcc.target/i386/pr82361-1.c.jj 2017-09-29 > 19:21:12.744113987 +0200 > +++ gcc/testsuite/gcc.target/i386/pr82361-1.c 2017-09-29 19:25:27.465046411 > +0200 > @@ -0,0 +1,53 @@ > +/* PR target/82361 */ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-O2 -mtune=generic -masm=att -mno-8bit-idiv" } */ > +/* We should be able to optimize all %eax to %rax zero extensions, because > + div and idiv instructions with 32-bit operands zero-extend both results. > */ > +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */ > +/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage > + one. */ > +/* { dg-final { scan-assembler-times "movl\t%edx, %edx" 2 } } */ > + > +void > +f1 (unsigned int a, unsigned int b) > +{ > + unsigned long long c = a / b; > + unsigned long long d = a % b; > + asm volatile ("" : : "r" (c), "r" (d)); > +} > + > +void > +f2 (int a, int b) > +{ > + unsigned long long c = (unsigned int) (a / b); > + unsigned long long d = (unsigned int) (a % b); > + asm volatile ("" : : "r" (c), "r" (d)); > +} > + > +void > +f3 (unsigned int a, unsigned int b) > +{ > + unsigned long long c = a / b; > + asm volatile ("" : : "r" (c)); > +} > + > +void > +f4 (int a, int b) > +{ > + unsigned long long c = (unsigned int) (a / b); > + asm volatile ("" : : "r" (c)); > +} > + > +void > +f5 (unsigned int a, unsigned int b) > +{ > + unsigned long long d = a % b; > + asm volatile ("" : : "r" (d)); > +} > + > +void > +f6 (int a, int b) > +{ > + unsigned long long d = (unsigned int) (a % b); > + asm volatile ("" : : "r" (d)); > +} > --- gcc/testsuite/gcc.target/i386/pr82361-2.c.jj 2017-09-29 > 19:25:40.344891300 +0200 > +++ gcc/testsuite/gcc.target/i386/pr82361-2.c 2017-09-29 19:31:56.725359101 > +0200 > @@ -0,0 +1,10 @@ > +/* PR target/82361 */ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-O2 -mtune=generic -masm=att -m8bit-idiv" } */ > +/* We should be able to optimize all %eax to %rax zero extensions, because > + div and idiv instructions with 32-bit operands zero-extend both results. > */ > +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */ > +/* Ditto %edx to %rdx zero extensions. */ > +/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */ > + > +#include "pr82361-1.c" > > Jakub