Hi! The following patch adds patterns and splitters for {,u}divmodsi4 followed by zero-extension, similarly to other 32-bit operand instructions divl and idivl zero extends both results to 64-bit, so there is no need to extend it again. The REE pass ignores instructions that have more than one SET, but at least the combiner doesn't. The patch adds both patterns/splitters that zero extend the quotient and patterns/splttiers that zero extend the modulo (the combiner wants in that case the modulo to be the first operation). I have a patch which I'll attach to the PR, which also has patterns for both results zero extended, but as neither combiner nor anything else is able to match them right now, I'm not including it here.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-09-29 Jakub Jelinek <ja...@redhat.com> PR target/82361 * config/i386/i386.md (TARGET_USE_8BIT_IDIV zext divmodsi4 splitter): New define_split. (divmodsi4_zext_1, divmodsi4_zext_2, *divmodsi4_zext_1, *divmodsi4_zext_2): New define_insn_and_split. (*divmodsi4_noext_zext_1, *divmodsi4_noext_zext_2): New define_insn. (TARGET_USE_8BIT_IDIV zext udivmodsi4 splitter): New define_split. (udivmodsi4_zext_1, udivmodsi4_zext_2, *udivmodsi4_zext_1, *udivmodsi4_zext_2, *udivmodsi4_pow2_zext_1, *udivmodsi4_pow2_zext_2): New define_insn_and_split. (*udivmodsi4_noext_zext_1, *udivmodsi4_noext_zext_2): New define_insn. * config/i386/i386.c (ix86_split_idivmod): Handle operands[0] or operands[1] having DImode when mode is SImode. * gcc.target/i386/pr82361-1.c: New test. * gcc.target/i386/pr82361-2.c: New test. --- gcc/config/i386/i386.md.jj 2017-09-29 09:19:42.000000000 +0200 +++ gcc/config/i386/i386.md 2017-09-29 19:19:34.795293575 +0200 @@ -7635,6 +7635,36 @@ (define_split [(const_int 0)] "ix86_split_idivmod (<MODE>mode, operands, true); DONE;") +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (div:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonimmediate_operand")))) + (set (match_operand:SI 1 "register_operand") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (SImode, operands, true); DONE;") + +(define_split + [(set (match_operand:DI 1 "register_operand") + (zero_extend:DI + (mod:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonimmediate_operand")))) + (set (match_operand:SI 0 "register_operand") + (div:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (SImode, operands, true); DONE;") + (define_insn_and_split "divmod<mode>4_1" [(set (match_operand:SWI48 0 "register_operand" "=a") (div:SWI48 (match_operand:SWI48 2 "register_operand" "0") @@ -7670,6 +7700,79 @@ (define_insn_and_split "divmod<mode>4_1" [(set_attr "type" "multi") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "divmodsi4_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (div:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=&d") + (mod:SI (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(parallel [(set (match_dup 1) + (ashiftrt:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (zero_extend:DI (div:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 1) + (mod:SI (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[1], operands[2]); + operands[4] = operands[1]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "divmodsi4_zext_2" + [(set (match_operand:DI 1 "register_operand" "=&d") + (zero_extend:DI + (mod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(parallel [(set (match_dup 6) + (ashiftrt:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 1) + (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 0) + (div:SI (match_dup 2) (match_dup 3))) + (use (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); + operands[6] = gen_lowpart (SImode, operands[1]); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[6], operands[2]); + operands[4] = operands[6]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + (define_insn_and_split "*divmod<mode>4" [(set (match_operand:SWIM248 0 "register_operand" "=a") (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") @@ -7705,6 +7808,77 @@ (define_insn_and_split "*divmod<mode>4" [(set_attr "type" "multi") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "*divmodsi4_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (div:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=&d") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(parallel [(set (match_dup 1) + (ashiftrt:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (zero_extend:DI (div:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 1) + (mod:SI (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[1], operands[2]); + operands[4] = operands[1]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*divmodsi4_zext_2" + [(set (match_operand:DI 1 "register_operand" "=&d") + (zero_extend:DI + (mod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(parallel [(set (match_dup 6) + (ashiftrt:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 1) + (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 0) + (div:SI (match_dup 2) (match_dup 3))) + (use (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); + operands[6] = gen_lowpart (SImode, operands[1]); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[6], operands[2]); + operands[4] = operands[6]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + (define_insn "*divmod<mode>4_noext" [(set (match_operand:SWIM248 0 "register_operand" "=a") (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") @@ -7718,6 +7892,34 @@ (define_insn "*divmod<mode>4_noext" [(set_attr "type" "idiv") (set_attr "mode" "<MODE>")]) +(define_insn "*divmodsi4_noext_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (div:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=d") + (mod:SI (match_dup 2) (match_dup 3))) + (use (match_operand:SI 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "idiv{l}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + +(define_insn "*divmodsi4_noext_zext_2" + [(set (match_operand:DI 1 "register_operand" "=d") + (zero_extend:DI + (mod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_dup 2) (match_dup 3))) + (use (match_operand:SI 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "idiv{l}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + (define_expand "divmodqi4" [(parallel [(set (match_operand:QI 0 "register_operand") (div:QI @@ -7808,6 +8010,38 @@ (define_split [(const_int 0)] "ix86_split_idivmod (<MODE>mode, operands, false); DONE;") +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonimmediate_operand")))) + (set (match_operand:SI 1 "register_operand") + (umod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (SImode, operands, false); DONE;") + +(define_split + [(set (match_operand:DI 1 "register_operand") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonimmediate_operand")))) + (set (match_operand:SI 0 "register_operand") + (udiv:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (SImode, operands, false); DONE;") + (define_insn_and_split "udivmod<mode>4_1" [(set (match_operand:SWI48 0 "register_operand" "=a") (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0") @@ -7830,6 +8064,52 @@ (define_insn_and_split "udivmod<mode>4_1 [(set_attr "type" "multi") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "udivmodsi4_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=&d") + (umod:SI (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(set (match_dup 1) (const_int 0)) + (parallel [(set (match_dup 0) + (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 1) + (umod:SI (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "udivmodsi4_zext_2" + [(set (match_operand:DI 1 "register_operand" "=&d") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(set (match_dup 4) (const_int 0)) + (parallel [(set (match_dup 1) + (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 0) + (udiv:SI (match_dup 2) (match_dup 3))) + (use (match_dup 4)) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = gen_lowpart (SImode, operands[1]);" + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + (define_insn_and_split "*udivmod<mode>4" [(set (match_operand:SWIM248 0 "register_operand" "=a") (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") @@ -7851,6 +8131,50 @@ (define_insn_and_split "*udivmod<mode>4" [(set_attr "type" "multi") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "*udivmodsi4_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=&d") + (umod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(set (match_dup 1) (const_int 0)) + (parallel [(set (match_dup 0) + (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 1) + (umod:SI (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*udivmodsi4_zext_2" + [(set (match_operand:DI 1 "register_operand" "=&d") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(set (match_dup 4) (const_int 0)) + (parallel [(set (match_dup 1) + (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 0) + (udiv:SI (match_dup 2) (match_dup 3))) + (use (match_dup 4)) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = gen_lowpart (SImode, operands[1]);" + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + ;; Optimize division or modulo by constant power of 2, if the constant ;; materializes only after expansion. (define_insn_and_split "*udivmod<mode>4_pow2" @@ -7877,6 +8201,60 @@ (define_insn_and_split "*udivmod<mode>4_ [(set_attr "type" "multi") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "*udivmodsi4_pow2_zext_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "const_int_operand" "n")))) + (set (match_operand:SI 1 "register_operand" "=r") + (umod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000)) + && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0" + "#" + "&& 1" + [(set (match_dup 1) (match_dup 2)) + (parallel [(set (match_dup 0) + (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4)))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] +{ + int v = exact_log2 (UINTVAL (operands[3])); + operands[4] = GEN_INT (v); + operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1); +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*udivmodsi4_pow2_zext_2" + [(set (match_operand:DI 1 "register_operand" "=r") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "const_int_operand" "n")))) + (set (match_operand:SI 0 "register_operand" "=r") + (umod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000)) + && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0" + "#" + "&& 1" + [(set (match_dup 1) (match_dup 2)) + (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 1) + (zero_extend:DI (and:SI (match_dup 1) (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] +{ + int v = exact_log2 (UINTVAL (operands[3])); + operands[4] = GEN_INT (v); + operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1); +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + (define_insn "*udivmod<mode>4_noext" [(set (match_operand:SWIM248 0 "register_operand" "=a") (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") @@ -7890,6 +8268,34 @@ (define_insn "*udivmod<mode>4_noext" [(set_attr "type" "idiv") (set_attr "mode" "<MODE>")]) +(define_insn "*udivmodsi4_noext_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=d") + (umod:SI (match_dup 2) (match_dup 3))) + (use (match_operand:SI 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "div{l}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + +(define_insn "*udivmodsi4_noext_zext_2" + [(set (match_operand:DI 1 "register_operand" "=d") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_dup 2) (match_dup 3))) + (use (match_operand:SI 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "div{l}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + (define_expand "udivmodqi4" [(parallel [(set (match_operand:QI 0 "register_operand") (udiv:QI --- gcc/config/i386/i386.c.jj 2017-09-29 19:15:27.822267844 +0200 +++ gcc/config/i386/i386.c 2017-09-29 19:29:00.276483787 +0200 @@ -21927,9 +21927,22 @@ ix86_split_idivmod (machine_mode mode, r switch (mode) { case E_SImode: - gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; + if (GET_MODE (operands[0]) == SImode) + { + if (GET_MODE (operands[1]) == SImode) + gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; + else + gen_divmod4_1 + = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2; + gen_zero_extend = gen_zero_extendqisi2; + } + else + { + gen_divmod4_1 + = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1; + gen_zero_extend = gen_zero_extendqidi2; + } gen_test_ccno_1 = gen_testsi_ccno_1; - gen_zero_extend = gen_zero_extendqisi2; break; case E_DImode: gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1; @@ -21988,16 +22001,24 @@ ix86_split_idivmod (machine_mode mode, r div = gen_rtx_UDIV (mode, operands[2], operands[3]); mod = gen_rtx_UMOD (mode, operands[2], operands[3]); } + if (mode == SImode) + { + if (GET_MODE (operands[0]) != SImode) + div = gen_rtx_ZERO_EXTEND (DImode, div); + if (GET_MODE (operands[1]) != SImode) + mod = gen_rtx_ZERO_EXTEND (DImode, mod); + } /* Extract remainder from AH. */ - tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8)); + tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]), + tmp0, GEN_INT (8), GEN_INT (8)); if (REG_P (operands[1])) insn = emit_move_insn (operands[1], tmp1); else { /* Need a new scratch register since the old one has result of 8bit divide. */ - scratch = gen_reg_rtx (mode); + scratch = gen_reg_rtx (GET_MODE (operands[1])); emit_move_insn (scratch, tmp1); insn = emit_move_insn (operands[1], scratch); } --- gcc/testsuite/gcc.target/i386/pr82361-1.c.jj 2017-09-29 19:21:12.744113987 +0200 +++ gcc/testsuite/gcc.target/i386/pr82361-1.c 2017-09-29 19:25:27.465046411 +0200 @@ -0,0 +1,53 @@ +/* PR target/82361 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mtune=generic -masm=att -mno-8bit-idiv" } */ +/* We should be able to optimize all %eax to %rax zero extensions, because + div and idiv instructions with 32-bit operands zero-extend both results. */ +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */ +/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage + one. */ +/* { dg-final { scan-assembler-times "movl\t%edx, %edx" 2 } } */ + +void +f1 (unsigned int a, unsigned int b) +{ + unsigned long long c = a / b; + unsigned long long d = a % b; + asm volatile ("" : : "r" (c), "r" (d)); +} + +void +f2 (int a, int b) +{ + unsigned long long c = (unsigned int) (a / b); + unsigned long long d = (unsigned int) (a % b); + asm volatile ("" : : "r" (c), "r" (d)); +} + +void +f3 (unsigned int a, unsigned int b) +{ + unsigned long long c = a / b; + asm volatile ("" : : "r" (c)); +} + +void +f4 (int a, int b) +{ + unsigned long long c = (unsigned int) (a / b); + asm volatile ("" : : "r" (c)); +} + +void +f5 (unsigned int a, unsigned int b) +{ + unsigned long long d = a % b; + asm volatile ("" : : "r" (d)); +} + +void +f6 (int a, int b) +{ + unsigned long long d = (unsigned int) (a % b); + asm volatile ("" : : "r" (d)); +} --- gcc/testsuite/gcc.target/i386/pr82361-2.c.jj 2017-09-29 19:25:40.344891300 +0200 +++ gcc/testsuite/gcc.target/i386/pr82361-2.c 2017-09-29 19:31:56.725359101 +0200 @@ -0,0 +1,10 @@ +/* PR target/82361 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mtune=generic -masm=att -m8bit-idiv" } */ +/* We should be able to optimize all %eax to %rax zero extensions, because + div and idiv instructions with 32-bit operands zero-extend both results. */ +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */ +/* Ditto %edx to %rdx zero extensions. */ +/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */ + +#include "pr82361-1.c" Jakub