> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 2cecf45..9d6983b 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -7131,6 +7131,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* > total, bool speed) > *total = COSTS_N_INSNS (2); > else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE) > *total = COSTS_N_INSNS (4); > + else if (mode == DImode) > + *total = COSTS_N_INSNS (50); > else > *total = COSTS_N_INSNS (20); > return false; > diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c > index d48a465..b5627c2 100644 > --- a/gcc/config/mips/mips.c > +++ b/gcc/config/mips/mips.c > @@ -3846,7 +3846,13 @@ mips_rtx_costs (rtx x, int code, int outer_code, int > opno ATTRIBUTE_UNUSED, > *total = COSTS_N_INSNS (mips_idiv_insns ()); > } > else if (mode == DImode) > - *total = mips_cost->int_div_di; > + { > + if (!TARGET_64BIT) > + /* Divide double integer library call is expensive. */ > + *total = COSTS_N_INSNS (200); > + else > + *total = mips_cost->int_div_di; > + } > else > *total = mips_cost->int_div_si; > return false; > diff --git a/gcc/expmed.c b/gcc/expmed.c > index aa24fbf..5f4c921 100644 > --- a/gcc/expmed.c > +++ b/gcc/expmed.c > @@ -3523,6 +3523,105 @@ expand_mult_highpart_optab (enum machine_mode mode, > rtx op0, rtx op1, > } > } > > + if (unsignedp && (!optimize_size && (optimize>1)) > + && (size - 1 > BITS_PER_WORD > + && BITS_PER_WORD == 32 && GET_MODE_BITSIZE (mode) == > 2*BITS_PER_WORD)
These references to 32-bits are still wrong (and unnecessary, just remove them). > + && (4 * mul_cost[speed][mode] + 4 * add_cost[speed][mode] > + + shift_cost[speed][mode][31] < max_cost)) > + { > + unsigned HOST_WIDE_INT d; > + rtx x1, x0, y1, y0, z2, z0, tmp, u0, u0tmp, u1, c, c1, ccst, cres, > result; > + > + d = (INTVAL (op1) & GET_MODE_MASK (mode)); This could be a CONST_DOUBLE. But you don't need "d", because you can... > + /* Extracting the higher part of the 64-bit multiplier. */ > + x1 = gen_highpart (word_mode, op0); > + x1 = force_reg (word_mode, x1); > + > + /* Extracting the lower part of the 64-bit multiplier. */ > + x0 = gen_lowpart (word_mode, op0); > + x0 = force_reg (word_mode, x0); > + > + /* Splitting the 64-bit constant for the higher and the lower parts. > */ > + y0 = gen_int_mode(d & UINT32_MAX, word_mode); > + y1 = gen_int_mode(d >> 32, word_mode); ... use gen_lowpart and gen_highpart directly on op1. > + > + z2 = gen_reg_rtx (mode); > + u0 = gen_reg_rtx (mode); > + > + /* Unsigned multiplication of the higher multiplier part > + and the higher constant part. */ > + z2 = expand_widening_mult (mode, x1, y1, z2, 1, umul_widen_optab); > + /* Unsigned multiplication of the lower multiplier part > + and the higher constant part. */ > + u0 = expand_widening_mult (mode, x0, y1, u0, 1, umul_widen_optab); > + > + z0 = gen_reg_rtx (mode); > + u1 = gen_reg_rtx (mode); > + > + /* Unsigned multiplication of the lower multiplier part > + and the lower constant part. */ > + z0 = expand_widening_mult (mode, x0, y0, z0, 1, umul_widen_optab); > + > + /* Unsigned multiplication of the higher multiplier part > + the lower constant part. */ > + u1 = expand_widening_mult (mode, x1, y0, u1, 1, umul_widen_optab); Up to here the comments are not necessary. > + /* Getting the higher part of multiplication between the lower > multiplier > + part and the lower constant part, the lower part is not interesting > + for the final result. */ > + u0tmp = gen_highpart (word_mode, z0); > + u0tmp = force_reg (word_mode, u0tmp); > + u0tmp = convert_to_mode (mode, u0tmp, 1); > + > + /* Adding the higher part of multiplication between the lower > multiplier > + part and the lower constant part to the result of multiplication > between > + the lower multiplier part and the higher constant part. Please note, > + that we couldn't get overflow here since in the worst case > + (0xffffffff*0xffffffff)+0xffffffff we get 0xffffffff00000000L. */ The command can simply be "compute the middle word of the three-word intermediate result." Also it's not overflow, it's carry. > + expand_inc (u0, u0tmp); > + tmp = gen_reg_rtx (mode); > + > + /* Adding multiplication between the lower multiplier part and the > higher > + constant part with the higher part of multiplication between the lower > + multiplier part and the lower constant part to the result of > multiplication > + between the higher multiplier part and the lower constant part. */ Here you have to explain: /* We have to return z2 + ((u0 + u1) >> GET_MODE_BITSIZE (word_mode)). u0 + u1 are the upper two words of the three-word intermediate result and they could have up to 2 * GET_MODE_BITSIZE (word_mode) + 1 bits of precision. We compute the extra bit by checking for carry, and add 1 << GET_MODE_BITSIZE (word_mode) to z2 if there is carry. */ > + tmp = expand_binop (mode, add_optab, u0, u1, tmp, 1, OPTAB_LIB_WIDEN); > + if (!tmp) > + return 0; /* We have to return z2 + (tmp >> 32). We need > + /* Checking for overflow. */ This is not overflow, it's carry (see above). > + c = gen_reg_rtx (mode); > + c1 = gen_reg_rtx (mode); > + cres = gen_reg_rtx (mode); > + > + emit_store_flag_force (c, GT, u0, tmp, mode, 1, 1); > + emit_store_flag_force (c1, GT, u1, tmp, mode, 1, 1); > + result = expand_binop (mode, ior_optab, c, c1, cres, 1, > OPTAB_LIB_WIDEN); > + if (!result) > + return 0; > + > + ccst = gen_reg_rtx (mode); > + ccst = expand_shift (LSHIFT_EXPR, mode, cres, 32, ccst, 1); This 32 should be GET_MODE_BITSIZE (word_mode). > + > + /* Adding 0x10000000 in case of overflow to the result of > multiplication One 0 missing in the constant. > + between the higher multiplier part and the higher constant part. > Please note, > + that we don't have to check for overflow here because in the worst case > + (0xffffffff*0xffffffff) + 0x100000000 equals to 0xffffffff00000001L. > */ Again, s/overflow/carry/. > + expand_inc (z2, ccst); > + /* Extracting the higher part of the sum. */ > + tmp = gen_highpart (word_mode, tmp); > + tmp = force_reg (word_mode, tmp); > + tmp = convert_to_mode (mode, tmp, 1); > + > + /* The final result, again we don't have to check for overflow here. > */ > + expand_inc (z2, tmp); > + > + return z2; > + > + } > + > /* Try widening multiplication of opposite signedness, and adjust. */ > moptab = unsignedp ? smul_widen_optab : umul_widen_optab; > if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing >