diff -rup gcc-20120418-orig/gcc/config/arm/arm.c gcc-20120418/gcc/config/arm/arm.c
--- gcc-20120418-orig/gcc/config/arm/arm.c	2012-04-20 13:59:17.521258861 +0400
+++ gcc-20120418/gcc/config/arm/arm.c	2012-05-14 15:38:44.980815823 +0400
@@ -7131,6 +7131,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
 	*total = COSTS_N_INSNS (2);
       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
 	*total = COSTS_N_INSNS (4);
+      else if (mode == DImode) 
+        *total = COSTS_N_INSNS (50);
       else
 	*total = COSTS_N_INSNS (20);
       return false;
diff -rup gcc-20120418-orig/gcc/config/mips/mips.c gcc-20120418/gcc/config/mips/mips.c
--- gcc-20120418-orig/gcc/config/mips/mips.c	2012-04-20 13:59:16.417258891 +0400
+++ gcc-20120418/gcc/config/mips/mips.c	2012-05-14 15:41:05.132812098 +0400
@@ -3845,8 +3845,13 @@ mips_rtx_costs (rtx x, int code, int out
 	    }
 	  *total = COSTS_N_INSNS (mips_idiv_insns ());
 	}
-      else if (mode == DImode)
+      else if (mode == DImode) {
+	if (!TARGET_64BIT)
+	   /* divide double integer libcall is expensive.  */
+	   *total = COSTS_N_INSNS (200);
+	  else
         *total = mips_cost->int_div_di;
+	}
       else
 	*total = mips_cost->int_div_si;
       return false;
diff -rup gcc-20120418-orig/gcc/expmed.c gcc-20120418/gcc/expmed.c
--- gcc-20120418-orig/gcc/expmed.c	2012-04-20 14:00:49.125256428 +0400
+++ gcc-20120418/gcc/expmed.c	2012-05-22 17:17:16.618291346 +0400
@@ -3523,6 +3523,110 @@ expand_mult_highpart_optab (enum machine
 	}
     }
 
+  if ((size - 1 > BITS_PER_WORD
+       && BITS_PER_WORD == 32 && mode == DImode)
+      && unsignedp
+      && (!optimize_size && (optimize>1))
+      && (4 * mul_cost[speed][mode] + 4 * add_cost[speed][mode]
+          + shift_cost[speed][mode][31] < max_cost))
+    {
+      unsigned HOST_WIDE_INT d;
+      rtx x1, x0, y1, y0, z2, z0, tmp, u0, u0tmp, u1, c, c1, ccst, cres, result;
+
+      d = (INTVAL (op1) & GET_MODE_MASK (DImode));
+
+      /* Extracting the higher part of the 64-bit multiplier.  */
+      x1 = gen_highpart (SImode, op0);
+      x1 = force_reg (SImode, x1);
+
+      /* Extracting the lower part of the 64-bit multiplier.  */
+      x0 = gen_lowpart (SImode, op0);
+      x0 = force_reg (SImode, x0);
+
+      x1 = convert_to_mode (DImode, x1, 1);
+      x0 = convert_to_mode (DImode, x0, 1);
+
+      /* Splitting the 64-bit constant for the higher and the lower parts.  */
+      y0 = gen_rtx_CONST_INT (DImode, d&UINT32_MAX);
+      y1 = gen_rtx_CONST_INT (DImode, d>>32);
+
+      z2 = gen_reg_rtx (DImode);
+      u0 = gen_reg_rtx (DImode);
+
+      /* Unsigned multiplication of the higher multiplier part
+	 and the higher constant part.  */
+      z2 = expand_mult(DImode, x1, y1, z2, 1);
+      /* Unsigned multiplication of the lower multiplier part
+         and the higher constant part.  */
+      u0 = expand_mult(DImode, x0, y1, u0, 1);
+
+      z0 = gen_reg_rtx (DImode);
+      u1 = gen_reg_rtx (DImode);
+
+      /* Unsigned multiplication of the lower multiplier part
+         and the lower constant part.  */
+      z0 = expand_mult (DImode, x0, y0, z0, 1);
+
+      /* Unsigned multiplication of the higher multiplier part
+         the lower constant part.  */
+      u1 = expand_mult (DImode, x1, y0, u1, 1);
+
+      /* Getting the higher part of multiplication between the lower multiplier
+         part and the lower constant part, the lower part is not interesting
+         for the final result.  */
+      u0tmp = gen_highpart (SImode, z0);
+      u0tmp = force_reg (SImode, u0tmp);
+      u0tmp = convert_to_mode (DImode, u0tmp, 1);
+
+      /* Adding the higher part of multiplication between the lower multiplier
+         part and the lower constant part to the result of multiplication between
+	 the lower multiplier part and the higher constant part. Please note,
+	 that we couldn't get overflow here since in the worst case
+         (0xffffffff*0xffffffff)+0xffffffff we get 0xffffffff00000000L.  */
+      expand_inc (u0, u0tmp);
+      tmp = gen_reg_rtx (DImode);
+
+      /* Adding multiplication between the lower multiplier part and the higher
+         constant part with the higher part of multiplication between the lower
+         multiplier part and the lower constant part to the result of multiplication
+         between the higher multiplier part and the lower constant part.  */
+      tmp = expand_binop (DImode, add_optab, u0, u1, tmp, 1, OPTAB_LIB_WIDEN);
+      if (!tmp)
+             return 0;
+
+      /* Checking for overflow.  */
+      c = gen_reg_rtx (DImode);
+      c1 = gen_reg_rtx (DImode);
+      cres = gen_reg_rtx (DImode);
+
+      emit_store_flag_force (c, GT, u0, tmp, DImode, 1, 1);
+      emit_store_flag_force (c1, GT, u1, tmp, DImode, 1, 1);
+      result = expand_binop (DImode, ior_optab, c, c1, cres, 1, OPTAB_LIB_WIDEN);
+      if (!result)
+           return 0;
+
+      ccst = gen_reg_rtx (DImode);
+      ccst = expand_shift (LSHIFT_EXPR, DImode, cres, 32, ccst, 1);
+
+      /* Adding 0x10000000 in case of overflow to result of multiplication
+         higher multiplier part and higher constant part. Please note that
+         we don't have to check for overflow here because in the worst case
+         (0xffffffff*0xffffffff) + 0x100000000 equals to 0xffffffff00000001L.  */
+      expand_inc (z2, ccst);
+
+
+      /* Extracting the higher part of the sum.  */
+      tmp = gen_highpart (SImode, tmp);
+      tmp = force_reg (SImode, tmp);
+      tmp = convert_to_mode (DImode, tmp, 1);
+
+      /* The final result, again we don't have to check for overflow here.  */
+      expand_inc (z2, tmp);
+
+      return z2;
+
+    }
+
   /* Try widening multiplication of opposite signedness, and adjust.  */
   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing