diff --git a/gcc/match.pd b/gcc/match.pd
index 19f4a782ae9..106895aa568 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -599,6 +599,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
   (convert (trunc_mod @0 @1))))
 
+/* X * (Y / X) is the same as Y - (Y % X).  */
+(simplify
+ (mult:c (convert1? @0) (convert2? (trunc_div @1 @@0)))
+ (if (INTEGRAL_TYPE_P (type))
+  (minus (convert @1) (convert (trunc_mod @1 @0)))))
+
 /* Optimize TRUNC_MOD_EXPR by a power of two into a BIT_AND_EXPR,
    i.e. "X % C" into "X & (C - 1)", if X and C are positive.
    Also optimize A % (C << N)  where C is a power of 2,
@@ -2388,9 +2394,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	 || TYPE_OVERFLOW_WRAPS (type))
      (negate (view_convert @1))
      (view_convert (negate @1))))
+
   (simplify
    (minus @0 (nop_convert1? (minus (nop_convert2? @0) @1)))
    (view_convert @1))
+
+  /* X - (X - Y) --> Y */
+  (simplify
+    (minus (convert1? @0) (convert2? (minus@2 (convert3? @@0) @1)))
+    (if (ANY_INTEGRAL_TYPE_P (type)
+        && TYPE_OVERFLOW_UNDEFINED(type)
+        && !TYPE_OVERFLOW_SANITIZED(type)
+        && ANY_INTEGRAL_TYPE_P (TREE_TYPE(@2))
+        && TYPE_OVERFLOW_UNDEFINED(TREE_TYPE(@2))
+        && !TYPE_OVERFLOW_SANITIZED(TREE_TYPE(@2))
+        && ANY_INTEGRAL_TYPE_P (TREE_TYPE(@0))
+        && TYPE_OVERFLOW_UNDEFINED(TREE_TYPE(@0))
+        && !TYPE_OVERFLOW_SANITIZED(TREE_TYPE(@0))
+        && TYPE_PRECISION (TREE_TYPE (@2)) <= TYPE_PRECISION (type)
+        && TYPE_PRECISION (TREE_TYPE (@0)) <= TYPE_PRECISION (type))
+    (convert @1)))
+
   /* (A +- B) + (C - A)   -> C +- B */
   /* (A +  B) - (A - C)   -> B + C */
   /* More cases are handled with comparisons.  */
diff --git a/gcc/testsuite/gcc.dg/pr95176-2.c b/gcc/testsuite/gcc.dg/pr95176-2.c
new file mode 100644
index 00000000000..03fed413240
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr95176-2.c
@@ -0,0 +1,91 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-optimized-raw" } */
+
+/* Test the X - (X - Y) --> Y transformation */
+
+extern int printf (__const char *__restrict __format, ...);
+
+int __attribute__ ((noinline))
+f(int a, int b)
+{
+    return a - (a-b);
+}
+
+int __attribute__((optimize("O0"))) __attribute__ ((noinline))
+fNoOpt(volatile int a, volatile int b)
+{
+    return a - (a-b);
+}
+
+long __attribute__ ((noinline))
+f2(short a, long b)
+{
+	return a - (((int) a)-b);
+}
+
+long __attribute__((optimize("O0"))) __attribute__ ((noinline))
+f2NoOpt(volatile short a, volatile long b)
+{
+    return a - (((int) a)-b);
+}
+
+long __attribute__ ((noinline))
+f3(short a, long b)
+{
+    return a - (a-b);
+}
+
+long __attribute__((optimize("O0"))) __attribute__ ((noinline))
+f3NoOpt(volatile short a, volatile long b)
+{
+    return a - (a-b);
+}
+
+long long __attribute__ ((noinline))
+f4(long b)
+{
+    return ((short)40L) - (((int)40)-b);
+}
+
+long __attribute__((optimize("O0"))) __attribute__ ((noinline))
+f4NoOpt(volatile long b)
+{
+    return ((short)40L) - (((int)40)-b);
+}
+
+long __attribute__ ((noinline))
+fNeg(long a, long b)
+{
+	return a - (((short) a)-b);
+}
+
+long long __attribute__ ((noinline))
+fNeg2(long long v, long b)
+{
+    return ((int)v) - (((short)v)-b);
+}
+
+long __attribute__ ((noinline))
+fNeg3(short a, long b)
+{
+	return a - (((unsigned int) a)-b);
+}
+
+int main()
+{
+    if (f(2, 15) != fNoOpt(2, 15))
+        __builtin_abort();
+    if (f2(2, 15) != f2NoOpt(2, 15))
+        __builtin_abort();
+    if (f3(2, 15) != f3NoOpt(2, 15))
+        __builtin_abort();
+    if (f4(15) != f4NoOpt(15))
+        __builtin_abort();
+    printf("pass");
+}
+
+// There should be 14 instances of minus_expr in the output:
+// Two for each NoOpt* function
+// Two for each fNeg* function
+/* { dg-final { scan-tree-dump-times "minus_expr" 12 "optimized" } } */
+/* { dg-output "pass" } */
diff --git a/gcc/testsuite/gcc.dg/pr95176.c b/gcc/testsuite/gcc.dg/pr95176.c
new file mode 100644
index 00000000000..ef087317187
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr95176.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-optimized-raw" } */
+
+extern int printf (__const char *__restrict __format, ...);
+
+int __attribute__ ((noinline))
+f(int a, int b)
+{
+    return a * (b / a);
+}
+
+int __attribute__((optimize("O0"))) __attribute__ ((noinline))
+fNoOpt(int a, int b)
+{
+    return a * (b / a);
+}
+
+int __attribute__ ((noinline))
+f2(int a, int b)
+{
+    return (b / a) * a;
+}
+
+int __attribute__((optimize("O0"))) __attribute__ ((noinline))
+f2NoOpt(int a, int b)
+{
+    return (b / a) * a;
+}
+
+int main()
+{
+    if (f(2, 15) != fNoOpt(2, 15))
+        __builtin_abort();
+    if (f2(2, 15) != f2NoOpt(2, 15))
+        __builtin_abort();
+    printf("pass");
+}
+
+// There should be two instances of trunc_div_expr and 
+// mult_expr in the output. One in fNoOpt and one in f2NoOpt.
+/* { dg-final { scan-tree-dump-times "trunc_div_expr" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "mult_expr" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "trunc_mod_expr" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "minus_expr" 2 "optimized" } } */
+/* { dg-output "pass" } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20030807-10.c b/gcc/testsuite/gcc.dg/tree-ssa/20030807-10.c
index 0e01e511b78..4cd35738057 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/20030807-10.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20030807-10.c
@@ -20,6 +20,9 @@ subreg_highpart_offset (outermode, innermode)
 /* There should be one mask with the value 3.  */
 /* { dg-final { scan-tree-dump-times " \& 3" 1 "vrp1"} } */
   
-/* There should be one right shift by 2 places.  */
-/* { dg-final { scan-tree-dump-times " >> 2" 1 "vrp1"} } */
+/* There should be no right shift by 2 places.  */
+/* { dg-final { scan-tree-dump-times " >> 2" 0 "vrp1"} } */
+
+/* The "difference / 4 * 4" should become a subtraction */
+/* { dg-final { scan-tree-dump-times " - " 2 "vrp1"} } */
 
