Re: [PATCH V6] Optimize '(X - N * M) / N' to 'X / N - M' if valid

2023-09-03 Thread Jiufu Guo via Gcc-patches


Hi,

Richard Biener  writes:

> On Fri, 1 Sep 2023, Jiufu Guo wrote:
>
>> Hi,
>> 
>> Integer expression "(X - N * M) / N" can be optimized to "X / N - M" with
>> the below conditions:
>> 1. There is no wrap/overflow/underflow.
>>wrap/overflow/underflow breaks the arithmetic operation.
>> 2. "X - N * M" and "X" are not of opposite sign.
>>Here, the operation "/" would be "trunc_div", the fractional part is
>>discarded towards zero. If "X - N * M" and "X" are in different signs,
>>then trunc_div discards the fractional parts (of /N) in different
>>directions.
>> 
>> Compare the previous version:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624801.html
>> This patch adds comments and update the pattern on "(t + C)" to be more
>> tight.
>> 
>> Bootstrap & regtest pass on ppc64{,le} and x86_64.
>> Is this patch ok for trunk?
>> 
>> BR,
>> Jeff (Jiufu Guo)
>> 
>>  PR tree-optimization/108757
>> 
>> gcc/ChangeLog:
>> 
>>  * match.pd ((X - N * M) / N): New pattern.
>>  ((X + N * M) / N): New pattern.
>>  ((X + C) div_rshift N): New pattern.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>  * gcc.dg/pr108757-1.c: New test.
>>  * gcc.dg/pr108757-2.c: New test.
>>  * gcc.dg/pr108757.h: New test.
>> 
>> ---
>>  gcc/match.pd  |  78 ++
>>  gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
>>  gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
>>  gcc/testsuite/gcc.dg/pr108757.h   | 233 ++
>>  4 files changed, 348 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
>> 
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index 
>> fa598d5ca2e470f9cc3b82469e77d743b12f107e..863bc7299cdefc622a7806a4d32e37268c50d453
>>  100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -959,6 +959,84 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>  #endif
>> 
>>  
>> +#if GIMPLE
>> +(for div (trunc_div exact_div)
>> + /* Simplify (X + M*N) / N -> X / N + M.  */
>> + (simplify
>> +  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
>> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
>> +  (if (INTEGRAL_TYPE_P (type)
>> +   && get_range_query (cfun)->range_of_expr (vr1, @1)
>> +   && get_range_query (cfun)->range_of_expr (vr2, @2)
>> +   /* "N*M" doesn't overflow.  */
>> +   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
>> +   && get_range_query (cfun)->range_of_expr (vr0, @0)
>> +   && get_range_query (cfun)->range_of_expr (vr3, @3)
>> +   /* "X+(N*M)" doesn't overflow.  */
>> +   && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
>> +   && get_range_query (cfun)->range_of_expr (vr4, @4)
>> +   /* "X+N*M" is not with opposite sign as "X".  */
>> +   && (TYPE_UNSIGNED (type)
>> +   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> +   || (vr0.nonpositive_p () && vr4.nonpositive_p (
>> +  (plus (div @0 @2) @1
>> +
>> + /* Simplify (X - M*N) / N -> X / N - M.  */
>> + (simplify
>> +  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
>> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
>> +  (if (INTEGRAL_TYPE_P (type)
>> +   && get_range_query (cfun)->range_of_expr (vr1, @1)
>> +   && get_range_query (cfun)->range_of_expr (vr2, @2)
>> +   /* "N * M" doesn't overflow.  */
>> +   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
>> +   && get_range_query (cfun)->range_of_expr (vr0, @0)
>> +   && get_range_query (cfun)->range_of_expr (vr3, @3)
>> +   /* "X - (N*M)" doesn't overflow.  */
>> +   && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
>> +   && get_range_query (cfun)->range_of_expr (vr4, @4)
>> +   /* "X-N*M" is not with opposite sign as "X".  */
>> +   && (TYPE_UNSIGNED (type)
>> +   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> +   || (vr0.nonpositive_p () && vr4.nonpositive_p (
>> +  (minus (div @0 @2) @1)
>> +
>> +/* Simplify
>> +   (X + C) / N -> X / N + C / N where C is multiple of N.
>> +   (X + C) >> N -> X >> N + C>>N if low N bits of C is 0.  */
>> +(for op (trunc_div exact_div rshift)
>> + (simplify
>> +  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
>> +   (with
>> +{
>> +  wide_int c = wi::to_wide (@1);
>> +  wide_int n = wi::to_wide (@2);
>> +  bool shift = op == RSHIFT_EXPR;
>> +  #define plus_op1(v) (shift ? wi::rshift (v, n, TYPE_SIGN (type)) \
>> + : wi::div_trunc (v, n, TYPE_SIGN (type)))
>> +  #define exact_mod(v) (shift ? wi::ctz (v) >= n.to_shwi () \
>> +  : wi::multiple_of_p (v, n, TYPE_SIGN (type)))
>
> please indent these full left
>
>> +  value_range vr0, vr1, vr3;
>> +}
>> +(if (INTEGRAL_TYPE_P (type)
>> + && get_range_query (cfun)->range_of_expr (vr0, @0))
>> + (if (exact_mod (c)
>> +  && get_range_query (cfun)->range_of_expr (vr1, @1)
>> + 

Re: [PATCH V6] Optimize '(X - N * M) / N' to 'X / N - M' if valid

2023-09-01 Thread Richard Biener via Gcc-patches
On Fri, 1 Sep 2023, Jiufu Guo wrote:

> Hi,
> 
> Integer expression "(X - N * M) / N" can be optimized to "X / N - M" with
> the below conditions:
> 1. There is no wrap/overflow/underflow.
>wrap/overflow/underflow breaks the arithmetic operation.
> 2. "X - N * M" and "X" are not of opposite sign.
>Here, the operation "/" would be "trunc_div", the fractional part is
>discarded towards zero. If "X - N * M" and "X" are in different signs,
>then trunc_div discards the fractional parts (of /N) in different
>directions.
> 
> Compare the previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624801.html
> This patch adds comments and update the pattern on "(t + C)" to be more
> tight.
> 
> Bootstrap & regtest pass on ppc64{,le} and x86_64.
> Is this patch ok for trunk?
> 
> BR,
> Jeff (Jiufu Guo)
> 
>   PR tree-optimization/108757
> 
> gcc/ChangeLog:
> 
>   * match.pd ((X - N * M) / N): New pattern.
>   ((X + N * M) / N): New pattern.
>   ((X + C) div_rshift N): New pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/pr108757-1.c: New test.
>   * gcc.dg/pr108757-2.c: New test.
>   * gcc.dg/pr108757.h: New test.
> 
> ---
>  gcc/match.pd  |  78 ++
>  gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
>  gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
>  gcc/testsuite/gcc.dg/pr108757.h   | 233 ++
>  4 files changed, 348 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> fa598d5ca2e470f9cc3b82469e77d743b12f107e..863bc7299cdefc622a7806a4d32e37268c50d453
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -959,6 +959,84 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  #endif
> 
>  
> +#if GIMPLE
> +(for div (trunc_div exact_div)
> + /* Simplify (X + M*N) / N -> X / N + M.  */
> + (simplify
> +  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
> +  (if (INTEGRAL_TYPE_P (type)
> +   && get_range_query (cfun)->range_of_expr (vr1, @1)
> +   && get_range_query (cfun)->range_of_expr (vr2, @2)
> +   /* "N*M" doesn't overflow.  */
> +   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
> +   && get_range_query (cfun)->range_of_expr (vr0, @0)
> +   && get_range_query (cfun)->range_of_expr (vr3, @3)
> +   /* "X+(N*M)" doesn't overflow.  */
> +   && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
> +   && get_range_query (cfun)->range_of_expr (vr4, @4)
> +   /* "X+N*M" is not with opposite sign as "X".  */
> +   && (TYPE_UNSIGNED (type)
> +|| (vr0.nonnegative_p () && vr4.nonnegative_p ())
> +|| (vr0.nonpositive_p () && vr4.nonpositive_p (
> +  (plus (div @0 @2) @1
> +
> + /* Simplify (X - M*N) / N -> X / N - M.  */
> + (simplify
> +  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
> +  (if (INTEGRAL_TYPE_P (type)
> +   && get_range_query (cfun)->range_of_expr (vr1, @1)
> +   && get_range_query (cfun)->range_of_expr (vr2, @2)
> +   /* "N * M" doesn't overflow.  */
> +   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
> +   && get_range_query (cfun)->range_of_expr (vr0, @0)
> +   && get_range_query (cfun)->range_of_expr (vr3, @3)
> +   /* "X - (N*M)" doesn't overflow.  */
> +   && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
> +   && get_range_query (cfun)->range_of_expr (vr4, @4)
> +   /* "X-N*M" is not with opposite sign as "X".  */
> +   && (TYPE_UNSIGNED (type)
> +|| (vr0.nonnegative_p () && vr4.nonnegative_p ())
> +|| (vr0.nonpositive_p () && vr4.nonpositive_p (
> +  (minus (div @0 @2) @1)
> +
> +/* Simplify
> +   (X + C) / N -> X / N + C / N where C is multiple of N.
> +   (X + C) >> N -> X >> N + C>>N if low N bits of C is 0.  */
> +(for op (trunc_div exact_div rshift)
> + (simplify
> +  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
> +   (with
> +{
> +  wide_int c = wi::to_wide (@1);
> +  wide_int n = wi::to_wide (@2);
> +  bool shift = op == RSHIFT_EXPR;
> +  #define plus_op1(v) (shift ? wi::rshift (v, n, TYPE_SIGN (type)) \
> +  : wi::div_trunc (v, n, TYPE_SIGN (type)))
> +  #define exact_mod(v) (shift ? wi::ctz (v) >= n.to_shwi () \
> +   : wi::multiple_of_p (v, n, TYPE_SIGN (type)))

please indent these full left

> +  value_range vr0, vr1, vr3;
> +}
> +(if (INTEGRAL_TYPE_P (type)
> +  && get_range_query (cfun)->range_of_expr (vr0, @0))
> + (if (exact_mod (c)
> +   && get_range_query (cfun)->range_of_expr (vr1, @1)
> +   /* "X+C" doesn't overflow.  */
> +   && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
> +   && get_range_query 

[PATCH V6] Optimize '(X - N * M) / N' to 'X / N - M' if valid

2023-09-01 Thread Jiufu Guo via Gcc-patches
Hi,

Integer expression "(X - N * M) / N" can be optimized to "X / N - M" with
the below conditions:
1. There is no wrap/overflow/underflow.
   wrap/overflow/underflow breaks the arithmetic operation.
2. "X - N * M" and "X" are not of opposite sign.
   Here, the operation "/" would be "trunc_div", the fractional part is
   discarded towards zero. If "X - N * M" and "X" are in different signs,
   then trunc_div discards the fractional parts (of /N) in different
   directions.

Compare the previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624801.html
This patch adds comments and update the pattern on "(t + C)" to be more
tight.

Bootstrap & regtest pass on ppc64{,le} and x86_64.
Is this patch ok for trunk?

BR,
Jeff (Jiufu Guo)

PR tree-optimization/108757

gcc/ChangeLog:

* match.pd ((X - N * M) / N): New pattern.
((X + N * M) / N): New pattern.
((X + C) div_rshift N): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/pr108757-1.c: New test.
* gcc.dg/pr108757-2.c: New test.
* gcc.dg/pr108757.h: New test.

---
 gcc/match.pd  |  78 ++
 gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
 gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
 gcc/testsuite/gcc.dg/pr108757.h   | 233 ++
 4 files changed, 348 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr108757.h

diff --git a/gcc/match.pd b/gcc/match.pd
index 
fa598d5ca2e470f9cc3b82469e77d743b12f107e..863bc7299cdefc622a7806a4d32e37268c50d453
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -959,6 +959,84 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 #endif

 
+#if GIMPLE
+(for div (trunc_div exact_div)
+ /* Simplify (X + M*N) / N -> X / N + M.  */
+ (simplify
+  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
+  (with {value_range vr0, vr1, vr2, vr3, vr4;}
+  (if (INTEGRAL_TYPE_P (type)
+   && get_range_query (cfun)->range_of_expr (vr1, @1)
+   && get_range_query (cfun)->range_of_expr (vr2, @2)
+   /* "N*M" doesn't overflow.  */
+   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
+   && get_range_query (cfun)->range_of_expr (vr0, @0)
+   && get_range_query (cfun)->range_of_expr (vr3, @3)
+   /* "X+(N*M)" doesn't overflow.  */
+   && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
+   && get_range_query (cfun)->range_of_expr (vr4, @4)
+   /* "X+N*M" is not with opposite sign as "X".  */
+   && (TYPE_UNSIGNED (type)
+  || (vr0.nonnegative_p () && vr4.nonnegative_p ())
+  || (vr0.nonpositive_p () && vr4.nonpositive_p (
+  (plus (div @0 @2) @1
+
+ /* Simplify (X - M*N) / N -> X / N - M.  */
+ (simplify
+  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
+  (with {value_range vr0, vr1, vr2, vr3, vr4;}
+  (if (INTEGRAL_TYPE_P (type)
+   && get_range_query (cfun)->range_of_expr (vr1, @1)
+   && get_range_query (cfun)->range_of_expr (vr2, @2)
+   /* "N * M" doesn't overflow.  */
+   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
+   && get_range_query (cfun)->range_of_expr (vr0, @0)
+   && get_range_query (cfun)->range_of_expr (vr3, @3)
+   /* "X - (N*M)" doesn't overflow.  */
+   && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
+   && get_range_query (cfun)->range_of_expr (vr4, @4)
+   /* "X-N*M" is not with opposite sign as "X".  */
+   && (TYPE_UNSIGNED (type)
+  || (vr0.nonnegative_p () && vr4.nonnegative_p ())
+  || (vr0.nonpositive_p () && vr4.nonpositive_p (
+  (minus (div @0 @2) @1)
+
+/* Simplify
+   (X + C) / N -> X / N + C / N where C is multiple of N.
+   (X + C) >> N -> X >> N + C>>N if low N bits of C is 0.  */
+(for op (trunc_div exact_div rshift)
+ (simplify
+  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
+   (with
+{
+  wide_int c = wi::to_wide (@1);
+  wide_int n = wi::to_wide (@2);
+  bool shift = op == RSHIFT_EXPR;
+  #define plus_op1(v) (shift ? wi::rshift (v, n, TYPE_SIGN (type)) \
+: wi::div_trunc (v, n, TYPE_SIGN (type)))
+  #define exact_mod(v) (shift ? wi::ctz (v) >= n.to_shwi () \
+ : wi::multiple_of_p (v, n, TYPE_SIGN (type)))
+  value_range vr0, vr1, vr3;
+}
+(if (INTEGRAL_TYPE_P (type)
+&& get_range_query (cfun)->range_of_expr (vr0, @0))
+ (if (exact_mod (c)
+ && get_range_query (cfun)->range_of_expr (vr1, @1)
+ /* "X+C" doesn't overflow.  */
+ && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
+ && get_range_query (cfun)->range_of_expr (vr3, @3)
+ /* "X+C" and "X" are not of opposite sign.  */
+ && (TYPE_UNSIGNED (type)
+ || (vr0.nonnegative_p () && vr3.nonnegative_p ())
+ || (vr0.nonpositive_p () && vr3.nonpositive_p (
+   (plus (op @0 @2) {