Re: Implement more rtx vector folds on variable-length vectors

2019-07-22 Thread Jeff Law
On 7/15/19 9:30 AM, Richard Sandiford wrote:
> Richard Sandiford  writes:
>> This patch extends the tree-level folding of variable-length vectors
>> so that it can also be used on rtxes.  The first step is to move
>> the tree_vector_builder new_unary/binary_operator routines to the
>> parent vector_builder class (which in turn means adding a new
>> template parameter).  The second step is to make simplify-rtx.c
>> use a direct rtx analogue of the VECTOR_CST handling in fold-const.c.
>>
>> Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
>> OK to install?
>>
>> Richard
> 
> Here's a version updated for the earlier patch, so that we take
> both HONOR_NANS and HONOR_SNANS into account.  Tested on
> aarch64-linux-gnu to far.
> 
> Thanks,
> Richard
> 
> 
> 2019-07-15  Richard Sandiford  
> 
> gcc/
>   * rtl.h (bit_and_conditions, bit_ior_conditions): Declare.
>   * jump.c (flags_to_condition): Add an honor_nans_p argument.
>   (bit_ior_conditions, bit_and_conditions): New functions.
>   * simplify-rtx.c (simplify_binary_operation_1): Try to fold an
>   AND or IOR of two comparisons into a single comparison.
>   (simplify_ternary_operation): Try to fold an if_then_else involving
>   two conditions into an AND of two conditions.
>   (test_merged_comparisons): New function.
>   (simplify_rtx_c_tests): Call it.
This is fine once the prereqs are approved.

jeff


Re: Implement more rtx vector folds on variable-length vectors

2019-07-15 Thread Richard Sandiford
Richard Sandiford  writes:
> This patch extends the tree-level folding of variable-length vectors
> so that it can also be used on rtxes.  The first step is to move
> the tree_vector_builder new_unary/binary_operator routines to the
> parent vector_builder class (which in turn means adding a new
> template parameter).  The second step is to make simplify-rtx.c
> use a direct rtx analogue of the VECTOR_CST handling in fold-const.c.
>
> Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
> OK to install?
>
> Richard

Here's a version updated for the earlier patch, so that we take
both HONOR_NANS and HONOR_SNANS into account.  Tested on
aarch64-linux-gnu to far.

Thanks,
Richard


2019-07-15  Richard Sandiford  

gcc/
* rtl.h (bit_and_conditions, bit_ior_conditions): Declare.
* jump.c (flags_to_condition): Add an honor_nans_p argument.
(bit_ior_conditions, bit_and_conditions): New functions.
* simplify-rtx.c (simplify_binary_operation_1): Try to fold an
AND or IOR of two comparisons into a single comparison.
(simplify_ternary_operation): Try to fold an if_then_else involving
two conditions into an AND of two conditions.
(test_merged_comparisons): New function.
(simplify_rtx_c_tests): Call it.

Index: gcc/rtl.h
===
--- gcc/rtl.h   2019-07-12 09:14:06.0 +0100
+++ gcc/rtl.h   2019-07-15 16:24:30.685937855 +0100
@@ -3315,6 +3315,8 @@ extern enum rtx_code reverse_condition_m
 extern enum rtx_code swap_condition (enum rtx_code);
 extern enum rtx_code unsigned_condition (enum rtx_code);
 extern enum rtx_code signed_condition (enum rtx_code);
+extern rtx_code bit_and_conditions (rtx_code, rtx_code, machine_mode);
+extern rtx_code bit_ior_conditions (rtx_code, rtx_code, machine_mode);
 extern void mark_jump_label (rtx, rtx_insn *, int);
 
 /* Return true if integer comparison operator CODE interprets its operands
Index: gcc/jump.c
===
--- gcc/jump.c  2019-07-15 16:22:55.342699887 +0100
+++ gcc/jump.c  2019-07-15 16:24:30.685937855 +0100
@@ -138,13 +138,28 @@ #define CASE(CODE, ORDER, SIGNEDNESS, TR
 }
 
 /* Return the comparison code that implements FLAGS_* bitmask FLAGS.
+   If MODE is not VOIDmode, it gives the mode of the values being compared.
+
Assert on failure if FORCE, otherwise return UNKNOWN.  */
 
 static rtx_code
-flags_to_condition (unsigned int flags, bool force)
+flags_to_condition (unsigned int flags, bool force,
+   machine_mode mode = VOIDmode)
 {
+  unsigned int order_mask = FLAGS_ORDER;
+  if (mode != VOIDmode)
+{
+  if (!HONOR_NANS (mode))
+   {
+ flags |= FLAGS_TRAP_NANS;
+ order_mask &= ~FLAGS_UNORDERED;
+   }
+  else if (!HONOR_SNANS (mode))
+   flags |= FLAGS_TRAP_SNANS;
+}
+
 #define TEST(CODE, ORDER, SIGNEDNESS, TRAPS)   \
-  if (((flags ^ (ORDER)) & FLAGS_ORDER) == 0   \
+  if (((flags ^ (ORDER)) & order_mask) == 0\
   && (FLAGS_##SIGNEDNESS == 0  \
  || ((FLAGS_##SIGNEDNESS ^ flags) & FLAGS_SIGNEDNESS) == 0)\
   && (FLAGS_##TRAPS & ~flags & FLAGS_TRAPS) == 0)  \
@@ -722,6 +737,33 @@ comparison_dominates_p (enum rtx_code co
   return (((flags1 | flags2) & FLAGS_SIGNEDNESS) != FLAGS_SIGNEDNESS
  && (flags1 & ~flags2 & FLAGS_ORDER) == 0);
 }
+
+/* Return the comparison code that tests whether CODE1 | CODE2 is
+   true for mode MODE.  Return UNKNOWN if no such comparison exists.
+   The result can trap whenever either CODE1 or CODE2 traps.  */
+
+rtx_code
+bit_ior_conditions (rtx_code code1, rtx_code code2, machine_mode mode)
+{
+  unsigned int flags1 = condition_to_flags (code1);
+  unsigned int flags2 = condition_to_flags (code2);
+  unsigned int flags = flags1 | flags2;
+  return flags_to_condition (flags, false, mode);
+}
+
+/* Return the comparison code that tests whether CODE1 & CODE2 is
+   true for mode MODE.  Return UNKNOWN if no such comparison exists.
+   The result can trap whenever either CODE1 or CODE2 traps.  */
+
+rtx_code
+bit_and_conditions (rtx_code code1, rtx_code code2, machine_mode mode)
+{
+  unsigned int flags1 = condition_to_flags (code1);
+  unsigned int flags2 = condition_to_flags (code2);
+  unsigned int flags = ((flags1 & flags2 & FLAGS_ORDER)
+   | ((flags1 | flags2) & ~FLAGS_ORDER));
+  return flags_to_condition (flags, false, mode);
+}
 
 /* Return 1 if INSN is an unconditional jump and nothing else.  */
 
Index: gcc/simplify-rtx.c
===
--- gcc/simplify-rtx.c  2019-07-12 09:14:06.0 +0100
+++ gcc/simplify-rtx.c  2019-07-15 16:24:30.689937823 +0100
@@ -2889,6 +2889,20 @@ simplify_binary_operation_1 (enum rtx_co
}
}
 
+  

Implement more rtx vector folds on variable-length vectors

2019-07-11 Thread Richard Sandiford
This patch extends the tree-level folding of variable-length vectors
so that it can also be used on rtxes.  The first step is to move
the tree_vector_builder new_unary/binary_operator routines to the
parent vector_builder class (which in turn means adding a new
template parameter).  The second step is to make simplify-rtx.c
use a direct rtx analogue of the VECTOR_CST handling in fold-const.c.

Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
OK to install?

Richard


2019-07-11  Richard Sandiford  

gcc/
* vector-builder.h (vector_builder): Add a shape template parameter.
(vector_builder::new_unary_operation): New function, generalizing
the old tree_vector_builder function.
(vector_builder::new_binary_operation): Likewise.
(vector_builder::binary_encoded_nelts): Likewise.
* int-vector-builder.h (int_vector_builder): Update template
parameters to vector_builder.
(int_vector_builder::shape_nelts): New function.
* rtx-vector-builder.h (rtx_vector_builder): Update template
parameters to vector_builder.
(rtx_vector_builder::shape_nelts): New function.
(rtx_vector_builder::nelts_of): Likewise.
(rtx_vector_builder::npatterns_of): Likewise.
(rtx_vector_builder::nelts_per_pattern_of): Likewise.
* tree-vector-builder.h (tree_vector_builder): Update template
parameters to vector_builder.
(tree_vector_builder::shape_nelts): New function.
(tree_vector_builder::nelts_of): Likewise.
(tree_vector_builder::npatterns_of): Likewise.
(tree_vector_builder::nelts_per_pattern_of): Likewise.
* tree-vector-builder.c (tree_vector_builder::new_unary_operation)
(tree_vector_builder::new_binary_operation): Delete.
(tree_vector_builder::binary_encoded_nelts): Likewise.
* simplify-rtx.c (distributes_over_addition_p): New function.
(simplify_const_unary_operation)
(simplify_const_binary_operation): Generalize handling of vector
constants to include variable-length vectors.
(test_vector_ops_series): Add more tests.

Index: gcc/vector-builder.h
===
--- gcc/vector-builder.h2019-06-07 08:39:43.126344672 +0100
+++ gcc/vector-builder.h2019-07-11 08:55:03.187049079 +0100
@@ -45,8 +45,11 @@ #define GCC_VECTOR_BUILDER_H
   variable-length vectors.  finalize () then canonicalizes the encoding
   to a simpler form if possible.
 
-   The derived class Derived provides this functionality for specific Ts.
-   Derived needs to provide the following interface:
+   Shape is the type that specifies the number of elements in the vector
+   and (where relevant) the type of each element.
+
+   The derived class Derived provides the functionality of this class
+   for specific Ts.  Derived needs to provide the following interface:
 
   bool equal_p (T elt1, T elt2) const;
 
@@ -82,9 +85,30 @@ #define GCC_VECTOR_BUILDER_H
 
  Record that ELT2 is being elided, given that ELT1_PTR points to
  the last encoded element for the containing pattern.  This is
- again provided for TREE_OVERFLOW handling.  */
+ again provided for TREE_OVERFLOW handling.
+
+  static poly_uint64 shape_nelts (Shape shape);
+
+ Return the number of elements in SHAPE.
+
+The class provides additional functionality for the case in which
+T can describe a vector constant as well as an individual element.
+This functionality requires:
+
+  static poly_uint64 nelts_of (T x);
+
+ Return the number of elements in vector constant X.
+
+  static unsigned int npatterns_of (T x);
+
+ Return the number of patterns used to encode vector constant X.
+
+  static unsigned int nelts_per_pattern_of (T x);
 
-template
+ Return the number of elements used to encode each pattern
+ in vector constant X.  */
+
+template
 class vector_builder : public auto_vec
 {
 public:
@@ -101,8 +125,13 @@ #define GCC_VECTOR_BUILDER_H
   bool operator == (const Derived &) const;
   bool operator != (const Derived &x) const { return !operator == (x); }
 
+  bool new_unary_operation (Shape, T, bool);
+  bool new_binary_operation (Shape, T, T, bool);
+
   void finalize ();
 
+  static unsigned int binary_encoded_nelts (T, T);
+
 protected:
   void new_vector (poly_uint64, unsigned int, unsigned int);
   void reshape (unsigned int, unsigned int);
@@ -121,16 +150,16 @@ #define GCC_VECTOR_BUILDER_H
   unsigned int m_nelts_per_pattern;
 };
 
-template
+template
 inline const Derived *
-vector_builder::derived () const
+vector_builder::derived () const
 {
   return static_cast (this);
 }
 
-template
+template
 inline
-vector_builder::vector_builder ()
+vector_builder::vector_builder ()
   : m_full_nelts (0),
 m_npatterns (0),
 m_nelts_per_pattern (0)
@@ -140,18 +169,18 @@ vector_builder::vector_build