Re: [PATCH] Improve gimplification of constructors with RANGE_EXPRs (PR c++/82294, PR c++/87436)

2018-12-14 Thread Jakub Jelinek
On Fri, Dec 14, 2018 at 12:22:28PM +0100, Richard Biener wrote:
> > Still need to wait for the FE patch if I want to commit the testcases, those
> > depend on both patches.
> > I've added size32plus effective target to the larger test, as 384MB is too
> > much for 16 or 20 bit address targets.
> > And, I'll gather statistics on how often this makes a difference during
> > gimplification during my next bootstraps/regtests.

Besides those 2 new testcases it only made a difference on:
g++.dg/torture/pr60746.C
which has
{[0 ... 4]={[0 ... 1]={._vptr.One=&_ZTV3One + 16}}}
ctor (80 bytes), but only at -O0, so comparing the assembly in that case
is kind of pointless.  If I modify the testcase to pass list_arry address
to external function and compile with -O2, the resulting assembly is
identical before/after this commit, as we inline the copy from the constant
ctor and unroll the loops.
If I modify it to:
class One
{
public:
  virtual unsigned long getSize () const;
};

class Two
{
  virtual int run ();
};

void bar (One (*)[2]);

int
Two::run ()
{
  One list_arry[1000][2];
  bar (list_arry);
  return 0;
}
it is similar difference to the other testcases, either 16000 bytes
long .rodata initializer with memcpy from it, or a small loop.

Jakub


Re: [PATCH] Improve gimplification of constructors with RANGE_EXPRs (PR c++/82294, PR c++/87436)

2018-12-14 Thread Richard Biener
On Fri, 14 Dec 2018, Jakub Jelinek wrote:

> On Fri, Dec 14, 2018 at 10:40:19AM +0100, Richard Biener wrote:
> > This looks OK to me - the only comment I have is on the two magic
> > constants (64 and 8) which are used twice in the patch.  Can you
> > either see to hoist the common condition into sth like
> > 
> >  bool prefer_loop_initializer_p = ...
> > 
> > or add #defines for those constants?  I suppose the hoisting
> > might be tricky as int_size_in_bytes can return -1 and the
> > workarounds are different in both places right now (maybe that's
> > a bug as well...).  OTOH using (unsigned)int_size_in_bytes
> > looks reasonable in general.
> 
> So like this?

Yes.

Thanks,
Richard.

> Still need to wait for the FE patch if I want to commit the testcases, those
> depend on both patches.
> I've added size32plus effective target to the larger test, as 384MB is too
> much for 16 or 20 bit address targets.
> And, I'll gather statistics on how often this makes a difference during
> gimplification during my next bootstraps/regtests.
> 
> 2018-12-14  Jakub Jelinek  
> 
>   PR c++/82294
>   PR c++/87436
>   * expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument.
>   * expr.c (categorize_ctor_elements_1): Likewise.  Compute it like
>   p_nz_elts, except don't multiply it by mult.  Adjust recursive call.
>   Fix up COMPLEX_CST handling.
>   (categorize_ctor_elements): Add p_unique_nz_elts argument, initialize
>   it and pass it through to categorize_ctor_elements_1.
>   (mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers.
>   * gimplify.c (gimplify_init_constructor): Likewise.  Don't force
>   ctor into readonly data section if num_unique_nonzero_elements is
>   smaller or equal to 1/8 of num_nonzero_elements and size is >= 64
>   bytes.
> 
>   * g++.dg/tree-ssa/pr82294.C: New test.
>   * g++.dg/tree-ssa/pr87436.C: New test.
> 
> --- gcc/expr.h.jj 2018-12-13 18:00:10.527301479 +0100
> +++ gcc/expr.h2018-12-14 11:52:31.941071185 +0100
> @@ -309,7 +309,8 @@ extern bool can_move_by_pieces (unsigned
>  extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
>  
>  extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
> -   HOST_WIDE_INT *, bool *);
> +   HOST_WIDE_INT *, HOST_WIDE_INT *,
> +   bool *);
>  
>  extern void expand_operands (tree, tree, rtx, rtx*, rtx*,
>enum expand_modifier);
> --- gcc/expr.c.jj 2018-12-13 18:00:10.426303121 +0100
> +++ gcc/expr.c2018-12-14 11:52:31.945071118 +0100
> @@ -5945,10 +5945,11 @@ count_type_elements (const_tree type, bo
>  
>  static bool
>  categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
> + HOST_WIDE_INT *p_unique_nz_elts,
>   HOST_WIDE_INT *p_init_elts, bool *p_complete)
>  {
>unsigned HOST_WIDE_INT idx;
> -  HOST_WIDE_INT nz_elts, init_elts, num_fields;
> +  HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
>tree value, purpose, elt_type;
>  
>/* Whether CTOR is a valid constant initializer, in accordance with what
> @@ -5958,6 +5959,7 @@ categorize_ctor_elements_1 (const_tree c
>bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
>  
>nz_elts = 0;
> +  unique_nz_elts = 0;
>init_elts = 0;
>num_fields = 0;
>elt_type = NULL_TREE;
> @@ -5982,12 +5984,13 @@ categorize_ctor_elements_1 (const_tree c
>   {
>   case CONSTRUCTOR:
> {
> - HOST_WIDE_INT nz = 0, ic = 0;
> + HOST_WIDE_INT nz = 0, unz = 0, ic = 0;
>  
> - bool const_elt_p = categorize_ctor_elements_1 (value, , ,
> -p_complete);
> + bool const_elt_p = categorize_ctor_elements_1 (value, , ,
> +, p_complete);
>  
>   nz_elts += mult * nz;
> + unique_nz_elts += unz;
>   init_elts += mult * ic;
>  
>   if (const_from_elts_p && const_p)
> @@ -5999,21 +6002,31 @@ categorize_ctor_elements_1 (const_tree c
>   case REAL_CST:
>   case FIXED_CST:
> if (!initializer_zerop (value))
> - nz_elts += mult;
> + {
> +   nz_elts += mult;
> +   unique_nz_elts++;
> + }
> init_elts += mult;
> break;
>  
>   case STRING_CST:
> nz_elts += mult * TREE_STRING_LENGTH (value);
> +   unique_nz_elts += TREE_STRING_LENGTH (value);
> init_elts += mult * TREE_STRING_LENGTH (value);
> break;
>  
>   case COMPLEX_CST:
> if (!initializer_zerop (TREE_REALPART (value)))
> - nz_elts += mult;
> + {
> +   nz_elts += mult;
> +   unique_nz_elts++;
> + }
> if (!initializer_zerop (TREE_IMAGPART (value)))
> - nz_elts += mult;
> -  

Re: [PATCH] Improve gimplification of constructors with RANGE_EXPRs (PR c++/82294, PR c++/87436)

2018-12-14 Thread Jakub Jelinek
On Fri, Dec 14, 2018 at 10:40:19AM +0100, Richard Biener wrote:
> This looks OK to me - the only comment I have is on the two magic
> constants (64 and 8) which are used twice in the patch.  Can you
> either see to hoist the common condition into sth like
> 
>  bool prefer_loop_initializer_p = ...
> 
> or add #defines for those constants?  I suppose the hoisting
> might be tricky as int_size_in_bytes can return -1 and the
> workarounds are different in both places right now (maybe that's
> a bug as well...).  OTOH using (unsigned)int_size_in_bytes
> looks reasonable in general.

So like this?
Still need to wait for the FE patch if I want to commit the testcases, those
depend on both patches.
I've added size32plus effective target to the larger test, as 384MB is too
much for 16 or 20 bit address targets.
And, I'll gather statistics on how often this makes a difference during
gimplification during my next bootstraps/regtests.

2018-12-14  Jakub Jelinek  

PR c++/82294
PR c++/87436
* expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument.
* expr.c (categorize_ctor_elements_1): Likewise.  Compute it like
p_nz_elts, except don't multiply it by mult.  Adjust recursive call.
Fix up COMPLEX_CST handling.
(categorize_ctor_elements): Add p_unique_nz_elts argument, initialize
it and pass it through to categorize_ctor_elements_1.
(mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers.
* gimplify.c (gimplify_init_constructor): Likewise.  Don't force
ctor into readonly data section if num_unique_nonzero_elements is
smaller or equal to 1/8 of num_nonzero_elements and size is >= 64
bytes.

* g++.dg/tree-ssa/pr82294.C: New test.
* g++.dg/tree-ssa/pr87436.C: New test.

--- gcc/expr.h.jj   2018-12-13 18:00:10.527301479 +0100
+++ gcc/expr.h  2018-12-14 11:52:31.941071185 +0100
@@ -309,7 +309,8 @@ extern bool can_move_by_pieces (unsigned
 extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
 
 extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
- HOST_WIDE_INT *, bool *);
+ HOST_WIDE_INT *, HOST_WIDE_INT *,
+ bool *);
 
 extern void expand_operands (tree, tree, rtx, rtx*, rtx*,
 enum expand_modifier);
--- gcc/expr.c.jj   2018-12-13 18:00:10.426303121 +0100
+++ gcc/expr.c  2018-12-14 11:52:31.945071118 +0100
@@ -5945,10 +5945,11 @@ count_type_elements (const_tree type, bo
 
 static bool
 categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
+   HOST_WIDE_INT *p_unique_nz_elts,
HOST_WIDE_INT *p_init_elts, bool *p_complete)
 {
   unsigned HOST_WIDE_INT idx;
-  HOST_WIDE_INT nz_elts, init_elts, num_fields;
+  HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
   tree value, purpose, elt_type;
 
   /* Whether CTOR is a valid constant initializer, in accordance with what
@@ -5958,6 +5959,7 @@ categorize_ctor_elements_1 (const_tree c
   bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
 
   nz_elts = 0;
+  unique_nz_elts = 0;
   init_elts = 0;
   num_fields = 0;
   elt_type = NULL_TREE;
@@ -5982,12 +5984,13 @@ categorize_ctor_elements_1 (const_tree c
{
case CONSTRUCTOR:
  {
-   HOST_WIDE_INT nz = 0, ic = 0;
+   HOST_WIDE_INT nz = 0, unz = 0, ic = 0;
 
-   bool const_elt_p = categorize_ctor_elements_1 (value, , ,
-  p_complete);
+   bool const_elt_p = categorize_ctor_elements_1 (value, , ,
+  , p_complete);
 
nz_elts += mult * nz;
+   unique_nz_elts += unz;
init_elts += mult * ic;
 
if (const_from_elts_p && const_p)
@@ -5999,21 +6002,31 @@ categorize_ctor_elements_1 (const_tree c
case REAL_CST:
case FIXED_CST:
  if (!initializer_zerop (value))
-   nz_elts += mult;
+   {
+ nz_elts += mult;
+ unique_nz_elts++;
+   }
  init_elts += mult;
  break;
 
case STRING_CST:
  nz_elts += mult * TREE_STRING_LENGTH (value);
+ unique_nz_elts += TREE_STRING_LENGTH (value);
  init_elts += mult * TREE_STRING_LENGTH (value);
  break;
 
case COMPLEX_CST:
  if (!initializer_zerop (TREE_REALPART (value)))
-   nz_elts += mult;
+   {
+ nz_elts += mult;
+ unique_nz_elts++;
+   }
  if (!initializer_zerop (TREE_IMAGPART (value)))
-   nz_elts += mult;
- init_elts += mult;
+   {
+ nz_elts += mult;
+ unique_nz_elts++;
+   }
+ init_elts += 2 * mult;
  break;
 
case VECTOR_CST:
@@ 

Re: [PATCH] Improve gimplification of constructors with RANGE_EXPRs (PR c++/82294, PR c++/87436)

2018-12-14 Thread Richard Biener
On Fri, 14 Dec 2018, Jakub Jelinek wrote:

> Hi!
> 
> With the previously posted patch to use RANGE_EXPRs in build_vec_init,
> the following patch attempts to do what the reporters were asking for
> - determine if it wouldn't be more efficient to use (perhaps nested) loops
> to initialize at runtime some automatic variable over having huge .rodata
> constants that are copied over into the automatic variables or used as the
> variable itself.
> 
> E.g. on the PR87436 testcase, we have RANGE_EXPR x4096 nested in a
> RANGE_EXPR x4096 with 24 byte innermost constructor.  Without this patch
> we emit 384MB long .rodata constant initializer that is then copied over
> in the constructor, with this patch we just have 2 nested loops.
> 
> The patch extends categorize_ctor_elements so that it gives next to the
> number of all scalars and number of non-zero scalars also something that
> allows us to guess how many RANGE_EXPRs are in there, i.e. how large the
> runtime code initializing that ctor would be.  It keeps doing the old thing
> for all initializers < 64 bytes, I think for those generally using runtime
> loops wouldn't be beneficial.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

This looks OK to me - the only comment I have is on the two magic
constants (64 and 8) which are used twice in the patch.  Can you
either see to hoist the common condition into sth like

 bool prefer_loop_initializer_p = ...

or add #defines for those constants?  I suppose the hoisting
might be tricky as int_size_in_bytes can return -1 and the
workarounds are different in both places right now (maybe that's
a bug as well...).  OTOH using (unsigned)int_size_in_bytes
looks reasonable in general.

Thanks for tacking this long-standing issue,
Richard.

> If the C FE starts using RANGE_EXPRs at some point e.g. for:
> struct S { int a, b, c, d; };
> void foo (const struct S *);
> 
> void
> bar (void)
> {
>   const struct S s[] = { [0 ... 9] = { 0, 1, 2, 3 } };
>   foo ([0]);
> }
> it could benefit from this patch too.
> 
> 2018-12-13  Jakub Jelinek  
> 
>   PR c++/82294
>   PR c++/87436
>   * expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument.
>   * expr.c (categorize_ctor_elements_1): Likewise.  Compute it like
>   p_nz_elts, except don't multiply it by mult.  Adjust recursive call.
>   Fix up COMPLEX_CST handling.
>   (categorize_ctor_elements): Add p_unique_nz_elts argument, initialize
>   it and pass it through to categorize_ctor_elements_1.
>   (mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers.
>   * gimplify.c (gimplify_init_constructor): Likewise.  Don't force
>   ctor into readonly data section if num_unique_nonzero_elements is
>   smaller or equal to 1/8 of num_nonzero_elements and size is >= 64
>   bytes.
> 
>   * g++.dg/tree-ssa/pr82294.C: New test.
>   * g++.dg/tree-ssa/pr87436.C: New test.
> 
> --- gcc/expr.h.jj 2018-11-27 16:37:40.249419631 +0100
> +++ gcc/expr.h2018-12-13 16:39:24.839050215 +0100
> @@ -309,7 +309,8 @@ extern bool can_move_by_pieces (unsigned
>  extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
>  
>  extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
> -   HOST_WIDE_INT *, bool *);
> +   HOST_WIDE_INT *, HOST_WIDE_INT *,
> +   bool *);
>  
>  extern void expand_operands (tree, tree, rtx, rtx*, rtx*,
>enum expand_modifier);
> --- gcc/expr.c.jj 2018-11-27 16:37:40.249419631 +0100
> +++ gcc/expr.c2018-12-13 17:13:47.083545126 +0100
> @@ -5945,10 +5945,11 @@ count_type_elements (const_tree type, bo
>  
>  static bool
>  categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
> + HOST_WIDE_INT *p_unique_nz_elts,
>   HOST_WIDE_INT *p_init_elts, bool *p_complete)
>  {
>unsigned HOST_WIDE_INT idx;
> -  HOST_WIDE_INT nz_elts, init_elts, num_fields;
> +  HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
>tree value, purpose, elt_type;
>  
>/* Whether CTOR is a valid constant initializer, in accordance with what
> @@ -5958,6 +5959,7 @@ categorize_ctor_elements_1 (const_tree c
>bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
>  
>nz_elts = 0;
> +  unique_nz_elts = 0;
>init_elts = 0;
>num_fields = 0;
>elt_type = NULL_TREE;
> @@ -5982,12 +5984,13 @@ categorize_ctor_elements_1 (const_tree c
>   {
>   case CONSTRUCTOR:
> {
> - HOST_WIDE_INT nz = 0, ic = 0;
> + HOST_WIDE_INT nz = 0, unz = 0, ic = 0;
>  
> - bool const_elt_p = categorize_ctor_elements_1 (value, , ,
> -p_complete);
> + bool const_elt_p = categorize_ctor_elements_1 (value, , ,
> +, 

[PATCH] Improve gimplification of constructors with RANGE_EXPRs (PR c++/82294, PR c++/87436)

2018-12-13 Thread Jakub Jelinek
Hi!

With the previously posted patch to use RANGE_EXPRs in build_vec_init,
the following patch attempts to do what the reporters were asking for
- determine if it wouldn't be more efficient to use (perhaps nested) loops
to initialize at runtime some automatic variable over having huge .rodata
constants that are copied over into the automatic variables or used as the
variable itself.

E.g. on the PR87436 testcase, we have RANGE_EXPR x4096 nested in a
RANGE_EXPR x4096 with 24 byte innermost constructor.  Without this patch
we emit 384MB long .rodata constant initializer that is then copied over
in the constructor, with this patch we just have 2 nested loops.

The patch extends categorize_ctor_elements so that it gives next to the
number of all scalars and number of non-zero scalars also something that
allows us to guess how many RANGE_EXPRs are in there, i.e. how large the
runtime code initializing that ctor would be.  It keeps doing the old thing
for all initializers < 64 bytes, I think for those generally using runtime
loops wouldn't be beneficial.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

If the C FE starts using RANGE_EXPRs at some point e.g. for:
struct S { int a, b, c, d; };
void foo (const struct S *);

void
bar (void)
{
  const struct S s[] = { [0 ... 9] = { 0, 1, 2, 3 } };
  foo ([0]);
}
it could benefit from this patch too.

2018-12-13  Jakub Jelinek  

PR c++/82294
PR c++/87436
* expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument.
* expr.c (categorize_ctor_elements_1): Likewise.  Compute it like
p_nz_elts, except don't multiply it by mult.  Adjust recursive call.
Fix up COMPLEX_CST handling.
(categorize_ctor_elements): Add p_unique_nz_elts argument, initialize
it and pass it through to categorize_ctor_elements_1.
(mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers.
* gimplify.c (gimplify_init_constructor): Likewise.  Don't force
ctor into readonly data section if num_unique_nonzero_elements is
smaller or equal to 1/8 of num_nonzero_elements and size is >= 64
bytes.

* g++.dg/tree-ssa/pr82294.C: New test.
* g++.dg/tree-ssa/pr87436.C: New test.

--- gcc/expr.h.jj   2018-11-27 16:37:40.249419631 +0100
+++ gcc/expr.h  2018-12-13 16:39:24.839050215 +0100
@@ -309,7 +309,8 @@ extern bool can_move_by_pieces (unsigned
 extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
 
 extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
- HOST_WIDE_INT *, bool *);
+ HOST_WIDE_INT *, HOST_WIDE_INT *,
+ bool *);
 
 extern void expand_operands (tree, tree, rtx, rtx*, rtx*,
 enum expand_modifier);
--- gcc/expr.c.jj   2018-11-27 16:37:40.249419631 +0100
+++ gcc/expr.c  2018-12-13 17:13:47.083545126 +0100
@@ -5945,10 +5945,11 @@ count_type_elements (const_tree type, bo
 
 static bool
 categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
+   HOST_WIDE_INT *p_unique_nz_elts,
HOST_WIDE_INT *p_init_elts, bool *p_complete)
 {
   unsigned HOST_WIDE_INT idx;
-  HOST_WIDE_INT nz_elts, init_elts, num_fields;
+  HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
   tree value, purpose, elt_type;
 
   /* Whether CTOR is a valid constant initializer, in accordance with what
@@ -5958,6 +5959,7 @@ categorize_ctor_elements_1 (const_tree c
   bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
 
   nz_elts = 0;
+  unique_nz_elts = 0;
   init_elts = 0;
   num_fields = 0;
   elt_type = NULL_TREE;
@@ -5982,12 +5984,13 @@ categorize_ctor_elements_1 (const_tree c
{
case CONSTRUCTOR:
  {
-   HOST_WIDE_INT nz = 0, ic = 0;
+   HOST_WIDE_INT nz = 0, unz = 0, ic = 0;
 
-   bool const_elt_p = categorize_ctor_elements_1 (value, , ,
-  p_complete);
+   bool const_elt_p = categorize_ctor_elements_1 (value, , ,
+  , p_complete);
 
nz_elts += mult * nz;
+   unique_nz_elts += unz;
init_elts += mult * ic;
 
if (const_from_elts_p && const_p)
@@ -5999,21 +6002,31 @@ categorize_ctor_elements_1 (const_tree c
case REAL_CST:
case FIXED_CST:
  if (!initializer_zerop (value))
-   nz_elts += mult;
+   {
+ nz_elts += mult;
+ unique_nz_elts++;
+   }
  init_elts += mult;
  break;
 
case STRING_CST:
  nz_elts += mult * TREE_STRING_LENGTH (value);
+ unique_nz_elts += TREE_STRING_LENGTH (value);
  init_elts += mult * TREE_STRING_LENGTH (value);
  break;
 
case COMPLEX_CST:
  if