Re: [PATCH] PR105647 Update pr105169* so it does not fail on powerpc64le

2022-05-19 Thread Giuliano Belinassi via Gcc-patches
On Thu, 2022-05-19 at 06:34 +, Richard Biener wrote:
> On Wed, 18 May 2022, Giuliano Belinassi wrote:
> 
> > On powerpc64le, the tests related to pr105169 failed because the
> > .localentry was not on a power of two address due to the extra nop
> > instruction taking one byte and thus moving its position one byte
> > further. Generating two nops instead moves .localentry to a valid
> > position.
> 
> OK

Pushed to trunk.

> 
> > gcc/testsuite/ChangeLog
> > 2022-05-18  Giuliano Belinassi  
> > 
> > PR target/105647
> > * g++.dg/modules/pr105169_a.C: Change -fpatchable-function-
> > entry to 2.
> > * g++.dg/modules/pr105169_b.C: Likewise.
> > ---
> >  gcc/testsuite/g++.dg/modules/pr105169_a.C | 4 ++--
> >  gcc/testsuite/g++.dg/modules/pr105169_b.C | 4 ++--
> >  2 files changed, 4 insertions(+), 4 deletions(-)
> > 
> > diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > b/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > index 66dc4b7901f..02660b3a0e4 100644
> > --- a/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > +++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > @@ -1,6 +1,6 @@
> >  /* { dg-module-do link } */
> > -/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
> > -/* { dg-additional-options "-std=c++11 -fpatchable-function-
> > entry=1 -O2" } */
> > +/* { dg-options "-std=c++11 -fpatchable-function-entry=2 -O2" } */
> > +/* { dg-additional-options "-std=c++11 -fpatchable-function-
> > entry=2 -O2" } */
> >  
> >  /* This test is in the "modules" package because it supports
> > multiple files
> > linkage.  */
> > diff --git a/gcc/testsuite/g++.dg/modules/pr105169_b.C
> > b/gcc/testsuite/g++.dg/modules/pr105169_b.C
> > index 5f8b00dfe51..7a9c5863a6a 100644
> > --- a/gcc/testsuite/g++.dg/modules/pr105169_b.C
> > +++ b/gcc/testsuite/g++.dg/modules/pr105169_b.C
> > @@ -1,6 +1,6 @@
> >  /* { dg-module-do link } */
> > -/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
> > -/* { dg-additional-options "-std=c++11 -fpatchable-function-
> > entry=1 -O2" } */
> > +/* { dg-options "-std=c++11 -fpatchable-function-entry=2 -O2" } */
> > +/* { dg-additional-options "-std=c++11 -fpatchable-function-
> > entry=2 -O2" } */
> >  
> >  /* This test is in the "modules" package because it supports
> > multiple files
> > linkage.  */
> > 


Re: [PATCH v2] PR105169 Fix references to discarded sections

2022-05-17 Thread Giuliano Belinassi via Gcc-patches
On Mon, 2022-05-09 at 13:39 +0200, Richard Biener wrote:
> On Sat, 7 May 2022, Giuliano Belinassi wrote:
> 
> > When -fpatchable-function-entry= is enabled, certain C++ codes
> > fails to
> > link because of generated references to discarded sections in
> > __patchable_function_entry section. This commit fixes this problem
> > by
> > puting those references in a COMDAT section.
> > 
> > On the previous patch, GCC fails to compile with --enable-vtable-
> > verify
> > enabled. This version compiles fine with it.
> 
> OK for trunk.
> 

Pushed to trunk. Is it okay if I backport it to older versions as well?

Giuliano.

> Thanks,
> Richard.
> 
> > 2022-05-06  Giuliano Belinassi  
> > 
> > PR c++/105169
> > * targhooks.cc (default_print_patchable_function_entry_1):
> > Handle COMDAT case.
> > * varasm.cc (switch_to_comdat_section): New
> > (handle_vtv_comdat_section): Call switch_to_comdat_section.
> > * varasm.h: Declare switch_to_comdat_section.
> > 
> > gcc/testsuite/ChangeLog
> > 2022-05-06  Giuliano Belinassi  
> > 
> > PR c++/105169
> > * g++.dg/modules/pr105169.h: New file.
> > * g++.dg/modules/pr105169_a.C: New test.
> > * g++.dg/modules/pr105169_b.C: New file.
> > ---
> >  gcc/targhooks.cc  |  8 --
> >  gcc/testsuite/g++.dg/modules/pr105169.h   | 22 +++
> >  gcc/testsuite/g++.dg/modules/pr105169_a.C | 25 +
> >  gcc/testsuite/g++.dg/modules/pr105169_b.C | 12 +
> >  gcc/varasm.cc | 33 ++-
> > 
> >  gcc/varasm.h  |  2 ++
> >  6 files changed, 87 insertions(+), 15 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/modules/pr105169.h
> >  create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_a.C
> >  create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_b.C
> > 
> > diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
> > index 399d6f874dc..b15ae19bcb6 100644
> > --- a/gcc/targhooks.cc
> > +++ b/gcc/targhooks.cc
> > @@ -2009,8 +2009,12 @@ default_print_patchable_function_entry_1
> > (FILE *file,
> >patch_area_number++;
> >ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE",
> > patch_area_number);
> >  
> > -  switch_to_section (get_section
> > ("__patchable_function_entries",
> > - flags, current_function_decl));
> > +  section *sect = get_section ("__patchable_function_entries",
> > + flags, current_function_decl);
> > +  if (HAVE_COMDAT_GROUP && DECL_COMDAT_GROUP
> > (current_function_decl))
> > +   switch_to_comdat_section (sect, current_function_decl);
> > +  else
> > +   switch_to_section (sect);
> >assemble_align (POINTER_SIZE);
> >fputs (asm_op, file);
> >assemble_name_raw (file, buf);
> > diff --git a/gcc/testsuite/g++.dg/modules/pr105169.h
> > b/gcc/testsuite/g++.dg/modules/pr105169.h
> > new file mode 100644
> > index 000..a7e76270531
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/modules/pr105169.h
> > @@ -0,0 +1,22 @@
> > +class IPXAddressClass
> > +{
> > +public:
> > +IPXAddressClass(void);
> > +};
> > +
> > +class WinsockInterfaceClass
> > +{
> > +
> > +public:
> > +WinsockInterfaceClass(void);
> > +
> > +virtual void Set_Broadcast_Address(void*){};
> > +
> > +virtual int Get_Protocol(void)
> > +{
> > +return 0;
> > +};
> > +
> > +protected:
> > +};
> > +
> > diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > b/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > new file mode 100644
> > index 000..66dc4b7901f
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > @@ -0,0 +1,25 @@
> > +/* { dg-module-do link } */
> > +/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
> > +/* { dg-additional-options "-std=c++11 -fpatchable-function-
> > entry=1 -O2" } */
> > +
> > +/* This test is in the "modules" package because it supports
> > multiple files
> > +   linkage.  */
> > +
> > +#include "pr105169.h"
> > +
> > +WinsockInterfaceClass* PacketTransport;
> > +
> > +IPXAddressClass::IPXAddressClass(void)
> > +{
> > +}
> > +
> > +int function()
> > +{
> > +  return PacketTransport->Get_Protocol();
> > +}
> > +
> > +int main()
> > +{
> > +  IPXAddressClass ipxaddr;
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/g++.dg/modules/pr105169_b.C
> > b/gcc/testsuite/g++.dg/modules/pr105169_b.C
> > new file mode 100644
> > index 000..5f8b00dfe51
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/modules/pr105169_b.C
> > @@ -0,0 +1,12 @@
> > +/* { dg-module-do link } */
> > +/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
> > +/* { dg-additional-options "-std=c++11 -fpatchable-function-
> > entry=1 -O2" } */
> > +
> > +/* This test is in the "modules" package because it supports
> > multiple files
> > +   linkage.  */
> > +
> > +#include "pr105169.h"
> > +
> > +WinsockInterfaceClass::WinsockInterfaceClass(void)

[PATCH v2] PR105169 Fix references to discarded sections

2022-05-09 Thread Giuliano Belinassi via Gcc-patches
When -fpatchable-function-entry= is enabled, certain C++ codes fails to
link because of generated references to discarded sections in
__patchable_function_entry section. This commit fixes this problem by
puting those references in a COMDAT section.

On the previous patch, GCC fails to compile with --enable-vtable-verify
enabled. This version compiles fine with it.

2022-05-06  Giuliano Belinassi  

PR c++/105169
* targhooks.cc (default_print_patchable_function_entry_1): Handle 
COMDAT case.
* varasm.cc (switch_to_comdat_section): New
(handle_vtv_comdat_section): Call switch_to_comdat_section.
* varasm.h: Declare switch_to_comdat_section.

gcc/testsuite/ChangeLog
2022-05-06  Giuliano Belinassi  

PR c++/105169
* g++.dg/modules/pr105169.h: New file.
* g++.dg/modules/pr105169_a.C: New test.
* g++.dg/modules/pr105169_b.C: New file.
---
 gcc/targhooks.cc  |  8 --
 gcc/testsuite/g++.dg/modules/pr105169.h   | 22 +++
 gcc/testsuite/g++.dg/modules/pr105169_a.C | 25 +
 gcc/testsuite/g++.dg/modules/pr105169_b.C | 12 +
 gcc/varasm.cc | 33 ++-
 gcc/varasm.h  |  2 ++
 6 files changed, 87 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/pr105169.h
 create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_a.C
 create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_b.C

diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index 399d6f874dc..b15ae19bcb6 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -2009,8 +2009,12 @@ default_print_patchable_function_entry_1 (FILE *file,
   patch_area_number++;
   ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE", patch_area_number);
 
-  switch_to_section (get_section ("__patchable_function_entries",
- flags, current_function_decl));
+  section *sect = get_section ("__patchable_function_entries",
+ flags, current_function_decl);
+  if (HAVE_COMDAT_GROUP && DECL_COMDAT_GROUP (current_function_decl))
+   switch_to_comdat_section (sect, current_function_decl);
+  else
+   switch_to_section (sect);
   assemble_align (POINTER_SIZE);
   fputs (asm_op, file);
   assemble_name_raw (file, buf);
diff --git a/gcc/testsuite/g++.dg/modules/pr105169.h 
b/gcc/testsuite/g++.dg/modules/pr105169.h
new file mode 100644
index 000..a7e76270531
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr105169.h
@@ -0,0 +1,22 @@
+class IPXAddressClass
+{
+public:
+IPXAddressClass(void);
+};
+
+class WinsockInterfaceClass
+{
+
+public:
+WinsockInterfaceClass(void);
+
+virtual void Set_Broadcast_Address(void*){};
+
+virtual int Get_Protocol(void)
+{
+return 0;
+};
+
+protected:
+};
+
diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C 
b/gcc/testsuite/g++.dg/modules/pr105169_a.C
new file mode 100644
index 000..66dc4b7901f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C
@@ -0,0 +1,25 @@
+/* { dg-module-do link } */
+/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
+/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
+
+/* This test is in the "modules" package because it supports multiple files
+   linkage.  */
+
+#include "pr105169.h"
+
+WinsockInterfaceClass* PacketTransport;
+
+IPXAddressClass::IPXAddressClass(void)
+{
+}
+
+int function()
+{
+  return PacketTransport->Get_Protocol();
+}
+
+int main()
+{
+  IPXAddressClass ipxaddr;
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/modules/pr105169_b.C 
b/gcc/testsuite/g++.dg/modules/pr105169_b.C
new file mode 100644
index 000..5f8b00dfe51
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr105169_b.C
@@ -0,0 +1,12 @@
+/* { dg-module-do link } */
+/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
+/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
+
+/* This test is in the "modules" package because it supports multiple files
+   linkage.  */
+
+#include "pr105169.h"
+
+WinsockInterfaceClass::WinsockInterfaceClass(void)
+{
+}
diff --git a/gcc/varasm.cc b/gcc/varasm.cc
index c41f17d64f7..6454f1ce519 100644
--- a/gcc/varasm.cc
+++ b/gcc/varasm.cc
@@ -8457,25 +8457,21 @@ default_asm_output_ident_directive (const char 
*ident_str)
 fprintf (asm_out_file, "%s\"%s\"\n", ident_asm_op, ident_str);
 }
 
-
-/* This function ensures that vtable_map variables are not only
-   in the comdat section, but that each variable has its own unique
-   comdat name.  Without this the variables end up in the same section
-   with a single comdat name.
-
+/* Switch to a COMDAT section with COMDAT name of decl.
+   
FIXME:  resolve_unique_section needs to deal better with
decls with both DECL_SECTION_NAME and DECL_ONE_ONLY.  Once
that is fixed, this if-else statement can be 

Re: [PATCH] PR105169 Fix references to discarded sections

2022-05-06 Thread Giuliano Belinassi via Gcc-patches
Hi,

On Tue, 2022-04-19 at 10:11 +0200, Richard Biener wrote:
> On Thu, 14 Apr 2022, Giuliano Belinassi wrote:
> 
> > When -fpatchable-function-entry= is enabled, certain C++ codes
> > fails to
> > link because of generated references to discarded sections in
> > __patchable_function_entry section. This commit fixes this problem
> > by
> > puting those references in a COMDAT section.
> > 
> > Boostrapped and regtested on x86_64 linux.
> > 
> > OK for Stage4?
> > 
> > 2022-04-13  Giuliano Belinassi  
> > 
> > PR c++/105169
> > * targhooks.cc (default_print_patchable_function_entry_1):
> > Handle COMDAT case.
> > * varasm.cc (handle_vtv_comdat_section): Rename to...
> > (switch_to_comdat_section): Generalize to also cover
> > __patchable_function_entry case.
> > (assemble_variable): Rename call from handle_vtv_comdat_section
> > to
> > switch_to_comdat_section.
> > (output_object_block): Same as above.
> > * varasm.h: Declare switch_to_comdat_section.
> > 
> > 2022-04-13  Giuliano Belinassi  
> > 
> > PR c++/105169
> > * g++.dg/modules/pr105169.h: New file.
> > * g++.dg/modules/pr105169_a.C: New test.
> > * g++.dg/modules/pr105169_b.C: New file.
> > 
> > Signed-off-by: Giuliano Belinassi 
> > ---
> >  gcc/targhooks.cc  |  8 ++--
> >  gcc/testsuite/ChangeLog   |  7 +++
> >  gcc/testsuite/g++.dg/modules/pr105169.h   | 22
> > 
> >  gcc/testsuite/g++.dg/modules/pr105169_a.C | 25
> > +++
> >  gcc/testsuite/g++.dg/modules/pr105169_b.C | 12 +++
> >  gcc/varasm.cc | 25 +
> > --
> >  gcc/varasm.h  |  1 +
> >  7 files changed, 87 insertions(+), 13 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/modules/pr105169.h
> >  create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_a.C
> >  create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_b.C
> > 
> > diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
> > index e22bc66a6c8..540460e7db9 100644
> > --- a/gcc/targhooks.cc
> > +++ b/gcc/targhooks.cc
> > @@ -1995,8 +1995,12 @@ default_print_patchable_function_entry_1
> > (FILE *file,
> >patch_area_number++;
> >ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE",
> > patch_area_number);
> >  
> > -  switch_to_section (get_section
> > ("__patchable_function_entries",
> > - flags, current_function_decl));
> > +  section *sect = get_section ("__patchable_function_entries",
> > + flags, current_function_decl);
> > +  if (HAVE_COMDAT_GROUP && DECL_COMDAT_GROUP
> > (current_function_decl))
> > +   switch_to_comdat_section (sect, current_function_decl);
> 
> You are passing a decl here, but ...
> 
> > +  else
> > +   switch_to_section (sect);
> >assemble_align (POINTER_SIZE);
> >fputs (asm_op, file);
> >assemble_name_raw (file, buf);
> > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> > index 9ab7a178bf8..524a546a832 100644
> > --- a/gcc/testsuite/ChangeLog
> > +++ b/gcc/testsuite/ChangeLog
> > @@ -1,3 +1,10 @@
> > +2022-04-13  Giuliano Belinassi  
> > +
> > +   PR c++/105169
> > +   * g++.dg/modules/pr105169.h: New file.
> > +   * g++.dg/modules/pr105169_a.C: New test.
> > +   * g++.dg/modules/pr105169_b.C: New file.
> > +
> >  2022-04-12  Antoni Boucher  
> >  
> > PR jit/104293
> > diff --git a/gcc/testsuite/g++.dg/modules/pr105169.h
> > b/gcc/testsuite/g++.dg/modules/pr105169.h
> > new file mode 100644
> > index 000..a7e76270531
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/modules/pr105169.h
> > @@ -0,0 +1,22 @@
> > +class IPXAddressClass
> > +{
> > +public:
> > +IPXAddressClass(void);
> > +};
> > +
> > +class WinsockInterfaceClass
> > +{
> > +
> > +public:
> > +WinsockInterfaceClass(void);
> > +
> > +virtual void Set_Broadcast_Address(void*){};
> > +
> > +virtual int Get_Protocol(void)
> > +{
> > +return 0;
> > +};
> > +
> > +protected:
> > +};
> > +
> > diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > b/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > new file mode 100644
> > index 000..66dc4b7901f
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > @@ -0,0 +1,25 @@
> > +/* { dg-module-do link } */
> > +/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
> > +/* { dg-additional-options "-std=c++11 -fpatchable-function-
> > entry=1 -O2" } */
> > +
> > +/* This test is in the "modules" package because it supports
> > multiple files
> > +   linkage.  */
> > +
> > +#include "pr105169.h"
> > +
> > +WinsockInterfaceClass* PacketTransport;
> > +
> > +IPXAddressClass::IPXAddressClass(void)
> > +{
> > +}
> > +
> > +int function()
> > +{
> > +  return PacketTransport->Get_Protocol();
> > +}
> > +
> > +int main()
> > +{
> > +  IPXAddressClass ipxaddr;
> > +  return 0;
> > +}
> > diff --git 

[PATCH] PR105169 Fix references to discarded sections

2022-04-14 Thread Giuliano Belinassi via Gcc-patches
When -fpatchable-function-entry= is enabled, certain C++ codes fails to
link because of generated references to discarded sections in
__patchable_function_entry section. This commit fixes this problem by
puting those references in a COMDAT section.

Boostrapped and regtested on x86_64 linux.

OK for Stage4?

2022-04-13  Giuliano Belinassi  

PR c++/105169
* targhooks.cc (default_print_patchable_function_entry_1): Handle 
COMDAT case.
* varasm.cc (handle_vtv_comdat_section): Rename to...
(switch_to_comdat_section): Generalize to also cover
__patchable_function_entry case.
(assemble_variable): Rename call from handle_vtv_comdat_section to
switch_to_comdat_section.
(output_object_block): Same as above.
* varasm.h: Declare switch_to_comdat_section.

2022-04-13  Giuliano Belinassi  

PR c++/105169
* g++.dg/modules/pr105169.h: New file.
* g++.dg/modules/pr105169_a.C: New test.
* g++.dg/modules/pr105169_b.C: New file.

Signed-off-by: Giuliano Belinassi 
---
 gcc/targhooks.cc  |  8 ++--
 gcc/testsuite/ChangeLog   |  7 +++
 gcc/testsuite/g++.dg/modules/pr105169.h   | 22 
 gcc/testsuite/g++.dg/modules/pr105169_a.C | 25 +++
 gcc/testsuite/g++.dg/modules/pr105169_b.C | 12 +++
 gcc/varasm.cc | 25 +--
 gcc/varasm.h  |  1 +
 7 files changed, 87 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/pr105169.h
 create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_a.C
 create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_b.C

diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index e22bc66a6c8..540460e7db9 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -1995,8 +1995,12 @@ default_print_patchable_function_entry_1 (FILE *file,
   patch_area_number++;
   ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE", patch_area_number);
 
-  switch_to_section (get_section ("__patchable_function_entries",
- flags, current_function_decl));
+  section *sect = get_section ("__patchable_function_entries",
+ flags, current_function_decl);
+  if (HAVE_COMDAT_GROUP && DECL_COMDAT_GROUP (current_function_decl))
+   switch_to_comdat_section (sect, current_function_decl);
+  else
+   switch_to_section (sect);
   assemble_align (POINTER_SIZE);
   fputs (asm_op, file);
   assemble_name_raw (file, buf);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9ab7a178bf8..524a546a832 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2022-04-13  Giuliano Belinassi  
+
+   PR c++/105169
+   * g++.dg/modules/pr105169.h: New file.
+   * g++.dg/modules/pr105169_a.C: New test.
+   * g++.dg/modules/pr105169_b.C: New file.
+
 2022-04-12  Antoni Boucher  
 
PR jit/104293
diff --git a/gcc/testsuite/g++.dg/modules/pr105169.h 
b/gcc/testsuite/g++.dg/modules/pr105169.h
new file mode 100644
index 000..a7e76270531
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr105169.h
@@ -0,0 +1,22 @@
+class IPXAddressClass
+{
+public:
+IPXAddressClass(void);
+};
+
+class WinsockInterfaceClass
+{
+
+public:
+WinsockInterfaceClass(void);
+
+virtual void Set_Broadcast_Address(void*){};
+
+virtual int Get_Protocol(void)
+{
+return 0;
+};
+
+protected:
+};
+
diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C 
b/gcc/testsuite/g++.dg/modules/pr105169_a.C
new file mode 100644
index 000..66dc4b7901f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C
@@ -0,0 +1,25 @@
+/* { dg-module-do link } */
+/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
+/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
+
+/* This test is in the "modules" package because it supports multiple files
+   linkage.  */
+
+#include "pr105169.h"
+
+WinsockInterfaceClass* PacketTransport;
+
+IPXAddressClass::IPXAddressClass(void)
+{
+}
+
+int function()
+{
+  return PacketTransport->Get_Protocol();
+}
+
+int main()
+{
+  IPXAddressClass ipxaddr;
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/modules/pr105169_b.C 
b/gcc/testsuite/g++.dg/modules/pr105169_b.C
new file mode 100644
index 000..5f8b00dfe51
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr105169_b.C
@@ -0,0 +1,12 @@
+/* { dg-module-do link } */
+/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
+/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
+
+/* This test is in the "modules" package because it supports multiple files
+   linkage.  */
+
+#include "pr105169.h"
+
+WinsockInterfaceClass::WinsockInterfaceClass(void)
+{
+}
diff --git a/gcc/varasm.cc b/gcc/varasm.cc
index c41f17d64f7..7cd91e2bb56 100644
--- 

Re: [PATCH v3] Do not abort compilation when dump file is /dev/*

2021-11-22 Thread Giuliano Belinassi via Gcc-patches
On Sun, 2021-11-21 at 00:47 -0300, Alexandre Oliva wrote:
> Hello, Giuliano, thanks for turning my suggestion into a patch!
> 
> On Nov 19, 2021, Richard Biener  wrote:
> 
> > > +/* { dg-options "-fdump-ipa-clones -o /dev/null" } */
> 
> May I suggest actually checking that the ipa-clones dump file is
> created
> with the same name and the location it would without -o /dev/null?  I
> think just using any of the normal dump file scanners would do.
> 
> There's a larger set of tests in gcc.misc/outputs.exp, where this
> test
> would be a slightly better fit.  Copying some of the tests under
> "Driver-chosen aux outputs", and some of the link tests that use
> '$oaout', changing them to use -o /dev/null without any changes to
> the
> expected output file names, would give us more coverage of expected
> behavior than just checking that we just don't get an error.
> 

Hi, Oliva.

The patch was already applied in trunk and backported to gcc-11. The
rationale behind it was that exiting with an error because it couldn't
write dumps into /dev/* confused configure. Simply not erroring
(through not dumping in /dev/ on -o /dev/null) was enough to fix
configure, and the testcase reflects that.

As for moving that test to gcc.misc, I am not sure if that deserves
another patch just for that, and then applying it on gcc-11. I could do
that if it is really important, OTOH.

Thank you,
Giuliano.

> Thanks again,
> 



[PATCH v3] Do not abort compilation when dump file is /dev/*

2021-11-19 Thread Giuliano Belinassi via Gcc-patches
The `configure` scripts generated with autoconf often tests compiler
features by setting output to `/dev/null`, which then sets the dump
folder as being /dev/* and the compilation halts with an error because
GCC cannot create files in /dev/. This is a problem when configure is
testing for compiler features because it cannot tell if the failure was
due to unsupported features or any other problem, and disable it even
if it is working.

As an example, running configure overriding CFLAGS="-fdump-ipa-clones"
will result in several compiler-features as being disabled because of
gcc halting with an error creating files in /dev/*.

This commit fixes this issue by checking if the output file is
/dev/null or /dev/zero. In this case we use the current working
directory for dump output instead of the directory of the output
file because we cannot write to /dev/*.

gcc/ChangeLog
2021-11-16  Giuliano Belinassi  

* gcc.c (process_command): Skip dumpdir override if file is a
not_actual_file_p.
(not_actual_file_p): Return true if file is /dev/zero as well.
* doc/invoke.texi: Update -dumpdir documentation.

gcc/testsuite/ChangeLog
2021-11-16  Giuliano Belinassi  

* gcc.dg/devnull-dump.c: New.

Signed-off-by: Giuliano Belinassi 
---
 gcc/doc/invoke.texi | 3 ++-
 gcc/gcc.c   | 6 --
 gcc/testsuite/gcc.dg/devnull-dump.c | 7 +++
 3 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/devnull-dump.c

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6070288856c..4a17cd4d317 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1877,7 +1877,8 @@ named @file{dir/bar.*}, combining the given @var{dumppfx} 
with the
 default @var{dumpbase} derived from the primary output name.  Dump
 outputs also take the input name suffix: @file{dir/bar.c.*}.
 
-It defaults to the location of the output file; options
+It defaults to the location of the output file, unless the output
+file is a special file like @code{/dev/null}. Options
 @option{-save-temps=cwd} and @option{-save-temps=obj} override this
 default, just like an explicit @option{-dumpdir} option.  In case
 multiple such options are given, the last one prevails:
diff --git a/gcc/gcc.c b/gcc/gcc.c
index 506c2acc282..43d7cde1be9 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -5098,7 +5098,8 @@ process_command (unsigned int decoded_options_count,
 
   bool explicit_dumpdir = dumpdir;
 
-  if (!save_temps_overrides_dumpdir && explicit_dumpdir)
+  if ((!save_temps_overrides_dumpdir && explicit_dumpdir)
+  || (output_file && not_actual_file_p (output_file)))
 {
   /* Do nothing.  */
 }
@@ -10716,7 +10717,8 @@ static bool
 not_actual_file_p (const char *name)
 {
   return (strcmp (name, "-") == 0
- || strcmp (name, HOST_BIT_BUCKET) == 0);
+ || strcmp (name, HOST_BIT_BUCKET) == 0
+ || strcmp (name, "/dev/zero") == 0);
 }
 
 /* %:dumps spec function.  Take an optional argument that overrides
diff --git a/gcc/testsuite/gcc.dg/devnull-dump.c 
b/gcc/testsuite/gcc.dg/devnull-dump.c
new file mode 100644
index 000..378e0901c28
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/devnull-dump.c
@@ -0,0 +1,7 @@
+/* { dg-do assemble } */
+/* { dg-options "-fdump-ipa-clones -o /dev/null" } */
+
+int main()
+{
+  return 0;
+}
-- 
2.33.1



[PATCH v2] Do not abort compilation when dump file is /dev/*

2021-11-18 Thread Giuliano Belinassi via Gcc-patches
The `configure` scripts generated with autoconf often tests compiler
features by setting output to `/dev/null`, which then sets the dump
folder as being /dev/* and the compilation halts with an error because
GCC cannot create files in /dev/. This is a problem when configure is
testing for compiler features because it cannot tell if the failure was
due to unsupported features or any other problem, and disable it even
if it is working.

As an example, running configure overriding CFLAGS="-fdump-ipa-clones"
will result in several compiler-features as being disabled because of
gcc halting with an error creating files in /dev/*.

This commit fixes this issue by checking if the output file is
/dev/null or /dev/zero. In this case we use the current working
directory for dump output instead of the directory of the output
file because we cannot write to /dev/*.

gcc/ChangeLog
2021-11-16  Giuliano Belinassi  

* gcc.c (process_command): Skip dumpdir override on -o /dev/null
or -o /dev/zero.

gcc/testsuite/ChangeLog
2021-11-16  Giuliano Belinassi  

* gcc.dg/devnull-dump.c: New.

Signed-off-by: Giuliano Belinassi 
---
 gcc/doc/invoke.texi |  4 +++-
 gcc/gcc.c   | 19 +++
 gcc/testsuite/gcc.dg/devnull-dump.c |  7 +++
 3 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/devnull-dump.c

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6070288856c..4c056606e0c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1877,7 +1877,9 @@ named @file{dir/bar.*}, combining the given @var{dumppfx} 
with the
 default @var{dumpbase} derived from the primary output name.  Dump
 outputs also take the input name suffix: @file{dir/bar.c.*}.
 
-It defaults to the location of the output file; options
+It defaults to the location of the output file, unless @option{-o /dev/null}
+or @option{-o /dev/zero} is passed to the compiler: on those cases the 
@option{cwd}
+will be used instead. Options
 @option{-save-temps=cwd} and @option{-save-temps=obj} override this
 default, just like an explicit @option{-dumpdir} option.  In case
 multiple such options are given, the last one prevails:
diff --git a/gcc/gcc.c b/gcc/gcc.c
index 506c2acc282..0173018ac04 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -5109,10 +5109,21 @@ process_command (unsigned int decoded_options_count,
 {
   free (dumpdir);
   dumpdir = NULL;
-  temp = lbasename (output_file);
-  if (temp != output_file)
-   dumpdir = xstrndup (output_file,
-   strlen (output_file) - strlen (temp));
+  if (strcmp(output_file, "/dev/null") == 0
+ || strcmp(output_file, "/dev/zero") == 0)
+   {
+ /* If output is /dev/null or /dev/zero, we cannot set the dumpdir to
+/dev/ because no files can be created on that directory.  In this
+case, we do nothing.  */
+   }
+  else
+   {
+ /* Set dumpdir as being the same directory where the output file is.  
*/
+ temp = lbasename (output_file);
+ if (temp != output_file)
+   dumpdir = xstrndup (output_file,
+   strlen (output_file) - strlen (temp));
+   }
 }
   else if (dumpdir)
 {
diff --git a/gcc/testsuite/gcc.dg/devnull-dump.c 
b/gcc/testsuite/gcc.dg/devnull-dump.c
new file mode 100644
index 000..378e0901c28
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/devnull-dump.c
@@ -0,0 +1,7 @@
+/* { dg-do assemble } */
+/* { dg-options "-fdump-ipa-clones -o /dev/null" } */
+
+int main()
+{
+  return 0;
+}
-- 
2.33.1



Re: [PATCH] Do not abort compilation when dump file is /dev/*

2021-11-18 Thread Giuliano Belinassi via Gcc-patches
On Thu, 2021-11-18 at 10:43 +0100, Richard Biener wrote:
> On Tue, 16 Nov 2021, Giuliano Belinassi wrote:
> 
> > The `configure` scripts generated with autoconf often tests
> > compiler
> > features by setting output to `/dev/null`, which then sets the dump
> > folder as being /dev/* and the compilation halts with an error
> > because
> > GCC cannot create files in /dev/. This is a problem when configure
> > is
> > testing for compiler features because it cannot tell if the failure
> > was
> > due to unsupported features or any other problem, and disable it
> > even
> > if it is working.
> > 
> > As an example, running configure overriding CFLAGS="-fdump-ipa-
> > clones"
> > will result in several compiler-features as being disabled because
> > of
> > gcc halting with an error creating files in /dev/*.
> > 
> > This commit fixes this issue by checking if the dump folder is
> > /dev/.
> > If yes, then it just informs the user and disables dumping, but
> > does
> > not halt the compilation and the compiler retuns 0 to the shell.
> 
> I think matching '/dev/*' is broken.  Either failure to open a dump
> file should be an error or not.  Btw, some configure tests check
> for compiler output as well which still breaks in this case.

Hi,

IMHO gcc shouldn't fail when it cannot write the dump files because
compilation could still continue compiling even if the dumps cannot be
created. However, I chose to do not because the code may be issuing an
error for other reasons, and matching for /dev/ fixes the test case. 

> 
> IMHO a more reasonable thing to do would be to not treat
> -o /dev/null as a source for -dumpdir and friends.  Alex?
> You did the last re-org, where'd we put such special casing?
> 
> Of course configure checks using -fdump-... with -o /dev/null are
> a bit fragile.  They could use -fdump-...=FILENAME
> 

But that would also set all clones output as being equal to FILENAME
and don't create one dump for every compiled file, no?

Thanks,
Giuliano.

> Thnaks,
> Richard.
> 
> > gcc/ChangeLog
> > 2021-11-16  Giuliano Belinassi  
> > 
> > * dumpfile.c (dump_open): Do not halt compilation when file
> > matches /dev/*.
> > 
> > gcc/testsuite/ChangeLog
> > 2021-11-16  Giuliano Belinassi  
> > 
> > * gcc.dg/devnull-dump.c: New.
> > 
> > Signed-off-by: Giuliano Belinassi 
> > ---
> >  gcc/dumpfile.c  | 17 -
> >  gcc/testsuite/gcc.dg/devnull-dump.c |  7 +++
> >  2 files changed, 23 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/devnull-dump.c
> > 
> > diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c
> > index 8169daf7f59..b1dbfb371af 100644
> > --- a/gcc/dumpfile.c
> > +++ b/gcc/dumpfile.c
> > @@ -378,7 +378,22 @@ dump_open (const char *filename, bool trunc)
> >    FILE *stream = fopen (filename, trunc ? "w" : "a");
> >  
> >    if (!stream)
> > -    error ("could not open dump file %qs: %m", filename);
> > +    {
> > +  /* Autoconf tests compiler functionalities by setting output
> > to /dev/null.
> > +    In this case, if dumps are enabled, it will try to set the
> > output
> > +    folder to /dev/*, which is of course invalid and the
> > compiler will exit
> > +    with an error, resulting in configure script reporting the
> > tested
> > +    feature as being unavailable. Here we test this case by
> > checking if the
> > +    output file prefix has /dev/ and only inform the user in
> > this case
> > +    rather than refusing to compile.  */
> > +
> > +  const char *const slash_dev = "/dev/";
> > +  if (strncmp(slash_dev, filename, strlen(slash_dev)) == 0)
> > +   inform (UNKNOWN_LOCATION,
> > +   "could not open dump file %qs: %m. Dumps are
> > disabled.", filename);
> > +  else
> > +   error ("could not open dump file %qs: %m", filename);
> > +    }
> >    return stream;
> >  }
> >  
> > diff --git a/gcc/testsuite/gcc.dg/devnull-dump.c
> > b/gcc/testsuite/gcc.dg/devnull-dump.c
> > new file mode 100644
> > index 000..378e0901c28
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/devnull-dump.c
> > @@ -0,0 +1,7 @@
> > +/* { dg-do assemble } */
> > +/* { dg-options "-fdump-ipa-clones -o /dev/null" } */
> > +
> > +int main()
> > +{
> > +  return 0;
> > +}
> > 
> 



[PATCH] Do not abort compilation when dump file is /dev/*

2021-11-16 Thread Giuliano Belinassi via Gcc-patches
The `configure` scripts generated with autoconf often tests compiler
features by setting output to `/dev/null`, which then sets the dump
folder as being /dev/* and the compilation halts with an error because
GCC cannot create files in /dev/. This is a problem when configure is
testing for compiler features because it cannot tell if the failure was
due to unsupported features or any other problem, and disable it even
if it is working.

As an example, running configure overriding CFLAGS="-fdump-ipa-clones"
will result in several compiler-features as being disabled because of
gcc halting with an error creating files in /dev/*.

This commit fixes this issue by checking if the dump folder is /dev/.
If yes, then it just informs the user and disables dumping, but does
not halt the compilation and the compiler retuns 0 to the shell.

gcc/ChangeLog
2021-11-16  Giuliano Belinassi  

* dumpfile.c (dump_open): Do not halt compilation when file
matches /dev/*.

gcc/testsuite/ChangeLog
2021-11-16  Giuliano Belinassi  

* gcc.dg/devnull-dump.c: New.

Signed-off-by: Giuliano Belinassi 
---
 gcc/dumpfile.c  | 17 -
 gcc/testsuite/gcc.dg/devnull-dump.c |  7 +++
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/devnull-dump.c

diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c
index 8169daf7f59..b1dbfb371af 100644
--- a/gcc/dumpfile.c
+++ b/gcc/dumpfile.c
@@ -378,7 +378,22 @@ dump_open (const char *filename, bool trunc)
   FILE *stream = fopen (filename, trunc ? "w" : "a");
 
   if (!stream)
-error ("could not open dump file %qs: %m", filename);
+{
+  /* Autoconf tests compiler functionalities by setting output to 
/dev/null.
+In this case, if dumps are enabled, it will try to set the output
+folder to /dev/*, which is of course invalid and the compiler will exit
+with an error, resulting in configure script reporting the tested
+feature as being unavailable. Here we test this case by checking if the
+output file prefix has /dev/ and only inform the user in this case
+rather than refusing to compile.  */
+
+  const char *const slash_dev = "/dev/";
+  if (strncmp(slash_dev, filename, strlen(slash_dev)) == 0)
+   inform (UNKNOWN_LOCATION,
+   "could not open dump file %qs: %m. Dumps are disabled.", 
filename);
+  else
+   error ("could not open dump file %qs: %m", filename);
+}
   return stream;
 }
 
diff --git a/gcc/testsuite/gcc.dg/devnull-dump.c 
b/gcc/testsuite/gcc.dg/devnull-dump.c
new file mode 100644
index 000..378e0901c28
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/devnull-dump.c
@@ -0,0 +1,7 @@
+/* { dg-do assemble } */
+/* { dg-options "-fdump-ipa-clones -o /dev/null" } */
+
+int main()
+{
+  return 0;
+}
-- 
2.33.1



Re: [PATCH] Fix PR 100944 - Array boundary misscalculation

2021-06-14 Thread Giuliano Belinassi via Gcc-patches
Hi,

I will give an quick answer to this mail. I will analyze carefully what
richi said when I have more time available.

On Mon, 2021-06-14 at 15:55 -0600, Martin Sebor wrote:
> On 6/13/21 11:00 AM, Giuliano Belinassi wrote:
> > This patch proposes a fix to PR 100944 by improving the array
> > boundary
> 
> Thanks for the patch!
> 
> > computation when the flexible array has no clear constructor: if no
> > constructor were found in the input code, we compute the size of
> > the
> > array as:
> > 
> >    offset(array begin) - offset(next element in RECORD_TYPE)
> > 
> > If no next element is found in the RECORD, simply fall back to:
> > 
> >    offset(array begin) - offset(RECORD_TYPE ends)
> 
> I'm not sure I understand the distinction between these expressions.
> I'd expect them to give the same result: either zero when there's no
> tail padding or negative when there is some.  Is there a case when
> they don't?

You can't compute the offset of a next field if this next field does
not exists. Therefore, if the array has no next member in the base
struct, it is an trailing array and the second logic should apply.

> 
> I would think the fix to need to change just component_ref_size()
> without having to touch other utility functions.  It seems to me
> the problem is in the logic that deals with accesses to backing
> buffers, as in:
> 
>    char a[sizeof (struct Bx)];
>    struct Bx *p = (struct Bx*) a;
>    p->a.a[i] = 1;
> 
> This should also be diagnosed.
> 
> The offset of the next member shouldn't matter, just the number of
> elements in the array's initializer if it has one or the size of
> the struct whose member is being accessed (in addition to the offset
> of the member).

I don't really understeand how this detects the testcase on bugzilla,
as the access in falling into `long x`. I will need to know where the
next member offset is to find such cases.

Giuliano. 

> 
> > 
> > We avoid doing this calculation if the RECORD_TYPE is actually an
> > UNION_TYPE, once things get very complicated in this case.
> > 
> > gcc/ChangeLog:
> > 2021-13-08  Giuliano Belinassi  
> > 
> > PR middle-end/100944
> > * tree-dfa.c (get_ref_base_and_unit_offset): Add option to
> > compute size
> > of next field.
> > * (get_ref_base_and_unit_offset_1): Same as above.
> > * tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
> > (get_ref_base_and_unit_offset_1): Same as above.
> > * tree.c (least_common_record_1): New.
> > (least_common_record): New.
> > (component_ref_size): Improve array size calculation.
> > 
> > gcc/testsuite/ChangeLog:
> > 2021-13-08  Giuliano Belinassi  
> > 
> > PR middle-end/100944
> > * gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
> > * gcc.dg/Warray-bounds-71.c: New test.
> > ---
> >   gcc/testsuite/gcc.dg/Warray-bounds-71.c   |  42 +++
> >   .../gcc.dg/Wzero-length-array-bounds.c    |  18 +--
> >   gcc/tree-dfa.c    |  31 -
> >   gcc/tree-dfa.h    |   6 +-
> >   gcc/tree.c    | 115
> > ++
> >   5 files changed, 172 insertions(+), 40 deletions(-)
> >   create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c
> > 
> > diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> > b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> > new file mode 100644
> > index 000..cc5b083bc77
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
> > @@ -0,0 +1,42 @@
> > +/* PR middle-end/100944 - missing -Warray-bounds accessing a
> > flexible array
> > +   member of a nested struct
> > +   { dg-do compile }
> > +   { dg-options "-O2 -Wall" } */
> > +
> > +struct A0
> > +{
> > +  int i, a[0];
> > +};
> > +
> > +struct B0
> > +{
> > +  struct A0 a;
> > +  long x;
> > +} b0;
> > +
> > +void f0 (int i)
> > +{
> > +  long t = b0.x;
> > +  b0.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> > +  if (t != b0.x)    // folded to false
> > +    __builtin_abort ();
> > +}
> > +
> > +struct Ax
> > +{
> > +  int i, a[];
> > +};
> > +
> > +struct Bx
> > +{
> > +  struct Ax a;
> > +  long x;
> > +} bx;
> > +
> > +void fx (int i)
> > +{
> > +  long t = bx.x;
> > +  bx.a.a[i] = 0;    // { dg-warning "\\\[-Warray-bounds" }
> > +  if (t != bx.x)    // folded to false
> > +    __builtin_abort ();
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> > b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> > index 8e880d92dea..117b30ff294 100644
> > --- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> > +++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
> > @@ -68,21 +68,21 @@ extern struct Y y;
> >   
> >   void access_to_member (int i)
> >   {
> > -  y.a[0].a[0] = 0;  // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > -  y.a[0].a[1] = 0;  // { dg-warning "\\\[-Wzero-length-bounds"
> > }
> > -  y.a[0].a[2] = 0;  // { dg-warning 

[PATCH] Fix PR 100944 - Array boundary misscalculation

2021-06-13 Thread Giuliano Belinassi via Gcc-patches
This patch proposes a fix to PR 100944 by improving the array boundary
computation when the flexible array has no clear constructor: if no
constructor were found in the input code, we compute the size of the
array as:

  offset(array begin) - offset(next element in RECORD_TYPE)

If no next element is found in the RECORD, simply fall back to:

  offset(array begin) - offset(RECORD_TYPE ends)

We avoid doing this calculation if the RECORD_TYPE is actually an
UNION_TYPE, once things get very complicated in this case.

gcc/ChangeLog:
2021-13-08  Giuliano Belinassi  

PR middle-end/100944
* tree-dfa.c (get_ref_base_and_unit_offset): Add option to compute size
of next field.
* (get_ref_base_and_unit_offset_1): Same as above.
* tree-dfa.h (get_ref_base_and_unit_offset): Same as above.
(get_ref_base_and_unit_offset_1): Same as above.
* tree.c (least_common_record_1): New.
(least_common_record): New.
(component_ref_size): Improve array size calculation.

gcc/testsuite/ChangeLog:
2021-13-08  Giuliano Belinassi  

PR middle-end/100944
* gcc.dg/Wzero-length-array-bounds.c: Update diagnostic.
* gcc.dg/Warray-bounds-71.c: New test.
---
 gcc/testsuite/gcc.dg/Warray-bounds-71.c   |  42 +++
 .../gcc.dg/Wzero-length-array-bounds.c|  18 +--
 gcc/tree-dfa.c|  31 -
 gcc/tree-dfa.h|   6 +-
 gcc/tree.c| 115 ++
 5 files changed, 172 insertions(+), 40 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-71.c

diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-71.c 
b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
new file mode 100644
index 000..cc5b083bc77
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-71.c
@@ -0,0 +1,42 @@
+/* PR middle-end/100944 - missing -Warray-bounds accessing a flexible array
+   member of a nested struct
+   { dg-do compile }
+   { dg-options "-O2 -Wall" } */
+
+struct A0
+{
+  int i, a[0];
+};
+
+struct B0
+{
+  struct A0 a;
+  long x;
+} b0;
+
+void f0 (int i)
+{
+  long t = b0.x;
+  b0.a.a[i] = 0;// { dg-warning "\\\[-Warray-bounds" }
+  if (t != b0.x)// folded to false
+__builtin_abort ();
+}
+
+struct Ax
+{
+  int i, a[];
+};
+
+struct Bx
+{
+  struct Ax a;
+  long x;
+} bx;
+
+void fx (int i)
+{
+  long t = bx.x;
+  bx.a.a[i] = 0;// { dg-warning "\\\[-Warray-bounds" }
+  if (t != bx.x)// folded to false
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c 
b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
index 8e880d92dea..117b30ff294 100644
--- a/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
+++ b/gcc/testsuite/gcc.dg/Wzero-length-array-bounds.c
@@ -68,21 +68,21 @@ extern struct Y y;
 
 void access_to_member (int i)
 {
-  y.a[0].a[0] = 0;  // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.a[0].a[1] = 0;  // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.a[0].a[2] = 0;  // { dg-warning "\\\[-Wzero-length-bounds" }
+  y.a[0].a[0] = 0;  // { dg-warning "\\\[-Warray-bounds" }
+  y.a[0].a[1] = 0;  // { dg-warning "\\\[-Warray-bounds" }
+  y.a[0].a[2] = 0;  // { dg-warning "\\\[-Warray-bounds" }
   sink (a);
 
-  y.a[1].a[0] = 0;  // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.a[1].a[1] = 0;  // { dg-warning "\\\[-Wzero-length-bounds" }
+  y.a[1].a[0] = 0;  // { dg-warning "\\\[-Warray-bounds" }
+  y.a[1].a[1] = 0;  // { dg-warning "\\\[-Warray-bounds" }
   /* Similar to the array case above, accesses to a subsequent member
  of the "parent" struct seem like a more severe problem than those
  to the next member of the same struct.  */
-  y.a[1].a[2] = 0;  // { dg-warning "\\\[-Wzero-length-bounds" }
+  y.a[1].a[2] = 0;  // { dg-warning "\\\[-Warray-bounds" }
   sink (a);
 
-  y.b.a[0] = 0; // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.b.a[1] = 0; // { dg-warning "\\\[-Wzero-length-bounds" }
-  y.b.a[2] = 0; // { dg-warning "\\\[-Wzero-length-bounds" }
+  y.b.a[0] = 0; // { dg-warning "\\\[-Warray-bounds" }
+  y.b.a[1] = 0; // { dg-warning "\\\[-Warray-bounds" }
+  y.b.a[2] = 0; // { dg-warning "\\\[-Warray-bounds" }
   y.b.a[3] = 0; // { dg-warning "\\\[-Warray-bounds" }
 }
diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
index 1d20de0c400..dc3c15f11d5 100644
--- a/gcc/tree-dfa.c
+++ b/gcc/tree-dfa.c
@@ -772,9 +772,11 @@ get_ref_base_and_extent_hwi (tree exp, HOST_WIDE_INT 
*poffset,
 
 tree
 get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
-tree (*valueize) (tree))
+tree (*valueize) (tree),
+bool of_next_component /* = false.  */)
 {
   poly_int64 byte_offset = 0;
+  tree next_field = NULL_TREE;
 
   /* Compute cumulative byte-offset for nested component-refs and 

Re: [PATCH] Try LTO partial linking. (Was: Speed of compiling gimple-match.c)

2021-06-12 Thread Giuliano Belinassi via Gcc-patches
Hi, all.

Please CC me when I am mentioned into a mail.

On Thu, 2021-05-20 at 15:16 +0200, Richard Biener via Gcc wrote:
> On Thu, May 20, 2021 at 3:06 PM Martin Liška  wrote:
> > 
> > On 5/20/21 2:54 PM, Richard Biener wrote:
> > > So why did you go from applying this per-file to multiple files?
> > 
> > When I did per-file for {gimple,generic}-match.c I hit the
> > following issue with lto.priv symbols:
> > 
> > /usr/lib64/gcc/x86_64-suse-linux/10/../../../../x86_64-suse-
> > linux/bin/ld: error: libbackend.a(generic-match.o): multiple
> > definition of 'wi::to_wide(tree_node const*) [clone .part.0] [clone
> > .lto_priv.0]'
> > /usr/lib64/gcc/x86_64-suse-linux/10/../../../../x86_64-suse-
> > linux/bin/ld: libbackend.a(gimple-match.o): previous definition
> > here
> > /usr/lib64/gcc/x86_64-suse-linux/10/../../../../x86_64-suse-
> > linux/bin/ld: error: libbackend.a(generic-match.o): multiple
> > definition of 'TYPE_VECTOR_SUBPARTS(tree_node const*) [clone
> > .part.0] [clone .lto_priv.0]'
> > /usr/lib64/gcc/x86_64-suse-linux/10/../../../../x86_64-suse-
> > linux/bin/ld: libbackend.a(gimple-match.o): previous definition
> > here
> > /usr/lib64/gcc/x86_64-suse-linux/10/../../../../x86_64-suse-
> > linux/bin/ld: error: libbackend.a(generic-match.o): multiple
> > definition of 'vec > vl_embed>::operator[](unsigned int) [clone .part.0] [clone
> > .lto_priv.0]'
> > 
> > Any idea what was I doing wrong?
> 
> Nothing in particular I think - you're just hitting the issue that
> LTO
> produces new symbols and that those
> can obviously clash.  Giuliano hit the very same issue.  When not
> doing partial links those internal
> symbols pose no problem, but with -r -flinker-output=nolto-rel and
> re-linking the produced objects
> they obviously do.  ELF has no solution for this though, but I think
> we could strip those from the
> partially linked object - if WPA would give us a list of objects the
> link step could postprocess
> the object with objcopy or maybe a custom linker script could do the
> trick as well.

I've "fixed" this issue in my branch by mangling any promoted to public
symbol. I've also disabled the "ipa-split" pass in the paper branch
because of some created symbols which I was not able to fix in time.
Perhaps this goes away if you disable it.

Perhaps we should work on getting the autopar branch merged into trunk.
There are several issues which must be fixed and I don't think it will
be ready for this next release. The main ones that I remember from the
top of my head:

1- Fix the driver to use SPEC language for the multiple required calls
to `as`, instead of injecting code for that directly on the `void
execute()` function.

2- Merge my custom partitioner for using the default LTO partitioner.
The default LTO partitioner were hitting the assertions about COMDAT
being split into multiple partitions.

3- Fix the issue with the ipa-split pass.

Perhaps we should further explore avoiding partial linking altogether
and concat the assembler files.

Thank you,
Giuliano.

> 
> So your workaround is to only ever have a single LTO produced object
> file participating in the
> final links ;)
> 
> Richard.
> 
> > 
> > Martin




Re: [PATCH 2/6] Implement a new partitioner for parallel compilation

2020-08-27 Thread Giuliano Belinassi via Gcc-patches
Hi, Honza.

Again, thank you for your detailed review!

On 08/27, Jan Hubicka wrote:
> > When using the LTO infrastructure to compile files in parallel, we
> > can't simply use any of the LTO partitioner, once extra dependency
> > analysis is required to ensure that some nodes are correctly
> > partitioned together.
> > 
> > Therefore, here we implement a new partitioner called
> > "lto_merge_comdat_map" that does all these required analysis.
> > The partitioner works as follows:
> > 
> > 1. We create a number of disjoint sets and inserts each node into a
> >separate set, which may be merged together in the future.
> > 
> > 2. Find COMDAT groups, and mark them to be partitioned together.
> > 
> > 3. Check all nodes that would require any COMDAT group to be
> >copied to its partition (which we name "COMDAT frontier"),
> >and mark them to be partitioned together.
> >This avoids duplication of COMDAT groups and crashes on the LTO
> >partitioning infrastructure.
> 
> What kind of crash you get here?

This assertion.

  bool added = add_symbol_to_partition_1 (part, node1);
  gcc_assert (added);

It checks if the COMDAT node was not already inserted into somewhere
else partition.

> > 
> > 4. Check if the user allows the partitioner to promote non-public
> >functions or variables to global to improve parallelization
> >opportunity with a cost of modifying the output code layout.
> > 
> > 5. Balance generated partitions for performance unless not told to.
> > 
> > The choice of 1. was by design, so we could use a union-find
> > data structure, which are know for being very fast on set unite
> > operations.
> 
> In LTO partitioner the groups of objects that "must go toghether"
> are discovered when first object is placed into the partition (via
> add_to_partition) because with the LTO rules it is always possible to
> discover all members from the group starting from any random element via
> graph walking.
> 
> I guess it is same with your partitioner?  I basically wonder how much
> code can be shared and what needs to be duplicated.
> It is not very nice to have partitioning implemented twice - it is bit
> subtle problem when it comes to details so I would be happier if we
> brought in the lto/lto-partition.c to middle end and updaed/cleaned it
> up as needed.

They are almost the same thing, they group together nodes that
require to be in the same partition before deciding how to partition
them.

Things are a little different in the way that this partitioner starts
with n partitions and merge nodes together as we decide that these
nodes needs to be in the same partition.  The advantage of this is that
merging partitions is quite cheap, but the drawback is that I can't
undo partitions easily. You can also see that I only use the
add_node_to_partition after it decides what nodes should be in the
partition.

I think that if there is a way to avoid failing that assertion that I
mentioned above, we could even get rid of this step and use the
balanced_map partitioner.

> > 
> > For 3. to work properly, we also had to modify
> > lto_promote_cross_file_statics to handle this case.
> > 
> > The parameters --param=promote-statics and --param=balance-partitions
> > control 4. and 5., respectively
> > 
> > gcc/ChangeLog:
> > 2020-08-20  Giuliano Belinassi  
> > 
> > * Makefile.in: Add lto-partition.o
> > * cgraph.h (struct symtab_node::aux2): New variable.
> > * lto-partition.c: Move from gcc/lto/lto-partition.c
> > (add_symbol_to_partition_1): Only compute insn size
> > if information is available.
> > (node_cmp): Same as above.
> > (class union_find): New.
> > (ds_print_roots): New function.
> > (balance_partitions): New function.
> > (build_ltrans_partitions): New function.
> > (merge_comdat_nodes): New function.
> > (merge_static_calls): New function.
> > (merge_contained_symbols): New function.
> > (lto_merge_comdat_map): New function.
> > (privatize_symbol_name_1): Handle when WPA is not enabled.
> > (privatize_symbol_name): Same as above.
> > (lto_promote_cross_file_statics): New parameter to select when
> > to promote to global.
> > (lto_check_usage_from_other_partitions): New function.
> > * lto-partition.h: Move from gcc/lto/lto-partition.h
> > (lto_promote_cross_file_statics): Update prototype.
> > (lto_check_usage_from_other_partitions): Declare.
> > (lto_merge_comdat_map): Declare.
> > 
> > gcc/lto/ChangeLog:
> > 2020-08-20  Giuliano Belinassi  
> > 
> > * lto-partition.c: Move to gcc/lto-partition.c.
> > * lto-partition.h: Move to gcc/lto-partition.h.
> > * lto.c: Update call to lto_promote_cross_file_statics.
> > * Makefile.in: Remove lto-partition.o.
> > ---
> >  gcc/Makefile.in   |   1 +
> >  gcc/cgraph.h  |   1 +
> >  gcc/{lto => }/lto-partition.c | 463 +-
> >  gcc/{lto => }/lto-partition.h |   4 

Re: [PATCH 3/6] Implement fork-based parallelism engine

2020-08-27 Thread Giuliano Belinassi via Gcc-patches
Hi, Honza.

Thank you for your detailed review!

On 08/27, Jan Hubicka wrote:
> > diff --git a/gcc/cgraph.c b/gcc/cgraph.c
> > index c0b45795059..22405098dc5 100644
> > --- a/gcc/cgraph.c
> > +++ b/gcc/cgraph.c
> > @@ -226,6 +226,22 @@ cgraph_node::delete_function_version_by_decl (tree 
> > decl)
> >decl_node->remove ();
> >  }
> >  
> > +/* Release function dominator info if present.  */
> > +
> > +void
> > +cgraph_node::maybe_release_dominators (void)
> > +{
> > +  struct function *fun = DECL_STRUCT_FUNCTION (decl);
> > +
> > +  if (fun && fun->cfg)
> > +{
> > +  if (dom_info_available_p (fun, CDI_DOMINATORS))
> > +   free_dominance_info (fun, CDI_DOMINATORS);
> > +  if (dom_info_available_p (fun, CDI_POST_DOMINATORS))
> > +   free_dominance_info (fun, CDI_POST_DOMINATORS);
> > +}
> > +}
> 
> I am not sure if that needs to be member function, but if so we want to
> merge it with other places in cgraph.c and cgraphunit.c where dominators
> are freed.  I do not think you need to check avalability.

This is necessary to remove some nodes from the callgraph.  For some
reason, if I node->remove () and it still have the dominance info
available, it will fail some assertions on the compiler.

However, with regard to code layout, this can be moved to lto-cgraph.c,
as it is only used there.

> > +
> >  /* Record that DECL1 and DECL2 are semantically identical function
> > versions.  */
> >  void
> > diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> > index b4a7871bd3d..72ac19f9672 100644
> > --- a/gcc/cgraph.h
> > +++ b/gcc/cgraph.h
> > @@ -463,6 +463,15 @@ public:
> >   Return NULL if there's no such node.  */
> >static symtab_node *get_for_asmname (const_tree asmname);
> >  
> > +  /* Get symtab node by order.  */
> > +  static symtab_node *find_by_order (int order);
> 
> This is quadratic and moreover seems unused. Why do you add it?

I added this for debugging, since I used this a lot inside GDB.
Sure, I can remove this without any problems, or print a warning
for the developer to avoid calling this in production code.

> > +
> > +  /* Get symtab_node by its name.  */
> > +  static symtab_node *find_by_name (const char *);
> 
> Similarly here, note that names are not really very meaningful as lookup
> things, since they get duplicated.
> > +
> > +  /* Get symtab_node by its ASM name.  */
> > +  static symtab_node *find_by_asm_name (const char *);
> 
> For this we have get_for_asmname (which also populates asm name hash as
> needed and is not quadratic)

Cool. I will surely remove this then :)

> > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> > index d10d635e942..73e4bed3b61 100644
> > --- a/gcc/cgraphunit.c
> > +++ b/gcc/cgraphunit.c
> > @@ -2258,6 +2258,11 @@ cgraph_node::expand (void)
> >  {
> >location_t saved_loc;
> >  
> > +  /* FIXME: Find out why body-removed nodes are marked for output.  */
> > +  if (body_removed)
> > +return;
> 
> Indeed, we should know :)

Looks like this was due an early problem. I removed this and bootstrap
is working OK.

> > +
> > +
> >/* We ought to not compile any inline clones.  */
> >gcc_assert (!inlined_to);
> >  
> > @@ -2658,6 +2663,7 @@ ipa_passes (void)
> >  
> >execute_ipa_summary_passes
> > ((ipa_opt_pass_d *) passes->all_regular_ipa_passes);
> > +
> This seems accidental.

Yes.

> >  }
> >  
> >/* Some targets need to handle LTO assembler output specially.  */
> > @@ -2687,10 +2693,17 @@ ipa_passes (void)
> >if (flag_generate_lto || flag_generate_offload)
> >  targetm.asm_out.lto_end ();
> >  
> > -  if (!flag_ltrans
> > +  if (split_outputs)
> > +flag_ltrans = true;
> > +
> > +  if ((!flag_ltrans || split_outputs)
> >&& ((in_lto_p && flag_incremental_link != INCREMENTAL_LINK_LTO)
> >   || !flag_lto || flag_fat_lto_objects))
> >  execute_ipa_pass_list (passes->all_regular_ipa_passes);
> > +
> > +  if (split_outputs)
> > +flag_ltrans = false;
> > +
> >invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_END, NULL);
> >  
> >bitmap_obstack_release (NULL);
> > @@ -2742,6 +2755,185 @@ symbol_table::output_weakrefs (void)
> >}
> >  }
> >  
> > +static bool is_number (const char *str)
> > +{
> > +  while (*str != '\0')
> > +switch (*str++)
> > +  {
> > +   case '0':
> > +   case '1':
> > +   case '2':
> > +   case '3':
> > +   case '4':
> > +   case '5':
> > +   case '6':
> > +   case '7':
> > +   case '8':
> > +   case '9':
> > + continue;
> > +   default:
> > + return false;
> > +  }
> > +
> > +  return true;
> > +}
> 
> This looks odd, we have other places where we parse number from command
> line :)

isdigit () is poisoned in GCC. But I guess I should look how -flto=
does this.

> > +
> > +/* If forked, which child am I?  */
> > +
> > +static int childno = -1;
> > +
> > +static bool
> > +maybe_compile_in_parallel (void)
> > +{
> > +  struct symtab_node *node;
> > +  int partitions, i, j;
> > +  int *pids;
> > +
> > +  bool promote_statics = 

Re: [PATCH 0/6] Parallelize Intra-Procedural Optimizations using the LTO Engine.

2020-08-24 Thread Giuliano Belinassi via Gcc-patches
Ho, Josh.

On 08/24, Josh Triplett wrote:
> On Sat, Aug 22, 2020 at 06:04:48PM -0300, Giuliano Belinassi wrote:
> > Hi, Josh
> > 
> > On 08/21, Josh Triplett wrote:
> > > On Thu, Aug 20, 2020 at 07:00:13PM -0300, Giuliano Belinassi wrote:
> > > > This patch series add a new flag "-fparallel-jobs=" to control if the
> > > > compiler should try to compile the current file in parallel.
> > > [...]
> > > > Bootstrapped and Regtested on Linux x86_64.
> > > > 
> > > > Giuliano Belinassi (6):
> > > >   Modify gcc driver for parallel compilation
> > > >   Implement a new partitioner for parallel compilation
> > > >   Implement fork-based parallelism engine
> > > >   Add `+' for Jobserver Integration
> > > >   Add invoke documentation
> > > >   New tests for parallel compilation feature
> > > 
> > > Very nice!
> > 
> > Thank you for your interest in this :)
> > 
> > > 
> > > I'm interested in testing this on a highly parallel system. What
> > > baseline do these patches apply to?  They don't seem to apply to GCC
> > > trunk.
> > 
> > Hummm, this was supposed to work on trunk out of the box. However,
> > there is a high probability that I messed up something while rebasing.
> > I will post a version 2 of it when I get more comments and when I fix
> > the Makefile issue that Joseph pointed out in other e-mail.
> > 
> > If you want to test it on a high parallel system, I think it will be
> > cool to see how it behaves also when --param=promote-statics=1, as it
> > increases parallelism opportunity. :)
> 
> I plan to try several variations, including that.
> 
> I'd like to see how it affects the performance of Linux kernel builds.

Well, I expect little to no impact on that.  I ran an experiment back
on 2018 looking for parallelism bottleneck in Kernel, and what I found
was that the developers did a good job on balancing the file sizes.

This was run on a machine with 4x AMD Opteron CPUs, (64 cores in total)
https://www.ime.usp.br/~belinass/64cores-kernel-experiment.svg

As you can see from this image, the jobs ends almost at the same time.

> 
> > > Also, I tried to bootstrap the current tip of the devel/autopar_devel
> > > branch, but ended up with compiler segfaults that all look like this:
> > > ../../gcc/zlib/compress.c:86:1: internal compiler error: Segmentation 
> > > fault
> > >86 | }
> > >   | ^
> > 
> > Well, there was once a bug in this branch when compiling with -flto that
> > caused the assembler output file not to be properly initialized early
> > enough, resulting in LTO LGEN stage writing into a invalid FILE pointer.
> > I fixed this during rebasing but I forgot to push to the autopar_devel
> > branch. In any case, I just pushed the recent changes to autopar_devel
> > which fix this issue.
> 
> That might explain the problem; I had tried to build gcc with the
> bootstrap-lto configuration.
> 
> > In any case, -fparallel-jobs= should NOT be used together with -flto.
> > Although I used part of the LTO engine for development of this feature,
> > they are meant for distinct things. I guess I should give a warning
> > about that in next version :)
> 
> Interesting. Is that something that could change in the future? I'd like
> to be able to get some parallelism when creating the object files, and
> then more parallelism when doing the final LTO link.

Well, if by "final LTO link" you mean LTO's Whole Program Analysis,
that is a quite challenging task to parallelize :)

As for the "creating object files", you mean the LTO LGEN, I think
it is not possible for now because -- as far as I understeand --, LTO
object files are just containers for a intermediate language and
does not support partial linking.

However, I would not expect LGEN bottlenecking compilation of any
project. Most compilation time is spent in optimization, that is
IPA and Intra-Procedural.

> 
> > Also, I just tested bootstrap with
> > 
> > ../gcc/configure --disable-multilib --enable-languages=c,c++
> >
> > on x86_64 linux and it is working.
> 
> I'd used --enable-multilib, and --enable-languages=c,c++,lto . Would
> that be expected to work?

Yes. If it doesn't, that is a bug :)

> 
> Thanks,
> Josh


Re: [PATCH 1/6] Modify gcc driver for parallel compilation

2020-08-24 Thread Giuliano Belinassi via Gcc-patches
Hi, Richi.

On 08/24, Richard Biener wrote:
> On Fri, Aug 21, 2020 at 12:00 AM Giuliano Belinassi
>  wrote:
> >
> > Update the driver for parallel compilation. This process work as
> > follows:
> >
> > When calling gcc, the driver will check if the flag
> > "-fparallel-jobs" was provided by the user. If yes, then we will
> > check what is the desired output, and if it can be parallelized.
> > There are the following cases, which is described:
> >
> > 1. -S or -E was provided: We can't run in parallel, as the output
> >can not be easily merged together into one file.
> >
> > 2. -c was provided: When cc1* forks into multiple processes, it
> >must tell the driver where it stored its generated assembler files.
> >Therefore we pass a hidden "-fsplit-outputs=filename" to the compiler,
> >and we check if "filename" was created by it. If yes, we open it,
> >call assembler for each generated asm file
> >(this file must not be empty), and link them together with
> >partial linking to a single .o file. This process is done for each
> >object file in the argument list.
> >
> > 3. -c was not provided, and the final product will be an binary: Here
> >we proceed exactly as 2., but we avoid doing the partial
> >linking, feeding the generated object files directly into the final link.
> >
> > For that to work, we had to heavily modify how the "execute" function
> > works, extracting common code which is used multiple times, and
> > also detecting when the command is a call to a compiler or an
> > assembler, as can be seen in append_split_outputs.
> >
> > Finally, we added some tests which reflects all cases found when
> > bootstrapping the compiler, so development of further features to the
> > driver get faster for now on.
> 
> Few comments inline, Joseph may want to comment on the overall
> structure as driver maintainer (CCed).
> 
> I know I asked for the changes on the branch to be squashed but
> the diff below is quite unreadable with the ChangeLog not helping
> the overall refactoring much.  Is it possible to do some of the
> factoring/refactoring without any functionality change to make the
> actual diff easier to follow?

Well, the refactoring is necessary, otherwise I would need to copy and
paste a really huge amount of code.

What I can do (and sounds reasonable to me) is to break this patch into
two parts; one with just refactoring changes, and the other adding the
parallelism engine.

> 
> Thanks,
> Richard.
> 
> > gcc/ChangeLog
> > 2020-08-20  Giuliano Belinassi  
> >
> > * common.opt (fsplit-outputs): New flag.
> > (fparallel-jobs): New flag.
> > * gcc.c (extra_arg_storer): New class.
> > (have_S): New variable.
> > (struct command): Move from execute.
> > (is_compiler): New function.
> > (is_assembler): New function.
> > (get_number_of_args): New function.
> > (get_file_by_lines): New function.
> > (identify_asm_file): New function.
> > (struct infile): New attribute temp_additional_asm.
> > (current_infile): New variable.
> > (get_path_to_ld): New function.
> > (has_hidden_E): New function.
> > (sort_asm_files): New function.
> > (append_split_outputs): New function.
> > (print_command): New function.
> > (print_commands): New function.
> > (print_argbuf): New function.
> > (handle_verbose): Extracted from execute.
> > (append_valgrind): Same as above.
> > (async_launch_commands): Same as above.
> > (await_commands_to_finish): Same as above.
> > (split_commands): Same as above.
> > (parse_argbuf): Same as above.
> > (execute): Refator.
> > (fsplit_arg): New function.
> > (alloc_infile): Initialize infiles with 0.
> > (process_command): Remember when -S was passed.
> > (do_spec_on_infiles): Remember current infile being processed.
> > (maybe_run_linker): Replace object files when -o is a executable.
> > (finalize): Deinitialize temp_object_files.
> >
> > gcc/testsuite/ChangeLog:
> > 20-08-2020  Giuliano Belinassi  
> >
> > * driver/driver.exp: New test.
> > * driver/a.c: New file.
> > * driver/b.c: New file.
> > * driver/empty.c: New file.
> > * driver/foo.c: New file.
> > ---
> >  gcc/common.opt  |4 +
> >  gcc/gcc.c   | 1219 ---
> >  gcc/testsuite/driver/a.c|6 +
> >  gcc/testsuite/driver/b.c|6 +
> >  gcc/testsuite/driver/driver.exp |   80 ++
> >  gcc/testsuite/driver/empty.c|0
> >  gcc/testsuite/driver/foo.c  |7 +
> >  7 files changed, 1049 insertions(+), 273 deletions(-)
> >  create mode 100644 gcc/testsuite/driver/a.c
> >  create mode 100644 gcc/testsuite/driver/b.c
> >  create mode 100644 gcc/testsuite/driver/driver.exp
> >  create mode 100644 

Re: [PATCH 0/6] Parallelize Intra-Procedural Optimizations using the LTO Engine.

2020-08-24 Thread Giuliano Belinassi via Gcc-patches
Hi, Richi.

On 08/24, Richard Biener wrote:
> On Fri, Aug 21, 2020 at 12:00 AM Giuliano Belinassi
>  wrote:
> >
> > This patch series add a new flag "-fparallel-jobs=" to control if the
> > compiler should try to compile the current file in parallel.
> >
> > There are three modes which is supported by now:
> >
> > 1. -fparallel-jobs=: Try to compile the file using a maximum of N
> > jobs.
> >
> > 2. -fparallel-jobs=jobserver: Check if there is a running GNU Make
> > Jobserver. If positive, communicate with it in order to launch jobs,
> > but alert the user if the jobserver was not found, since it requires
> > modifications in the project Makefile.
> >
> > 3. -fparallel-jobs=auto: Same as 2., but quietly fall back to a maximum
> > of 2 jobs if the jobserver was not found.
> >
> > The parallelization works by using a modified LTO engine, as no IR is
> > dumped into the disk, and a new partitioner is employed to find
> > symbols which must be partitioned together.
> >
> > In order to implement the parallelism feature, we:
> >
> > 1. The driver will pass a hidden -fsplit-outputs= to cc1*.
> >
> > 2. After IPA, cc1* will search for symbols in which must be partitioned
> > together.  If the user allows GCC to automatically promote symbols to
> > globals through "--param=promote-statics=1" for a better parallel
> > compilation performance, it will also be done.  However, if it decides
> > that partitioning is a bad idea, it will continue with a default serial
> > compilation, and the additional  will not be created.  It will
> > avoid compiling in parallel if and only if:
> >
> >   * File size exceeds the minimum file size specified by LTO default
> >   --param=lto-min-partition.
> 
> less than the minimum size I suppose.

True. :)

> 
> >   * The partitioner is unable to find any point of partitioning in the
> >   file.
> 
> It might make sense to increase the minimum partition size and also
> check the partitioning result against unreasonable bias (one very
> large and one very small partition).

I am working on this right now :)

> 
> > 3. cc1* will fork itself; one fork for each partition. Each child
> > process will apply its partition mask generated by the partitioner
> > and write a new assembler name file to  pointed by the driver.
> 
> For the first partition there's no fork (but the main process is used) and
> the main output file will be used, correct?

No. Forking is only disabled only in fallback mode in this version, but
this is certainly possible to implement.

> 
> > 4. The driver will open each file and partially link them together into
> > a single .o file, if -c was requested, else into a binary.  -S and -E
> > is unsupported for now and probably will remain so.
> 
> That also applies to -save-temps mode I assume which makes
> debugging issues a bit tricky and involves manual invocation
> of the cc1 command to have the file with the output filenames preserved.

This is currently unsupported, but it seems to be a interesting feature
to have.  I could use the input file name as a baseline for the
temporary files and dump them in the current working directory instead
of asking for a temporary file to libiberty.

> 
> >
> > Speedups ranged from 0.95x to 1.9x on a Quad-Core Intel Core-i7 8565U
> > when testing with two files in GCC, as stated in the following table.
> > The test was the result of a single execution with a previous warm up
> > execution. The compiled GCC had checking enabled, and therefore release
> > version might have better timings in both sequential and parallel, but the
> > speedup may remain the same.
> >
> > ||| Without Static | With Static |   Max   |
> > | File   | Sequential |Promotion   |  Promotion  | Speedup |
> > ||||---|
> > | gimple-match.c | 60s|   63s  | 34s |   1.7x  |
> > | insn-emit.c| 37s|   19s  | 20s |   1.9x  |
> >
> > Notice that we have a slowdown in some cases when it is enabled, that
> > is why the parallelism feature is enabled with a flag for now.
> 
> One reason why promote-statics is not enabled by default is that
> it creates new hidden symbols (LTO does so as well) which might
> be undesirable.  If deemed OK in general we could enable it by
> default.  Note that originally I wanted to have -fparallel-jobs=auto
> be enabled by default which should not end up with visible changes
> like this(?)

It not only creates hidden symbols, but it also changes the original
symbol name to avoid clashses with other object files. It could be
very nice to avoid doing this at all.

There was once an idea (I don't remember if from Richi or Honza) to
avoid using partial linking, but instead concatenate the generated
assembler files into a single assembler file and assemble it.  This
would remove the requirement of symbol promotion, as they would be
in the same assembler file, but I am not sure if this would work 

Re: [PATCH 0/6] Parallelize Intra-Procedural Optimizations using the LTO Engine.

2020-08-22 Thread Giuliano Belinassi via Gcc-patches
Hi, Josh

On 08/21, Josh Triplett wrote:
> On Thu, Aug 20, 2020 at 07:00:13PM -0300, Giuliano Belinassi wrote:
> > This patch series add a new flag "-fparallel-jobs=" to control if the
> > compiler should try to compile the current file in parallel.
> [...]
> > Bootstrapped and Regtested on Linux x86_64.
> > 
> > Giuliano Belinassi (6):
> >   Modify gcc driver for parallel compilation
> >   Implement a new partitioner for parallel compilation
> >   Implement fork-based parallelism engine
> >   Add `+' for Jobserver Integration
> >   Add invoke documentation
> >   New tests for parallel compilation feature
> 
> Very nice!

Thank you for your interest in this :)

> 
> I'm interested in testing this on a highly parallel system. What
> baseline do these patches apply to?  They don't seem to apply to GCC
> trunk.

Hummm, this was supposed to work on trunk out of the box. However,
there is a high probability that I messed up something while rebasing.
I will post a version 2 of it when I get more comments and when I fix
the Makefile issue that Joseph pointed out in other e-mail.

If you want to test it on a high parallel system, I think it will be
cool to see how it behaves also when --param=promote-statics=1, as it
increases parallelism opportunity. :)

> 
> Also, I tried to bootstrap the current tip of the devel/autopar_devel
> branch, but ended up with compiler segfaults that all look like this:
> ../../gcc/zlib/compress.c:86:1: internal compiler error: Segmentation fault
>86 | }
>   | ^

Well, there was once a bug in this branch when compiling with -flto that
caused the assembler output file not to be properly initialized early
enough, resulting in LTO LGEN stage writing into a invalid FILE pointer.
I fixed this during rebasing but I forgot to push to the autopar_devel
branch. In any case, I just pushed the recent changes to autopar_devel
which fix this issue.

In any case, -fparallel-jobs= should NOT be used together with -flto.
Although I used part of the LTO engine for development of this feature,
they are meant for distinct things. I guess I should give a warning
about that in next version :)

Also, I just tested bootstrap with

../gcc/configure --disable-multilib --enable-languages=c,c++

on x86_64 linux and it is working.

Thank you,
Giuliano.


[PATCH 4/6] Add `+' for Jobserver Integration

2020-08-20 Thread Giuliano Belinassi via Gcc-patches
GNU Make expects that a `+' token is present on the beggining of the
rule command if it wants to interact with the Jobserver [1]. This commit
add such token for the Makefiles in GCC.

[1] 
https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver

gcc/ChangeLog:
intl/ChageLog:
libbacktrace/ChangeLog:
libcpp/ChangeLog:
libdecnumber/ChangeLog:
libiberty/ChangeLog:
zlib/ChangeLog:

2020-08-20  Giuliano Belinassi  

* Makefile.in: Use `+' on rule calling GCC.
---
 gcc/Makefile.in  |   4 +-
 intl/Makefile.in |   2 +-
 libbacktrace/Makefile.in |   2 +-
 libcpp/Makefile.in   |   2 +-
 libdecnumber/Makefile.in |   2 +-
 libiberty/Makefile.in| 212 +++
 zlib/Makefile.in |  64 ++--
 7 files changed, 144 insertions(+), 144 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index c00617cfc1a..2e7aa4b6d30 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2703,14 +2703,14 @@ generated_files = config.h tm.h $(TM_P_H) $(TM_D_H) 
$(TM_H) multilib.h \
 # How to compile object files to run on the build machine.
 
 build/%.o :  # dependencies provided by explicit rule later
-   $(COMPILER_FOR_BUILD) -c $(BUILD_COMPILERFLAGS) $(BUILD_CPPFLAGS) \
+   +$(COMPILER_FOR_BUILD) -c $(BUILD_COMPILERFLAGS) $(BUILD_CPPFLAGS) \
-o $@ $<
 
 ## build/version.o is compiled by the $(COMPILER_FOR_BUILD) but needs
 ## several C macro definitions, just like version.o
 build/version.o:  version.c version.h \
   $(REVISION) $(DATESTAMP) $(BASEVER) $(DEVPHASE)
-   $(COMPILER_FOR_BUILD) -c $(BUILD_COMPILERFLAGS) $(BUILD_CPPFLAGS) \
+   +$(COMPILER_FOR_BUILD) -c $(BUILD_COMPILERFLAGS) $(BUILD_CPPFLAGS) \
-DBASEVER=$(BASEVER_s) -DDATESTAMP=$(DATESTAMP_s) \
-DREVISION=$(REVISION_s) \
-DDEVPHASE=$(DEVPHASE_s) -DPKGVERSION=$(PKGVERSION_s) \
diff --git a/intl/Makefile.in b/intl/Makefile.in
index 356c8ab9b65..de95846cc1d 100644
--- a/intl/Makefile.in
+++ b/intl/Makefile.in
@@ -131,7 +131,7 @@ libintl.h: $(srcdir)/libgnuintl.h
 .SUFFIXES: .c .y .o
 
 .c.o:
-   $(COMPILE) $<
+   +$(COMPILE) $<
 
 .y.c:
 @BISON3_YES@   echo '#define USE_BISON3' > $(patsubst %.c,%-config.h,$@)
diff --git a/libbacktrace/Makefile.in b/libbacktrace/Makefile.in
index b244ca10a4a..08212bb8ac0 100644
--- a/libbacktrace/Makefile.in
+++ b/libbacktrace/Makefile.in
@@ -1326,7 +1326,7 @@ distclean-compile:
-rm -f *.tab.c
 
 .c.o:
-   $(AM_V_CC)$(COMPILE) -c -o $@ $<
+   +$(AM_V_CC)$(COMPILE) -c -o $@ $<
 
 .c.obj:
$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
diff --git a/libcpp/Makefile.in b/libcpp/Makefile.in
index 5fbba9b9c76..eaffeedf31a 100644
--- a/libcpp/Makefile.in
+++ b/libcpp/Makefile.in
@@ -223,7 +223,7 @@ endif
 # Implicit rules and I18N
 
 .c.o:
-   $(COMPILE) $<
+   +$(COMPILE) $<
$(POSTCOMPILE)
 
 # N.B. We do not attempt to copy these into $(srcdir).
diff --git a/libdecnumber/Makefile.in b/libdecnumber/Makefile.in
index 9da028d7f2f..2192e7434ad 100644
--- a/libdecnumber/Makefile.in
+++ b/libdecnumber/Makefile.in
@@ -191,7 +191,7 @@ COMPILE = source='$<' object='$@' libtool=no $(CC) $(DEFS) 
$(INCLUDES) $(CPPFLAG
 # Implicit rules
 
 .c.$(objext):
-   $(COMPILE) $<
+   +$(COMPILE) $<
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in
index 895f701bcd0..fe85363ba2a 100644
--- a/libiberty/Makefile.in
+++ b/libiberty/Makefile.in
@@ -419,7 +419,7 @@ etags tags TAGS: etags-subdir
 demangle: $(ALL) $(srcdir)/cp-demangle.c
@echo "The standalone demangler, now named c++filt, is now"
@echo "a part of binutils."
-   $(CC) @DEFS@ $(CFLAGS) $(CPPFLAGS) -I. -I$(INCDIR) $(HDEFINES) \
+   +$(CC) @DEFS@ $(CFLAGS) $(CPPFLAGS) -I. -I$(INCDIR) $(HDEFINES) \
  $(srcdir)/cp-demangle.c -DSTANDALONE_DEMANGLER $(TARGETLIB) -o $@
 
 ls:
@@ -739,7 +739,7 @@ $(CONFIGURED_OFILES): stamp-picdir stamp-noasandir
if [ x"$(NOASANFLAG)" != x ]; then \
  $(COMPILE.c) $(PICFLAG) $(NOASANFLAG) $(srcdir)/dyn-string.c -o 
noasan/$@; \
else true; fi
-   $(COMPILE.c) $(srcdir)/dyn-string.c $(OUTPUT_OPTION)
+   +$(COMPILE.c) $(srcdir)/dyn-string.c $(OUTPUT_OPTION)
 
 ./fdmatch.$(objext): $(srcdir)/fdmatch.c config.h $(INCDIR)/ansidecl.h \
$(INCDIR)/libiberty.h
@@ -749,7 +749,7 @@ $(CONFIGURED_OFILES): stamp-picdir stamp-noasandir
if [ x"$(NOASANFLAG)" != x ]; then \
  $(COMPILE.c) $(PICFLAG) $(NOASANFLAG) $(srcdir)/fdmatch.c -o 
noasan/$@; \
else true; fi
-   $(COMPILE.c) $(srcdir)/fdmatch.c $(OUTPUT_OPTION)
+   +$(COMPILE.c) $(srcdir)/fdmatch.c $(OUTPUT_OPTION)
 
 ./ffs.$(objext): $(srcdir)/ffs.c
if [ x"$(PICFLAG)" != x ]; then \
@@ -758,7 +758,7 @@ $(CONFIGURED_OFILES): stamp-picdir 

[PATCH 2/6] Implement a new partitioner for parallel compilation

2020-08-20 Thread Giuliano Belinassi via Gcc-patches
When using the LTO infrastructure to compile files in parallel, we
can't simply use any of the LTO partitioner, once extra dependency
analysis is required to ensure that some nodes are correctly
partitioned together.

Therefore, here we implement a new partitioner called
"lto_merge_comdat_map" that does all these required analysis.
The partitioner works as follows:

1. We create a number of disjoint sets and inserts each node into a
   separate set, which may be merged together in the future.

2. Find COMDAT groups, and mark them to be partitioned together.

3. Check all nodes that would require any COMDAT group to be
   copied to its partition (which we name "COMDAT frontier"),
   and mark them to be partitioned together.
   This avoids duplication of COMDAT groups and crashes on the LTO
   partitioning infrastructure.

4. Check if the user allows the partitioner to promote non-public
   functions or variables to global to improve parallelization
   opportunity with a cost of modifying the output code layout.

5. Balance generated partitions for performance unless not told to.

The choice of 1. was by design, so we could use a union-find
data structure, which are know for being very fast on set unite
operations.

For 3. to work properly, we also had to modify
lto_promote_cross_file_statics to handle this case.

The parameters --param=promote-statics and --param=balance-partitions
control 4. and 5., respectively

gcc/ChangeLog:
2020-08-20  Giuliano Belinassi  

* Makefile.in: Add lto-partition.o
* cgraph.h (struct symtab_node::aux2): New variable.
* lto-partition.c: Move from gcc/lto/lto-partition.c
(add_symbol_to_partition_1): Only compute insn size
if information is available.
(node_cmp): Same as above.
(class union_find): New.
(ds_print_roots): New function.
(balance_partitions): New function.
(build_ltrans_partitions): New function.
(merge_comdat_nodes): New function.
(merge_static_calls): New function.
(merge_contained_symbols): New function.
(lto_merge_comdat_map): New function.
(privatize_symbol_name_1): Handle when WPA is not enabled.
(privatize_symbol_name): Same as above.
(lto_promote_cross_file_statics): New parameter to select when
to promote to global.
(lto_check_usage_from_other_partitions): New function.
* lto-partition.h: Move from gcc/lto/lto-partition.h
(lto_promote_cross_file_statics): Update prototype.
(lto_check_usage_from_other_partitions): Declare.
(lto_merge_comdat_map): Declare.

gcc/lto/ChangeLog:
2020-08-20  Giuliano Belinassi  

* lto-partition.c: Move to gcc/lto-partition.c.
* lto-partition.h: Move to gcc/lto-partition.h.
* lto.c: Update call to lto_promote_cross_file_statics.
* Makefile.in: Remove lto-partition.o.
---
 gcc/Makefile.in   |   1 +
 gcc/cgraph.h  |   1 +
 gcc/{lto => }/lto-partition.c | 463 +-
 gcc/{lto => }/lto-partition.h |   4 +-
 gcc/lto/Make-lang.in  |   4 +-
 gcc/lto/lto.c |   2 +-
 gcc/params.opt|   8 +
 gcc/tree.c|  23 +-
 8 files changed, 489 insertions(+), 17 deletions(-)
 rename gcc/{lto => }/lto-partition.c (78%)
 rename gcc/{lto => }/lto-partition.h (89%)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 79e854aa938..be42b15f4ff 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1457,6 +1457,7 @@ OBJS = \
lra-spills.o \
lto-cgraph.o \
lto-streamer.o \
+   lto-partition.o \
lto-streamer-in.o \
lto-streamer-out.o \
lto-section-in.o \
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 0211f08964f..b4a7871bd3d 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -615,6 +615,7 @@ public:
   struct lto_file_decl_data * lto_file_data;
 
   PTR GTY ((skip)) aux;
+  int aux2;
 
   /* Comdat group the symbol is in.  Can be private if GGC allowed that.  */
   tree x_comdat_group;
diff --git a/gcc/lto/lto-partition.c b/gcc/lto-partition.c
similarity index 78%
rename from gcc/lto/lto-partition.c
rename to gcc/lto-partition.c
index 8e0488ab13e..ca962e69b5d 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto-partition.c
@@ -170,7 +170,11 @@ add_symbol_to_partition_1 (ltrans_partition part, 
symtab_node *node)
 {
   struct cgraph_edge *e;
   if (!node->alias && c == SYMBOL_PARTITION)
-   part->insns += ipa_size_summaries->get (cnode)->size;
+   {
+ /* FIXME: Find out why this is being returned NULL in some cases.  */
+ if (ipa_size_summaries->get (cnode))
+   part->insns += ipa_size_summaries->get (cnode)->size;
+   }
 
   /* Add all inline clones and callees that are duplicated.  */
   for (e = cnode->callees; e; e = e->next_callee)
@@ -372,6 +376,402 @@ lto_max_map (void)
 new_partition ("empty");
 }
 
+/* Class implementing a 

[PATCH 3/6] Implement fork-based parallelism engine

2020-08-20 Thread Giuliano Belinassi via Gcc-patches
This patch belongs to the "Parallelize GCC with Processes" series.

Here, we implement the parallelism by forking the compiler into
multiple processes after what would be the LTO LTRANS stage,
partitioning the callgraph into several partitions, as implemented in
"maybe_compile_in_parallel". From a high level, what happens is:

1. If the partitioner manages to generate multiple partitions, the
compiler will then call lto_promote_cross_file_statics to compute
the partition boundary, and symbols are promoted to global only if
promote_statics is set to true. This option is controlled by the user
through --param=promote-statics, which is disabled by default.

2. The compiler will initialize the file passed by the driver trough
the hidden "-fsplit-outputs=", creating such file.

3. The compiler will fork into multiple processes and apply the
allocated partition to the symbol table, removing every node which
is unnecessary for the partition.

4. The parent process wait for all child processes to finish, and then
call exit (0).

For implementing 3., however, we had to do some more detailed analysis
and figure a way to correctly remove reachable nodes from the callgraph
without corrupting any other node. LTO does this by simple trowing
everything into files and reloading it, but we had to avoid this
because that would result in a huge overhead. We implemented this in
"lto_apply_partition_mask" by classifying each node according to
a dependency analysis:

* Start by trusting what lto_promote_cross_file_statics
gave to us.

* Look for nodes in which may need additional nodes to be
carried with it. For example, inline clones requires that their body
keep present, so we have to expand the boundary a little by adding
all nodes that it calls.

* If the node is in the boundary, we release all unnecessary
informations about it.  For varpool nodes, we have to declare it
external, otherwise we end up with multiple instances of the same
global variable in the program, which results in incorrect linking.

* Avoid duplicated release of function summaries (ipa-fnsummary).

* Finally, we had to delay the assembler file initialization,
delay any early assembler output to file, and remove any initialized
RTL code if a certain varaible requires to be renamed.

We also implemented a GNU Make Jobserver integration to this mechanism,
as implemented in jobserver.cc. This works as follows:

* If -fparallel-jobs=jobserver, then we will query the existence of a
jobserver by calling jobserver_initialize. This method will look if
the file descriptors provided by make are valid, and check the flags
of the read file descriptor are set to O_NONBLOCK.

* Then, the parent process will return the token which Make
originally gave to it, since the child is blocked awaiting for a
new token. To correctly block the child, there are two cases: (1)
when select is available in the host, and (2) when it is not. In
(1), we have to use it, since the read fd will have O_NONBLOCK. In
(2), we can simply read the fd, as the read is set to blocking mode.

* Once the child read a token, it will then compile its part, and return
the token before finalizing. If the compilation crash, however, the 
parent
process will correctly detect that a signal was sent to it, so there is
no need for any fancy crash control by the jobserver engine part.

gcc/ChangeLog:
2020-08-20  Giuliano Belinassi  

* jobserver.cc: New file.
* jobserver.h: New file.
* cgraph.c (cgraph_node::maybe_release_dominators): New function.
* cgraph.h (symtab_node::find_by_order): Declare.
(symtab_node::find_by_name): Declare.
(symtab_node::find_by_asm_name): Declare.
(maybe_release_dominators): Declare.
* cgraphunit.c (cgraph_node::expand): Quickly return if body removed.
(ipa_passes): Run all_regular_ipa_passes if split_outputs.
(is_number): New function.
(childno): New variable.
(maybe_compile_in_parallel): New function.
* ipa-fnsummary (pass_ipa_free_fn_summary::gate): Avoid running twice
when compiling in parallel.
* ipa-icf.c (sem_item_optimizer::filter_removed_items): Behaviour when
compiling in parallel should be the same as if in LTO.
* ipa-visibility (localize_node): Same as above.
lto-cgraph.c (handle_node_in_boundary): New function.
(compute_boundary): New function.
(lto_apply_partition_mask): New function.
symtab.c: (symbol_table::change_decl_assembler_name): Discard RTL decl
if name changed.
(symtab_node::dump_base): Dump aux2.
(symtab_node::find_by_order): New function.
(symtab_node::find_by_name): New function.
(symtab_node::find_by_asm_name): New function.
  

[PATCH 5/6] Add invoke documentation

2020-08-20 Thread Giuliano Belinassi via Gcc-patches
Add documentation about how to invoke GCC in order to use parallel
compilation.

gcc/ChangeLog:
20-08-2020  Giuliano Belinassi  

* doc/invoke.texi: Document -fparallel-jobs=.
---
 gcc/doc/invoke.texi | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 70dc1ab73a1..18cebf99dfd 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -504,7 +504,8 @@ Objective-C and Objective-C++ Dialects}.
 -fno-sched-spec  -fno-signed-zeros @gol
 -fno-toplevel-reorder  -fno-trapping-math  -fno-zero-initialized-in-bss @gol
 -fomit-frame-pointer  -foptimize-sibling-calls @gol
--fpartial-inlining  -fpeel-loops  -fpredictive-commoning @gol
+-fpartial-inlining  -fparallel-jobs=@var{alg} @gol
+-fpeel-loops  -fpredictive-commoning @gol
 -fprefetch-loop-arrays @gol
 -fprofile-correction @gol
 -fprofile-use  -fprofile-use=@var{path} -fprofile-partial-training @gol
@@ -14511,6 +14512,35 @@ of the function name, it is considered to be a match.  
For C99 and C++
 extended identifiers, the function name must be given in UTF-8, not
 using universal character names.
 
+@item -fparallel-jobs=@var{n}
+@opindex parallel
+This option is experimental.
+
+This option enables parallel compilation of files using a maximum of
+@var{n} parallel jobs.  When invoked, it tries to distribute the symbols
+within the file into multiple partitions and compile them in parallel.
+
+For now, private symbols are paritioned together with public symbols
+if there are references to them to avoid code layout modifications
+when compiling.  This means that compiling a file
+with very few public symbols will not provide noticeable improvements
+in compilation time.  However, you can use
+@option{--param=promote-statics=1} to allow GCC to automatically
+promote a symbol to be globally available, improving compilation
+performance in exchange to changing code layout.
+
+You can also specify @option{-fparallel-jobs=jobserver} to use GNU make's
+job server mode to determine the number of parallel jobs.  This
+is useful when the Makefile calling GCC is already executing in parallel.
+You must prepend a @samp{+} to the command recipe in the parent Makefile
+for this to work.  This option likely only works if @env{MAKE} is
+GNU make.  If you specify @option{-fparallel-jobs=auto}, GCC will try to
+automatically detect a running GNU make's job server.
+
+An extra parameter, @option{--param=balance-partitions=0} can be used to
+avoid balancing created partitions.  This should only be used to debug
+the compiler.
+
 @item -fpatchable-function-entry=@var{N}[,@var{M}]
 @opindex fpatchable-function-entry
 Generate @var{N} NOPs right at the beginning
-- 
2.28.0



[PATCH 6/6] New tests for parallel compilation feature

2020-08-20 Thread Giuliano Belinassi via Gcc-patches
Adds new tests for testing the parallel compilation engine.
They mainly test issues with regard to symbol promotion clash and
incorrect early assembler output.

2020-08-20  Giuliano Belinassi  

* gcc.dg/parallel-early-constant.c: New test.
* gcc.dg/parallel-static-1.c: New test.
* gcc.dg/parallel-static-2.c: New test.
* gcc.dg/parallel-static-clash-1.c: New test.
* gcc.dg/parallel-static-clash-aux.c: New test.
---
 .../gcc.dg/parallel-early-constant.c  | 22 ++
 gcc/testsuite/gcc.dg/parallel-static-1.c  | 21 +
 gcc/testsuite/gcc.dg/parallel-static-2.c  | 21 +
 .../gcc.dg/parallel-static-clash-1.c  | 23 +++
 .../gcc.dg/parallel-static-clash-aux.c| 14 +++
 5 files changed, 101 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/parallel-early-constant.c
 create mode 100644 gcc/testsuite/gcc.dg/parallel-static-1.c
 create mode 100644 gcc/testsuite/gcc.dg/parallel-static-2.c
 create mode 100644 gcc/testsuite/gcc.dg/parallel-static-clash-1.c
 create mode 100644 gcc/testsuite/gcc.dg/parallel-static-clash-aux.c

diff --git a/gcc/testsuite/gcc.dg/parallel-early-constant.c 
b/gcc/testsuite/gcc.dg/parallel-early-constant.c
new file mode 100644
index 000..fc8c5a986ec
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/parallel-early-constant.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-fparallel-jobs=2 --param=balance-partitions=0" } */
+
+#define A "This is a long test that tests the structure initialization"
+#define B A,A
+#define C B,B,B,B
+#define D C,C,C,C
+
+const char *foo1 ()
+{
+  return A;
+}
+
+int foo2 ()
+{
+  return 42;
+}
+
+int main()
+{
+  char *subs[]={ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D};
+}
diff --git a/gcc/testsuite/gcc.dg/parallel-static-1.c 
b/gcc/testsuite/gcc.dg/parallel-static-1.c
new file mode 100644
index 000..cf1cc7df93d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/parallel-static-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-fparallel-jobs=2 --param=balance-partitions=0" } */
+
+static int global_var;
+
+int foo1(void)
+{
+  global_var = 1;
+}
+
+int foo2(void)
+{
+  global_var = 2;
+}
+
+int main ()
+{
+  foo1 ();
+  foo2 ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/parallel-static-2.c 
b/gcc/testsuite/gcc.dg/parallel-static-2.c
new file mode 100644
index 000..44f5b0d5a02
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/parallel-static-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-fparallel-jobs=2 --param=balance-partitions=0" } */
+
+int foo1(void)
+{
+  static int var;
+  var = 1;
+}
+
+int foo2(void)
+{
+  static int var;
+  var = 2;
+}
+
+int main ()
+{
+  foo1 ();
+  foo2 ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/parallel-static-clash-1.c 
b/gcc/testsuite/gcc.dg/parallel-static-clash-1.c
new file mode 100644
index 000..37a01e28b1b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/parallel-static-clash-1.c
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-options "-fparallel-jobs=2 --param=balance-partitions=0 
--param=promote-statics=1" } */
+/* { dg-additional-sources "parallel-static-clash-aux.c" } */
+
+int file2_c ();
+
+static int __attribute__ ((noinline))
+private ()
+{
+  return 42;
+}
+
+int
+file1_c ()
+{
+  return private ();
+}
+
+int
+main ()
+{
+  return file1_c () + file2_c ();
+}
diff --git a/gcc/testsuite/gcc.dg/parallel-static-clash-aux.c 
b/gcc/testsuite/gcc.dg/parallel-static-clash-aux.c
new file mode 100644
index 000..aac473933a9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/parallel-static-clash-aux.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-fparallel-jobs=2 --param=balance-partitions=0" } */
+
+static int __attribute__ ((noinline))
+private ()
+{
+  return -42;
+}
+
+int
+file2_c ()
+{
+  return private ();
+}
-- 
2.28.0



[PATCH 1/6] Modify gcc driver for parallel compilation

2020-08-20 Thread Giuliano Belinassi via Gcc-patches
Update the driver for parallel compilation. This process work as
follows:

When calling gcc, the driver will check if the flag
"-fparallel-jobs" was provided by the user. If yes, then we will
check what is the desired output, and if it can be parallelized.
There are the following cases, which is described:

1. -S or -E was provided: We can't run in parallel, as the output
   can not be easily merged together into one file.

2. -c was provided: When cc1* forks into multiple processes, it
   must tell the driver where it stored its generated assembler files.
   Therefore we pass a hidden "-fsplit-outputs=filename" to the compiler,
   and we check if "filename" was created by it. If yes, we open it,
   call assembler for each generated asm file
   (this file must not be empty), and link them together with
   partial linking to a single .o file. This process is done for each
   object file in the argument list.

3. -c was not provided, and the final product will be an binary: Here
   we proceed exactly as 2., but we avoid doing the partial
   linking, feeding the generated object files directly into the final link.

For that to work, we had to heavily modify how the "execute" function
works, extracting common code which is used multiple times, and
also detecting when the command is a call to a compiler or an
assembler, as can be seen in append_split_outputs.

Finally, we added some tests which reflects all cases found when
bootstrapping the compiler, so development of further features to the
driver get faster for now on.

gcc/ChangeLog
2020-08-20  Giuliano Belinassi  

* common.opt (fsplit-outputs): New flag.
(fparallel-jobs): New flag.
* gcc.c (extra_arg_storer): New class.
(have_S): New variable.
(struct command): Move from execute.
(is_compiler): New function.
(is_assembler): New function.
(get_number_of_args): New function.
(get_file_by_lines): New function.
(identify_asm_file): New function.
(struct infile): New attribute temp_additional_asm.
(current_infile): New variable.
(get_path_to_ld): New function.
(has_hidden_E): New function.
(sort_asm_files): New function.
(append_split_outputs): New function.
(print_command): New function.
(print_commands): New function.
(print_argbuf): New function.
(handle_verbose): Extracted from execute.
(append_valgrind): Same as above.
(async_launch_commands): Same as above.
(await_commands_to_finish): Same as above.
(split_commands): Same as above.
(parse_argbuf): Same as above.
(execute): Refator.
(fsplit_arg): New function.
(alloc_infile): Initialize infiles with 0.
(process_command): Remember when -S was passed.
(do_spec_on_infiles): Remember current infile being processed.
(maybe_run_linker): Replace object files when -o is a executable.
(finalize): Deinitialize temp_object_files.

gcc/testsuite/ChangeLog:
20-08-2020  Giuliano Belinassi  

* driver/driver.exp: New test.
* driver/a.c: New file.
* driver/b.c: New file.
* driver/empty.c: New file.
* driver/foo.c: New file.
---
 gcc/common.opt  |4 +
 gcc/gcc.c   | 1219 ---
 gcc/testsuite/driver/a.c|6 +
 gcc/testsuite/driver/b.c|6 +
 gcc/testsuite/driver/driver.exp |   80 ++
 gcc/testsuite/driver/empty.c|0
 gcc/testsuite/driver/foo.c  |7 +
 7 files changed, 1049 insertions(+), 273 deletions(-)
 create mode 100644 gcc/testsuite/driver/a.c
 create mode 100644 gcc/testsuite/driver/b.c
 create mode 100644 gcc/testsuite/driver/driver.exp
 create mode 100644 gcc/testsuite/driver/empty.c
 create mode 100644 gcc/testsuite/driver/foo.c

diff --git a/gcc/common.opt b/gcc/common.opt
index 4b08e91859f..4aa3ad8c95b 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3465,4 +3465,8 @@ fipa-ra
 Common Report Var(flag_ipa_ra) Optimization
 Use caller save register across calls if possible.
 
+fsplit-outputs=
+Common Joined Var(split_outputs)
+-fsplit-outputs=  Filename in which current Compilation Unit will be 
split to.
+
 ; This comment is to ensure we retain the blank line above.
diff --git a/gcc/gcc.c b/gcc/gcc.c
index 10bc9881aed..c276a11ca7a 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -343,6 +343,74 @@ static struct obstack obstack;
 
 static struct obstack collect_obstack;
 
+/* This is used to store new argv arrays created dinamically to avoid memory
+   leaks.  */
+
+class extra_arg_storer
+{
+  public:
+
+/* Initialize the vec with a default size.  */
+
+extra_arg_storer ()
+  {
+   string_vec.create (8);
+   extra_args.create (64);
+  }
+
+/* Create new array of strings of size N.  */
+const char **create_new (size_t n)
+  {
+   const char **ret = XNEWVEC (const char *, n);
+   

[PATCH 0/6] Parallelize Intra-Procedural Optimizations using the LTO Engine.

2020-08-20 Thread Giuliano Belinassi via Gcc-patches
This patch series add a new flag "-fparallel-jobs=" to control if the
compiler should try to compile the current file in parallel.

There are three modes which is supported by now:

1. -fparallel-jobs=: Try to compile the file using a maximum of N
jobs.

2. -fparallel-jobs=jobserver: Check if there is a running GNU Make
Jobserver. If positive, communicate with it in order to launch jobs,
but alert the user if the jobserver was not found, since it requires
modifications in the project Makefile.

3. -fparallel-jobs=auto: Same as 2., but quietly fall back to a maximum
of 2 jobs if the jobserver was not found.

The parallelization works by using a modified LTO engine, as no IR is
dumped into the disk, and a new partitioner is employed to find
symbols which must be partitioned together.

In order to implement the parallelism feature, we:

1. The driver will pass a hidden -fsplit-outputs= to cc1*.

2. After IPA, cc1* will search for symbols in which must be partitioned
together.  If the user allows GCC to automatically promote symbols to
globals through "--param=promote-statics=1" for a better parallel
compilation performance, it will also be done.  However, if it decides
that partitioning is a bad idea, it will continue with a default serial
compilation, and the additional  will not be created.  It will
avoid compiling in parallel if and only if:

  * File size exceeds the minimum file size specified by LTO default
  --param=lto-min-partition.

  * The partitioner is unable to find any point of partitioning in the
  file.

3. cc1* will fork itself; one fork for each partition. Each child
process will apply its partition mask generated by the partitioner
and write a new assembler name file to  pointed by the driver.

4. The driver will open each file and partially link them together into
a single .o file, if -c was requested, else into a binary.  -S and -E
is unsupported for now and probably will remain so.


Speedups ranged from 0.95x to 1.9x on a Quad-Core Intel Core-i7 8565U
when testing with two files in GCC, as stated in the following table.
The test was the result of a single execution with a previous warm up
execution. The compiled GCC had checking enabled, and therefore release
version might have better timings in both sequential and parallel, but the
speedup may remain the same.

||| Without Static | With Static |   Max   |
| File   | Sequential |Promotion   |  Promotion  | Speedup |
||||---|
| gimple-match.c | 60s|   63s  | 34s |   1.7x  |
| insn-emit.c| 37s|   19s  | 20s |   1.9x  |

Notice that we have a slowdown in some cases when it is enabled, that
is why the parallelism feature is enabled with a flag for now.

Bootstrapped and Regtested on Linux x86_64.

Giuliano Belinassi (6):
  Modify gcc driver for parallel compilation
  Implement a new partitioner for parallel compilation
  Implement fork-based parallelism engine
  Add `+' for Jobserver Integration
  Add invoke documentation
  New tests for parallel compilation feature

 gcc/Makefile.in   |6 +-
 gcc/cgraph.c  |   16 +
 gcc/cgraph.h  |   13 +
 gcc/cgraphunit.c  |  198 ++-
 gcc/common.opt|4 +
 gcc/doc/invoke.texi   |   32 +-
 gcc/gcc.c | 1219 +
 gcc/ipa-fnsummary.c   |2 +-
 gcc/ipa-icf.c |3 +-
 gcc/ipa-visibility.c  |3 +-
 gcc/ipa.c |4 +-
 gcc/jobserver.cc  |  168 +++
 gcc/jobserver.h   |   33 +
 gcc/lto-cgraph.c  |  172 +++
 gcc/{lto => }/lto-partition.c |  463 ++-
 gcc/{lto => }/lto-partition.h |4 +-
 gcc/lto-streamer.h|4 +
 gcc/lto/Make-lang.in  |4 +-
 gcc/lto/lto.c |2 +-
 gcc/params.opt|8 +
 gcc/symtab.c  |   46 +-
 gcc/testsuite/driver/a.c  |6 +
 gcc/testsuite/driver/b.c  |6 +
 gcc/testsuite/driver/driver.exp   |   80 ++
 gcc/testsuite/driver/empty.c  |0
 gcc/testsuite/driver/foo.c|7 +
 .../gcc.dg/parallel-early-constant.c  |   22 +
 gcc/testsuite/gcc.dg/parallel-static-1.c  |   21 +
 gcc/testsuite/gcc.dg/parallel-static-2.c  |   21 +
 .../gcc.dg/parallel-static-clash-1.c  |   23 +
 .../gcc.dg/parallel-static-clash-aux.c|   14 +
 gcc/toplev.c  |   58 +-
 gcc/toplev.h

[PATCH] Refactor `execute` from gcc.c

2020-05-18 Thread Giuliano Belinassi via Gcc-patches
Hi,

After having so much trouble working on the `execute' function inside
gcc.c, I decided to refactor it so that it could be more digestible.
Since I am using it on my branch, I am submitting this patch for
"battlefield" testing.

Bootstrapped and ran the testsuite on Linux x86_64.

-

Refactor `execute' function to avoid exposing unnecessary details of
how it works, encapsulating each important step into a function.

gcc/ChangeLog
2020-05-18  Giuliano Belinassi  

* gcc.c (struct command): Move from execute.
(await_commands_to_finish): New function.
(split_commands): Same as above.
(parse_argbuf): Same as above.
(execute): Refactor based on new functions.
>From df092d93b511a28180105a2db40777fb977e23da Mon Sep 17 00:00:00 2001
From: Giuliano Belinassi 
Date: Mon, 18 May 2020 21:08:43 -0300
Subject: [PATCH] Refactor execute in gcc.c

Refactor `execute' function to avoid exposing unnecessary details of
how it works, encapsulating each important step into a function.

gcc/ChangeLog
2020-05-18  Giuliano Belinassi  

	* gcc.c (struct command): Move from execute.
	(await_commands_to_finish): New function.
	(split_commands): Same as above.
	(parse_argbuf): Same as above.
	(execute): Refactor based on new functions.
---
 gcc/ChangeLog |   8 +
 gcc/gcc.c | 595 +-
 2 files changed, 353 insertions(+), 250 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7f484da04f0..3ad1b9f7fb9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2020-05-18  Giuliano Belinassi  
+
+	* gcc.c (struct command): Move from execute.
+	(await_commands_to_finish): New function.
+	(split_commands): Same as above.
+	(parse_argbuf): Same as above.
+	(execute): Refactor based on new functions.
+
 2020-05-18  Jason Merrill  
 
 	* aclocal.m4: Add ax_cxx_compile_stdcxx.m4.
diff --git a/gcc/gcc.c b/gcc/gcc.c
index b0d0308f127..082a5d1d4a8 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -3019,157 +3019,105 @@ add_sysrooted_hdrs_prefix (struct path_prefix *pprefix, const char *prefix,
 }
 
 
-/* Execute the command specified by the arguments on the current line of spec.
-   When using pipes, this includes several piped-together commands
-   with `|' between them.
 
-   Return 0 if successful, -1 if failed.  */
+struct command
+{
+  const char *prog;		/* program name.  */
+  const char **argv;		/* vector of args.  */
+};
+
+/* Print what commands will run.  Return 0 if success, anything else on
+   error.  */
 
 static int
-execute (void)
+print_verbose (int n_commands, struct command commands[])
 {
   int i;
-  int n_commands;		/* # of command.  */
-  char *string;
-  struct pex_obj *pex;
-  struct command
-  {
-const char *prog;		/* program name.  */
-const char **argv;		/* vector of args.  */
-  };
-  const char *arg;
-
-  struct command *commands;	/* each command buffer with above info.  */
-
-  gcc_assert (!processing_spec_function);
-
-  if (wrapper_string)
-{
-  string = find_a_file (_prefixes,
-			argbuf[0], X_OK, false);
-  if (string)
-	argbuf[0] = string;
-  insert_wrapper (wrapper_string);
-}
 
-  /* Count # of piped commands.  */
-  for (n_commands = 1, i = 0; argbuf.iterate (i, ); i++)
-if (strcmp (arg, "|") == 0)
-  n_commands++;
-
-  /* Get storage for each command.  */
-  commands = (struct command *) alloca (n_commands * sizeof (struct command));
-
-  /* Split argbuf into its separate piped processes,
- and record info about each one.
- Also search for the programs that are to be run.  */
-
-  argbuf.safe_push (0);
-
-  commands[0].prog = argbuf[0]; /* first command.  */
-  commands[0].argv = argbuf.address ();
-
-  if (!wrapper_string)
-{
-  string = find_a_file (_prefixes, commands[0].prog, X_OK, false);
-  if (string)
-	commands[0].argv[0] = string;
-}
-
-  for (n_commands = 1, i = 0; argbuf.iterate (i, ); i++)
-if (arg && strcmp (arg, "|") == 0)
-  {/* each command.  */
-#if defined (__MSDOS__) || defined (OS2) || defined (VMS)
-	fatal_error (input_location, "%<-pipe%> not supported");
-#endif
-	argbuf[i] = 0; /* Termination of command args.  */
-	commands[n_commands].prog = argbuf[i + 1];
-	commands[n_commands].argv
-	  = &(argbuf.address ())[i + 1];
-	string = find_a_file (_prefixes, commands[n_commands].prog,
-			  X_OK, false);
-	if (string)
-	  commands[n_commands].argv[0] = string;
-	n_commands++;
-  }
-
-  /* If -v, print what we are about to do, and maybe query.  */
+  /* For help listings, put a blank line between sub-processes.  */
+  if (print_help_list)
+fputc ('\n', stderr);
 
-  if (verbose_flag)
+  /* Print each piped command as a separate line.  */
+  for (i = 0; i < n_commands; i++)
 {
-  /* For help listings, put a blank line between sub-processes.  */
-  if (print_help_list)
-	fputc ('\n', stderr);
+  const char *const *j;
 
-  /* Print each piped command as a separate line.  */
-  

Re: [PATCH] Refactor tree-vrp.c

2020-05-12 Thread Giuliano Belinassi via Gcc-patches
Hi.

On 05/11, Richard Biener wrote:
> On Fri, May 8, 2020 at 7:11 PM Jeff Law via Gcc-patches
>  wrote:
> >
> > On Fri, 2020-05-08 at 13:06 -0300, Giuliano Belinassi via Gcc-patches wrote:
> > > Hi,
> > >
> > > This patch Refactors tree-vrp.c to eliminate all global variables except
> > > 'x_vrp_values', which will require that 'thread_outgoing_edges' to
> > > accept an extra argument and pass it to the 'simplify' callback.
> > >
> > > It also removes every access to 'cfun', retrieving the function being
> > > compiled from the pass engine.
> > >
> > > Bootstrapped and ran the testsuite on Linux x86_64.
> > >
> > > gcc/ChangeLog
> > > 2020-05-08  Giuliano Belinassi  
> > >
> > >   * tree-vrp.c (class liveness): New.
> > >   (insert_range_assertions): Move to class liveness.
> > >   (dump_all_asserts): Same as above.
> > >   (dump_asserts_for): Same as above.
> > >   (live): Same as above.
> > >   (need_assert_for): Same as above.
> > >   (live_on_edge): Same as above.
> > >   (finish_register_edge_assert_for): Same as above.
> > >   (find_switch_asserts): Same as above.
> > >   (find_assert_locations): Same as above.
> > >   (find_assert_locations_1): Same as above.
> > >   (find_conditional_asserts): Same as above.
> > >   (process_assert_insertions): Same as above.
> > >   (register_new_assert_for): Same as above.
> > >   (vrp_prop): New variable fun.
> > >   (vrp_initialize): New parameter.
> > >   (identify_jump_threads): Same as above.
> > >   (execute_vrp): Same as above.
> > Just a note.  While the old VRP implementation in tree-vrp.c is on the 
> > chopping
> > block, but it'll likely be the end of summer before we know if further work 
> > in
> > the new Ranger based implementation will be needed to totally replace 
> > tree-vrp
> > w/o introducing any performance regressions.
> >
> > Thus, IMHO, we should go forward with the review.
> 
> Agreed, so I went ahead and reviewed it.  The only comment I have is
> that 'liveness' is not a good match for the machinery which is about
> insertion of ASSERT_EXPR stmts for VRP.  I suggest to use
> vrp_insert or vrp_asserts instead.

Renamed to vrp_insert, bootstrapped, and commited to master as trivial.

Giuliano.

> 
> OK with that change.
> Richard.
> 
> >
> > Jeff
> >
> >
> >


[PATCH] Refactor tree-vrp.c

2020-05-08 Thread Giuliano Belinassi via Gcc-patches
Hi,

This patch Refactors tree-vrp.c to eliminate all global variables except
'x_vrp_values', which will require that 'thread_outgoing_edges' to
accept an extra argument and pass it to the 'simplify' callback.

It also removes every access to 'cfun', retrieving the function being
compiled from the pass engine.

Bootstrapped and ran the testsuite on Linux x86_64.

gcc/ChangeLog
2020-05-08  Giuliano Belinassi  

* tree-vrp.c (class liveness): New.
(insert_range_assertions): Move to class liveness.
(dump_all_asserts): Same as above.
(dump_asserts_for): Same as above.
(live): Same as above.
(need_assert_for): Same as above.
(live_on_edge): Same as above.
(finish_register_edge_assert_for): Same as above.
(find_switch_asserts): Same as above.
(find_assert_locations): Same as above.
(find_assert_locations_1): Same as above.
(find_conditional_asserts): Same as above.
(process_assert_insertions): Same as above.
(register_new_assert_for): Same as above.
(vrp_prop): New variable fun.
(vrp_initialize): New parameter.
(identify_jump_threads): Same as above.
(execute_vrp): Same as above.
>From 5a88c2ffef02414ed2c5f98f7808fdd3386f87be Mon Sep 17 00:00:00 2001
From: Giuliano Belinassi 
Date: Fri, 8 May 2020 11:30:23 -0300
Subject: [PATCH] Refactor tree-vrp.c

Refactor tree-vrp.c to eliminate all global variables except
'x_vrp_values', which will require that 'thread_outgoing_edges'
to accept an extra argument and pass it to the 'simplify' callback.

It also removes every access to 'cfun', retrieving the function being
compiled from the pass engine.

gcc/ChangeLog
2020-05-08  Giuliano Belinassi  

	* tree-vrp.c (class liveness): New.
	(insert_range_assertions): Move to class liveness.
	(dump_all_asserts): Same as above.
	(dump_asserts_for): Same as above.
	(live): Same as above.
	(need_assert_for): Same as above.
	(live_on_edge): Same as above.
	(finish_register_edge_assert_for): Same as above.
	(find_switch_asserts): Same as above.
	(find_assert_locations): Same as above.
	(find_assert_locations_1): Same as above.
	(find_conditional_asserts): Same as above.
	(process_assert_insertions): Same as above.
	(register_new_assert_for): Same as above.
	(vrp_prop): New variable fun.
	(vrp_initialize): New parameter.
	(identify_jump_threads): Same as above.
	(execute_vrp): Same as above.
---
 gcc/ChangeLog  |  22 
 gcc/tree-vrp.c | 309 +++--
 2 files changed, 220 insertions(+), 111 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 14605950a8b..8de20b09a2b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2020-05-08  Giuliano Belinassi  
+
+	* tree-vrp.c (class liveness): New.
+	(insert_range_assertions): Move to class liveness.
+	(dump_all_asserts): Same as above.
+	(dump_asserts_for): Same as above.
+	(live): Same as above.
+	(need_assert_for): Same as above.
+	(live_on_edge): Same as above.
+	(finish_register_edge_assert_for): Same as above.
+	(find_switch_asserts): Same as above.
+	(find_assert_locations): Same as above.
+	(find_assert_locations_1): Same as above.
+	(find_conditional_asserts): Same as above.
+	(process_assert_insertions): Same as above.
+	(register_new_assert_for): Same as above.
+	(vrp_prop): New variable fun.
+	(vrp_initialize): New parameter.
+	(identify_jump_threads): Same as above.
+	(execute_vrp): Same as above.
+
+
 2020-05-08  Richard Biener  
 
 	* tree-ssa-sccvn.c (rpo_avail): Change type to
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index a8861670790..a8dc6fd0110 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -68,9 +68,149 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "range-op.h"
 
-/* Set of SSA names found live during the RPO traversal of the function
-   for still active basic-blocks.  */
-static sbitmap *live;
+
+
+/* Location information for ASSERT_EXPRs.  Each instance of this
+   structure describes an ASSERT_EXPR for an SSA name.  Since a single
+   SSA name may have more than one assertion associated with it, these
+   locations are kept in a linked list attached to the corresponding
+   SSA name.  */
+struct assert_locus
+{
+  /* Basic block where the assertion would be inserted.  */
+  basic_block bb;
+
+  /* Some assertions need to be inserted on an edge (e.g., assertions
+ generated by COND_EXPRs).  In those cases, BB will be NULL.  */
+  edge e;
+
+  /* Pointer to the statement that generated this assertion.  */
+  gimple_stmt_iterator si;
+
+  /* Predicate code for the ASSERT_EXPR.  Must be COMPARISON_CLASS_P.  */
+  enum tree_code comp_code;
+
+  /* Value being compared against.  */
+  tree val;
+
+  /* Expression to compare.  */
+  tree expr;
+
+  /* Next node in the linked list.  */
+  assert_locus *next;
+};
+
+class liveness
+{
+  public:
+
+liveness (struct function *fn)
+  {
+	fun = fn;
+  }
+
+

Re: [PATCH v3] [Stage1] Refactor tree-ssa-operands.c

2020-05-04 Thread Giuliano Belinassi via Gcc-patches
Hi,

This patch Refactors tree-ssa-operands.c by wrapping the global
variables into a class, and also removes unused code.

The difference between this version and v2 is:
  * Disable the copy of operands_scanner
  * remove void from empty arguments functions.


gcc/ChangeLog:
2020-05-04  Giuliano Belinassi  

* tree-ssa-operands.c (operands_scanner): New class.
(operands_bitmap_obstack): Remove.
(n_initialized): Remove.
(build_uses): Move to operands_scanner class.
(build_vuse): Same as above.
(build_vdef): Same as above.
(verify_ssa_operands): Same as above.
(finalize_ssa_uses): Same as above.
(cleanup_build_arrays): Same as above.
(finalize_ssa_stmt_operands): Same as above.
(start_ssa_stmt_operands): Same as above.
(append_use): Same as above.
(append_vdef): Same as above.
(add_virtual_operand): Same as above.
(add_stmt_operand): Same as above.
(get_mem_ref_operands): Same as above.
(get_tmr_operands): Same as above.
(maybe_add_call_vops): Same as above.
(get_asm_stmt_operands): Same as above.
(get_expr_operands): Same as above.
(parse_ssa_operands): Same as above.
(finalize_ssa_defs): Same as above.
(build_ssa_operands): Same as above, plus create a C-like wrapper.
(update_stmt_operands): Create an instance of operands_scanner.
>From d908e3184585cf0c1f7fcfb0490a3e51273559c6 Mon Sep 17 00:00:00 2001
From: Giuliano Belinassi 
Date: Mon, 4 May 2020 18:44:31 -0300
Subject: [PATCH v3] Refactor tree-ssa-operands.c

Refactor tree-ssa-operands.c by wrapping the global
variables into a class, and also removes unused code.

gcc/ChangeLog:
2020-05-04  Giuliano Belinassi  

	* tree-ssa-operands.c (operands_scanner): New class.
	(operands_bitmap_obstack): Remove.
	(n_initialized): Remove.
	(build_uses): Move to operands_scanner class.
	(build_vuse): Same as above.
	(build_vdef): Same as above.
	(verify_ssa_operands): Same as above.
	(finalize_ssa_uses): Same as above.
	(cleanup_build_arrays): Same as above.
	(finalize_ssa_stmt_operands): Same as above.
	(start_ssa_stmt_operands): Same as above.
	(append_use): Same as above.
	(append_vdef): Same as above.
	(add_virtual_operand): Same as above.
	(add_stmt_operand): Same as above.
	(get_mem_ref_operands): Same as above.
	(get_tmr_operands): Same as above.
	(maybe_add_call_vops): Same as above.
	(get_asm_stmt_operands): Same as above.
	(get_expr_operands): Same as above.
	(parse_ssa_operands): Same as above.
	(finalize_ssa_defs): Same as above.
	(build_ssa_operands): Same as above, plus create a C-like wrapper.
	(update_stmt_operands): Create an instance of operands_scanner.
---
 gcc/tree-ssa-operands.c | 306 +---
 1 file changed, 189 insertions(+), 117 deletions(-)

diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c
index b525ee318a4..f4716d0e36f 100644
--- a/gcc/tree-ssa-operands.c
+++ b/gcc/tree-ssa-operands.c
@@ -99,23 +99,111 @@ along with GCC; see the file COPYING3.  If not see
 /* Operand is having its address taken.  */
 #define opf_address_taken (1 << 5)
 
-/* Array for building all the use operands.  */
-static vec build_uses;
+/* Class containing temporary per-stmt state.  */
 
-/* The built VDEF operand.  */
-static tree build_vdef;
+class operands_scanner
+{
+  public:
+operands_scanner (struct function *fun, gimple *statement)
+  {
+	build_vuse = NULL_TREE;
+	build_vdef = NULL_TREE;
+	fn = fun;
+	stmt = statement;
+  }
+
+/* Create an operands cache for STMT.  */
+void build_ssa_operands ();
+
+/* Verifies SSA statement operands.  */
+DEBUG_FUNCTION bool verify_ssa_operands ();
+
+  private:
+/* Disable copy and assign of this class, as it may have problems with
+   build_uses vec.  */
+DISABLE_COPY_AND_ASSIGN (operands_scanner);
+
+/* Array for building all the use operands.  */
+auto_vec build_uses;
+
+/* The built VDEF operand.  */
+tree build_vdef;
+
+/* The built VUSE operand.  */
+tree build_vuse;
+
+/* Function which STMT belongs to.  */
+struct function *fn;
+
+/* Statement to work on.  */
+gimple *stmt;
+
+/* Takes elements from build_uses and turns them into use operands of STMT.  */
+void finalize_ssa_uses ();
+
+/* Clear the in_list bits and empty the build array for VDEFs and
+   VUSEs.  */
+void cleanup_build_arrays ();
+
+/* Finalize all the build vectors, fill the new ones into INFO.  */
+void finalize_ssa_stmt_operands ();
+
+/* Start the process of building up operands vectors in INFO.  */
+void start_ssa_stmt_operands ();
+
+/* Add USE_P to the list of pointers to operands.  */
+void append_use (tree *use_p);
 
-/* The built VUSE operand.  */
-static tree build_vuse;
+/* Add VAR to the set of variables that require a VDEF operator.  */
+void append_vdef (tree var);
 
-/* 

Re: [PATCHv2] [Stage1] Refactor tree-ssa-operands.c

2020-04-23 Thread Giuliano Belinassi via Gcc-patches
Hi, Thank you for your quick review :)

This patch refactors tree-ssa-operands.c by wrapping the global
variables into a class, and also removes unused code.

In this version, we now move struct function and gimple stmt
arguments previously in the methods to the object constructor,
so that we avoid passing down these values to every function.

Bootstrapped and ran the testsuite in Linux x86_64.

gcc/ChangeLog:
2020-04-23  Giuliano Belinassi  

* tree-ssa-operands.c (operands_scanner): New class.
(operands_bitmap_obstack): Remove.
(n_initialized): Remove.
(build_uses): Move to operands_scanner class.
(build_vuse): Same as above.
(build_vdef): Same as above.
(verify_ssa_operands): Same as above.
(finalize_ssa_uses): Same as above.
(cleanup_build_arrays): Same as above.
(finalize_ssa_stmt_operands): Same as above.
(start_ssa_stmt_operands): Same as above.
(append_use): Same as above.
(append_vdef): Same as above.
(add_virtual_operand): Same as above.
(add_stmt_operand): Same as above.
(get_mem_ref_operands): Same as above.
(get_tmr_operands): Same as above.
(maybe_add_call_vops): Same as above.
(get_asm_stmt_operands): Same as above.
(get_expr_operands): Same as above.
(parse_ssa_operands): Same as above.
(finalize_ssa_defs): Same as above.
(build_ssa_operands): Same as above, plus create a C-like wrapper.
(update_stmt_operands): Create an instance of operands_scanner.
>From d6393c00d228164a40985288eae405b5130d676a Mon Sep 17 00:00:00 2001
From: Giuliano Belinassi 
Date: Tue, 21 Apr 2020 19:38:37 -0300
Subject: [PATCH] Refactor tree-ssa-operands.c

Refactor tree-ssa-operands.c by wrapping the global
variables into a class, and also removes unused code.

gcc/ChangeLog:
2020-04-23  Giuliano Belinassi  

	* tree-ssa-operands.c (operands_scanner): New class.
	(operands_bitmap_obstack): Remove.
	(n_initialized): Remove.
	(build_uses): Move to operands_scanner class.
	(build_vuse): Same as above.
	(build_vdef): Same as above.
	(verify_ssa_operands): Same as above.
	(finalize_ssa_uses): Same as above.
	(cleanup_build_arrays): Same as above.
	(finalize_ssa_stmt_operands): Same as above.
	(start_ssa_stmt_operands): Same as above.
	(append_use): Same as above.
	(append_vdef): Same as above.
	(add_virtual_operand): Same as above.
	(add_stmt_operand): Same as above.
	(get_mem_ref_operands): Same as above.
	(get_tmr_operands): Same as above.
	(maybe_add_call_vops): Same as above.
	(get_asm_stmt_operands): Same as above.
	(get_expr_operands): Same as above.
	(parse_ssa_operands): Same as above.
	(finalize_ssa_defs): Same as above.
	(build_ssa_operands): Same as above, plus create a C-like wrapper.
	(update_stmt_operands): Create an instance of operands_scanner.
---
 gcc/tree-ssa-operands.c | 302 
 1 file changed, 185 insertions(+), 117 deletions(-)

diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c
index b525ee318a4..5cd5fc05dcc 100644
--- a/gcc/tree-ssa-operands.c
+++ b/gcc/tree-ssa-operands.c
@@ -99,23 +99,107 @@ along with GCC; see the file COPYING3.  If not see
 /* Operand is having its address taken.  */
 #define opf_address_taken (1 << 5)
 
-/* Array for building all the use operands.  */
-static vec build_uses;
+/* Class containing temporary per-stmt state.  */
 
-/* The built VDEF operand.  */
-static tree build_vdef;
+class operands_scanner
+{
+  public:
+operands_scanner (struct function *fun, gimple *statement)
+  {
+	build_vuse = NULL_TREE;
+	build_vdef = NULL_TREE;
+	fn = fun;
+	stmt = statement;
+  }
+
+/* Create an operands cache for STMT.  */
+void build_ssa_operands (void);
+
+/* Verifies SSA statement operands.  */
+DEBUG_FUNCTION bool verify_ssa_operands (void);
+
+  private:
+/* Array for building all the use operands.  */
+auto_vec build_uses;
+
+/* The built VDEF operand.  */
+tree build_vdef;
+
+/* The built VUSE operand.  */
+tree build_vuse;
+
+/* Function which STMT belongs to.  */
+struct function *fn;
+
+/* Statement to work on.  */
+gimple *stmt;
+
+/* Takes elements from build_uses and turns them into use operands of STMT.  */
+inline void finalize_ssa_uses (void);
+
+/* Clear the in_list bits and empty the build array for VDEFs and
+   VUSEs.  */
+inline void cleanup_build_arrays (void);
+
+/* Finalize all the build vectors, fill the new ones into INFO.  */
+inline void finalize_ssa_stmt_operands (void);
+
+/* Start the process of building up operands vectors in INFO.  */
+inline void start_ssa_stmt_operands (void);
+
+/* Add USE_P to the list of pointers to operands.  */
+inline void append_use (tree *use_p);
+
+/* Add VAR to the set of variables that require a VDEF operator.  */
+inline void append_vdef (tree var);
 
-/* The built 

[PATCH] [Stage1] Refactor tree-ssa-operands.c

2020-04-22 Thread Giuliano Belinassi via Gcc-patches
This patch refactors tree-ssa-operands.c by wrapping the global
variables into a class, and also removes unused code.

Just sending this for when Stage1 is back again.

I ran the testsuite and bootstraped in a x86_64 linux machine and
found no issues.

gcc/ChangeLog:
2020-04-22  Giuliano Belinassi  

* tree-ssa-operands.c (build_virtual_operands): New class.
(operands_bitmap_obstack): Remove.
(n_initialized): Remove.
(build_uses): Move to build_virtual_operands class.
(build_vuse): Same as above.
(build_vdef): Same as above.
(verify_ssa_operands): Same as above.
(finalize_ssa_uses): Same as above.
(cleanup_build_arrays): Same as above.
(finalize_ssa_stmt_operands): Same as above.
(start_ssa_stmt_operands): Same as above.
(append_use): Same as above.
(append_vdef): Same as above.
(add_virtual_operand): Same as above.
(add_stmt_operand): Same as above.
(get_mem_ref_operands): Same as above.
(get_tmr_operands): Same as above.
(maybe_add_call_vops): Same as above.
(get_asm_stmt_operands): Same as above.
(get_expr_operands): Same as above.
(parse_ssa_operands): Same as above.
(finalize_ssa_defs): Same as above.
(build_ssa_operands): Same as above, plus create a C-like wrapper.
(update_stmt_operands): Create an instance of build_virtual_operands.
>From b860b39045e1b90319caa7c75ad189514e4a5641 Mon Sep 17 00:00:00 2001
From: Giuliano Belinassi 
Date: Tue, 21 Apr 2020 19:38:37 -0300
Subject: [PATCH] [Stage1] Refactor tree-ssa-operands.c

Refactor tree-ssa-operands.c by wrapping the global
variables into a class, and also removes unused code.

gcc/ChangeLog:
2020-04-22  Giuliano Belinassi  

	* tree-ssa-operands.c (build_virtual_operands): New class.
	(operands_bitmap_obstack): Remove.
	(n_initialized): Remove.
	(build_uses): Move to build_virtual_operands class.
	(build_vuse): Same as above.
	(build_vdef): Same as above.
	(verify_ssa_operands): Same as above.
	(finalize_ssa_uses): Same as above.
	(cleanup_build_arrays): Same as above.
	(finalize_ssa_stmt_operands): Same as above.
	(start_ssa_stmt_operands): Same as above.
	(append_use): Same as above.
	(append_vdef): Same as above.
	(add_virtual_operand): Same as above.
	(add_stmt_operand): Same as above.
	(get_mem_ref_operands): Same as above.
	(get_tmr_operands): Same as above.
	(maybe_add_call_vops): Same as above.
	(get_asm_stmt_operands): Same as above.
	(get_expr_operands): Same as above.
	(parse_ssa_operands): Same as above.
	(finalize_ssa_defs): Same as above.
	(build_ssa_operands): Same as above, plus create a C-like wrapper.
	(update_stmt_operands): Create an instance of build_virtual_operands.
---
 gcc/tree-ssa-operands.c | 218 +++-
 1 file changed, 150 insertions(+), 68 deletions(-)

diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c
index b525ee318a4..ae8734d17e2 100644
--- a/gcc/tree-ssa-operands.c
+++ b/gcc/tree-ssa-operands.c
@@ -99,23 +99,111 @@ along with GCC; see the file COPYING3.  If not see
 /* Operand is having its address taken.  */
 #define opf_address_taken (1 << 5)
 
-/* Array for building all the use operands.  */
-static vec build_uses;
+/* Class containing temporary per-stmt state.  */
 
-/* The built VDEF operand.  */
-static tree build_vdef;
+class build_virtual_operands
+{
+  public:
+build_virtual_operands ()
+  {
+	build_uses.create (10);
+	build_vuse = NULL_TREE;
+	build_vdef = NULL_TREE;
+  }
+
+~build_virtual_operands ()
+  {
+	cleanup_build_arrays ();
+	build_uses.release ();
+  }
+
+/* Create an operands cache for STMT.  */
+void build_ssa_operands (struct function *fn, gimple *stmt);
+
+/* Verifies SSA statement operands.  */
+DEBUG_FUNCTION bool verify_ssa_operands (struct function *fn, gimple *stmt);
+
+  private:
+/* Array for building all the use operands.  */
+vec build_uses;
+
+/* The built VDEF operand.  */
+tree build_vdef;
+
+/* The built VUSE operand.  */
+tree build_vuse;
+
+/* Takes elements from build_uses and turns them into use operands of STMT.  */
+inline void finalize_ssa_uses (struct function *fn, gimple *stmt);
+
+/* Clear the in_list bits and empty the build array for VDEFs and
+   VUSEs.  */
+inline void cleanup_build_arrays (void);
+
+/* Finalize all the build vectors, fill the new ones into INFO.  */
+inline void finalize_ssa_stmt_operands (struct function *fn, gimple *stmt);
+
+/* Start the process of building up operands vectors in INFO.  */
+inline void start_ssa_stmt_operands (void);
+
+/* Add USE_P to the list of pointers to operands.  */
+inline void append_use (tree *use_p);
 
-/* The built VUSE operand.  */
-static tree build_vuse;
+/* Add VAR to the set of variables that require a VDEF operator.  */
+inline void append_vdef (tree var);
 
-/*