[Backport 2/2] middle-end/114599 - fix bitmap allocation for check_ifunc_callee_symtab_nodes

2024-04-14 Thread H.J. Lu
From: Richard Biener 

There's no default bitmap obstack during global CTORs, so allocate the
bitmap locally.

PR middle-end/114599
PR gcov-profile/114115
* symtab.cc (ifunc_ref_map): Do not use auto_bitmap.
(is_caller_ifunc_resolver): Optimize bitmap_bit_p/bitmap_set_bit
pair.
(symtab_node::check_ifunc_callee_symtab_nodes): Properly
allocate ifunc_ref_map here.

(cherry picked from commit 9ab8fdfeef5b1a47b358e08a98177b2fad65fed9)
---
 gcc/symtab.cc | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/gcc/symtab.cc b/gcc/symtab.cc
index df09def81e9..10ec6d03842 100644
--- a/gcc/symtab.cc
+++ b/gcc/symtab.cc
@@ -1383,7 +1383,7 @@ check_ifunc_resolver (cgraph_node *node, void *data)
   return false;
 }
 
-static auto_bitmap ifunc_ref_map;
+static bitmap ifunc_ref_map;
 
 /* Return true if any caller of NODE is an ifunc resolver.  */
 
@@ -1404,9 +1404,8 @@ is_caller_ifunc_resolver (cgraph_node *node)
 
   /* Skip if it has been visited.  */
   unsigned int uid = e->caller->get_uid ();
-  if (bitmap_bit_p (ifunc_ref_map, uid))
+  if (!bitmap_set_bit (ifunc_ref_map, uid))
continue;
-  bitmap_set_bit (ifunc_ref_map, uid);
 
   if (is_caller_ifunc_resolver (e->caller))
{
@@ -1437,6 +1436,9 @@ symtab_node::check_ifunc_callee_symtab_nodes (void)
 {
   symtab_node *node;
 
+  bitmap_obstack_initialize (NULL);
+  ifunc_ref_map = BITMAP_ALLOC (NULL);
+
   FOR_EACH_SYMBOL (node)
 {
   cgraph_node *cnode = dyn_cast  (node);
@@ -1455,7 +1457,8 @@ symtab_node::check_ifunc_callee_symtab_nodes (void)
cnode->called_by_ifunc_resolver = true;
 }
 
-  bitmap_clear (ifunc_ref_map);
+  BITMAP_FREE (ifunc_ref_map);
+  bitmap_obstack_release (NULL);
 }
 
 /* Verify symbol table for internal consistency.  */
-- 
2.44.0



[Backport 1/2] tree-profile: Disable indirect call profiling for IFUNC resolvers

2024-04-14 Thread H.J. Lu
We can't profile indirect calls to IFUNC resolvers nor their callees as
it requires TLS which hasn't been set up yet when the dynamic linker is
resolving IFUNC symbols.

Add an IFUNC resolver caller marker to cgraph_node and set it if the
function is called by an IFUNC resolver.  Disable indirect call profiling
for IFUNC resolvers and their callees.

Tested with profiledbootstrap on Fedora 39/x86-64.

gcc/ChangeLog:

PR tree-optimization/114115
* cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes.
(cgraph_node): Add called_by_ifunc_resolver.
* cgraphunit.cc (symbol_table::compile): Call
symtab_node::check_ifunc_callee_symtab_nodes.
* symtab.cc (check_ifunc_resolver): New.
(ifunc_ref_map): Likewise.
(is_caller_ifunc_resolver): Likewise.
(symtab_node::check_ifunc_callee_symtab_nodes): Likewise.
* tree-profile.cc (gimple_gen_ic_func_profiler): Disable indirect
call profiling for IFUNC resolvers and their callees.

gcc/testsuite/ChangeLog:

PR tree-optimization/114115
* gcc.dg/pr114115.c: New test.

(cherry picked from commit cab32bacaea268ec062b1fb4fc662d90c9d1cfce)
---
 gcc/cgraph.h|  6 +++
 gcc/cgraphunit.cc   |  2 +
 gcc/symtab.cc   | 89 +
 gcc/testsuite/gcc.dg/pr114115.c | 24 +
 gcc/tree-profile.cc |  8 ++-
 5 files changed, 128 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr114115.c

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index c1a3691b6f5..430c87d8bb7 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -479,6 +479,9 @@ public:
  Return NULL if there's no such node.  */
   static symtab_node *get_for_asmname (const_tree asmname);
 
+  /* Check symbol table for callees of IFUNC resolvers.  */
+  static void check_ifunc_callee_symtab_nodes (void);
+
   /* Verify symbol table for internal consistency.  */
   static DEBUG_FUNCTION void verify_symtab_nodes (void);
 
@@ -896,6 +899,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public 
symtab_node
   redefined_extern_inline (false), tm_may_enter_irr (false),
   ipcp_clone (false), declare_variant_alt (false),
   calls_declare_variant_alt (false), gc_candidate (false),
+  called_by_ifunc_resolver (false),
   m_uid (uid), m_summary_id (-1)
   {}
 
@@ -1491,6 +1495,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
  is set for local SIMD clones when they are created and cleared if the
  vectorizer uses them.  */
   unsigned gc_candidate : 1;
+  /* Set if the function is called by an IFUNC resolver.  */
+  unsigned called_by_ifunc_resolver : 1;
 
 private:
   /* Unique id of the node.  */
diff --git a/gcc/cgraphunit.cc b/gcc/cgraphunit.cc
index bccd2f2abb5..40dcceccca5 100644
--- a/gcc/cgraphunit.cc
+++ b/gcc/cgraphunit.cc
@@ -2313,6 +2313,8 @@ symbol_table::compile (void)
 
   symtab_node::checking_verify_symtab_nodes ();
 
+  symtab_node::check_ifunc_callee_symtab_nodes ();
+
   timevar_push (TV_CGRAPHOPT);
   if (pre_ipa_mem_report)
 dump_memory_report ("Memory consumption before IPA");
diff --git a/gcc/symtab.cc b/gcc/symtab.cc
index 0470509a98d..df09def81e9 100644
--- a/gcc/symtab.cc
+++ b/gcc/symtab.cc
@@ -1369,6 +1369,95 @@ symtab_node::verify (void)
   timevar_pop (TV_CGRAPH_VERIFY);
 }
 
+/* Return true and set *DATA to true if NODE is an ifunc resolver.  */
+
+static bool
+check_ifunc_resolver (cgraph_node *node, void *data)
+{
+  if (node->ifunc_resolver)
+{
+  bool *is_ifunc_resolver = (bool *) data;
+  *is_ifunc_resolver = true;
+  return true;
+}
+  return false;
+}
+
+static auto_bitmap ifunc_ref_map;
+
+/* Return true if any caller of NODE is an ifunc resolver.  */
+
+static bool
+is_caller_ifunc_resolver (cgraph_node *node)
+{
+  bool is_ifunc_resolver = false;
+
+  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
+{
+  /* Return true if caller is known to be an IFUNC resolver.  */
+  if (e->caller->called_by_ifunc_resolver)
+   return true;
+
+  /* Check for recursive call.  */
+  if (e->caller == node)
+   continue;
+
+  /* Skip if it has been visited.  */
+  unsigned int uid = e->caller->get_uid ();
+  if (bitmap_bit_p (ifunc_ref_map, uid))
+   continue;
+  bitmap_set_bit (ifunc_ref_map, uid);
+
+  if (is_caller_ifunc_resolver (e->caller))
+   {
+ /* Return true if caller is an IFUNC resolver.  */
+ e->caller->called_by_ifunc_resolver = true;
+ return true;
+   }
+
+  /* Check if caller's alias is an IFUNC resolver.  */
+  e->caller->call_for_symbol_and_aliases (check_ifunc_resolver,
+ _ifunc_resolver,
+ true);
+  if (is_ifunc_resolver)
+   {
+ /* Return true if caller's alias is an IFUNC resolver.  */
+ 

[PATCH] x86: Allow TImode offsettable memory only with 8-bit constant

2024-04-12 Thread H.J. Lu
The x86 instruction size limit is 15 bytes.  If a NDD instruction has
a segment prefix byte, a 4-byte opcode prefix, a MODRM byte, a SIB byte,
a 4-byte displacement and a 4-byte immediate, adding an address size
prefix will exceed the size limit.  Change TImode ADD, AND, OR and XOR
to allow offsettable memory only with 8-bit signed integer constant,
which is encoded with a 1-byte immediate, if the address size prefix
is used.

gcc/

PR target/114696
* config/i386/i386.md (isa): Add apx_ndd_64.
(enabled): Likewise.
(*add3_doubleword): Change rjO to r,ro,jO with 8-bit
signed integer constant and enable jO only for apx_ndd_64.
(*add3_doubleword_cc_overflow_1): Likewise.
(*and3_doubleword): Likewise.
(*3_doubleword): Likewise.

gcc/testsuite/

PR target/114696
* gcc.target/i386/apx-ndd-x32-2a.c: New test.
* gcc.target/i386/apx-ndd-x32-2b.c: Likewise.
* gcc.target/i386/apx-ndd-x32-2c.c: Likewise.
* gcc.target/i386/apx-ndd-x32-2d.c: Likewise.
---
 gcc/config/i386/i386.md   | 36 ++-
 .../gcc.target/i386/apx-ndd-x32-2a.c  | 13 +++
 .../gcc.target/i386/apx-ndd-x32-2b.c  |  6 
 .../gcc.target/i386/apx-ndd-x32-2c.c  |  6 
 .../gcc.target/i386/apx-ndd-x32-2d.c  |  6 
 5 files changed, 50 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-x32-2a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-x32-2b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-x32-2c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-x32-2d.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d4ce3809e6d..adab1ef9e04 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -568,7 +568,7 @@ (define_attr "unit" "integer,i387,sse,mmx,unknown"
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
-   x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,
+   x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64,
sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
@@ -968,6 +968,8 @@ (define_attr "enabled" ""
   (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
 (eq_attr "isa" "apx_ndd")
   (symbol_ref "TARGET_APX_NDD")
+(eq_attr "isa" "apx_ndd_64")
+  (symbol_ref "TARGET_APX_NDD && Pmode == DImode")
 (eq_attr "isa" "vaes_avx512vl")
   (symbol_ref "TARGET_VAES && TARGET_AVX512VL")
 
@@ -6302,10 +6304,10 @@ (define_expand "add3"
 })
 
 (define_insn_and_split "*add3_doubleword"
-  [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,,,")
+  [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,")
(plus:
- (match_operand: 1 "nonimmediate_operand" "%0,0,ro,rjO,r")
- (match_operand: 2 "x86_64_hilo_general_operand" 
"r,o,r,,r")))
+ (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r")
+ (match_operand: 2 "x86_64_hilo_general_operand" 
"r,o,r,,K,,r")))
(clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)"
   "#"
@@ -6344,7 +6346,7 @@ (define_insn_and_split "*add3_doubleword"
   DONE;
 }
 }
-[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")])
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
 
 (define_insn_and_split "*add3_doubleword_zext"
   [(set (match_operand: 0 "nonimmediate_operand" "=r,o,,")
@@ -9515,10 +9517,10 @@ (define_insn_and_split 
"*add3_doubleword_cc_overflow_1"
   [(set (reg:CCC FLAGS_REG)
(compare:CCC
  (plus:
-   (match_operand: 1 "nonimmediate_operand" "%0,0,ro,rjO,r")
-   (match_operand: 2 "x86_64_hilo_general_operand" 
"r,o,r,,o"))
+   (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r")
+   (match_operand: 2 "x86_64_hilo_general_operand" 
"r,o,r,,K,,o"))
  (match_dup 1)))
-   (set (match_operand: 0 "nonimmediate_operand" "=ro,r,,,")
+   (set (match_operand: 0 "nonimmediate_operand" "=ro,r,")
(plus: (match_dup 1) (match_dup 2)))]
   "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)"
   "#"
@@ -9560,7 +9562,7 @@ (define_insn_and_split 
"*add3_doubleword_cc_overflow_1"
   else
 operands[6] = gen_rtx_ZERO_EXTEND (mode, operands[5]);
 }
-[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")])
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
 
 ;; x == 0 with zero flag test can be done also as x < 1U with carry flag
 ;; test, where the latter is preferrable if we have some carry consuming
@@ -11704,10 +11706,10 @@ (define_expand "and3"
 })
 
 (define_insn_and_split "*and3_doubleword"

[PATCH] libstdc++: Update some baseline_symbols.txt (x32)

2024-04-12 Thread H.J. Lu
* config/abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt:
Updated.
---
 .../abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt  | 6 ++
 1 file changed, 6 insertions(+)

diff --git 
a/libstdc++-v3/config/abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt 
b/libstdc++-v3/config/abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt
index dc69c47f4d7..ac11d5dba4d 100644
--- a/libstdc++-v3/config/abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt
+++ b/libstdc++-v3/config/abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt
@@ -497,6 +497,7 @@ FUNC:_ZNKSt11__timepunctIwE7_M_daysEPPKw@@GLIBCXX_3.4
 FUNC:_ZNKSt11__timepunctIwE8_M_am_pmEPPKw@@GLIBCXX_3.4
 FUNC:_ZNKSt11__timepunctIwE9_M_monthsEPPKw@@GLIBCXX_3.4
 FUNC:_ZNKSt11logic_error4whatEv@@GLIBCXX_3.4
+FUNC:_ZNKSt12__basic_fileIcE13native_handleEv@@GLIBCXX_3.4.33
 FUNC:_ZNKSt12__basic_fileIcE7is_openEv@@GLIBCXX_3.4
 
FUNC:_ZNKSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
 
FUNC:_ZNKSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
@@ -3214,6 +3215,7 @@ 
FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_disposeEv@@GLIBCX
 
FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEjjPKcj@@GLIBCXX_3.4.21
 
FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_S_compareEjj@@GLIBCXX_3.4.21
 
FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_capacityEj@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_S_allocateERS3_j@@GLIBCXX_3.4.32
 
FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderC1EPcOS3_@@GLIBCXX_3.4.23
 
FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderC1EPcRKS3_@@GLIBCXX_3.4.21
 
FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderC2EPcOS3_@@GLIBCXX_3.4.23
@@ -3366,6 +3368,7 @@ 
FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE10_M_disposeEv@@GLIBCX
 
FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE10_M_replaceEjjPKwj@@GLIBCXX_3.4.21
 
FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE10_S_compareEjj@@GLIBCXX_3.4.21
 
FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE11_M_capacityEj@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE11_S_allocateERS3_j@@GLIBCXX_3.4.32
 
FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE12_Alloc_hiderC1EPwOS3_@@GLIBCXX_3.4.23
 
FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE12_Alloc_hiderC1EPwRKS3_@@GLIBCXX_3.4.21
 
FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE12_Alloc_hiderC2EPwOS3_@@GLIBCXX_3.4.23
@@ -4531,6 +4534,7 @@ FUNC:__cxa_allocate_exception@@CXXABI_1.3
 FUNC:__cxa_bad_cast@@CXXABI_1.3
 FUNC:__cxa_bad_typeid@@CXXABI_1.3
 FUNC:__cxa_begin_catch@@CXXABI_1.3
+FUNC:__cxa_call_terminate@@CXXABI_1.3.15
 FUNC:__cxa_call_unexpected@@CXXABI_1.3
 FUNC:__cxa_current_exception_type@@CXXABI_1.3
 FUNC:__cxa_deleted_virtual@@CXXABI_1.3.6
@@ -4574,6 +4578,7 @@ OBJECT:0:CXXABI_1.3.11
 OBJECT:0:CXXABI_1.3.12
 OBJECT:0:CXXABI_1.3.13
 OBJECT:0:CXXABI_1.3.14
+OBJECT:0:CXXABI_1.3.15
 OBJECT:0:CXXABI_1.3.2
 OBJECT:0:CXXABI_1.3.3
 OBJECT:0:CXXABI_1.3.4
@@ -4611,6 +4616,7 @@ OBJECT:0:GLIBCXX_3.4.3
 OBJECT:0:GLIBCXX_3.4.30
 OBJECT:0:GLIBCXX_3.4.31
 OBJECT:0:GLIBCXX_3.4.32
+OBJECT:0:GLIBCXX_3.4.33
 OBJECT:0:GLIBCXX_3.4.4
 OBJECT:0:GLIBCXX_3.4.5
 OBJECT:0:GLIBCXX_3.4.6
-- 
2.44.0



Re: [PATCH 0/2] mmap: Avoid the sanitizer configure check failure

2024-04-10 Thread H.J. Lu
On Tue, Apr 9, 2024 at 10:39 PM Alan Modra  wrote:
>
> On Tue, Apr 09, 2024 at 07:24:33AM -0700, H.J. Lu wrote:
> > Define GCC_AC_FUNC_MMAP with export ASAN_OPTIONS=detect_leaks=0 to avoid
> > the sanitizer configure check failure.
>
> OK for binutils.  (I just fixed my local copy of autoconf so I
> wouldn't run into this again.)  The proper fix of course is to update
> autotools to something more recent.
>

This is what I am checking in with:

dnl
dnl Avoid the sanitizer run-time memory leak failure in the mmap configure
dnl test.  This should be removed when autoconf with commit:
dnl
dnl commit 09b6e78d1592ce10fdc975025d699ee41444aa3f
dnl Author: Paul Eggert 
dnl Date:   Fri Feb 5 21:06:20 2016 -0800
dnl Fix memory leak in AC_FUNC_MMAP
dnl
dnl * lib/autoconf/functions.m4 (AC_FUNC_MMAP): Fix memory leak
dnl in test case, found by configuring with gcc -fsanitize=address.
dnl
dnl is in use.
dnl

Thanks.

-- 
H.J.
From b0c2d5417fc216eeaacf7f2cd34109f438fa9aa7 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Tue, 9 Apr 2024 06:39:21 -0700
Subject: [PATCH] mmap: Avoid the sanitizer configure check failure

When -fsanitize=address,undefined is used to build, the mmap configure
check failed with

=
==231796==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 4096 byte(s) in 1 object(s) allocated from:
#0 0x7cdd3d0defdf in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
#1 0x5750c7f6d72b in main /home/alan/build/gas-san/all/bfd/conftest.c:239

Direct leak of 4096 byte(s) in 1 object(s) allocated from:
#0 0x7cdd3d0defdf in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
#1 0x5750c7f6d2e1 in main /home/alan/build/gas-san/all/bfd/conftest.c:190

SUMMARY: AddressSanitizer: 8192 byte(s) leaked in 2 allocation(s).

Define GCC_AC_FUNC_MMAP with export ASAN_OPTIONS=detect_leaks=0 to avoid
the sanitizer configure check failure.

config/

	* mmap.m4 (GCC_AC_FUNC_MMAP): New.
	* no-executables.m4 (AC_FUNC_MMAP): Renamed to GCC_AC_FUNC_MMAP.
	Change AC_FUNC_MMAP to GCC_AC_FUNC_MMAP.

libiberty/

	* Makefile.in (aclocal_deps): Add $(srcdir)/../config/mmap.m4.
	* acinclude.m4: Change AC_FUNC_MMAP to GCC_AC_FUNC_MMAP.
	* aclocal.m4: Regenerated.
	* configure: Likewise.

zlib/

	* acinclude.m4: Include ../config/mmap.m4.
	* Makefile.in: Regenerated.
	* configure: Likewise.
---
 config/mmap.m4   | 22 ++
 config/no-executables.m4 |  4 ++--
 libiberty/Makefile.in|  1 +
 libiberty/acinclude.m4   |  2 +-
 libiberty/aclocal.m4 |  1 +
 libiberty/configure  |  5 +
 zlib/Makefile.in |  2 +-
 zlib/acinclude.m4|  1 +
 zlib/configure   |  7 ---
 9 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/config/mmap.m4 b/config/mmap.m4
index fba0d9d3657..326b97b91f4 100644
--- a/config/mmap.m4
+++ b/config/mmap.m4
@@ -95,3 +95,25 @@ if test $gcc_cv_func_mmap_anon = yes; then
 	[Define if mmap with MAP_ANON(YMOUS) works.])
 fi
 ])
+
+dnl
+dnl Avoid the sanitizer run-time memory leak failure in the mmap configure
+dnl test.  This should be removed when autoconf with commit:
+dnl
+dnl commit 09b6e78d1592ce10fdc975025d699ee41444aa3f
+dnl Author: Paul Eggert 
+dnl Date:   Fri Feb 5 21:06:20 2016 -0800
+dnl Fix memory leak in AC_FUNC_MMAP
+dnl
+dnl * lib/autoconf/functions.m4 (AC_FUNC_MMAP): Fix memory leak
+dnl in test case, found by configuring with gcc -fsanitize=address.
+dnl
+dnl is in use.
+dnl
+AC_DEFUN([GCC_AC_FUNC_MMAP],
+  save_ASAN_OPTIONS="$ASAN_OPTIONS"
+  ASAN_OPTIONS=detect_leaks=0
+  export ASAN_OPTIONS
+  m4_defn([AC_FUNC_MMAP])
+  ASAN_OPTIONS="$save_ASAN_OPTIONS"
+)
diff --git a/config/no-executables.m4 b/config/no-executables.m4
index 6842f84fba3..e8e2537bde5 100644
--- a/config/no-executables.m4
+++ b/config/no-executables.m4
@@ -49,14 +49,14 @@ m4_defn([AC_LINK_IFELSE]))
 
 dnl This is a shame.  We have to provide a default for some link tests,
 dnl similar to the default for run tests.
-m4_define([AC_FUNC_MMAP],
+m4_define([GCC_AC_FUNC_MMAP],
 if test x$gcc_no_link = xyes; then
   if test "x${ac_cv_func_mmap_fixed_mapped+set}" != xset; then
 ac_cv_func_mmap_fixed_mapped=no
   fi
 fi
 if test "x${ac_cv_func_mmap_fixed_mapped}" != xno; then
-  m4_defn([AC_FUNC_MMAP])
+  m4_defn([GCC_AC_FUNC_MMAP])
 fi)
 
 m4_divert_pop()dnl
diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in
index 85c4b6b6ef8..b77a41c781c 100644
--- a/libiberty/Makefile.in
+++ b/libiberty/Makefile.in
@@ -508,6 +508,7 @@ aclocal_deps = \
 	$(srcdir)/../config/cet.m4 \
 	$(srcdir)/../config/enable.m4 \
 	$(srcdir)/../config/gcc-plugin.m4 \
+	$(srcdir)/../config/mmap.m4 \
 	$(srcdir)/../config/no-executables.m4 \
 	$(srcdir)/../config/override.m4 \
 	$(srcdir)/../config/picflag.m4 \
diff --git a/libiberty/acinclude.m4 b/libiberty/ac

Re: [PATCH 0/2] mmap: Avoid the sanitizer configure check failure

2024-04-09 Thread H.J. Lu
On Tue, Apr 9, 2024 at 4:08 PM Sam James  wrote:
>
> "H.J. Lu"  writes:
>
> > When -fsanitize=address,undefined is used to build, the mmap configure
> > check failed with
>
> I think Paul fixed this in autoconf commit
> 09b6e78d1592ce10fdc975025d699ee41444aa3f, so we should add a comment
> about that so we can clean this up in future.

Sure . That was in 2016.

> >
> > =
> > ==231796==ERROR: LeakSanitizer: detected memory leaks
> >
> > Direct leak of 4096 byte(s) in 1 object(s) allocated from:
> > #0 0x7cdd3d0defdf in __interceptor_malloc 
> > ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
> > #1 0x5750c7f6d72b in main 
> > /home/alan/build/gas-san/all/bfd/conftest.c:239
> >
> > Direct leak of 4096 byte(s) in 1 object(s) allocated from:
> > #0 0x7cdd3d0defdf in __interceptor_malloc 
> > ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
> > #1 0x5750c7f6d2e1 in main 
> > /home/alan/build/gas-san/all/bfd/conftest.c:190
> >
> > SUMMARY: AddressSanitizer: 8192 byte(s) leaked in 2 allocation(s).
> >
> > Define GCC_AC_FUNC_MMAP with export ASAN_OPTIONS=detect_leaks=0 to avoid
> > the sanitizer configure check failure.
> >
> > H.J. Lu (2):
> >   mmap: Avoid the sanitizer configure check failure
> >   mmap: Avoid the sanitizer configure check failure
> >
> >  bfd/Makefile.in  |  2 +-
> >  bfd/aclocal.m4   |  1 +
> >  bfd/configure|  5 +
> >  bfd/configure.ac |  2 +-
> >  binutils/Makefile.in |  2 +-
> >  binutils/aclocal.m4  |  1 +
> >  binutils/configure   |  5 +
> >  binutils/configure.ac|  2 +-
> >  config/mmap.m4   | 12 
> >  config/no-executables.m4 |  4 ++--
> >  ld/Makefile.in   |  2 +-
> >  ld/aclocal.m4|  1 +
> >  ld/configure |  5 +
> >  ld/configure.ac  |  2 +-
> >  libctf/Makefile.in   |  2 +-
> >  libctf/aclocal.m4|  1 +
> >  libctf/configure |  5 +
> >  libctf/configure.ac  |  2 +-
> >  libiberty/Makefile.in|  1 +
> >  libiberty/acinclude.m4   |  2 +-
> >  libiberty/aclocal.m4 |  1 +
> >  libiberty/configure  |  5 +
> >  libsframe/Makefile.in|  1 +
> >  libsframe/aclocal.m4 |  1 +
> >  libsframe/configure  |  5 +
> >  libsframe/configure.ac   |  2 +-
> >  zlib/Makefile.in |  2 +-
> >  zlib/acinclude.m4|  1 +
> >  zlib/configure   |  7 ---
> >  29 files changed, 64 insertions(+), 20 deletions(-)



-- 
H.J.


Re: [PATCH] libgfortran: Disable gthreads weak symbols for glibc 2.34

2024-04-09 Thread H.J. Lu
On Tue, Apr 9, 2024 at 10:25 AM Andrew Pinski  wrote:
>
>
>
> On Tue, Apr 9, 2024, 10:07 H.J. Lu  wrote:
>>
>> Since Glibc 2.34 all pthreads symbols are defined directly in libc not
>> libpthread, and since Glibc 2.32 we have used __libc_single_threaded to
>> avoid unnecessary locking in single-threaded programs. This means there
>> is no reason to avoid linking to libpthread now, and so no reason to use
>> weak symbols defined in gthr-posix.h for all the pthread_xxx functions.
>
>
>
> First you forgot to cc fortran@. Second the issue is in gthrd-posix.h which 
> should be fixed instead of libgfortran since the issue will also be seen with 
> libobjc, and the other users of gthrd.

Weak symbol reference to pthread doesn't fail for all static executables.
Fixing it on a per-library basis is one approach.

> Note the fix for libstdc++ was also done in the wrong location too and should 
> have done once and for all in gthrd-posix.h.
>
>
> Thanks,
> Andrew
>
>>
>> Also add prune_warnings to libgomp.exp to prune glibc static link warning:
>>
>> .*: warning: Using 'dlopen' in statically linked applications requires at 
>> runtime the shared libraries from the glibc version us ed for linking
>>
>> libgfortran/
>>
>> PR libgfortran/114646
>> * acinclude.m4: Define GTHREAD_USE_WEAK 0 for glibc 2.34 or
>> above on Linux.
>> * configure: Regenerated.
>>
>> libgomp/
>>
>> PR libgfortran/114646
>> * testsuite/lib/libgomp.exp (prune_warnings): New.
>> * testsuite/libgomp.fortran/pr114646-1.f90: New test.
>> * testsuite/libgomp.fortran/pr114646-2.f90: Likewise.
>> ---
>>  libgfortran/acinclude.m4  | 14 +
>>  libgfortran/configure | 29 +++
>>  libgomp/testsuite/lib/libgomp.exp | 14 +
>>  .../testsuite/libgomp.fortran/pr114646-1.f90  | 11 +++
>>  .../testsuite/libgomp.fortran/pr114646-2.f90  | 22 ++
>>  5 files changed, 90 insertions(+)
>>  create mode 100644 libgomp/testsuite/libgomp.fortran/pr114646-1.f90
>>  create mode 100644 libgomp/testsuite/libgomp.fortran/pr114646-2.f90
>>
>> diff --git a/libgfortran/acinclude.m4 b/libgfortran/acinclude.m4
>> index a73207e5465..f4642494c4f 100644
>> --- a/libgfortran/acinclude.m4
>> +++ b/libgfortran/acinclude.m4
>> @@ -92,6 +92,20 @@ void foo (void);
>>AC_DEFINE(GTHREAD_USE_WEAK, 0,
>> [Define to 0 if the target shouldn't use #pragma weak])
>>;;
>> +*-*-linux*)
>> +  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
>> +#include 
>> +#if !__GLIBC_PREREQ(2, 34)
>> +#error glibc version is too old
>> +#endif
>> +]], [[]])],
>> +   libgfor_cv_use_pragma_weak=no,
>> +   libgfor_cv_use_pragma_weak=yes)
>> +  if test $libgfor_cv_use_pragma_weak = no; then
>> +AC_DEFINE(GTHREAD_USE_WEAK, 0,
>> + [Define to 0 if the target shouldn't use #pragma weak])
>> +  fi
>> +  ;;
>>esac])
>>
>>  dnl Check whether target effectively supports weakref
>> diff --git a/libgfortran/configure b/libgfortran/configure
>> index 774dd52fc95..1f477256b75 100755
>> --- a/libgfortran/configure
>> +++ b/libgfortran/configure
>> @@ -31057,6 +31057,35 @@ $as_echo "#define SUPPORTS_WEAK 1" >>confdefs.h
>>
>>  $as_echo "#define GTHREAD_USE_WEAK 0" >>confdefs.h
>>
>> +  ;;
>> +*-*-linux*)
>> +  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
>> +/* end confdefs.h.  */
>> +
>> +#include 
>> +#if !__GLIBC_PREREQ(2, 34)
>> +#error glibc version is too old
>> +#endif
>> +
>> +int
>> +main ()
>> +{
>> +
>> +  ;
>> +  return 0;
>> +}
>> +_ACEOF
>> +if ac_fn_c_try_compile "$LINENO"; then :
>> +  libgfor_cv_use_pragma_weak=no
>> +else
>> +  libgfor_cv_use_pragma_weak=yes
>> +fi
>> +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
>> +  if test $libgfor_cv_use_pragma_weak = no; then
>> +
>> +$as_echo "#define GTHREAD_USE_WEAK 0" >>confdefs.h
>> +
>> +  fi
>>;;
>>esac
>>
>> diff --git a/libgomp/testsuite/lib/libgomp.exp 
>> b/libgomp/testsuite/lib/libgomp.exp
>> index cab926a798b..9cfa6d7b31d 100644
>> --- a/libgomp/testsuite/lib/libgomp.exp
>> +++ b/libgomp/testsuite/li

[PATCH] libgfortran: Disable gthreads weak symbols for glibc 2.34

2024-04-09 Thread H.J. Lu
Since Glibc 2.34 all pthreads symbols are defined directly in libc not
libpthread, and since Glibc 2.32 we have used __libc_single_threaded to
avoid unnecessary locking in single-threaded programs. This means there
is no reason to avoid linking to libpthread now, and so no reason to use
weak symbols defined in gthr-posix.h for all the pthread_xxx functions.

Also add prune_warnings to libgomp.exp to prune glibc static link warning:

.*: warning: Using 'dlopen' in statically linked applications requires at 
runtime the shared libraries from the glibc version us ed for linking

libgfortran/

PR libgfortran/114646
* acinclude.m4: Define GTHREAD_USE_WEAK 0 for glibc 2.34 or
above on Linux.
* configure: Regenerated.

libgomp/

PR libgfortran/114646
* testsuite/lib/libgomp.exp (prune_warnings): New.
* testsuite/libgomp.fortran/pr114646-1.f90: New test.
* testsuite/libgomp.fortran/pr114646-2.f90: Likewise.
---
 libgfortran/acinclude.m4  | 14 +
 libgfortran/configure | 29 +++
 libgomp/testsuite/lib/libgomp.exp | 14 +
 .../testsuite/libgomp.fortran/pr114646-1.f90  | 11 +++
 .../testsuite/libgomp.fortran/pr114646-2.f90  | 22 ++
 5 files changed, 90 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.fortran/pr114646-1.f90
 create mode 100644 libgomp/testsuite/libgomp.fortran/pr114646-2.f90

diff --git a/libgfortran/acinclude.m4 b/libgfortran/acinclude.m4
index a73207e5465..f4642494c4f 100644
--- a/libgfortran/acinclude.m4
+++ b/libgfortran/acinclude.m4
@@ -92,6 +92,20 @@ void foo (void);
   AC_DEFINE(GTHREAD_USE_WEAK, 0,
[Define to 0 if the target shouldn't use #pragma weak])
   ;;
+*-*-linux*)
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#include 
+#if !__GLIBC_PREREQ(2, 34)
+#error glibc version is too old
+#endif
+]], [[]])],
+   libgfor_cv_use_pragma_weak=no,
+   libgfor_cv_use_pragma_weak=yes)
+  if test $libgfor_cv_use_pragma_weak = no; then
+AC_DEFINE(GTHREAD_USE_WEAK, 0,
+ [Define to 0 if the target shouldn't use #pragma weak])
+  fi
+  ;;
   esac])
 
 dnl Check whether target effectively supports weakref
diff --git a/libgfortran/configure b/libgfortran/configure
index 774dd52fc95..1f477256b75 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -31057,6 +31057,35 @@ $as_echo "#define SUPPORTS_WEAK 1" >>confdefs.h
 
 $as_echo "#define GTHREAD_USE_WEAK 0" >>confdefs.h
 
+  ;;
+*-*-linux*)
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include 
+#if !__GLIBC_PREREQ(2, 34)
+#error glibc version is too old
+#endif
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  libgfor_cv_use_pragma_weak=no
+else
+  libgfor_cv_use_pragma_weak=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  if test $libgfor_cv_use_pragma_weak = no; then
+
+$as_echo "#define GTHREAD_USE_WEAK 0" >>confdefs.h
+
+  fi
   ;;
   esac
 
diff --git a/libgomp/testsuite/lib/libgomp.exp 
b/libgomp/testsuite/lib/libgomp.exp
index cab926a798b..9cfa6d7b31d 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -54,6 +54,20 @@ set dg-do-what-default run
 
 set libgomp_compile_options ""
 
+# Prune messages that aren't useful.
+
+proc prune_warnings { text } {
+
+verbose "prune_warnings: entry: $text" 2
+
+# Ignore warning from -static: warning: Using 'dlopen' in statically 
linked applications requires at runtime the shared libraries from the glibc 
version used for linking
+regsub -all "(^|\n)\[^\n\]*: warning: Using 'dlopen' in statically 
linked\[^\n\]*" $text "" text
+
+verbose "prune_warnings: exit: $text" 2
+
+return $text
+}
+
 #
 # libgomp_init
 #
diff --git a/libgomp/testsuite/libgomp.fortran/pr114646-1.f90 
b/libgomp/testsuite/libgomp.fortran/pr114646-1.f90
new file mode 100644
index 000..a48e6103343
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/pr114646-1.f90
@@ -0,0 +1,11 @@
+! PR libgfortran/114646
+! { dg-do run }
+! { dg-additional-options "-static" }
+
+!$OMP PARALLEL
+!$OMP CRITICAL
+ write(6,*) "Hello world"
+!$OMP END CRITICAL
+!$OMP END PARALLEL
+ write(6,*) "Done!"
+END
diff --git a/libgomp/testsuite/libgomp.fortran/pr114646-2.f90 
b/libgomp/testsuite/libgomp.fortran/pr114646-2.f90
new file mode 100644
index 000..8c0d7526f95
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/pr114646-2.f90
@@ -0,0 +1,22 @@
+! PR libgfortran/114646
+! { dg-do run }
+! { dg-additional-options "-static" }
+
+  use omp_lib
+  implicit none
+  integer, parameter :: NT = 4
+  integer :: nThreads(NT)
+
+  print *, 'Call omp_set_dynamic'
+!$call omp_set_dynamic(.false.)
+  print *, 'Call omp_set_num_threads'
+!$call omp_set_num_threads(NT)
+  

[PATCH 1/2] mmap: Avoid the sanitizer configure check failure

2024-04-09 Thread H.J. Lu
When -fsanitize=address,undefined is used to build, the mmap configure
check failed with

=
==231796==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 4096 byte(s) in 1 object(s) allocated from:
#0 0x7cdd3d0defdf in __interceptor_malloc 
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
#1 0x5750c7f6d72b in main /home/alan/build/gas-san/all/bfd/conftest.c:239

Direct leak of 4096 byte(s) in 1 object(s) allocated from:
#0 0x7cdd3d0defdf in __interceptor_malloc 
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
#1 0x5750c7f6d2e1 in main /home/alan/build/gas-san/all/bfd/conftest.c:190

SUMMARY: AddressSanitizer: 8192 byte(s) leaked in 2 allocation(s).

Define GCC_AC_FUNC_MMAP with export ASAN_OPTIONS=detect_leaks=0 to avoid
the sanitizer configure check failure.

config/

* mmap.m4 (GCC_AC_FUNC_MMAP): New.
* no-executables.m4 (AC_FUNC_MMAP): Renamed to GCC_AC_FUNC_MMAP.
Change AC_FUNC_MMAP to GCC_AC_FUNC_MMAP.

libiberty/

* Makefile.in (aclocal_deps): Add $(srcdir)/../config/mmap.m4.
* acinclude.m4: Change AC_FUNC_MMAP to GCC_AC_FUNC_MMAP.
* aclocal.m4: Regenerated.
* configure: Likewise.

zlib/

* acinclude.m4: Include ../config/mmap.m4.
* Makefile.in: Regenerated.
* configure: Likewise.
---
 config/mmap.m4   | 12 
 config/no-executables.m4 |  4 ++--
 libiberty/Makefile.in|  1 +
 libiberty/acinclude.m4   |  2 +-
 libiberty/aclocal.m4 |  1 +
 libiberty/configure  |  5 +
 zlib/Makefile.in |  2 +-
 zlib/acinclude.m4|  1 +
 zlib/configure   |  7 ---
 9 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/config/mmap.m4 b/config/mmap.m4
index fba0d9d3657..5ebdb90e4ee 100644
--- a/config/mmap.m4
+++ b/config/mmap.m4
@@ -95,3 +95,15 @@ if test $gcc_cv_func_mmap_anon = yes; then
[Define if mmap with MAP_ANON(YMOUS) works.])
 fi
 ])
+
+dnl
+dnl Avoid the sanitizer run-time memory leak failure in the mmap configure
+dnl test.
+dnl
+AC_DEFUN([GCC_AC_FUNC_MMAP],
+  save_ASAN_OPTIONS="$ASAN_OPTIONS"
+  ASAN_OPTIONS=detect_leaks=0
+  export ASAN_OPTIONS
+  m4_defn([AC_FUNC_MMAP])
+  ASAN_OPTIONS="$save_ASAN_OPTIONS"
+)
diff --git a/config/no-executables.m4 b/config/no-executables.m4
index 6842f84fba3..e8e2537bde5 100644
--- a/config/no-executables.m4
+++ b/config/no-executables.m4
@@ -49,14 +49,14 @@ m4_defn([AC_LINK_IFELSE]))
 
 dnl This is a shame.  We have to provide a default for some link tests,
 dnl similar to the default for run tests.
-m4_define([AC_FUNC_MMAP],
+m4_define([GCC_AC_FUNC_MMAP],
 if test x$gcc_no_link = xyes; then
   if test "x${ac_cv_func_mmap_fixed_mapped+set}" != xset; then
 ac_cv_func_mmap_fixed_mapped=no
   fi
 fi
 if test "x${ac_cv_func_mmap_fixed_mapped}" != xno; then
-  m4_defn([AC_FUNC_MMAP])
+  m4_defn([GCC_AC_FUNC_MMAP])
 fi)
 
 m4_divert_pop()dnl
diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in
index 85c4b6b6ef8..b77a41c781c 100644
--- a/libiberty/Makefile.in
+++ b/libiberty/Makefile.in
@@ -508,6 +508,7 @@ aclocal_deps = \
$(srcdir)/../config/cet.m4 \
$(srcdir)/../config/enable.m4 \
$(srcdir)/../config/gcc-plugin.m4 \
+   $(srcdir)/../config/mmap.m4 \
$(srcdir)/../config/no-executables.m4 \
$(srcdir)/../config/override.m4 \
$(srcdir)/../config/picflag.m4 \
diff --git a/libiberty/acinclude.m4 b/libiberty/acinclude.m4
index 9974dcd4ec5..d08e31bc0b5 100644
--- a/libiberty/acinclude.m4
+++ b/libiberty/acinclude.m4
@@ -19,7 +19,7 @@ dnl On some versions of SunOS4 at least, strncmp reads a word 
at a time
 dnl but erroneously reads past the end of strings.  This can cause
 dnl a SEGV in some cases.
 AC_DEFUN([libiberty_AC_FUNC_STRNCMP],
-[AC_REQUIRE([AC_FUNC_MMAP])
+[AC_REQUIRE([GCC_AC_FUNC_MMAP])
 AC_CACHE_CHECK([for working strncmp], ac_cv_func_strncmp_works,
 [AC_TRY_RUN([
 /* Test by Jim Wilson and Kaveh Ghazi.
diff --git a/libiberty/aclocal.m4 b/libiberty/aclocal.m4
index 364fb6bc3b4..9678b0acaba 100644
--- a/libiberty/aclocal.m4
+++ b/libiberty/aclocal.m4
@@ -17,6 +17,7 @@ m4_include([../config/cet.m4])
 m4_include([../config/enable.m4])
 m4_include([../config/gcc-plugin.m4])
 m4_include([../config/hwcaps.m4])
+m4_include([../config/mmap.m4])
 m4_include([../config/no-executables.m4])
 m4_include([../config/override.m4])
 m4_include([../config/picflag.m4])
diff --git a/libiberty/configure b/libiberty/configure
index 5c69fee56c1..18e98b84bb5 100755
--- a/libiberty/configure
+++ b/libiberty/configure
@@ -7724,6 +7724,9 @@ if test x$gcc_no_link = xyes; then
   fi
 fi
 if test "x${ac_cv_func_mmap_fixed_mapped}" != xno; then
+  save_ASAN_OPTIONS="$ASAN_OPTIONS"
+  ASAN_OPTIONS=detect_leaks=0
+  export ASAN_OPTIONS
 
 for ac_func in getpagesize
 do :
@@ -7902,6 +7905,8 @@ $as_echo "#define HAVE_MMAP 1" >>confdefs.h
 fi
 rm -f conftest.mmap conftest.txt
 
+  

[PATCH 2/2] mmap: Avoid the sanitizer configure check failure

2024-04-09 Thread H.J. Lu
When -fsanitize=address,undefined is used to build, the mmap configure
check failed with

=
==231796==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 4096 byte(s) in 1 object(s) allocated from:
#0 0x7cdd3d0defdf in __interceptor_malloc 
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
#1 0x5750c7f6d72b in main /home/alan/build/gas-san/all/bfd/conftest.c:239

Direct leak of 4096 byte(s) in 1 object(s) allocated from:
#0 0x7cdd3d0defdf in __interceptor_malloc 
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
#1 0x5750c7f6d2e1 in main /home/alan/build/gas-san/all/bfd/conftest.c:190

SUMMARY: AddressSanitizer: 8192 byte(s) leaked in 2 allocation(s).

Replace AC_FUNC_MMAP with GCC_AC_FUNC_MMAP to avoid the sanitizer
configure check failure.

bfd/

* configure.ac: Replace AC_FUNC_MMAP with GCC_AC_FUNC_MMAP.
* Makefile.in: Regenerated.
* aclocal.m4: Likewise.
* configure: Likewise.

binutils/

* configure.ac: Replace AC_FUNC_MMAP with GCC_AC_FUNC_MMAP.
* Makefile.in: Regenerated.
* aclocal.m4: Likewise.
* configure: Likewise.

ld/

* configure.ac: Replace AC_FUNC_MMAP with GCC_AC_FUNC_MMAP.
* Makefile.in: Regenerated.
* aclocal.m4: Likewise.
* configure: Likewise.

libctf/

* configure.ac: Replace AC_FUNC_MMAP with GCC_AC_FUNC_MMAP.
* Makefile.in: Regenerated.
* aclocal.m4: Likewise.
* configure: Likewise.

libsframe/

* configure.ac: Replace AC_FUNC_MMAP with GCC_AC_FUNC_MMAP.
* Makefile.in: Regenerated.
* aclocal.m4: Likewise.
* configure: Likewise.
---
 bfd/Makefile.in| 2 +-
 bfd/aclocal.m4 | 1 +
 bfd/configure  | 5 +
 bfd/configure.ac   | 2 +-
 binutils/Makefile.in   | 2 +-
 binutils/aclocal.m4| 1 +
 binutils/configure | 5 +
 binutils/configure.ac  | 2 +-
 ld/Makefile.in | 2 +-
 ld/aclocal.m4  | 1 +
 ld/configure   | 5 +
 ld/configure.ac| 2 +-
 libctf/Makefile.in | 2 +-
 libctf/aclocal.m4  | 1 +
 libctf/configure   | 5 +
 libctf/configure.ac| 2 +-
 libsframe/Makefile.in  | 1 +
 libsframe/aclocal.m4   | 1 +
 libsframe/configure| 5 +
 libsframe/configure.ac | 2 +-
 20 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/bfd/Makefile.in b/bfd/Makefile.in
index faaa0c424b8..3092bff2935 100644
--- a/bfd/Makefile.in
+++ b/bfd/Makefile.in
@@ -125,7 +125,7 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/lib-ld.m4 \
$(top_srcdir)/../config/lib-link.m4 \
$(top_srcdir)/../config/lib-prefix.m4 \
-   $(top_srcdir)/../config/nls.m4 \
+   $(top_srcdir)/../config/mmap.m4 $(top_srcdir)/../config/nls.m4 \
$(top_srcdir)/../config/override.m4 \
$(top_srcdir)/../config/pkg.m4 \
$(top_srcdir)/../config/plugins.m4 \
diff --git a/bfd/aclocal.m4 b/bfd/aclocal.m4
index 4b3fd56cfc9..8364b5dba69 100644
--- a/bfd/aclocal.m4
+++ b/bfd/aclocal.m4
@@ -1180,6 +1180,7 @@ m4_include([../config/lead-dot.m4])
 m4_include([../config/lib-ld.m4])
 m4_include([../config/lib-link.m4])
 m4_include([../config/lib-prefix.m4])
+m4_include([../config/mmap.m4])
 m4_include([../config/nls.m4])
 m4_include([../config/override.m4])
 m4_include([../config/pkg.m4])
diff --git a/bfd/configure b/bfd/configure
index 210550ae042..89fe4388171 100755
--- a/bfd/configure
+++ b/bfd/configure
@@ -17332,6 +17332,9 @@ done
 
 
 
+save_ASAN_OPTIONS="$ASAN_OPTIONS"
+  ASAN_OPTIONS=detect_leaks=0
+  export ASAN_OPTIONS
 
 for ac_func in getpagesize
 do :
@@ -17510,6 +17513,8 @@ $as_echo "#define HAVE_MMAP 1" >>confdefs.h
 fi
 rm -f conftest.mmap conftest.txt
 
+  ASAN_OPTIONS="$save_ASAN_OPTIONS"
+
 for ac_func in madvise mprotect
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
diff --git a/bfd/configure.ac b/bfd/configure.ac
index 02d8ba96318..29ede92b993 100644
--- a/bfd/configure.ac
+++ b/bfd/configure.ac
@@ -1029,7 +1029,7 @@ AC_MSG_RESULT($bfd_file_ptr)
 AC_SUBST(bfd_file_ptr)
 AC_SUBST(bfd_ufile_ptr)
 
-AC_FUNC_MMAP
+GCC_AC_FUNC_MMAP
 AC_CHECK_FUNCS(madvise mprotect)
 case ${want_mmap}+${ac_cv_func_mmap_fixed_mapped} in
   true+yes )  AC_DEFINE(USE_MMAP, 1, [Use mmap if it's available?]) ;;
diff --git a/binutils/Makefile.in b/binutils/Makefile.in
index 842a6d99b54..67fa5b3b8d9 100644
--- a/binutils/Makefile.in
+++ b/binutils/Makefile.in
@@ -152,7 +152,7 @@ am__aclocal_m4_deps = $(top_srcdir)/../bfd/acinclude.m4 \
$(top_srcdir)/../config/lib-ld.m4 \
$(top_srcdir)/../config/lib-link.m4 \
$(top_srcdir)/../config/lib-prefix.m4 \
-   $(top_srcdir)/../config/nls.m4 \
+   $(top_srcdir)/../config/mmap.m4 $(top_srcdir)/../config/nls.m4 \
$(top_srcdir)/../config/override.m4 \
$(top_srcdir)/../config/pkg.m4 \

[PATCH 0/2] mmap: Avoid the sanitizer configure check failure

2024-04-09 Thread H.J. Lu
When -fsanitize=address,undefined is used to build, the mmap configure
check failed with

=
==231796==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 4096 byte(s) in 1 object(s) allocated from:
#0 0x7cdd3d0defdf in __interceptor_malloc 
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
#1 0x5750c7f6d72b in main /home/alan/build/gas-san/all/bfd/conftest.c:239

Direct leak of 4096 byte(s) in 1 object(s) allocated from:
#0 0x7cdd3d0defdf in __interceptor_malloc 
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
#1 0x5750c7f6d2e1 in main /home/alan/build/gas-san/all/bfd/conftest.c:190

SUMMARY: AddressSanitizer: 8192 byte(s) leaked in 2 allocation(s).

Define GCC_AC_FUNC_MMAP with export ASAN_OPTIONS=detect_leaks=0 to avoid
the sanitizer configure check failure.

H.J. Lu (2):
  mmap: Avoid the sanitizer configure check failure
  mmap: Avoid the sanitizer configure check failure

 bfd/Makefile.in  |  2 +-
 bfd/aclocal.m4   |  1 +
 bfd/configure|  5 +
 bfd/configure.ac |  2 +-
 binutils/Makefile.in |  2 +-
 binutils/aclocal.m4  |  1 +
 binutils/configure   |  5 +
 binutils/configure.ac|  2 +-
 config/mmap.m4   | 12 
 config/no-executables.m4 |  4 ++--
 ld/Makefile.in   |  2 +-
 ld/aclocal.m4|  1 +
 ld/configure |  5 +
 ld/configure.ac  |  2 +-
 libctf/Makefile.in   |  2 +-
 libctf/aclocal.m4|  1 +
 libctf/configure |  5 +
 libctf/configure.ac  |  2 +-
 libiberty/Makefile.in|  1 +
 libiberty/acinclude.m4   |  2 +-
 libiberty/aclocal.m4 |  1 +
 libiberty/configure  |  5 +
 libsframe/Makefile.in|  1 +
 libsframe/aclocal.m4 |  1 +
 libsframe/configure  |  5 +
 libsframe/configure.ac   |  2 +-
 zlib/Makefile.in |  2 +-
 zlib/acinclude.m4|  1 +
 zlib/configure   |  7 ---
 29 files changed, 64 insertions(+), 20 deletions(-)

-- 
2.44.0



[PATCH v2] x86: Define __APX_INLINE_ASM_USE_GPR32__

2024-04-08 Thread H.J. Lu
Define __APX_INLINE_ASM_USE_GPR32__ for -mapx-inline-asm-use-gpr32.
When __APX_INLINE_ASM_USE_GPR32__ is defined, inline asm statements
should contain only instructions compatible with r16-r31.

gcc/

PR target/114587
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__APX_INLINE_ASM_USE_GPR32__ for -mapx-inline-asm-use-gpr32.

gcc/testsuite/

PR target/114587
* gcc.target/i386/apx-3.c: Likewise.
---
 gcc/config/i386/i386-c.cc | 2 ++
 gcc/testsuite/gcc.target/i386/apx-3.c | 6 ++
 2 files changed, 8 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-3.c

diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 226d277676c..07f4936ba91 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -751,6 +751,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
 def_or_undef (parse_in, "__AVX10_1_512__");
   if (isa_flag2 & OPTION_MASK_ISA2_APX_F)
 def_or_undef (parse_in, "__APX_F__");
+  if (ix86_apx_inline_asm_use_gpr32)
+def_or_undef (parse_in, "__APX_INLINE_ASM_USE_GPR32__");
   if (TARGET_IAMCU)
 {
   def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/testsuite/gcc.target/i386/apx-3.c 
b/gcc/testsuite/gcc.target/i386/apx-3.c
new file mode 100644
index 000..1ba4ac036fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-3.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-inline-asm-use-gpr32" } */
+
+#ifndef __APX_INLINE_ASM_USE_GPR32__
+# error __APX_INLINE_ASM_USE_GPR32__ not defined
+#endif
-- 
2.44.0



[PATCH] x86: Define macros for APX options

2024-04-08 Thread H.J. Lu
Define following macros for APX options:

1. __APX_EGPR__: -mapx-features=egpr.
2. __APX_PUSH2POP2__: -mapx-features=push2pop2.
3. __APX_NDD__: -mapx-features=ndd.
4. __APX_PPX__: -mapx-features=ppx.
5. __APX_INLINE_ASM_USE_GPR32__: -mapx-inline-asm-use-gpr32.

They can be used to make assembly codes compatible with APX options.
Some use cases are:

1. When __APX_PUSH2POP2__ is defined, assembly codes should always align
the outgoing stack to 16 bytes.
2. When __APX_INLINE_ASM_USE_GPR32__ is defined, inline asm statements
should contain only instructions compatible with r16-r31.

gcc/

PR target/114587
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__APX_XXX__ for APX options.

gcc/testsuite/

PR target/114587
* gcc.target/i386/apx-3a.c: New test.
* gcc.target/i386/apx-3b.c: Likewise.
* gcc.target/i386/apx-3c.c: Likewise.
* gcc.target/i386/apx-3d.c: Likewise.
* gcc.target/i386/apx-3e.c: Likewise.
* gcc.target/i386/apx-4.c: Likewise.
---
 gcc/config/i386/i386-c.cc  | 10 ++
 gcc/testsuite/gcc.target/i386/apx-3a.c |  6 ++
 gcc/testsuite/gcc.target/i386/apx-3b.c |  6 ++
 gcc/testsuite/gcc.target/i386/apx-3c.c |  6 ++
 gcc/testsuite/gcc.target/i386/apx-3d.c |  6 ++
 gcc/testsuite/gcc.target/i386/apx-3e.c | 18 ++
 gcc/testsuite/gcc.target/i386/apx-4.c  |  6 ++
 7 files changed, 58 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-3c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-3d.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-3e.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-4.c

diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 226d277676c..b8cfba90fdc 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -751,6 +751,16 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
 def_or_undef (parse_in, "__AVX10_1_512__");
   if (isa_flag2 & OPTION_MASK_ISA2_APX_F)
 def_or_undef (parse_in, "__APX_F__");
+  if (TARGET_APX_EGPR)
+def_or_undef (parse_in, "__APX_EGPR__");
+  if (TARGET_APX_PUSH2POP2)
+def_or_undef (parse_in, "__APX_PUSH2POP2__");
+  if (TARGET_APX_NDD)
+def_or_undef (parse_in, "__APX_NDD__");
+  if (TARGET_APX_PPX)
+def_or_undef (parse_in, "__APX_PPX__");
+  if (ix86_apx_inline_asm_use_gpr32)
+def_or_undef (parse_in, "__APX_INLINE_ASM_USE_GPR32__");
   if (TARGET_IAMCU)
 {
   def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/testsuite/gcc.target/i386/apx-3a.c 
b/gcc/testsuite/gcc.target/i386/apx-3a.c
new file mode 100644
index 000..86d3ef2061d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-3a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=egpr" } */
+
+#ifndef __APX_EGPR__
+# error __APX_EGPR__ not defined
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/apx-3b.c 
b/gcc/testsuite/gcc.target/i386/apx-3b.c
new file mode 100644
index 000..611727a389a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-3b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=push2pop2" } */
+
+#ifndef __APX_PUSH2POP2__
+# error __APX_PUSH2POP2__ not defined
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/apx-3c.c 
b/gcc/testsuite/gcc.target/i386/apx-3c.c
new file mode 100644
index 000..52655b6cfa5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-3c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=ndd" } */
+
+#ifndef __APX_NDD__
+# error __APX_NDD__ not defined
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/apx-3d.c 
b/gcc/testsuite/gcc.target/i386/apx-3d.c
new file mode 100644
index 000..9b91af1d377
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-3d.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=ppx" } */
+
+#ifndef __APX_PPX__
+# error __APX_PPX__ not defined
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/apx-3e.c 
b/gcc/testsuite/gcc.target/i386/apx-3e.c
new file mode 100644
index 000..7278428e5c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-3e.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx" } */
+
+#ifndef __APX_EGPR__
+# error __APX_EGPR__ not defined
+#endif
+
+#ifndef __APX_PUSH2POP2__
+# error __APX_PUSH2POP2__ not defined
+#endif
+
+#ifndef __APX_NDD__
+# error __APX_NDD__ not defined
+#endif
+
+#ifndef __APX_PPX__
+# error __APX_PPX__ not defined
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/apx-4.c 
b/gcc/testsuite/gcc.target/i386/apx-4.c
new file mode 100644
index 000..1ba4ac036fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-4.c
@@ -0,0 +1,6 @@
+/* { 

[PATCH] x86: Use explicit shift count in double-precision shifts

2024-04-05 Thread H.J. Lu
Don't use implicit shift count in double-precision shifts in AT syntax
since they aren't in Intel SDM.  Keep the 's' modifier for backward
compatibility with inline asm statements.

PR target/114590
* config/i386/i386.md (x86_64_shld): Use explicit shift count in
AT syntax.
(x86_64_shld_ndd): Likewise.
(x86_shld): Likewise.
(x86_shld_ndd): Likewise.
(x86_64_shrd): Likewise.
(x86_64_shrd_ndd): Likewise.
(x86_shrd): Likewise.
(x86_shrd_ndd): Likewise.
---
 gcc/config/i386/i386.md | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6ac401154e4..bb2c72f3473 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14503,7 +14503,7 @@ (define_insn "x86_64_shld"
  (and:QI (match_dup 2) (const_int 63 0)))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "shld{q}\t{%s2%1, %0|%0, %1, %2}"
+  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
@@ -14524,7 +14524,7 @@ (define_insn "x86_64_shld_ndd"
  (and:QI (match_dup 3) (const_int 63 0)))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_APX_NDD"
-  "shld{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
(set_attr "mode" "DI")])
 
@@ -14681,7 +14681,7 @@ (define_insn "x86_shld"
  (and:QI (match_dup 2) (const_int 31 0)))
(clobber (reg:CC FLAGS_REG))]
   ""
-  "shld{l}\t{%s2%1, %0|%0, %1, %2}"
+  "shld{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "SI")
@@ -14703,7 +14703,7 @@ (define_insn "x86_shld_ndd"
  (and:QI (match_dup 3) (const_int 31 0)))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_APX_NDD"
-  "shld{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+  "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
(set_attr "mode" "SI")])
 
@@ -15792,7 +15792,7 @@ (define_insn "x86_64_shrd"
  (and:QI (match_dup 2) (const_int 63 0)))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
+  "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
@@ -15813,7 +15813,7 @@ (define_insn "x86_64_shrd_ndd"
  (and:QI (match_dup 3) (const_int 63 0)))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_APX_NDD"
-  "shrd{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+  "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
(set_attr "mode" "DI")])
 
@@ -15971,7 +15971,7 @@ (define_insn "x86_shrd"
  (and:QI (match_dup 2) (const_int 31 0)))
(clobber (reg:CC FLAGS_REG))]
   ""
-  "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
+  "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "SI")
@@ -15993,7 +15993,7 @@ (define_insn "x86_shrd_ndd"
  (and:QI (match_dup 3) (const_int 31 0)))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_APX_NDD"
-  "shrd{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+  "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
(set_attr "mode" "SI")])
 
-- 
2.44.0



Re: [PATCH] middle-end/114599 - fix bitmap allocation for check_ifunc_callee_symtab_nodes

2024-04-05 Thread H.J. Lu
On Fri, Apr 5, 2024 at 6:52 AM Richard Biener  wrote:
>
>
>
> > Am 05.04.2024 um 15:46 schrieb H.J. Lu :
> >
> > On Fri, Apr 5, 2024 at 1:21 AM Richard Biener  wrote:
> >>
> >> There's no default bitmap obstack during global CTORs, so allocate the
> >> bitmap locally.
> >>
> >> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> >>
> >> Richard.
> >>
> >>PR middle-end/114599
> >>* symtab.cc (ifunc_ref_map): Do not use auto_bitmap.
> >>(is_caller_ifunc_resolver): Optimize bitmap_bit_p/bitmap_set_bit
> >>pair.
> >>(symtab_node::check_ifunc_callee_symtab_nodes): Properly
> >>allocate ifunc_ref_map here.
> >> ---
> >> gcc/symtab.cc | 11 +++
> >> 1 file changed, 7 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/gcc/symtab.cc b/gcc/symtab.cc
> >> index 3256133891d..3b018ab3ea2 100644
> >> --- a/gcc/symtab.cc
> >> +++ b/gcc/symtab.cc
> >> @@ -1383,7 +1383,7 @@ check_ifunc_resolver (cgraph_node *node, void *data)
> >>   return false;
> >> }
> >>
> >> -static auto_bitmap ifunc_ref_map;
> >> +static bitmap ifunc_ref_map;
> >>
> >> /* Return true if any caller of NODE is an ifunc resolver.  */
> >>
> >> @@ -1404,9 +1404,8 @@ is_caller_ifunc_resolver (cgraph_node *node)
> >>
> >>   /* Skip if it has been visited.  */
> >>   unsigned int uid = e->caller->get_uid ();
> >> -  if (bitmap_bit_p (ifunc_ref_map, uid))
> >> +  if (!bitmap_set_bit (ifunc_ref_map, uid))
> >>continue;
> >> -  bitmap_set_bit (ifunc_ref_map, uid);
> >>
> >>   if (is_caller_ifunc_resolver (e->caller))
> >>{
> >> @@ -1437,6 +1436,9 @@ symtab_node::check_ifunc_callee_symtab_nodes (void)
> >> {
> >>   symtab_node *node;
> >>
> >> +  bitmap_obstack_initialize (NULL);
> >> +  ifunc_ref_map = BITMAP_ALLOC (NULL);
> >> +
> >>   FOR_EACH_SYMBOL (node)
> >> {
> >>   cgraph_node *cnode = dyn_cast  (node);
> >> @@ -1455,7 +1457,8 @@ symtab_node::check_ifunc_callee_symtab_nodes (void)
> >>cnode->called_by_ifunc_resolver = true;
> >> }
> >>
> >> -  bitmap_clear (ifunc_ref_map);
> >> +  BITMAP_FREE (ifunc_ref_map);
> >> +  bitmap_obstack_release (NULL);
> >> }
> >>
> >> /* Verify symbol table for internal consistency.  */
> >> --
> >> 2.35.3
> >
> > The bug isn't fixed:
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114599#c5
>
> Ah, I reproduced with -coverage and that is fixed now.  The still existing 
> bug must be something unrelated.
>

It is a bug in the -fcondition-coverage patch.

-- 
H.J.


Re: [PATCH v10 1/2] Add condition coverage (MC/DC)

2024-04-05 Thread H.J. Lu
On Thu, Apr 4, 2024 at 5:54 AM Jørgen Kvalsvik  wrote:
>
> On 04/04/2024 14:10, Jan Hubicka wrote:
> >> gcc/ChangeLog:
> >>
> >>  * builtins.cc (expand_builtin_fork_or_exec): Check
> >>condition_coverage_flag.
> >>  * collect2.cc (main): Add -fno-condition-coverage to OBSTACK.
> >>  * common.opt: Add new options -fcondition-coverage and
> >>-Wcoverage-too-many-conditions.
> >>  * doc/gcov.texi: Add --conditions documentation.
> >>  * doc/invoke.texi: Add -fcondition-coverage documentation.
> >>  * function.cc (free_after_compilation): Free cond_uids.
> >>  * function.h (struct function): Add cond_uids.
> >>  * gcc.cc: Link gcov on -fcondition-coverage.
> >>  * gcov-counter.def (GCOV_COUNTER_CONDS): New.
> >>  * gcov-dump.cc (tag_conditions): New.
> >>  * gcov-io.h (GCOV_TAG_CONDS): New.
> >>  (GCOV_TAG_CONDS_LENGTH): New.
> >>  (GCOV_TAG_CONDS_NUM): New.
> >>  * gcov.cc (class condition_info): New.
> >>  (condition_info::condition_info): New.
> >>  (condition_info::popcount): New.
> >>  (struct coverage_info): New.
> >>  (add_condition_counts): New.
> >>  (output_conditions): New.
> >>  (print_usage): Add -g, --conditions.
> >>  (process_args): Likewise.
> >>  (output_intermediate_json_line): Output conditions.
> >>  (read_graph_file): Read condition counters.
> >>  (read_count_file): Likewise.
> >>  (file_summary): Print conditions.
> >>  (accumulate_line_info): Accumulate conditions.
> >>  (output_line_details): Print conditions.
> >>  * gimplify.cc (next_cond_uid): New.
> >>  (reset_cond_uid): New.
> >>  (shortcut_cond_r): Set condition discriminator.
> >>  (tag_shortcut_cond): New.
> >>  (gimple_associate_condition_with_expr): New.
> >>  (shortcut_cond_expr): Set condition discriminator.
> >>  (gimplify_cond_expr): Likewise.
> >>  (gimplify_function_tree): Call reset_cond_uid.
> >>  * ipa-inline.cc (can_early_inline_edge_p): Check
> >>condition_coverage_flag.
> >>  * ipa-split.cc (pass_split_functions::gate): Likewise.
> >>  * passes.cc (finish_optimization_passes): Likewise.
> >>  * profile.cc (struct condcov): New declaration.
> >>  (cov_length): Likewise.
> >>  (cov_blocks): Likewise.
> >>  (cov_masks): Likewise.
> >>  (cov_maps): Likewise.
> >>  (cov_free): Likewise.
> >>  (instrument_decisions): New.
> >>  (read_thunk_profile): Control output to file.
> >>  (branch_prob): Call find_conditions, instrument_decisions.
> >>  (init_branch_prob): Add total_num_conds.
> >>  (end_branch_prob): Likewise.
> >>  * tree-core.h (struct tree_exp): Add condition_uid.
> >>  * tree-profile.cc (struct conds_ctx): New.
> >>  (CONDITIONS_MAX_TERMS): New.
> >>  (EDGE_CONDITION): New.
> >>  (topological_cmp): New.
> >>  (index_of): New.
> >>  (single_p): New.
> >>  (single_edge): New.
> >>  (contract_edge_up): New.
> >>  (struct outcomes): New.
> >>  (conditional_succs): New.
> >>  (condition_index): New.
> >>  (condition_uid): New.
> >>  (masking_vectors): New.
> >>  (emit_assign): New.
> >>  (emit_bitwise_op): New.
> >>  (make_top_index_visit): New.
> >>  (make_top_index): New.
> >>  (paths_between): New.
> >>  (struct condcov): New.
> >>  (cov_length): New.
> >>  (cov_blocks): New.
> >>  (cov_masks): New.
> >>  (cov_maps): New.
> >>  (cov_free): New.
> >>  (find_conditions): New.
> >>  (struct counters): New.
> >>  (find_counters): New.
> >>  (resolve_counter): New.
> >>  (resolve_counters): New.
> >>  (instrument_decisions): New.
> >>  (tree_profiling): Check condition_coverage_flag.
> >>  (pass_ipa_tree_profile::gate): Likewise.
> >>  * tree.h (SET_EXPR_UID): New.
> >>  (EXPR_COND_UID): New.
> >>
> >> libgcc/ChangeLog:
> >>
> >>  * libgcov-merge.c (__gcov_merge_ior): New.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>  * lib/gcov.exp: Add condition coverage test function.
> >>  * g++.dg/gcov/gcov-18.C: New test.
> >>  * gcc.misc-tests/gcov-19.c: New test.
> >>  * gcc.misc-tests/gcov-20.c: New test.
> >>  * gcc.misc-tests/gcov-21.c: New test.
> >>  * gcc.misc-tests/gcov-22.c: New test.
> >>  * gcc.misc-tests/gcov-23.c: New test.
> >> ---
> >>   gcc/builtins.cc|2 +-
> >>   gcc/collect2.cc|7 +-
> >>   gcc/common.opt |9 +
> >>   gcc/doc/gcov.texi  |   38 +
> >>   gcc/doc/invoke.texi|   21 +
> >>   gcc/function.cc|1 +
> >>   gcc/function.h |4 +
> >>   gcc/gcc.cc |4 +-
> >>   gcc/gcov-counter.def   |3 +
> >>   gcc/gcov-dump.cc   |   24 +
> >>   gcc/gcov-io.h  |3 

Re: [PATCH] middle-end/114599 - fix bitmap allocation for check_ifunc_callee_symtab_nodes

2024-04-05 Thread H.J. Lu
On Fri, Apr 5, 2024 at 1:21 AM Richard Biener  wrote:
>
> There's no default bitmap obstack during global CTORs, so allocate the
> bitmap locally.
>
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Richard.
>
> PR middle-end/114599
> * symtab.cc (ifunc_ref_map): Do not use auto_bitmap.
> (is_caller_ifunc_resolver): Optimize bitmap_bit_p/bitmap_set_bit
> pair.
> (symtab_node::check_ifunc_callee_symtab_nodes): Properly
> allocate ifunc_ref_map here.
> ---
>  gcc/symtab.cc | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/symtab.cc b/gcc/symtab.cc
> index 3256133891d..3b018ab3ea2 100644
> --- a/gcc/symtab.cc
> +++ b/gcc/symtab.cc
> @@ -1383,7 +1383,7 @@ check_ifunc_resolver (cgraph_node *node, void *data)
>return false;
>  }
>
> -static auto_bitmap ifunc_ref_map;
> +static bitmap ifunc_ref_map;
>
>  /* Return true if any caller of NODE is an ifunc resolver.  */
>
> @@ -1404,9 +1404,8 @@ is_caller_ifunc_resolver (cgraph_node *node)
>
>/* Skip if it has been visited.  */
>unsigned int uid = e->caller->get_uid ();
> -  if (bitmap_bit_p (ifunc_ref_map, uid))
> +  if (!bitmap_set_bit (ifunc_ref_map, uid))
> continue;
> -  bitmap_set_bit (ifunc_ref_map, uid);
>
>if (is_caller_ifunc_resolver (e->caller))
> {
> @@ -1437,6 +1436,9 @@ symtab_node::check_ifunc_callee_symtab_nodes (void)
>  {
>symtab_node *node;
>
> +  bitmap_obstack_initialize (NULL);
> +  ifunc_ref_map = BITMAP_ALLOC (NULL);
> +
>FOR_EACH_SYMBOL (node)
>  {
>cgraph_node *cnode = dyn_cast  (node);
> @@ -1455,7 +1457,8 @@ symtab_node::check_ifunc_callee_symtab_nodes (void)
> cnode->called_by_ifunc_resolver = true;
>  }
>
> -  bitmap_clear (ifunc_ref_map);
> +  BITMAP_FREE (ifunc_ref_map);
> +  bitmap_obstack_release (NULL);
>  }
>
>  /* Verify symbol table for internal consistency.  */
> --
> 2.35.3

The bug isn't fixed:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114599#c5

-- 
H.J.


Re: [PATCH v3] tree-profile: Disable indirect call profiling for IFUNC resolvers

2024-04-04 Thread H.J. Lu
On Thu, Apr 4, 2024 at 5:34 PM  wrote:
>
> On 3 April 2024 15:49:13 CEST, "H.J. Lu"  wrote:
>
>
> >> OK witht that change.
> >> Honza
> >
> >I am checking in this patch with the updated comments:
> >
> >  /* Disable indirect call profiling for an IFUNC resolver and its
> > callees since it requires TLS which hasn't been set up yet when
> > the dynamic linker is resolving IFUNC symbols.  See
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115
> >   */
> >
> >Thanks.
> >
>
> +  /* Skip if it has been visited.  */
> +  unsigned int uid = e->caller->get_uid ();
> +  if (bitmap_bit_p (ifunc_ref_map, uid))
> +   continue;
> +  bitmap_set_bit (ifunc_ref_map, uid);
>
> I think you could have written this as
> if (!bitmap_set_bit (ifunc_ref_map, uid))
>   continue;
>

Feel free to submit a patch.

Thanks.

-- 
H.J.


[PATCH] x86: Define __APX_F__ for -mapxf

2024-04-04 Thread H.J. Lu
Define __APX_F__ when APX is enabled.

gcc/

PR target/114587
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__APX_F__ when APX is enabled.

gcc/testsuite/

PR target/114587
* gcc.target/i386/apx-2.c: New test.
---
 gcc/config/i386/i386-c.cc | 2 ++
 gcc/testsuite/gcc.target/i386/apx-2.c | 6 ++
 2 files changed, 8 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-2.c

diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 114908c7ec0..226d277676c 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -749,6 +749,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
 }
   if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_512)
 def_or_undef (parse_in, "__AVX10_1_512__");
+  if (isa_flag2 & OPTION_MASK_ISA2_APX_F)
+def_or_undef (parse_in, "__APX_F__");
   if (TARGET_IAMCU)
 {
   def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/testsuite/gcc.target/i386/apx-2.c 
b/gcc/testsuite/gcc.target/i386/apx-2.c
new file mode 100644
index 000..2f6439e4b23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapxf" } */
+
+#ifndef __APX_F__
+# error __APX_F__ not defined
+#endif
-- 
2.44.0



Re: [PATCH v3] tree-profile: Disable indirect call profiling for IFUNC resolvers

2024-04-03 Thread H.J. Lu
On Wed, Apr 3, 2024 at 8:31 AM Peter Bergner  wrote:
>
> On 4/3/24 7:40 AM, H.J. Lu wrote:
> > We can't profile indirect calls to IFUNC resolvers nor their callees as
> > it requires TLS which hasn't been set up yet when the dynamic linker is
> > resolving IFUNC symbols.
> >
> > Add an IFUNC resolver caller marker to cgraph_node and set it if the
> > function is called by an IFUNC resolver.  Disable indirect call profiling
> > for IFUNC resolvers and their callees.
>
> The IFUNC resolvers on Power do not use TLS, so isn't this a little too
> conservative?  Should this be triggered via a target hook so architectures
> that don't use TLS in their IFUNC resolvers could still profile them?

It is not about IFUNC resolver using TLS.   TLS is used by indirect call
profiling which hasn't been set up yet when the dynamic linker is resolving
IFUNC symbols.   Doesn't Power need to set up TLS before using TLS?

-- 
H.J.


Re: [PATCH v3] tree-profile: Disable indirect call profiling for IFUNC resolvers

2024-04-03 Thread H.J. Lu
On Wed, Apr 3, 2024 at 6:38 AM Jan Hubicka  wrote:
>
> > We can't profile indirect calls to IFUNC resolvers nor their callees as
> > it requires TLS which hasn't been set up yet when the dynamic linker is
> > resolving IFUNC symbols.
> >
> > Add an IFUNC resolver caller marker to cgraph_node and set it if the
> > function is called by an IFUNC resolver.  Disable indirect call profiling
> > for IFUNC resolvers and their callees.
> >
> > Tested with profiledbootstrap on Fedora 39/x86-64.
> >
> > gcc/ChangeLog:
> >
> >   PR tree-optimization/114115
> >   * cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes.
> >   (cgraph_node): Add called_by_ifunc_resolver.
> >   * cgraphunit.cc (symbol_table::compile): Call
> >   symtab_node::check_ifunc_callee_symtab_nodes.
> >   * symtab.cc (check_ifunc_resolver): New.
> >   (ifunc_ref_map): Likewise.
> >   (is_caller_ifunc_resolver): Likewise.
> >   (symtab_node::check_ifunc_callee_symtab_nodes): Likewise.
> >   * tree-profile.cc (gimple_gen_ic_func_profiler): Disable indirect
> >   call profiling for IFUNC resolvers and their callees.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   PR tree-optimization/114115
> >   * gcc.dg/pr114115.c: New test.
> > +/* { dg-final { scan-tree-dump-not "__gcov_indirect_call_profiler_v" 
> > "optimized" } } */
> > diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
> > index aed13e2b1bc..373dbd60481 100644
> > --- a/gcc/tree-profile.cc
> > +++ b/gcc/tree-profile.cc
> > @@ -520,7 +520,10 @@ gimple_gen_ic_func_profiler (void)
> >gcall *stmt1;
> >tree tree_uid, cur_func, void0;
> >
> > -  if (c_node->only_called_directly_p ())
> > +  /* Disable indirect call profiling for an IFUNC resolver and its
> > + callees.  */
> Please add a comment here referring to the PR and need to have TLS
> initialized.
>
> OK witht that change.
> Honza

I am checking in this patch with the updated comments:

  /* Disable indirect call profiling for an IFUNC resolver and its
     callees since it requires TLS which hasn't been set up yet when
 the dynamic linker is resolving IFUNC symbols.  See
 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115
   */

Thanks.

-- 
H.J.
From 65d320ea9652d6b5f68d8b9057838224a8e2322e Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Mon, 26 Feb 2024 08:38:58 -0800
Subject: [PATCH] tree-profile: Disable indirect call profiling for IFUNC
 resolvers

We can't profile indirect calls to IFUNC resolvers nor their callees as
it requires TLS which hasn't been set up yet when the dynamic linker is
resolving IFUNC symbols.

Add an IFUNC resolver caller marker to cgraph_node and set it if the
function is called by an IFUNC resolver.  Disable indirect call profiling
for IFUNC resolvers and their callees.

Tested with profiledbootstrap on Fedora 39/x86-64.

gcc/ChangeLog:

	PR tree-optimization/114115
	* cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes.
	(cgraph_node): Add called_by_ifunc_resolver.
	* cgraphunit.cc (symbol_table::compile): Call
	symtab_node::check_ifunc_callee_symtab_nodes.
	* symtab.cc (check_ifunc_resolver): New.
	(ifunc_ref_map): Likewise.
	(is_caller_ifunc_resolver): Likewise.
	(symtab_node::check_ifunc_callee_symtab_nodes): Likewise.
	* tree-profile.cc (gimple_gen_ic_func_profiler): Disable indirect
	call profiling for IFUNC resolvers and their callees.

gcc/testsuite/ChangeLog:

	PR tree-optimization/114115
	* gcc.dg/pr114115.c: New test.
---
 gcc/cgraph.h|  6 +++
 gcc/cgraphunit.cc   |  2 +
 gcc/symtab.cc   | 89 +
 gcc/testsuite/gcc.dg/pr114115.c | 24 +
 gcc/tree-profile.cc |  8 ++-
 5 files changed, 128 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr114115.c

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 47f35e8078d..a8c3224802c 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -479,6 +479,9 @@ public:
  Return NULL if there's no such node.  */
   static symtab_node *get_for_asmname (const_tree asmname);
 
+  /* Check symbol table for callees of IFUNC resolvers.  */
+  static void check_ifunc_callee_symtab_nodes (void);
+
   /* Verify symbol table for internal consistency.  */
   static DEBUG_FUNCTION void verify_symtab_nodes (void);
 
@@ -896,6 +899,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
   redefined_extern_inline (false), tm_may_enter_irr (false),
   ipcp_clone (false), declare_variant_alt (false),
   calls_declare_variant_alt (false), gc_candidate (false),
+  called_by_ifunc_resolver (false),
   m_uid (uid), m_summary_id (-1)
   {}

Re: PING: [PATCH v2] tree-profile: Don't instrument an IFUNC resolver nor its callees

2024-04-03 Thread H.J. Lu
On Tue, Apr 2, 2024 at 10:03 AM Jan Hubicka  wrote:
>
> > > I am bit worried about commonly used functions getting "infected" by
> > > being called once from ifunc resolver.  I think we only use thread local
> > > storage for indirect call profiling, so we may just disable indirect
> > > call profiling for these functions.
> >
> > Will change it.
> >
> > > Also the patch will be noop with -flto -flto-partition=max, so probably
> > > we need to compute this flag at WPA time and stream to partitions.
> > >
> >
> > Why is it a nop with -flto -flto-partition=max? I got
> >
> > (gdb) bt
> > #0  symtab_node::check_ifunc_callee_symtab_nodes ()
> > at /export/gnu/import/git/gitlab/x86-gcc/gcc/symtab.cc:1440
> > #1  0x00e487d3 in symbol_table::compile (this=0x7fffea006000)
> > at /export/gnu/import/git/gitlab/x86-gcc/gcc/cgraphunit.cc:2320
> > #2  0x00d23ecf in lto_main ()
> > at /export/gnu/import/git/gitlab/x86-gcc/gcc/lto/lto.cc:687
> > #3  0x015254d2 in compile_file ()
> > at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:449
> > #4  0x015284a4 in do_compile ()
> > at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2154
> > #5  0x01528864 in toplev::main (this=0x7fffd84a, argc=16,
> > argv=0x42261f0) at 
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2310
> > #6  0x030a3fe2 in main (argc=16, argv=0x7fffd958)
> > at /export/gnu/import/git/gitlab/x86-gcc/gcc/main.cc:39
> >
> > Do you have a testcase to show that it is a nop?
> Aha, sorry.  I tought this is run during late optimization, but it is
> done early, so LTo partitioning does not mix things up.  So current
> patch modified to disable only instrumentation that needs TLS should be
> fine.
>

Done.  Here is the v3 patch:

https://gcc.gnu.org/pipermail/gcc-patches/2024-April/648733.html

-- 
H.J.


[PATCH v3] tree-profile: Disable indirect call profiling for IFUNC resolvers

2024-04-03 Thread H.J. Lu
We can't profile indirect calls to IFUNC resolvers nor their callees as
it requires TLS which hasn't been set up yet when the dynamic linker is
resolving IFUNC symbols.

Add an IFUNC resolver caller marker to cgraph_node and set it if the
function is called by an IFUNC resolver.  Disable indirect call profiling
for IFUNC resolvers and their callees.

Tested with profiledbootstrap on Fedora 39/x86-64.

gcc/ChangeLog:

PR tree-optimization/114115
* cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes.
(cgraph_node): Add called_by_ifunc_resolver.
* cgraphunit.cc (symbol_table::compile): Call
symtab_node::check_ifunc_callee_symtab_nodes.
* symtab.cc (check_ifunc_resolver): New.
(ifunc_ref_map): Likewise.
(is_caller_ifunc_resolver): Likewise.
(symtab_node::check_ifunc_callee_symtab_nodes): Likewise.
* tree-profile.cc (gimple_gen_ic_func_profiler): Disable indirect
call profiling for IFUNC resolvers and their callees.

gcc/testsuite/ChangeLog:

PR tree-optimization/114115
* gcc.dg/pr114115.c: New test.
---
 gcc/cgraph.h|  6 +++
 gcc/cgraphunit.cc   |  2 +
 gcc/symtab.cc   | 89 +
 gcc/testsuite/gcc.dg/pr114115.c | 24 +
 gcc/tree-profile.cc |  5 +-
 5 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr114115.c

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 47f35e8078d..a8c3224802c 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -479,6 +479,9 @@ public:
  Return NULL if there's no such node.  */
   static symtab_node *get_for_asmname (const_tree asmname);
 
+  /* Check symbol table for callees of IFUNC resolvers.  */
+  static void check_ifunc_callee_symtab_nodes (void);
+
   /* Verify symbol table for internal consistency.  */
   static DEBUG_FUNCTION void verify_symtab_nodes (void);
 
@@ -896,6 +899,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public 
symtab_node
   redefined_extern_inline (false), tm_may_enter_irr (false),
   ipcp_clone (false), declare_variant_alt (false),
   calls_declare_variant_alt (false), gc_candidate (false),
+  called_by_ifunc_resolver (false),
   m_uid (uid), m_summary_id (-1)
   {}
 
@@ -1495,6 +1499,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
  is set for local SIMD clones when they are created and cleared if the
  vectorizer uses them.  */
   unsigned gc_candidate : 1;
+  /* Set if the function is called by an IFUNC resolver.  */
+  unsigned called_by_ifunc_resolver : 1;
 
 private:
   /* Unique id of the node.  */
diff --git a/gcc/cgraphunit.cc b/gcc/cgraphunit.cc
index d200166f7e9..2bd0289ffba 100644
--- a/gcc/cgraphunit.cc
+++ b/gcc/cgraphunit.cc
@@ -2317,6 +2317,8 @@ symbol_table::compile (void)
 
   symtab_node::checking_verify_symtab_nodes ();
 
+  symtab_node::check_ifunc_callee_symtab_nodes ();
+
   timevar_push (TV_CGRAPHOPT);
   if (pre_ipa_mem_report)
 dump_memory_report ("Memory consumption before IPA");
diff --git a/gcc/symtab.cc b/gcc/symtab.cc
index 4c7e3c135ca..3256133891d 100644
--- a/gcc/symtab.cc
+++ b/gcc/symtab.cc
@@ -1369,6 +1369,95 @@ symtab_node::verify (void)
   timevar_pop (TV_CGRAPH_VERIFY);
 }
 
+/* Return true and set *DATA to true if NODE is an ifunc resolver.  */
+
+static bool
+check_ifunc_resolver (cgraph_node *node, void *data)
+{
+  if (node->ifunc_resolver)
+{
+  bool *is_ifunc_resolver = (bool *) data;
+  *is_ifunc_resolver = true;
+  return true;
+}
+  return false;
+}
+
+static auto_bitmap ifunc_ref_map;
+
+/* Return true if any caller of NODE is an ifunc resolver.  */
+
+static bool
+is_caller_ifunc_resolver (cgraph_node *node)
+{
+  bool is_ifunc_resolver = false;
+
+  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
+{
+  /* Return true if caller is known to be an IFUNC resolver.  */
+  if (e->caller->called_by_ifunc_resolver)
+   return true;
+
+  /* Check for recursive call.  */
+  if (e->caller == node)
+   continue;
+
+  /* Skip if it has been visited.  */
+  unsigned int uid = e->caller->get_uid ();
+  if (bitmap_bit_p (ifunc_ref_map, uid))
+   continue;
+  bitmap_set_bit (ifunc_ref_map, uid);
+
+  if (is_caller_ifunc_resolver (e->caller))
+   {
+ /* Return true if caller is an IFUNC resolver.  */
+ e->caller->called_by_ifunc_resolver = true;
+ return true;
+   }
+
+  /* Check if caller's alias is an IFUNC resolver.  */
+  e->caller->call_for_symbol_and_aliases (check_ifunc_resolver,
+ _ifunc_resolver,
+ true);
+  if (is_ifunc_resolver)
+   {
+ /* Return true if caller's alias is an IFUNC resolver.  */
+ e->caller->called_by_ifunc_resolver = true;
+ return true;
+   }

Re: PING: [PATCH v2] tree-profile: Don't instrument an IFUNC resolver nor its callees

2024-04-02 Thread H.J. Lu
On Tue, Apr 2, 2024 at 7:50 AM Jan Hubicka  wrote:
>
> > On Tue, Mar 5, 2024 at 1:45 PM H.J. Lu  wrote:
> > >
> > > We can't instrument an IFUNC resolver nor its callees as it may require
> > > TLS which hasn't been set up yet when the dynamic linker is resolving
> > > IFUNC symbols.
> > >
> > > Add an IFUNC resolver caller marker to cgraph_node and set it if the
> > > function is called by an IFUNC resolver.  Update tree_profiling to skip
> > > functions called by IFUNC resolver.
> > >
> > > Tested with profiledbootstrap on Fedora 39/x86-64.
> > >
> > > gcc/ChangeLog:
> > >
> > > PR tree-optimization/114115
> > > * cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes.
> > > (cgraph_node): Add called_by_ifunc_resolver.
> > > * cgraphunit.cc (symbol_table::compile): Call
> > > symtab_node::check_ifunc_callee_symtab_nodes.
> > > * symtab.cc (check_ifunc_resolver): New.
> > > (ifunc_ref_map): Likewise.
> > > (is_caller_ifunc_resolver): Likewise.
> > > (symtab_node::check_ifunc_callee_symtab_nodes): Likewise.
> > > * tree-profile.cc (tree_profiling): Do not instrument an IFUNC
> > > resolver nor its callees.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > PR tree-optimization/114115
> > > * gcc.dg/pr114115.c: New test.
> >
> > PING.
>
> I am bit worried about commonly used functions getting "infected" by
> being called once from ifunc resolver.  I think we only use thread local
> storage for indirect call profiling, so we may just disable indirect
> call profiling for these functions.

Will change it.

> Also the patch will be noop with -flto -flto-partition=max, so probably
> we need to compute this flag at WPA time and stream to partitions.
>

Why is it a nop with -flto -flto-partition=max? I got

(gdb) bt
#0  symtab_node::check_ifunc_callee_symtab_nodes ()
at /export/gnu/import/git/gitlab/x86-gcc/gcc/symtab.cc:1440
#1  0x00e487d3 in symbol_table::compile (this=0x7fffea006000)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/cgraphunit.cc:2320
#2  0x00d23ecf in lto_main ()
at /export/gnu/import/git/gitlab/x86-gcc/gcc/lto/lto.cc:687
#3  0x015254d2 in compile_file ()
at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:449
#4  0x015284a4 in do_compile ()
at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2154
#5  0x01528864 in toplev::main (this=0x7fffd84a, argc=16,
argv=0x42261f0) at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2310
#6  0x030a3fe2 in main (argc=16, argv=0x7fffd958)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/main.cc:39

Do you have a testcase to show that it is a nop?

-- 
H.J.


PING: [PATCH v2] tree-profile: Don't instrument an IFUNC resolver nor its callees

2024-04-02 Thread H.J. Lu
On Tue, Mar 5, 2024 at 1:45 PM H.J. Lu  wrote:
>
> We can't instrument an IFUNC resolver nor its callees as it may require
> TLS which hasn't been set up yet when the dynamic linker is resolving
> IFUNC symbols.
>
> Add an IFUNC resolver caller marker to cgraph_node and set it if the
> function is called by an IFUNC resolver.  Update tree_profiling to skip
> functions called by IFUNC resolver.
>
> Tested with profiledbootstrap on Fedora 39/x86-64.
>
> gcc/ChangeLog:
>
> PR tree-optimization/114115
> * cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes.
> (cgraph_node): Add called_by_ifunc_resolver.
> * cgraphunit.cc (symbol_table::compile): Call
> symtab_node::check_ifunc_callee_symtab_nodes.
> * symtab.cc (check_ifunc_resolver): New.
> (ifunc_ref_map): Likewise.
> (is_caller_ifunc_resolver): Likewise.
> (symtab_node::check_ifunc_callee_symtab_nodes): Likewise.
> * tree-profile.cc (tree_profiling): Do not instrument an IFUNC
> resolver nor its callees.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/114115
> * gcc.dg/pr114115.c: New test.
> ---
>  gcc/cgraph.h|  6 +++
>  gcc/cgraphunit.cc   |  2 +
>  gcc/symtab.cc   | 89 +
>  gcc/testsuite/gcc.dg/pr114115.c | 24 +
>  gcc/tree-profile.cc |  4 ++
>  5 files changed, 125 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr114115.c
>
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index 47f35e8078d..a8c3224802c 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -479,6 +479,9 @@ public:
>   Return NULL if there's no such node.  */
>static symtab_node *get_for_asmname (const_tree asmname);
>
> +  /* Check symbol table for callees of IFUNC resolvers.  */
> +  static void check_ifunc_callee_symtab_nodes (void);
> +
>/* Verify symbol table for internal consistency.  */
>static DEBUG_FUNCTION void verify_symtab_nodes (void);
>
> @@ -896,6 +899,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
> public symtab_node
>redefined_extern_inline (false), tm_may_enter_irr (false),
>ipcp_clone (false), declare_variant_alt (false),
>calls_declare_variant_alt (false), gc_candidate (false),
> +  called_by_ifunc_resolver (false),
>m_uid (uid), m_summary_id (-1)
>{}
>
> @@ -1495,6 +1499,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
> public symtab_node
>   is set for local SIMD clones when they are created and cleared if the
>   vectorizer uses them.  */
>unsigned gc_candidate : 1;
> +  /* Set if the function is called by an IFUNC resolver.  */
> +  unsigned called_by_ifunc_resolver : 1;
>
>  private:
>/* Unique id of the node.  */
> diff --git a/gcc/cgraphunit.cc b/gcc/cgraphunit.cc
> index d200166f7e9..2bd0289ffba 100644
> --- a/gcc/cgraphunit.cc
> +++ b/gcc/cgraphunit.cc
> @@ -2317,6 +2317,8 @@ symbol_table::compile (void)
>
>symtab_node::checking_verify_symtab_nodes ();
>
> +  symtab_node::check_ifunc_callee_symtab_nodes ();
> +
>timevar_push (TV_CGRAPHOPT);
>if (pre_ipa_mem_report)
>  dump_memory_report ("Memory consumption before IPA");
> diff --git a/gcc/symtab.cc b/gcc/symtab.cc
> index 4c7e3c135ca..3256133891d 100644
> --- a/gcc/symtab.cc
> +++ b/gcc/symtab.cc
> @@ -1369,6 +1369,95 @@ symtab_node::verify (void)
>timevar_pop (TV_CGRAPH_VERIFY);
>  }
>
> +/* Return true and set *DATA to true if NODE is an ifunc resolver.  */
> +
> +static bool
> +check_ifunc_resolver (cgraph_node *node, void *data)
> +{
> +  if (node->ifunc_resolver)
> +{
> +  bool *is_ifunc_resolver = (bool *) data;
> +  *is_ifunc_resolver = true;
> +  return true;
> +}
> +  return false;
> +}
> +
> +static auto_bitmap ifunc_ref_map;
> +
> +/* Return true if any caller of NODE is an ifunc resolver.  */
> +
> +static bool
> +is_caller_ifunc_resolver (cgraph_node *node)
> +{
> +  bool is_ifunc_resolver = false;
> +
> +  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
> +{
> +  /* Return true if caller is known to be an IFUNC resolver.  */
> +  if (e->caller->called_by_ifunc_resolver)
> +   return true;
> +
> +  /* Check for recursive call.  */
> +  if (e->caller == node)
> +   continue;
> +
> +  /* Skip if it has been visited.  */
> +  unsigned int uid = e->caller->get_uid ();
> +  if (bitmap_bit_p (ifunc_ref_map, uid))
> +   continue;
> +  bitmap_set_bit (

Re: libbacktrace patch committed: Don't assume compressed section aligned

2024-03-08 Thread H.J. Lu
On Fri, Mar 8, 2024 at 2:48 PM Fangrui Song  wrote:
>
> On ELF64, it looks like BFD uses 8-byte alignment for compressed
> `.debug_*` sections while gold/lld/mold use 1-byte alignment. I do not
> know how the Solaris linker sets the alignment.
>
> The specification's wording makes me confused whether it really
> requires 8-byte alignment, even if a non-packed `Elf64_Chdr` surely
> requires 8.

Since compressed sections begin with a compression header
structure that identifies the compression algorithm, compressed
sections must be aligned to the alignment of the compression
header.  I don't think there is any ambiguity here.

> > The sh_size and sh_addralign fields of the section header for a compressed 
> > section reflect the requirements of the compressed section.
>
> There are many `.debug_*` sections. So avoiding some alignment padding
> seems a very natural extension (a DWARF v5 -gsplit-dwarf relocatable
> file has ~10 `.debug_*` sections), even if the specification doesn't
> allow it with a very strict interpretation...
>
> (Off-topic: I wonder whether ELF control structures should use
> unaligned LEB128 more. REL/RELA can naturally be replaced with a
> LEB128 one similar to wasm.)
>
> On Fri, Mar 8, 2024 at 1:57 PM Ian Lance Taylor  wrote:
> >
> > Reportedly when lld compresses debug sections, it fails to set the
> > alignment of the compressed section such that the compressed header
> > can be read directly.  To me this seems like a bug in lld.  However,
> > libbacktrace needs to work around it.  This patch, originally by the
> > GitHub user ubyte, does that.  Bootstrapped and tested on
> > x86_64-pc-linux-gnu.  Committed to mainline.
> >
> > Ian
> >
> > * elf.c (elf_uncompress_chdr): Don't assume compressed section is
> > aligned.
>
>
>
> --
> 宋方睿



-- 
H.J.


Re: [C++ coroutines] Initial implementation pushed to master.

2024-03-06 Thread H.J. Lu
On Wed, Mar 6, 2024 at 1:03 AM Iain Sandoe  wrote:
>
>
>
> > On 5 Mar 2024, at 17:31, H.J. Lu  wrote:
> >
> > On Sat, Jan 18, 2020 at 4:54 AM Iain Sandoe  wrote:
> >>
>
> >> 2020-01-18  Iain Sandoe  
> >>
> >>* Makefile.in: Add coroutine-passes.o.
> >>* builtin-types.def (BT_CONST_SIZE): New.
> >>(BT_FN_BOOL_PTR): New.
> >>(BT_FN_PTR_PTR_CONST_SIZE_BOOL): New.
> >>* builtins.def (DEF_COROUTINE_BUILTIN): New.
> >>* coroutine-builtins.def: New file.
> >>* coroutine-passes.cc: New file.
> >
> > There are
> >
> >  tree res_tgt = TREE_OPERAND (gimple_call_arg (stmt, 2), 0);
> >  tree _dest = destinations.get_or_insert (idx, );
> >  if (existed && dump_file)
> >Why does this behavior depend on dump_file?
>
> This was checking for a potential wrong-code error during development;
> there is no point in making it into a diagnostic (since the user could not fix
> the problem if it happened).  I guess changing to a gcc_checking_assert()
> would be reasonable but I’d prefer to do that once GCC-15 opens.
>
> Have you found any instance where this results in a reported bug?

No, I haven't.  I only noticed it by chance.

> (I do not recall anything on my coroutines bug list that would seem to 
> indicate this).
>
> thanks for noting it.
> Iain
>
>
> >{
> >  fprintf (
> >dump_file,
> >"duplicate YIELD RESUME point (" HOST_WIDE_INT_PRINT_DEC
> >") ?\n",
> >idx);
> >  print_gimple_stmt (dump_file, stmt, 0, 
> > TDF_VOPS|TDF_MEMSYMS);
> >}
> >  else
> >res_dest = res_tgt;
> >
> > H.J.
>


-- 
H.J.


Re: [PATCH] tree-profile: Don't instrument an IFUNC resolver nor its callees

2024-03-05 Thread H.J. Lu
On Thu, Feb 29, 2024 at 7:11 AM H.J. Lu  wrote:
>
> On Thu, Feb 29, 2024 at 7:06 AM Jan Hubicka  wrote:
> >
> > > > I am worried about scenario where ifunc selector calls function foo
> > > > defined locally and foo is also used from other places possibly in hot
> > > > loops.
> > > > >
> > > > > > So it is not really reliable fix (though I guess it will work a lot 
> > > > > > of
> > > > > > common code).  I wonder what would be alternatives.  In GCC 
> > > > > > generated
> > > > > > profling code we use TLS only for undirect call profiling (so there 
> > > > > > is
> > > > > > no need to turn off rest of profiling).  I wonder if there is any 
> > > > > > chance
> > > > > > to not make it seffault when it is done before TLS is set up?
> > > > >
> > > > > IFUNC selector should make minimum external calls, none is preferred.
> > > >
> > > > Edge porfiling only inserts (atomic) 64bit increments of counters.
> > > > If target supports these operations inline, no external calls will be
> > > > done.
> > > >
> > > > Indirect call profiling inserts the problematic TLS variable (to track
> > > > caller-callee pairs). Value profiling also inserts various additional
> > > > external calls to counters.
> > > >
> > > > I am perfectly fine with disabling instrumentation for ifunc selectors
> > > > and functions only reachable from them, but I am worried about calles
> > > > used also from non-ifunc path.
> > >
> > > Programmers need to understand not to do it.
> >
> > It would help to have this documented. Should we warn when ifunc
> > resolver calls external function, comdat of function reachable from
> > non-ifunc code?
>
> That will be nice.
>
> > >
> > > > For example selector implemented in C++ may do some string handling to
> > > > match CPU name and propagation will disable profiling for std::string
> > >
> > > On x86, they should use CPUID, not string functions.
> > >
> > > > member functions (which may not be effective if comdat section is
> > > > prevailed from other translation unit).
> > >
> > > String functions may lead to external function calls which is dangerous.
> > >
> > > > > Any external calls may lead to issues at run-time.  It is a very bad 
> > > > > idea
> > > > > to profile IFUNC selector via external function call.
> > > >
> > > > Looking at https://sourceware.org/glibc/wiki/GNU_IFUNC
> > > > there are other limitations on ifunc except for profiling, such as
> > > > -fstack-protector-all.  So perhaps your propagation can be used to
> > > > disable those features as well.
> > >
> > > So, it may not be tree-profile specific.  Where should these 2 bits
> > > be added?
> >
> > If we want to disable other transforms too, then I think having a bit in
> > cgraph_node for reachability from ifunc resolver makes sense.
> > I would still do the cycle detection using on-side hash_map to avoid
> > polution of the global datastructure.
> >
>
> I will see what I can do.
>
>

The v2 patch is at

https://patchwork.sourceware.org/project/gcc/list/?series=31627

-- 
H.J.


[PATCH v2] tree-profile: Don't instrument an IFUNC resolver nor its callees

2024-03-05 Thread H.J. Lu
We can't instrument an IFUNC resolver nor its callees as it may require
TLS which hasn't been set up yet when the dynamic linker is resolving
IFUNC symbols.

Add an IFUNC resolver caller marker to cgraph_node and set it if the
function is called by an IFUNC resolver.  Update tree_profiling to skip
functions called by IFUNC resolver.

Tested with profiledbootstrap on Fedora 39/x86-64.

gcc/ChangeLog:

PR tree-optimization/114115
* cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes.
(cgraph_node): Add called_by_ifunc_resolver.
* cgraphunit.cc (symbol_table::compile): Call
symtab_node::check_ifunc_callee_symtab_nodes.
* symtab.cc (check_ifunc_resolver): New.
(ifunc_ref_map): Likewise.
(is_caller_ifunc_resolver): Likewise.
(symtab_node::check_ifunc_callee_symtab_nodes): Likewise.
* tree-profile.cc (tree_profiling): Do not instrument an IFUNC
resolver nor its callees.

gcc/testsuite/ChangeLog:

PR tree-optimization/114115
* gcc.dg/pr114115.c: New test.
---
 gcc/cgraph.h|  6 +++
 gcc/cgraphunit.cc   |  2 +
 gcc/symtab.cc   | 89 +
 gcc/testsuite/gcc.dg/pr114115.c | 24 +
 gcc/tree-profile.cc |  4 ++
 5 files changed, 125 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr114115.c

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 47f35e8078d..a8c3224802c 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -479,6 +479,9 @@ public:
  Return NULL if there's no such node.  */
   static symtab_node *get_for_asmname (const_tree asmname);
 
+  /* Check symbol table for callees of IFUNC resolvers.  */
+  static void check_ifunc_callee_symtab_nodes (void);
+
   /* Verify symbol table for internal consistency.  */
   static DEBUG_FUNCTION void verify_symtab_nodes (void);
 
@@ -896,6 +899,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public 
symtab_node
   redefined_extern_inline (false), tm_may_enter_irr (false),
   ipcp_clone (false), declare_variant_alt (false),
   calls_declare_variant_alt (false), gc_candidate (false),
+  called_by_ifunc_resolver (false),
   m_uid (uid), m_summary_id (-1)
   {}
 
@@ -1495,6 +1499,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
  is set for local SIMD clones when they are created and cleared if the
  vectorizer uses them.  */
   unsigned gc_candidate : 1;
+  /* Set if the function is called by an IFUNC resolver.  */
+  unsigned called_by_ifunc_resolver : 1;
 
 private:
   /* Unique id of the node.  */
diff --git a/gcc/cgraphunit.cc b/gcc/cgraphunit.cc
index d200166f7e9..2bd0289ffba 100644
--- a/gcc/cgraphunit.cc
+++ b/gcc/cgraphunit.cc
@@ -2317,6 +2317,8 @@ symbol_table::compile (void)
 
   symtab_node::checking_verify_symtab_nodes ();
 
+  symtab_node::check_ifunc_callee_symtab_nodes ();
+
   timevar_push (TV_CGRAPHOPT);
   if (pre_ipa_mem_report)
 dump_memory_report ("Memory consumption before IPA");
diff --git a/gcc/symtab.cc b/gcc/symtab.cc
index 4c7e3c135ca..3256133891d 100644
--- a/gcc/symtab.cc
+++ b/gcc/symtab.cc
@@ -1369,6 +1369,95 @@ symtab_node::verify (void)
   timevar_pop (TV_CGRAPH_VERIFY);
 }
 
+/* Return true and set *DATA to true if NODE is an ifunc resolver.  */
+
+static bool
+check_ifunc_resolver (cgraph_node *node, void *data)
+{
+  if (node->ifunc_resolver)
+{
+  bool *is_ifunc_resolver = (bool *) data;
+  *is_ifunc_resolver = true;
+  return true;
+}
+  return false;
+}
+
+static auto_bitmap ifunc_ref_map;
+
+/* Return true if any caller of NODE is an ifunc resolver.  */
+
+static bool
+is_caller_ifunc_resolver (cgraph_node *node)
+{
+  bool is_ifunc_resolver = false;
+
+  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
+{
+  /* Return true if caller is known to be an IFUNC resolver.  */
+  if (e->caller->called_by_ifunc_resolver)
+   return true;
+
+  /* Check for recursive call.  */
+  if (e->caller == node)
+   continue;
+
+  /* Skip if it has been visited.  */
+  unsigned int uid = e->caller->get_uid ();
+  if (bitmap_bit_p (ifunc_ref_map, uid))
+   continue;
+  bitmap_set_bit (ifunc_ref_map, uid);
+
+  if (is_caller_ifunc_resolver (e->caller))
+   {
+ /* Return true if caller is an IFUNC resolver.  */
+ e->caller->called_by_ifunc_resolver = true;
+ return true;
+   }
+
+  /* Check if caller's alias is an IFUNC resolver.  */
+  e->caller->call_for_symbol_and_aliases (check_ifunc_resolver,
+ _ifunc_resolver,
+ true);
+  if (is_ifunc_resolver)
+   {
+ /* Return true if caller's alias is an IFUNC resolver.  */
+ e->caller->called_by_ifunc_resolver = true;
+ return true;
+   }
+}
+
+  return false;
+}
+
+/* Check symbol table for 

Re: [C++ coroutines] Initial implementation pushed to master.

2024-03-05 Thread H.J. Lu
On Sat, Jan 18, 2020 at 4:54 AM Iain Sandoe  wrote:
>
> Hi,
>
> Thanks to:
>
>* the reviewers, the code was definitely improved by your reviews.
>
>* those folks who tested the branch and/or compiler explorer
>  instance and reported problems with reproducers.
>
>   * WG21 colleagues, especially Lewis and Gor for valuable input
> and discussions on the design.
>
> = TL;DR:
>
> * This is not enabled by default (even for -std=c++2a), it needs -fcoroutines.
>
> * Like all the C++20 support, it is experimental, perhaps more experimental
>   than some other pieces because wording is still being amended.
>
> * The FE/ME tests are run for ALL targets; in principle this should be target-
>   agnostic, if we see fails then that is probably interesting input for the 
> ABI
>  panel.
>
>  * I regstrapped on 64b LE and BE platforms and a 32b LE host with no observed
>   issues or regressions.
>
>  * it’s just slightly too big to send uncompressed so attached as a bz2.
>
>  * commit is r10-6063-g49789fd08
>
> thanks again to all those who helped,
> Iain
>
> ==  The full covering note:
>
> This is the squashed version of the first 6 patches that were split to
> facilitate review.
>
> The changes to libiberty (7th patch) to support demangling the co_await
> operator stand alone and are applied separately.
>
> The patch series is an initial implementation of a coroutine feature,
> expected to be standardised in C++20.
>
> Standardisation status (and potential impact on this implementation)
> 
>
> The facility was accepted into the working draft for C++20 by WG21 in
> February 2019.  During following WG21 meetings, design and national body
> comments have been reviewed, with no significant change resulting.
>
> The current GCC implementation is against n4835 [1].
>
> At this stage, the remaining potential for change comes from:
>
> * Areas of national body comments that were not resolved in the version we
>   have worked to:
>   (a) handling of the situation where aligned allocation is available.
>   (b) handling of the situation where a user wants coroutines, but does not
>   want exceptions (e.g. a GPU).
>
> * Agreed changes that have not yet been worded in a draft standard that we
>   have worked to.
>
> It is not expected that the resolution to these can produce any major
> change at this phase of the standardisation process.  Such changes should be
> limited to the coroutine-specific code.
>
> ABI
> ---
>
> The various compiler developers 'vendors' have discussed a minimal ABI to
> allow one implementation to call coroutines compiled by another.
>
> This amounts to:
>
> 1. The layout of a public portion of the coroutine frame.
>
>  Coroutines need to preserve state across suspension points, the storage for
>  this is called a "coroutine frame".
>
>  The ABI mandates that pointers into the coroutine frame point to an area
>  begining with two function pointers (to the resume and destroy functions
>  described below); these are immediately followed by the "promise object"
>  described in the standard.
>
>  This is sufficient that the builtins can take a coroutine frame pointer and
>  determine the address of the promise (or call the resume/destroy functions).
>
> 2. A number of compiler builtins that the standard library might use.
>
>   These are implemented by this patch series.
>
> 3. This introduces a new operator 'co_await' the mangling for which is also
> agreed between vendors (and has an issue filed for that against the upstream
> c++abi).  Demangling for this is added to libiberty in a separate patch.
>
> The ABI has currently no target-specific content (a given psABI might elect
> to mandate alignment, but the common ABI does not do this).
>
> Standard Library impact
> ---
>
> The current implementations require addition of only a single header to
> the standard library (no change to the runtime).  This header is part of
> the patch.
>
> GCC Implementation outline
> --
>
> The standard's design for coroutines does not decorate the definition of
> a coroutine in any way, so that a function is only known to be a coroutine
> when one of the keywords (co_await, co_yield, co_return) is encountered.
>
> This means that we cannot special-case such functions from the outset, but
> must process them differently when they are finalised - which we do from
> "finish_function ()".
>
> At a high level, this design of coroutine produces four pieces from the
> original user's function:
>
>   1. A coroutine state frame (taking the logical place of the activation
>  record for a regular function).  One item stored in that state is the
>  index of the current suspend point.
>   2. A "ramp" function
>  This is what the user calls to construct the coroutine frame and start
>  the coroutine execution.  This will return some object representing the
>  coroutine's eventual 

Re: [PATCH] tree-profile: Don't instrument an IFUNC resolver nor its callees

2024-02-29 Thread H.J. Lu
On Thu, Feb 29, 2024 at 7:06 AM Jan Hubicka  wrote:
>
> > > I am worried about scenario where ifunc selector calls function foo
> > > defined locally and foo is also used from other places possibly in hot
> > > loops.
> > > >
> > > > > So it is not really reliable fix (though I guess it will work a lot of
> > > > > common code).  I wonder what would be alternatives.  In GCC generated
> > > > > profling code we use TLS only for undirect call profiling (so there is
> > > > > no need to turn off rest of profiling).  I wonder if there is any 
> > > > > chance
> > > > > to not make it seffault when it is done before TLS is set up?
> > > >
> > > > IFUNC selector should make minimum external calls, none is preferred.
> > >
> > > Edge porfiling only inserts (atomic) 64bit increments of counters.
> > > If target supports these operations inline, no external calls will be
> > > done.
> > >
> > > Indirect call profiling inserts the problematic TLS variable (to track
> > > caller-callee pairs). Value profiling also inserts various additional
> > > external calls to counters.
> > >
> > > I am perfectly fine with disabling instrumentation for ifunc selectors
> > > and functions only reachable from them, but I am worried about calles
> > > used also from non-ifunc path.
> >
> > Programmers need to understand not to do it.
>
> It would help to have this documented. Should we warn when ifunc
> resolver calls external function, comdat of function reachable from
> non-ifunc code?

That will be nice.

> >
> > > For example selector implemented in C++ may do some string handling to
> > > match CPU name and propagation will disable profiling for std::string
> >
> > On x86, they should use CPUID, not string functions.
> >
> > > member functions (which may not be effective if comdat section is
> > > prevailed from other translation unit).
> >
> > String functions may lead to external function calls which is dangerous.
> >
> > > > Any external calls may lead to issues at run-time.  It is a very bad 
> > > > idea
> > > > to profile IFUNC selector via external function call.
> > >
> > > Looking at https://sourceware.org/glibc/wiki/GNU_IFUNC
> > > there are other limitations on ifunc except for profiling, such as
> > > -fstack-protector-all.  So perhaps your propagation can be used to
> > > disable those features as well.
> >
> > So, it may not be tree-profile specific.  Where should these 2 bits
> > be added?
>
> If we want to disable other transforms too, then I think having a bit in
> cgraph_node for reachability from ifunc resolver makes sense.
> I would still do the cycle detection using on-side hash_map to avoid
> polution of the global datastructure.
>

I will see what I can do.

Thanks.

> Thanks,
> Honza
> >
> > > "Unfortunately there are actually a lot of restrictions placed on IFUNC
> > > usage which aren't entirely clear and the documentation needs to be
> > > updated." makes me wonder what other transformations are potentially
> > > dangerous.
> > >
> > > Honza
> >
> >
> > --
> > H.J.



-- 
H.J.


Re: [PATCH] tree-profile: Don't instrument an IFUNC resolver nor its callees

2024-02-29 Thread H.J. Lu
On Thu, Feb 29, 2024 at 6:34 AM Jan Hubicka  wrote:
>
> > On Thu, Feb 29, 2024 at 5:39 AM Jan Hubicka  wrote:
> > >
> > > > We can't instrument an IFUNC resolver nor its callees as it may require
> > > > TLS which hasn't been set up yet when the dynamic linker is resolving
> > > > IFUNC symbols.  Add an IFUNC resolver caller marker to symtab_node to
> > > > avoid recursive checking.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >   PR tree-optimization/114115
> > > >   * cgraph.h (enum ifunc_caller): New.
> > > >   (symtab_node): Add has_ifunc_caller.
> > > Unless we have users outside of tree-profile, I think it is better to
> > > avoid adding extra data to cgraph_node.  One can use node->get_uid() 
> > > indexed hash
> > > set to save the two bits needed for propagation.
> > > >   * tree-profile.cc (check_ifunc_resolver): New.
> > > >   (is_caller_ifunc_resolver): Likewise.
> > > >   (tree_profiling): Don't instrument an IFUNC resolver nor its
> > > >   callees.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >   PR tree-optimization/114115
> > > >   * gcc.dg/pr114115.c: New test.
> > >
> > > The problem with this approach is that tracking callees of ifunc
> > > resolvers will stop on the translation unit boundary and also with
> > > indirect call.  Also while ifunc resolver itself is called only once,
> > > its callees may also be used from performance critical code.
> >
> > IFUNC selector shouldn't have any external dependencies which
> > can cause issues at run-time.
>
> I am worried about scenario where ifunc selector calls function foo
> defined locally and foo is also used from other places possibly in hot
> loops.
> >
> > > So it is not really reliable fix (though I guess it will work a lot of
> > > common code).  I wonder what would be alternatives.  In GCC generated
> > > profling code we use TLS only for undirect call profiling (so there is
> > > no need to turn off rest of profiling).  I wonder if there is any chance
> > > to not make it seffault when it is done before TLS is set up?
> >
> > IFUNC selector should make minimum external calls, none is preferred.
>
> Edge porfiling only inserts (atomic) 64bit increments of counters.
> If target supports these operations inline, no external calls will be
> done.
>
> Indirect call profiling inserts the problematic TLS variable (to track
> caller-callee pairs). Value profiling also inserts various additional
> external calls to counters.
>
> I am perfectly fine with disabling instrumentation for ifunc selectors
> and functions only reachable from them, but I am worried about calles
> used also from non-ifunc path.

Programmers need to understand not to do it.

> For example selector implemented in C++ may do some string handling to
> match CPU name and propagation will disable profiling for std::string

On x86, they should use CPUID, not string functions.

> member functions (which may not be effective if comdat section is
> prevailed from other translation unit).

String functions may lead to external function calls which is dangerous.

> > Any external calls may lead to issues at run-time.  It is a very bad idea
> > to profile IFUNC selector via external function call.
>
> Looking at https://sourceware.org/glibc/wiki/GNU_IFUNC
> there are other limitations on ifunc except for profiling, such as
> -fstack-protector-all.  So perhaps your propagation can be used to
> disable those features as well.

So, it may not be tree-profile specific.  Where should these 2 bits
be added?

> "Unfortunately there are actually a lot of restrictions placed on IFUNC
> usage which aren't entirely clear and the documentation needs to be
> updated." makes me wonder what other transformations are potentially
> dangerous.
>
> Honza


-- 
H.J.


Re: [PATCH] i386: Guard noreturn no-callee-saved-registers optimization with -mnoreturn-no-callee-saved-registers [PR38534]

2024-02-29 Thread H.J. Lu
On Thu, Feb 29, 2024 at 6:15 AM Jan Hubicka  wrote:
>
> > On Thu, Feb 29, 2024 at 02:31:05PM +0100, Jan Hubicka wrote:
> > > I agree that debugability of user core dumps is important here.
> > >
> > > I guess an ideal solution would be to change codegen of noreturn functions
> > > to callee save all registers. Performance of prologue of noreturn
> > > function is not too important. THen we can stop caller saving registers
> > > and still get reasonable backtraces.
> >
> > I don't think that is possible.
> > While both C and C++ require that if [[noreturn]] attribute is used on
> > some function declaration, it must be used on the first declaration and
> > also if some function is [[noreturn]] in one TU, it must be [[noreturn]]
> > in all other TUs which declare the same function.
> > But, we have no such requirement for __attribute__((noreturn)), there it
> > is a pure optimization, it can be declared just on the caller side as an
> > optimization hint the function will not return, or just on the callee side
> > where the compiler will actually verify it doesn't return, or both.
> > And, the attribute is not part of function type, so even in standard C/C++,
> > one can use
> > extern void bar ();
> > [[noreturn]] void foo ()
> > {
> >   for (;;) bar ();
> > }
> > void (*fn) () = foo;
> > void baz ()
> > {
> >   fn ();
> > }
> > As you can call the noreturn function directly or indirectly, changing
> > calling conventions based on noreturn vs. no-noreturn is IMHO not possible.
>
> I am not wed to the idea (just it appeared to me as an option to
> disabling this optimization by default). I still think it may make sense.
>
> Making noreturn calles to save caller saved register is compatible with
> the default ABI.  If noreturn is missing on caller side, then caller will
> save reigsters as usual. Noreturn callee will save them again, which is
> pointless, but everything should work as usual and extra cost of saving
> should not matter in practice.  This is also the case of indirect call
> of noreturn function where you miss annotation on caller side.
>
> If noreturn is missing on callee side, we will lose information on
> functions arguments in backtrace, but the code will still work
> (especially if we save BP register to make code backtraceable).  This is
> scenario that probably can be avoided in practice where it matters (such
> as in glibc abort whose implementation is annotated).
>
> Noreturn already leads to some information loss in backtraces. I tend to
> get surprised from time to time to see whrong call to abort due to tail
> merging. So it may be acceptable to lose info in a situation where user
> does sily thing and only annotates caller.
>
> Since we auto-detect noreturn, we may need to be extra careful about noreturn
> comdats. Here auto-detection of prevailing def may have different
> outcome than auto-detection of prevailed defs. So we may want to disable
> the optimization for auto-detected comdats.
>

There are 2 kinds of noreturns.  One is abort which may require backtrace.
The other is a normal exit from the previous frame.  The latter case doesn't
require backtrace and can be performance critical.  Which one is more
important for users?

-- 
H.J.


Re: [PATCH] tree-profile: Don't instrument an IFUNC resolver nor its callees

2024-02-29 Thread H.J. Lu
On Thu, Feb 29, 2024 at 5:39 AM Jan Hubicka  wrote:
>
> > We can't instrument an IFUNC resolver nor its callees as it may require
> > TLS which hasn't been set up yet when the dynamic linker is resolving
> > IFUNC symbols.  Add an IFUNC resolver caller marker to symtab_node to
> > avoid recursive checking.
> >
> > gcc/ChangeLog:
> >
> >   PR tree-optimization/114115
> >   * cgraph.h (enum ifunc_caller): New.
> >   (symtab_node): Add has_ifunc_caller.
> Unless we have users outside of tree-profile, I think it is better to
> avoid adding extra data to cgraph_node.  One can use node->get_uid() indexed 
> hash
> set to save the two bits needed for propagation.
> >   * tree-profile.cc (check_ifunc_resolver): New.
> >   (is_caller_ifunc_resolver): Likewise.
> >   (tree_profiling): Don't instrument an IFUNC resolver nor its
> >   callees.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   PR tree-optimization/114115
> >   * gcc.dg/pr114115.c: New test.
>
> The problem with this approach is that tracking callees of ifunc
> resolvers will stop on the translation unit boundary and also with
> indirect call.  Also while ifunc resolver itself is called only once,
> its callees may also be used from performance critical code.

IFUNC selector shouldn't have any external dependencies which
can cause issues at run-time.

> So it is not really reliable fix (though I guess it will work a lot of
> common code).  I wonder what would be alternatives.  In GCC generated
> profling code we use TLS only for undirect call profiling (so there is
> no need to turn off rest of profiling).  I wonder if there is any chance
> to not make it seffault when it is done before TLS is set up?

IFUNC selector should make minimum external calls, none is preferred.
Any external calls may lead to issues at run-time.  It is a very bad idea
to profile IFUNC selector via external function call.

> Honza
> > ---
> >  gcc/cgraph.h| 18 +++
> >  gcc/testsuite/gcc.dg/pr114115.c | 24 +
> >  gcc/tree-profile.cc | 92 +
> >  3 files changed, 134 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.dg/pr114115.c
> >
> > diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> > index 47f35e8078d..ce99f4a5114 100644
> > --- a/gcc/cgraph.h
> > +++ b/gcc/cgraph.h
> > @@ -100,6 +100,21 @@ enum symbol_partitioning_class
> > SYMBOL_DUPLICATE
> >  };
> >
> > +/* Classification whether a function has any IFUNC resolver caller.  */
> > +enum ifunc_caller
> > +{
> > +  /* It is unknown if this function has any IFUNC resolver caller.  */
> > +  IFUNC_CALLER_UNKNOWN,
> > +  /* Work in progress to check if this function has any IFUNC resolver
> > + caller.  */
> > +  IFUNC_CALLER_WIP,
> > +  /* This function has at least an IFUNC resolver caller, including
> > + itself.  */
> > +  IFUNC_CALLER_TRUE,
> > +  /* This function doesn't have any IFUNC resolver caller.  */
> > +  IFUNC_CALLER_FALSE
> > +};
> > +
> >  /* Base of all entries in the symbol table.
> > The symtab_node is inherited by cgraph and varpol nodes.  */
> >  struct GTY((desc ("%h.type"), tag ("SYMTAB_SYMBOL"),
> > @@ -121,6 +136,7 @@ public:
> >used_from_other_partition (false), in_other_partition (false),
> >address_taken (false), in_init_priority_hash (false),
> >need_lto_streaming (false), offloadable (false), ifunc_resolver 
> > (false),
> > +  has_ifunc_caller (IFUNC_CALLER_UNKNOWN),
> >order (false), next_sharing_asm_name (NULL),
> >previous_sharing_asm_name (NULL), same_comdat_group (NULL), ref_list 
> > (),
> >alias_target (NULL), lto_file_data (NULL), aux (NULL),
> > @@ -595,6 +611,8 @@ public:
> >/* Set when symbol is an IFUNC resolver.  */
> >unsigned ifunc_resolver : 1;
> >
> > +  /* Classification whether a function has any IFUNC resolver caller.  */
> > +  ENUM_BITFIELD (ifunc_caller) has_ifunc_caller : 2;
> >
> >/* Ordering of all symtab entries.  */
> >int order;
> > diff --git a/gcc/testsuite/gcc.dg/pr114115.c 
> > b/gcc/testsuite/gcc.dg/pr114115.c
> > new file mode 100644
> > index 000..2629f591877
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr114115.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O0 -fprofile-generate -fdump-tree-optimized" } */
> > +/* { dg-require-profiling "-fprofile-generate" } */
> > +/* { dg-require-ifunc "" } */
> > +
> > +void *foo_ifunc2() __attribute__((ifunc("foo_resolver")));
> > +
> > +void bar(void)
> > +{
> > +}
> > +
> > +static int f3()
> > +{
> > +  bar ();
> > +  return 5;
> > +}
> > +
> > +void (*foo_resolver(void))(void)
> > +{
> > +  f3();
> > +  return bar;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-not "__gcov_indirect_call_profiler_v" 
> > "optimized" } } */
> > diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
> > index aed13e2b1bc..46478648b32 100644
> > --- a/gcc/tree-profile.cc
> > +++ b/gcc/tree-profile.cc
> > @@ -738,6 

Re: [PATCH] i386: Guard noreturn no-callee-saved-registers optimization with -mnoreturn-no-callee-saved-registers [PR38534]

2024-02-29 Thread H.J. Lu
On Wed, Feb 28, 2024 at 10:20 PM Hongtao Liu  wrote:
>
> On Wed, Feb 28, 2024 at 4:54 PM Jakub Jelinek  wrote:
> >
> > Hi!
> >
> > Adding Hongtao and Honza into the loop as the ones who acked the original
> > patch.
> >
> > The no_callee_saved_registers by default for noreturn functions change can
> > break in-process backtrace(3) or backtraces from debugger or other process
> > (quite often, any time the noreturn function decides to use the bp register
> > and any of the parent frames uses a frame pointer; the unwinder just crashes
> > in the libgcc unwinder case, gdb prints stack corrupted message), so I'd
> > like to save bp register in that case:
> >
> > https://gcc.gnu.org/pipermail/gcc-patches/2024-February/646591.html
> I think this patch makes sense and LGTM, we save and restore frame
> pointer for noreturn.
> >
> > and additionally the no_callee_saved_registers by default for noreturn
> > functions change can make debugging harder, again not localized to the
> > noreturn function, but any of its callers.  So, if say glibc abort function
> > implementation needs a lot of normally callee-saved registers, no matter how
> > users recompile their apps, they will see garbage or optimized out
> > vars/parameters in their code unless they rebuild their glibc with -O0.
> > So, I think we should guard that by a non-default option:
> >
> > https://gcc.gnu.org/pipermail/gcc-patches/2024-February/646649.html
> So it turns off the optimization for noreturn functions by default,
> I'm not sure about this.
> Any comments, H.J?

We need BP for backtrace.  I don't think we need to save other
registers.  True, GDB may not see function parameters.  But
optimization always has this impact.  When I need to debug a
program, I always use -O0 or -Og.

> >
> > Plus we need to somehow make sure to emit DW_CFA_undefined for the modified
> > but not saved normally callee-saved registers, so that we at least don't get
> > garbage in debug info.  H.J. posted some patches for that, so far I wasn't
> > happy about the implementation but the actual change is desirable.
> >
> > Your thoughts on this?
> >
> > Jakub
> >
>
>
> --
> BR,
> Hongtao



-- 
H.J.


[PATCH] tree-profile: Don't instrument an IFUNC resolver nor its callees

2024-02-26 Thread H.J. Lu
We can't instrument an IFUNC resolver nor its callees as it may require
TLS which hasn't been set up yet when the dynamic linker is resolving
IFUNC symbols.  Add an IFUNC resolver caller marker to symtab_node to
avoid recursive checking.

gcc/ChangeLog:

PR tree-optimization/114115
* cgraph.h (enum ifunc_caller): New.
(symtab_node): Add has_ifunc_caller.
* tree-profile.cc (check_ifunc_resolver): New.
(is_caller_ifunc_resolver): Likewise.
(tree_profiling): Don't instrument an IFUNC resolver nor its
callees.

gcc/testsuite/ChangeLog:

PR tree-optimization/114115
* gcc.dg/pr114115.c: New test.
---
 gcc/cgraph.h| 18 +++
 gcc/testsuite/gcc.dg/pr114115.c | 24 +
 gcc/tree-profile.cc | 92 +
 3 files changed, 134 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr114115.c

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 47f35e8078d..ce99f4a5114 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -100,6 +100,21 @@ enum symbol_partitioning_class
SYMBOL_DUPLICATE
 };
 
+/* Classification whether a function has any IFUNC resolver caller.  */
+enum ifunc_caller
+{
+  /* It is unknown if this function has any IFUNC resolver caller.  */
+  IFUNC_CALLER_UNKNOWN,
+  /* Work in progress to check if this function has any IFUNC resolver
+ caller.  */
+  IFUNC_CALLER_WIP,
+  /* This function has at least an IFUNC resolver caller, including
+ itself.  */
+  IFUNC_CALLER_TRUE,
+  /* This function doesn't have any IFUNC resolver caller.  */
+  IFUNC_CALLER_FALSE
+};
+
 /* Base of all entries in the symbol table.
The symtab_node is inherited by cgraph and varpol nodes.  */
 struct GTY((desc ("%h.type"), tag ("SYMTAB_SYMBOL"),
@@ -121,6 +136,7 @@ public:
   used_from_other_partition (false), in_other_partition (false),
   address_taken (false), in_init_priority_hash (false),
   need_lto_streaming (false), offloadable (false), ifunc_resolver (false),
+  has_ifunc_caller (IFUNC_CALLER_UNKNOWN),
   order (false), next_sharing_asm_name (NULL),
   previous_sharing_asm_name (NULL), same_comdat_group (NULL), ref_list (),
   alias_target (NULL), lto_file_data (NULL), aux (NULL),
@@ -595,6 +611,8 @@ public:
   /* Set when symbol is an IFUNC resolver.  */
   unsigned ifunc_resolver : 1;
 
+  /* Classification whether a function has any IFUNC resolver caller.  */
+  ENUM_BITFIELD (ifunc_caller) has_ifunc_caller : 2;
 
   /* Ordering of all symtab entries.  */
   int order;
diff --git a/gcc/testsuite/gcc.dg/pr114115.c b/gcc/testsuite/gcc.dg/pr114115.c
new file mode 100644
index 000..2629f591877
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114115.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fprofile-generate -fdump-tree-optimized" } */
+/* { dg-require-profiling "-fprofile-generate" } */
+/* { dg-require-ifunc "" } */
+
+void *foo_ifunc2() __attribute__((ifunc("foo_resolver")));
+
+void bar(void)
+{
+}
+
+static int f3()
+{
+  bar ();
+  return 5;
+}
+
+void (*foo_resolver(void))(void)
+{
+  f3();
+  return bar;
+}
+
+/* { dg-final { scan-tree-dump-not "__gcov_indirect_call_profiler_v" 
"optimized" } } */
diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
index aed13e2b1bc..46478648b32 100644
--- a/gcc/tree-profile.cc
+++ b/gcc/tree-profile.cc
@@ -738,6 +738,72 @@ include_source_file_for_profile (const char *filename)
   return false;
 }
 
+/* Return true and set *DATA to true if NODE is an ifunc resolver.  */
+
+static bool
+check_ifunc_resolver (cgraph_node *node, void *data)
+{
+  if (node->ifunc_resolver)
+{
+  bool *is_ifunc_resolver = (bool *) data;
+  *is_ifunc_resolver = true;
+  return true;
+}
+  return false;
+}
+
+/* Return true if any caller of NODE is an ifunc resolver.  */
+
+static bool
+is_caller_ifunc_resolver (cgraph_node *node)
+{
+  if (node->has_ifunc_caller == IFUNC_CALLER_WIP)
+gcc_unreachable ();
+
+  node->has_ifunc_caller = IFUNC_CALLER_WIP;
+  bool is_ifunc_resolver = false;
+
+  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
+{
+  /* Check for recursive call.  */
+  if (e->caller == node)
+   continue;
+
+  switch (e->caller->has_ifunc_caller)
+   {
+   case IFUNC_CALLER_UNKNOWN:
+ e->caller->call_for_symbol_and_aliases (check_ifunc_resolver,
+ _ifunc_resolver,
+ true);
+ if (is_ifunc_resolver)
+   {
+ e->caller->has_ifunc_caller = IFUNC_CALLER_TRUE;
+ return true;
+   }
+ break;
+   case IFUNC_CALLER_TRUE:
+ return true;
+   case IFUNC_CALLER_FALSE:
+ /* This caller doesn't have any IFUNC resolver call.  Check
+the next caller.  */
+ continue;
+
+   case IFUNC_CALLER_WIP:
+ continue;
+   }
+
+  if 

Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-26 Thread H.J. Lu
On Sun, Feb 25, 2024 at 8:25 PM H.J. Lu  wrote:
>
> On Sun, Feb 25, 2024 at 7:03 PM Hongtao Liu  wrote:
> >
> > On Mon, Feb 26, 2024 at 10:37 AM H.J. Lu  wrote:
> > >
> > > On Sun, Feb 25, 2024 at 6:03 PM Hongtao Liu  wrote:
> > > >
> > > > On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu  wrote:
> > > > >
> > > > > ldtilecfg and sttilecfg take a 512-byte memory block.  With
> > > > > _tile_loadconfig implemented as
> > > > >
> > > > > extern __inline void
> > > > > __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > > _tile_loadconfig (const void *__config)
> > > > > {
> > > > >   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > > **)__config)));
> > > > > }
> > > > >
> > > > > GCC sees:
> > > > >
> > > > > (parallel [
> > > > >   (asm_operands/v ("ldtilecfg   %X0") ("") 0
> > > > >[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
> > > > >  (const_int -64 [0xffc0])) [1 
> > > > > MEM[(const void * *)_data]+0 S8 A128])]
> > > > >[(asm_input:DI ("m"))]
> > > > >(clobber (reg:CC 17 flags))])
> > > > >
> > > > > and the memory operand size is 1 byte.  As the result, the rest of 511
> > > > > bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
> > > > > with a pointer to BLKmode to honor the 512-byte memory block.
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > > PR target/114098
> > > > > * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> > > > > __builtin_ia32_ldtilecfg.
> > > > > (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> > > > > * config/i386/i386-builtin.def (BDESC): Add
> > > > > __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> > > > > * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> > > > > IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> > > > > * config/i386/i386.md (ldtilecfg): New pattern.
> > > > > (sttilecfg): Likewise.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > > PR target/114098
> > > > > * gcc.target/i386/amxtile-4.c: New test.
> > > > > ---
> > > > >  gcc/config/i386/amxtileintrin.h   |  4 +-
> > > > >  gcc/config/i386/i386-builtin.def  |  4 ++
> > > > >  gcc/config/i386/i386-expand.cc| 19 
> > > > >  gcc/config/i386/i386.md   | 24 ++
> > > > >  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 
> > > > > +++
> > > > >  5 files changed, 104 insertions(+), 2 deletions(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
> > > > >
> > > > > diff --git a/gcc/config/i386/amxtileintrin.h 
> > > > > b/gcc/config/i386/amxtileintrin.h
> > > > > index d1a26e0fea5..5081b326498 100644
> > > > > --- a/gcc/config/i386/amxtileintrin.h
> > > > > +++ b/gcc/config/i386/amxtileintrin.h
> > > > > @@ -39,14 +39,14 @@ extern __inline void
> > > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > >  _tile_loadconfig (const void *__config)
> > > > >  {
> > > > > -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > > **)__config)));
> > > > > +  __builtin_ia32_ldtilecfg (__config);
> > > > >  }
> > > > >
> > > > >  extern __inline void
> > > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > >  _tile_storeconfig (void *__config)
> > > > >  {
> > > > > -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
> > > > > +  __builtin_ia32_sttilecfg (__config);
> > > > >  }
> > > > >
> > > > >  extern __inline void
> > > > > diff --git a/gcc/config/i386/i386-builtin.def 
> > > > &

Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-25 Thread H.J. Lu
On Sun, Feb 25, 2024 at 7:03 PM Hongtao Liu  wrote:
>
> On Mon, Feb 26, 2024 at 10:37 AM H.J. Lu  wrote:
> >
> > On Sun, Feb 25, 2024 at 6:03 PM Hongtao Liu  wrote:
> > >
> > > On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu  wrote:
> > > >
> > > > ldtilecfg and sttilecfg take a 512-byte memory block.  With
> > > > _tile_loadconfig implemented as
> > > >
> > > > extern __inline void
> > > > __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > _tile_loadconfig (const void *__config)
> > > > {
> > > >   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > **)__config)));
> > > > }
> > > >
> > > > GCC sees:
> > > >
> > > > (parallel [
> > > >   (asm_operands/v ("ldtilecfg   %X0") ("") 0
> > > >[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
> > > >  (const_int -64 [0xffc0])) [1 
> > > > MEM[(const void * *)_data]+0 S8 A128])]
> > > >[(asm_input:DI ("m"))]
> > > >(clobber (reg:CC 17 flags))])
> > > >
> > > > and the memory operand size is 1 byte.  As the result, the rest of 511
> > > > bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
> > > > with a pointer to BLKmode to honor the 512-byte memory block.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > PR target/114098
> > > > * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> > > > __builtin_ia32_ldtilecfg.
> > > > (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> > > > * config/i386/i386-builtin.def (BDESC): Add
> > > > __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> > > > * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> > > > IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> > > > * config/i386/i386.md (ldtilecfg): New pattern.
> > > > (sttilecfg): Likewise.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > PR target/114098
> > > > * gcc.target/i386/amxtile-4.c: New test.
> > > > ---
> > > >  gcc/config/i386/amxtileintrin.h   |  4 +-
> > > >  gcc/config/i386/i386-builtin.def  |  4 ++
> > > >  gcc/config/i386/i386-expand.cc| 19 
> > > >  gcc/config/i386/i386.md   | 24 ++
> > > >  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 +++
> > > >  5 files changed, 104 insertions(+), 2 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
> > > >
> > > > diff --git a/gcc/config/i386/amxtileintrin.h 
> > > > b/gcc/config/i386/amxtileintrin.h
> > > > index d1a26e0fea5..5081b326498 100644
> > > > --- a/gcc/config/i386/amxtileintrin.h
> > > > +++ b/gcc/config/i386/amxtileintrin.h
> > > > @@ -39,14 +39,14 @@ extern __inline void
> > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > >  _tile_loadconfig (const void *__config)
> > > >  {
> > > > -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > **)__config)));
> > > > +  __builtin_ia32_ldtilecfg (__config);
> > > >  }
> > > >
> > > >  extern __inline void
> > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > >  _tile_storeconfig (void *__config)
> > > >  {
> > > > -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
> > > > +  __builtin_ia32_sttilecfg (__config);
> > > >  }
> > > >
> > > >  extern __inline void
> > > > diff --git a/gcc/config/i386/i386-builtin.def 
> > > > b/gcc/config/i386/i386-builtin.def
> > > > index 729355230b8..88dd7f8857f 100644
> > > > --- a/gcc/config/i386/i386-builtin.def
> > > > +++ b/gcc/config/i386/i386-builtin.def
> > > > @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | 
> > > > OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__b
> > > >  BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
> > > > CODE_FOR_nothing, "_

Re: [PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-25 Thread H.J. Lu
On Sun, Feb 25, 2024 at 6:03 PM Hongtao Liu  wrote:
>
> On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu  wrote:
> >
> > ldtilecfg and sttilecfg take a 512-byte memory block.  With
> > _tile_loadconfig implemented as
> >
> > extern __inline void
> > __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > _tile_loadconfig (const void *__config)
> > {
> >   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
> > }
> >
> > GCC sees:
> >
> > (parallel [
> >   (asm_operands/v ("ldtilecfg   %X0") ("") 0
> >[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
> >  (const_int -64 [0xffc0])) [1 
> > MEM[(const void * *)_data]+0 S8 A128])]
> >[(asm_input:DI ("m"))]
> >(clobber (reg:CC 17 flags))])
> >
> > and the memory operand size is 1 byte.  As the result, the rest of 511
> > bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
> > with a pointer to BLKmode to honor the 512-byte memory block.
> >
> > gcc/ChangeLog:
> >
> > PR target/114098
> > * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> > __builtin_ia32_ldtilecfg.
> > (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> > * config/i386/i386-builtin.def (BDESC): Add
> > __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> > * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> > IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> > * config/i386/i386.md (ldtilecfg): New pattern.
> > (sttilecfg): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR target/114098
> > * gcc.target/i386/amxtile-4.c: New test.
> > ---
> >  gcc/config/i386/amxtileintrin.h   |  4 +-
> >  gcc/config/i386/i386-builtin.def  |  4 ++
> >  gcc/config/i386/i386-expand.cc| 19 
> >  gcc/config/i386/i386.md   | 24 ++
> >  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 +++
> >  5 files changed, 104 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
> >
> > diff --git a/gcc/config/i386/amxtileintrin.h 
> > b/gcc/config/i386/amxtileintrin.h
> > index d1a26e0fea5..5081b326498 100644
> > --- a/gcc/config/i386/amxtileintrin.h
> > +++ b/gcc/config/i386/amxtileintrin.h
> > @@ -39,14 +39,14 @@ extern __inline void
> >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> >  _tile_loadconfig (const void *__config)
> >  {
> > -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
> > +  __builtin_ia32_ldtilecfg (__config);
> >  }
> >
> >  extern __inline void
> >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> >  _tile_storeconfig (void *__config)
> >  {
> > -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
> > +  __builtin_ia32_sttilecfg (__config);
> >  }
> >
> >  extern __inline void
> > diff --git a/gcc/config/i386/i386-builtin.def 
> > b/gcc/config/i386/i386-builtin.def
> > index 729355230b8..88dd7f8857f 100644
> > --- a/gcc/config/i386/i386-builtin.def
> > +++ b/gcc/config/i386/i386-builtin.def
> > @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 
> > 0, CODE_FOR_nothing, "__b
> >  BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
> > CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, 
> > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> >  BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, 
> > CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, 
> > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> >
> > +/* LDFILECFG and STFILECFG.  */
> > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > CODE_FOR_ldtilecfg, "__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, 
> > UNKNOWN, (int) VOID_FTYPE_PCVOID)
> > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > CODE_FOR_ldtilecfg, "__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, 
> > UNKNOWN, (int) VOID_FTYPE_PVOID)
> CODE_FOR_sttilecfg.

It is unused.  I changed both to CODE_FOR_nothing.

> > +
> >  /* SSE */
> >  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, 
> > "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKN

[PATCH v2] x86: Check interrupt instead of noreturn attribute

2024-02-25 Thread H.J. Lu
ix86_set_func_type checks noreturn attribute to avoid incompatible
attribute error in LTO1 on interrupt functions.  Since TREE_THIS_VOLATILE
is set also for _Noreturn without noreturn attribute, check interrupt
attribute for interrupt functions instead.

gcc/

PR target/114097
* config/i386/i386-options.cc (ix86_set_func_type): Check
interrupt instead of noreturn attribute.

gcc/testsuite/

PR target/114097
* gcc.target/i386/pr114097-1.c: New test.
---
 gcc/config/i386/i386-options.cc|  8 ---
 gcc/testsuite/gcc.target/i386/pr114097-1.c | 26 ++
 2 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr114097-1.c

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 93a01146db7..1301f6b913e 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -3391,11 +3391,13 @@ ix86_set_func_type (tree fndecl)
  into a noreturn function by setting TREE_THIS_VOLATILE.  Normally
  the local-pure-const pass is run after ix86_set_func_type is called.
  When the local-pure-const pass is enabled for LTO, the interrupt
- function is marked as noreturn in the IR output, which leads the
- incompatible attribute error in LTO1.  */
+ function is marked with TREE_THIS_VOLATILE in the IR output, which
+ leads to the incompatible attribute error in LTO1.  Ignore the
+ interrupt function in this case.  */
   bool has_no_callee_saved_registers
 = ((TREE_THIS_VOLATILE (fndecl)
-   && lookup_attribute ("noreturn", DECL_ATTRIBUTES (fndecl))
+   && !lookup_attribute ("interrupt",
+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
&& optimize
&& !optimize_debug
&& (TREE_NOTHROW (fndecl) || !flag_exceptions))
diff --git a/gcc/testsuite/gcc.target/i386/pr114097-1.c 
b/gcc/testsuite/gcc.target/i386/pr114097-1.c
new file mode 100644
index 000..b14c7b6214d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114097-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move 
-fomit-frame-pointer" } */
+
+#define ARRAY_SIZE 256
+
+extern int array[ARRAY_SIZE][ARRAY_SIZE][ARRAY_SIZE];
+extern int value (int, int, int)
+#ifndef __x86_64__
+__attribute__ ((regparm(3)))
+#endif
+;
+
+void
+_Noreturn
+no_return_to_caller (void)
+{
+  unsigned i, j, k;
+  for (i = ARRAY_SIZE; i > 0; --i)
+for (j = ARRAY_SIZE; j > 0; --j)
+  for (k = ARRAY_SIZE; k > 0; --k)
+   array[i - 1][j - 1][k - 1] = value (i, j, k);
+  while (1);
+}
+
+/* { dg-final { scan-assembler-not "push" } } */
+/* { dg-final { scan-assembler-not "pop" } } */
-- 
2.43.2



[PATCH] x86: Properly implement AMX-TILE load/store intrinsics

2024-02-25 Thread H.J. Lu
ldtilecfg and sttilecfg take a 512-byte memory block.  With
_tile_loadconfig implemented as

extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tile_loadconfig (const void *__config)
{
  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
}

GCC sees:

(parallel [
  (asm_operands/v ("ldtilecfg   %X0") ("") 0
   [(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
 (const_int -64 [0xffc0])) [1 MEM[(const 
void * *)_data]+0 S8 A128])]
   [(asm_input:DI ("m"))]
   (clobber (reg:CC 17 flags))])

and the memory operand size is 1 byte.  As the result, the rest of 511
bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
with a pointer to BLKmode to honor the 512-byte memory block.

gcc/ChangeLog:

PR target/114098
* config/i386/amxtileintrin.h (_tile_loadconfig): Use
__builtin_ia32_ldtilecfg.
(_tile_storeconfig): Use __builtin_ia32_sttilecfg.
* config/i386/i386-builtin.def (BDESC): Add
__builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
* config/i386/i386-expand.cc (ix86_expand_builtin): Handle
IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
* config/i386/i386.md (ldtilecfg): New pattern.
(sttilecfg): Likewise.

gcc/testsuite/ChangeLog:

PR target/114098
* gcc.target/i386/amxtile-4.c: New test.
---
 gcc/config/i386/amxtileintrin.h   |  4 +-
 gcc/config/i386/i386-builtin.def  |  4 ++
 gcc/config/i386/i386-expand.cc| 19 
 gcc/config/i386/i386.md   | 24 ++
 gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 +++
 5 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c

diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
index d1a26e0fea5..5081b326498 100644
--- a/gcc/config/i386/amxtileintrin.h
+++ b/gcc/config/i386/amxtileintrin.h
@@ -39,14 +39,14 @@ extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _tile_loadconfig (const void *__config)
 {
-  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
+  __builtin_ia32_ldtilecfg (__config);
 }
 
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _tile_storeconfig (void *__config)
 {
-  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
+  __builtin_ia32_sttilecfg (__config);
 }
 
 extern __inline void
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 729355230b8..88dd7f8857f 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_nothing, "__b
 BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, 
"__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) 
VOID_FTYPE_PVOID_INT64)
 BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, 
"__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) 
VOID_FTYPE_PVOID_INT64)
 
+/* LDFILECFG and STFILECFG.  */
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_ldtilecfg, 
"__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, UNKNOWN, (int) 
VOID_FTYPE_PCVOID)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_ldtilecfg, 
"__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, UNKNOWN, (int) 
VOID_FTYPE_PVOID)
+
 /* SSE */
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, 
"__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) 
VOID_FTYPE_PFLOAT_V4SF)
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, 
"__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) 
VOID_FTYPE_PFLOAT_V4SF)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a4d3369f01b..17993eb837f 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -14152,6 +14152,25 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
emit_insn (pat);
   return 0;
 
+case IX86_BUILTIN_LDTILECFG:
+case IX86_BUILTIN_STTILECFG:
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  op0 = expand_normal (arg0);
+
+  if (!address_operand (op0, VOIDmode))
+   {
+ op0 = convert_memory_address (Pmode, op0);
+ op0 = copy_addr_to_reg (op0);
+   }
+  op0 = gen_rtx_MEM (BLKmode, op0);
+  if (fcode == IX86_BUILTIN_LDTILECFG)
+   icode = CODE_FOR_ldtilecfg;
+  else
+   icode = CODE_FOR_sttilecfg;
+  pat = GEN_FCN (icode) (op0);
+  emit_insn (pat);
+  return 0;
+
 case IX86_BUILTIN_LLWPCB:
   arg0 = CALL_EXPR_ARG (exp, 0);
   op0 = expand_normal (arg0);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6a26d966a0e..0ede6adac2f 100644
--- a/gcc/config/i386/i386.md
+++ 

Re: [PATCH] x86: Check interrupt instead of noreturn attribute

2024-02-25 Thread H.J. Lu
On Sun, Feb 25, 2024 at 8:54 AM Uros Bizjak  wrote:
>
> On Sun, Feb 25, 2024 at 5:01 PM H.J. Lu  wrote:
> >
> > ix86_set_func_type checks noreturn attribute to avoid incompatible
> > attribute error in LTO1 on interrupt functions.  Since TREE_THIS_VOLATILE
> > is set also for _Noreturn without noreturn attribute, check interrupt
> > attribute for interrupt functions instead.
>
> Please also adjust the comment above the change. The current comment
> even explains why the "noreturn" attribute is checked instead of
> "interrupt" attribute.

How about this?

 NB: Can't use just TREE_THIS_VOLATILE to check if this is a noreturn
 function.  The local-pure-const pass turns an interrupt function
 into a noreturn function by setting TREE_THIS_VOLATILE.  Normally
 the local-pure-const pass is run after ix86_set_func_type is called.
 When the local-pure-const pass is enabled for LTO, the interrupt
 function is marked with TREE_THIS_VOLATILE in the IR output, which
 leads to the incompatible attribute error in LTO1.  Ignore the
 interrupt function in this case.

Thanks.

> Uros.
>
> >
> > gcc/
> >
> > PR target/114097
> > * config/i386/i386-options.cc (ix86_set_func_type): Check
> > interrupt instead of noreturn attribute.
> >
> > gcc/testsuite/
> >
> > PR target/114097
> > * gcc.target/i386/pr114097-1.c: New test.
> > ---
> >  gcc/config/i386/i386-options.cc|  3 ++-
> >  gcc/testsuite/gcc.target/i386/pr114097-1.c | 26 ++
> >  2 files changed, 28 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr114097-1.c
> >
> > diff --git a/gcc/config/i386/i386-options.cc 
> > b/gcc/config/i386/i386-options.cc
> > index 93a01146db7..82fe0d228cd 100644
> > --- a/gcc/config/i386/i386-options.cc
> > +++ b/gcc/config/i386/i386-options.cc
> > @@ -3395,7 +3395,8 @@ ix86_set_func_type (tree fndecl)
> >   incompatible attribute error in LTO1.  */
> >bool has_no_callee_saved_registers
> >  = ((TREE_THIS_VOLATILE (fndecl)
> > -   && lookup_attribute ("noreturn", DECL_ATTRIBUTES (fndecl))
> > +   && !lookup_attribute ("interrupt",
> > + TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
> > && optimize
> > && !optimize_debug
> > && (TREE_NOTHROW (fndecl) || !flag_exceptions))
> > diff --git a/gcc/testsuite/gcc.target/i386/pr114097-1.c 
> > b/gcc/testsuite/gcc.target/i386/pr114097-1.c
> > new file mode 100644
> > index 000..b14c7b6214d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr114097-1.c
> > @@ -0,0 +1,26 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move 
> > -fomit-frame-pointer" } */
> > +
> > +#define ARRAY_SIZE 256
> > +
> > +extern int array[ARRAY_SIZE][ARRAY_SIZE][ARRAY_SIZE];
> > +extern int value (int, int, int)
> > +#ifndef __x86_64__
> > +__attribute__ ((regparm(3)))
> > +#endif
> > +;
> > +
> > +void
> > +_Noreturn
> > +no_return_to_caller (void)
> > +{
> > +  unsigned i, j, k;
> > +  for (i = ARRAY_SIZE; i > 0; --i)
> > +for (j = ARRAY_SIZE; j > 0; --j)
> > +  for (k = ARRAY_SIZE; k > 0; --k)
> > +   array[i - 1][j - 1][k - 1] = value (i, j, k);
> > +  while (1);
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "push" } } */
> > +/* { dg-final { scan-assembler-not "pop" } } */
> > --
> > 2.43.2
> >



-- 
H.J.


[PATCH] x86: Check interrupt instead of noreturn attribute

2024-02-25 Thread H.J. Lu
ix86_set_func_type checks noreturn attribute to avoid incompatible
attribute error in LTO1 on interrupt functions.  Since TREE_THIS_VOLATILE
is set also for _Noreturn without noreturn attribute, check interrupt
attribute for interrupt functions instead.

gcc/

PR target/114097
* config/i386/i386-options.cc (ix86_set_func_type): Check
interrupt instead of noreturn attribute.

gcc/testsuite/

PR target/114097
* gcc.target/i386/pr114097-1.c: New test.
---
 gcc/config/i386/i386-options.cc|  3 ++-
 gcc/testsuite/gcc.target/i386/pr114097-1.c | 26 ++
 2 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr114097-1.c

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 93a01146db7..82fe0d228cd 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -3395,7 +3395,8 @@ ix86_set_func_type (tree fndecl)
  incompatible attribute error in LTO1.  */
   bool has_no_callee_saved_registers
 = ((TREE_THIS_VOLATILE (fndecl)
-   && lookup_attribute ("noreturn", DECL_ATTRIBUTES (fndecl))
+   && !lookup_attribute ("interrupt",
+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
&& optimize
&& !optimize_debug
&& (TREE_NOTHROW (fndecl) || !flag_exceptions))
diff --git a/gcc/testsuite/gcc.target/i386/pr114097-1.c 
b/gcc/testsuite/gcc.target/i386/pr114097-1.c
new file mode 100644
index 000..b14c7b6214d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114097-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune-ctrl=^prologue_using_move,^epilogue_using_move 
-fomit-frame-pointer" } */
+
+#define ARRAY_SIZE 256
+
+extern int array[ARRAY_SIZE][ARRAY_SIZE][ARRAY_SIZE];
+extern int value (int, int, int)
+#ifndef __x86_64__
+__attribute__ ((regparm(3)))
+#endif
+;
+
+void
+_Noreturn
+no_return_to_caller (void)
+{
+  unsigned i, j, k;
+  for (i = ARRAY_SIZE; i > 0; --i)
+for (j = ARRAY_SIZE; j > 0; --j)
+  for (k = ARRAY_SIZE; k > 0; --k)
+   array[i - 1][j - 1][k - 1] = value (i, j, k);
+  while (1);
+}
+
+/* { dg-final { scan-assembler-not "push" } } */
+/* { dg-final { scan-assembler-not "pop" } } */
-- 
2.43.2



Re: PING: [PATCH] x86-64: Check R_X86_64_CODE_6_GOTTPOFF support

2024-02-23 Thread H.J. Lu
On Fri, Feb 23, 2024 at 11:12:41AM +0100, Uros Bizjak wrote:
> On Fri, Feb 23, 2024 at 3:45 AM H.J. Lu  wrote:
> >
> > On Thu, Feb 22, 2024 at 6:39 PM Hongtao Liu  wrote:
> > >
> > > On Thu, Feb 22, 2024 at 10:33 PM H.J. Lu  wrote:
> > > >
> > > > On Sun, Feb 18, 2024 at 8:02 AM H.J. Lu  wrote:
> > > > >
> > > > > If assembler and linker supports
> > > > >
> > > > > add %reg1, name@gottpoff(%rip), %reg2
> > > > >
> > > > > with R_X86_64_CODE_6_GOTTPOFF, we can generate it instead of
> > > > >
> > > > > mov name@gottpoff(%rip), %reg2
> > > > > add %reg1, %reg2
> > > x86 part LGTM, but I'm not familiar with the changes in config related 
> > > files.
> >
> > Jakub, Uros, Alexandre, can you review the configure.ac change in this 
> > patch?
> >
> > https://patchwork.sourceware.org/project/gcc/list/?series=31075
> >
> > Thanks.
> >
> > > > >
> > > > > gcc/
> > > > >
> > > > > * configure.ac (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF): Defined as 
> > > > > 1
> > > > > if R_X86_64_CODE_6_GOTTPOFF is supported.
> > > > > * config.in: Regenerated.
> > > > > * configure: Likewise.
> > > > > * config/i386/predicates.md (apx_ndd_add_memory_operand): 
> > > > > Allow
> > > > > UNSPEC_GOTNTPOFF if R_X86_64_CODE_6_GOTTPOFF is supported.
> > > > >
> > > > > gcc/testsuite/
> > > > >
> > > > > * gcc.target/i386/apx-ndd-tls-1b.c: New test.
> > > > > * lib/target-supports.exp
> > > > > (check_effective_target_code_6_gottpoff_reloc): New.
> > > > > ---
> > > > >  gcc/config.in |  7 +++
> > > > >  gcc/config/i386/predicates.md |  6 +-
> > > > >  gcc/configure | 62 
> > > > > +++
> > > > >  gcc/configure.ac  | 37 +++
> > > > >  .../gcc.target/i386/apx-ndd-tls-1b.c  |  9 +++
> > > > >  gcc/testsuite/lib/target-supports.exp | 48 ++
> > > > >  6 files changed, 168 insertions(+), 1 deletion(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-1b.c
> > > > >
> > > > > diff --git a/gcc/config.in b/gcc/config.in
> > > > > index ce1d073833f..f3de4ba6776 100644
> > > > > --- a/gcc/config.in
> > > > > +++ b/gcc/config.in
> > > > > @@ -737,6 +737,13 @@
> > > > >  #endif
> > > > >
> > > > >
> > > > > +/* Define 0/1 if your assembler and linker support 
> > > > > R_X86_64_CODE_6_GOTTPOFF.
> > > > > +   */
> > > > > +#ifndef USED_FOR_TARGET
> > > > > +#undef HAVE_AS_R_X86_64_CODE_6_GOTTPOFF
> > > > > +#endif
> > > > > +
> > > > > +
> > > > >  /* Define if your assembler supports relocs needed by -fpic. */
> > > > >  #ifndef USED_FOR_TARGET
> > > > >  #undef HAVE_AS_SMALL_PIC_RELOCS
> > > > > diff --git a/gcc/config/i386/predicates.md 
> > > > > b/gcc/config/i386/predicates.md
> > > > > index 4c1aedd7e70..391f108c360 100644
> > > > > --- a/gcc/config/i386/predicates.md
> > > > > +++ b/gcc/config/i386/predicates.md
> > > > > @@ -2299,10 +2299,14 @@ (define_predicate "apx_ndd_memory_operand"
> > > > >
> > > > >  ;; Return true if OP is a memory operand which can be used in APX NDD
> > > > >  ;; ADD with register source operand.  UNSPEC_GOTNTPOFF memory operand
> > > > > -;; isn't allowed with APX NDD ADD.
> > > > > +;; is allowed with APX NDD ADD only if R_X86_64_CODE_6_GOTTPOFF 
> > > > > works.
> > > > >  (define_predicate "apx_ndd_add_memory_operand"
> > > > >(match_operand 0 "memory_operand")
> > > > >  {
> > > > > +  /* OK if "add %reg1, name@gottpoff(%rip), %reg2" is supported.  */
> > > > > +  if (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF)
> > > > > +return true;
> > > > > +
> > > > >op = XEXP (op, 0);
> >

Re: PING: [PATCH] x86-64: Check R_X86_64_CODE_6_GOTTPOFF support

2024-02-22 Thread H.J. Lu
On Thu, Feb 22, 2024 at 6:39 PM Hongtao Liu  wrote:
>
> On Thu, Feb 22, 2024 at 10:33 PM H.J. Lu  wrote:
> >
> > On Sun, Feb 18, 2024 at 8:02 AM H.J. Lu  wrote:
> > >
> > > If assembler and linker supports
> > >
> > > add %reg1, name@gottpoff(%rip), %reg2
> > >
> > > with R_X86_64_CODE_6_GOTTPOFF, we can generate it instead of
> > >
> > > mov name@gottpoff(%rip), %reg2
> > > add %reg1, %reg2
> x86 part LGTM, but I'm not familiar with the changes in config related files.

Jakub, Uros, Alexandre, can you review the configure.ac change in this patch?

https://patchwork.sourceware.org/project/gcc/list/?series=31075

Thanks.

> > >
> > > gcc/
> > >
> > > * configure.ac (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF): Defined as 1
> > > if R_X86_64_CODE_6_GOTTPOFF is supported.
> > > * config.in: Regenerated.
> > > * configure: Likewise.
> > > * config/i386/predicates.md (apx_ndd_add_memory_operand): Allow
> > > UNSPEC_GOTNTPOFF if R_X86_64_CODE_6_GOTTPOFF is supported.
> > >
> > > gcc/testsuite/
> > >
> > > * gcc.target/i386/apx-ndd-tls-1b.c: New test.
> > > * lib/target-supports.exp
> > > (check_effective_target_code_6_gottpoff_reloc): New.
> > > ---
> > >  gcc/config.in |  7 +++
> > >  gcc/config/i386/predicates.md |  6 +-
> > >  gcc/configure | 62 +++
> > >  gcc/configure.ac  | 37 +++
> > >  .../gcc.target/i386/apx-ndd-tls-1b.c  |  9 +++
> > >  gcc/testsuite/lib/target-supports.exp | 48 ++
> > >  6 files changed, 168 insertions(+), 1 deletion(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-1b.c
> > >
> > > diff --git a/gcc/config.in b/gcc/config.in
> > > index ce1d073833f..f3de4ba6776 100644
> > > --- a/gcc/config.in
> > > +++ b/gcc/config.in
> > > @@ -737,6 +737,13 @@
> > >  #endif
> > >
> > >
> > > +/* Define 0/1 if your assembler and linker support 
> > > R_X86_64_CODE_6_GOTTPOFF.
> > > +   */
> > > +#ifndef USED_FOR_TARGET
> > > +#undef HAVE_AS_R_X86_64_CODE_6_GOTTPOFF
> > > +#endif
> > > +
> > > +
> > >  /* Define if your assembler supports relocs needed by -fpic. */
> > >  #ifndef USED_FOR_TARGET
> > >  #undef HAVE_AS_SMALL_PIC_RELOCS
> > > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> > > index 4c1aedd7e70..391f108c360 100644
> > > --- a/gcc/config/i386/predicates.md
> > > +++ b/gcc/config/i386/predicates.md
> > > @@ -2299,10 +2299,14 @@ (define_predicate "apx_ndd_memory_operand"
> > >
> > >  ;; Return true if OP is a memory operand which can be used in APX NDD
> > >  ;; ADD with register source operand.  UNSPEC_GOTNTPOFF memory operand
> > > -;; isn't allowed with APX NDD ADD.
> > > +;; is allowed with APX NDD ADD only if R_X86_64_CODE_6_GOTTPOFF works.
> > >  (define_predicate "apx_ndd_add_memory_operand"
> > >(match_operand 0 "memory_operand")
> > >  {
> > > +  /* OK if "add %reg1, name@gottpoff(%rip), %reg2" is supported.  */
> > > +  if (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF)
> > > +return true;
> > > +
> > >op = XEXP (op, 0);
> > >
> > >/* Disallow APX NDD ADD with UNSPEC_GOTNTPOFF.  */
> > > diff --git a/gcc/configure b/gcc/configure
> > > index 41b978b0380..c59c971862c 100755
> > > --- a/gcc/configure
> > > +++ b/gcc/configure
> > > @@ -29834,6 +29834,68 @@ cat >>confdefs.h <<_ACEOF
> > >  _ACEOF
> > >
> > >
> > > +if echo "$ld_ver" | grep GNU > /dev/null; then
> > > +  if $gcc_cv_ld -V 2>/dev/null | grep elf_x86_64_sol2 > /dev/null; 
> > > then
> > > +ld_ix86_gld_64_opt="-melf_x86_64_sol2"
> > > +  else
> > > +ld_ix86_gld_64_opt="-melf_x86_64"
> > > +  fi
> > > +fi
> > > +conftest_s='
> > > +   .text
> > > +   .globl  _start
> > > +   .type _start, @function
> > > +_start:
> > > +   addq%r23,foo@GOTTPOFF(%rip), %r15
> > > +   .section .tdata,"awT",@p

PING: [PATCH] x86-64: Check R_X86_64_CODE_6_GOTTPOFF support

2024-02-22 Thread H.J. Lu
On Sun, Feb 18, 2024 at 8:02 AM H.J. Lu  wrote:
>
> If assembler and linker supports
>
> add %reg1, name@gottpoff(%rip), %reg2
>
> with R_X86_64_CODE_6_GOTTPOFF, we can generate it instead of
>
> mov name@gottpoff(%rip), %reg2
> add %reg1, %reg2
>
> gcc/
>
> * configure.ac (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF): Defined as 1
> if R_X86_64_CODE_6_GOTTPOFF is supported.
> * config.in: Regenerated.
> * configure: Likewise.
> * config/i386/predicates.md (apx_ndd_add_memory_operand): Allow
> UNSPEC_GOTNTPOFF if R_X86_64_CODE_6_GOTTPOFF is supported.
>
> gcc/testsuite/
>
> * gcc.target/i386/apx-ndd-tls-1b.c: New test.
> * lib/target-supports.exp
> (check_effective_target_code_6_gottpoff_reloc): New.
> ---
>  gcc/config.in |  7 +++
>  gcc/config/i386/predicates.md |  6 +-
>  gcc/configure | 62 +++
>  gcc/configure.ac  | 37 +++
>  .../gcc.target/i386/apx-ndd-tls-1b.c  |  9 +++
>  gcc/testsuite/lib/target-supports.exp | 48 ++
>  6 files changed, 168 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-1b.c
>
> diff --git a/gcc/config.in b/gcc/config.in
> index ce1d073833f..f3de4ba6776 100644
> --- a/gcc/config.in
> +++ b/gcc/config.in
> @@ -737,6 +737,13 @@
>  #endif
>
>
> +/* Define 0/1 if your assembler and linker support R_X86_64_CODE_6_GOTTPOFF.
> +   */
> +#ifndef USED_FOR_TARGET
> +#undef HAVE_AS_R_X86_64_CODE_6_GOTTPOFF
> +#endif
> +
> +
>  /* Define if your assembler supports relocs needed by -fpic. */
>  #ifndef USED_FOR_TARGET
>  #undef HAVE_AS_SMALL_PIC_RELOCS
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 4c1aedd7e70..391f108c360 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -2299,10 +2299,14 @@ (define_predicate "apx_ndd_memory_operand"
>
>  ;; Return true if OP is a memory operand which can be used in APX NDD
>  ;; ADD with register source operand.  UNSPEC_GOTNTPOFF memory operand
> -;; isn't allowed with APX NDD ADD.
> +;; is allowed with APX NDD ADD only if R_X86_64_CODE_6_GOTTPOFF works.
>  (define_predicate "apx_ndd_add_memory_operand"
>(match_operand 0 "memory_operand")
>  {
> +  /* OK if "add %reg1, name@gottpoff(%rip), %reg2" is supported.  */
> +  if (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF)
> +return true;
> +
>op = XEXP (op, 0);
>
>/* Disallow APX NDD ADD with UNSPEC_GOTNTPOFF.  */
> diff --git a/gcc/configure b/gcc/configure
> index 41b978b0380..c59c971862c 100755
> --- a/gcc/configure
> +++ b/gcc/configure
> @@ -29834,6 +29834,68 @@ cat >>confdefs.h <<_ACEOF
>  _ACEOF
>
>
> +if echo "$ld_ver" | grep GNU > /dev/null; then
> +  if $gcc_cv_ld -V 2>/dev/null | grep elf_x86_64_sol2 > /dev/null; then
> +ld_ix86_gld_64_opt="-melf_x86_64_sol2"
> +  else
> +ld_ix86_gld_64_opt="-melf_x86_64"
> +  fi
> +fi
> +conftest_s='
> +   .text
> +   .globl  _start
> +   .type _start, @function
> +_start:
> +   addq%r23,foo@GOTTPOFF(%rip), %r15
> +   .section .tdata,"awT",@progbits
> +   .type foo, @object
> +foo:
> +   .quad 0'
> +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
> R_X86_64_CODE_6_GOTTPOFF reloc" >&5
> +$as_echo_n "checking assembler for R_X86_64_CODE_6_GOTTPOFF reloc... " >&6; }
> +if ${gcc_cv_as_x86_64_code_6_gottpoff+:} false; then :
> +  $as_echo_n "(cached) " >&6
> +else
> +  gcc_cv_as_x86_64_code_6_gottpoff=no
> +  if test x$gcc_cv_as != x; then
> +$as_echo "$conftest_s" > conftest.s
> +if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> +  (eval $ac_try) 2>&5
> +  ac_status=$?
> +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> +  test $ac_status = 0; }; }
> +then
> +   if test x$gcc_cv_ld != x && test x$gcc_cv_objdump != x \
> +   && test x$gcc_cv_readelf != x \
> +   && $gcc_cv_readelf --relocs --wide conftest.o 2>&1 \
> +  | grep R_X86_64_CODE_6_GOTTPOFF > /dev/null 2>&1 \
> +   && $gcc_cv_ld $ld_ix86_gld_64_opt -o conftest conftest.o > 
> /dev/null 

[PATCH] x86-64: Check R_X86_64_CODE_6_GOTTPOFF support

2024-02-18 Thread H.J. Lu
If assembler and linker supports

add %reg1, name@gottpoff(%rip), %reg2

with R_X86_64_CODE_6_GOTTPOFF, we can generate it instead of

mov name@gottpoff(%rip), %reg2
add %reg1, %reg2

gcc/

* configure.ac (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF): Defined as 1
if R_X86_64_CODE_6_GOTTPOFF is supported.
* config.in: Regenerated.
* configure: Likewise.
* config/i386/predicates.md (apx_ndd_add_memory_operand): Allow
UNSPEC_GOTNTPOFF if R_X86_64_CODE_6_GOTTPOFF is supported.

gcc/testsuite/

* gcc.target/i386/apx-ndd-tls-1b.c: New test.
* lib/target-supports.exp
(check_effective_target_code_6_gottpoff_reloc): New.
---
 gcc/config.in |  7 +++
 gcc/config/i386/predicates.md |  6 +-
 gcc/configure | 62 +++
 gcc/configure.ac  | 37 +++
 .../gcc.target/i386/apx-ndd-tls-1b.c  |  9 +++
 gcc/testsuite/lib/target-supports.exp | 48 ++
 6 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-1b.c

diff --git a/gcc/config.in b/gcc/config.in
index ce1d073833f..f3de4ba6776 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -737,6 +737,13 @@
 #endif
 
 
+/* Define 0/1 if your assembler and linker support R_X86_64_CODE_6_GOTTPOFF.
+   */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_R_X86_64_CODE_6_GOTTPOFF
+#endif
+
+
 /* Define if your assembler supports relocs needed by -fpic. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_SMALL_PIC_RELOCS
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 4c1aedd7e70..391f108c360 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2299,10 +2299,14 @@ (define_predicate "apx_ndd_memory_operand"
 
 ;; Return true if OP is a memory operand which can be used in APX NDD
 ;; ADD with register source operand.  UNSPEC_GOTNTPOFF memory operand
-;; isn't allowed with APX NDD ADD.
+;; is allowed with APX NDD ADD only if R_X86_64_CODE_6_GOTTPOFF works.
 (define_predicate "apx_ndd_add_memory_operand"
   (match_operand 0 "memory_operand")
 {
+  /* OK if "add %reg1, name@gottpoff(%rip), %reg2" is supported.  */
+  if (HAVE_AS_R_X86_64_CODE_6_GOTTPOFF)
+return true;
+
   op = XEXP (op, 0);
 
   /* Disallow APX NDD ADD with UNSPEC_GOTNTPOFF.  */
diff --git a/gcc/configure b/gcc/configure
index 41b978b0380..c59c971862c 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -29834,6 +29834,68 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
+if echo "$ld_ver" | grep GNU > /dev/null; then
+  if $gcc_cv_ld -V 2>/dev/null | grep elf_x86_64_sol2 > /dev/null; then
+ld_ix86_gld_64_opt="-melf_x86_64_sol2"
+  else
+ld_ix86_gld_64_opt="-melf_x86_64"
+  fi
+fi
+conftest_s='
+   .text
+   .globl  _start
+   .type _start, @function
+_start:
+   addq%r23,foo@GOTTPOFF(%rip), %r15
+   .section .tdata,"awT",@progbits
+   .type foo, @object
+foo:
+   .quad 0'
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
R_X86_64_CODE_6_GOTTPOFF reloc" >&5
+$as_echo_n "checking assembler for R_X86_64_CODE_6_GOTTPOFF reloc... " >&6; }
+if ${gcc_cv_as_x86_64_code_6_gottpoff+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_x86_64_code_6_gottpoff=no
+  if test x$gcc_cv_as != x; then
+$as_echo "$conftest_s" > conftest.s
+if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+then
+   if test x$gcc_cv_ld != x && test x$gcc_cv_objdump != x \
+   && test x$gcc_cv_readelf != x \
+   && $gcc_cv_readelf --relocs --wide conftest.o 2>&1 \
+  | grep R_X86_64_CODE_6_GOTTPOFF > /dev/null 2>&1 \
+   && $gcc_cv_ld $ld_ix86_gld_64_opt -o conftest conftest.o > 
/dev/null 2>&1; then
+  if $gcc_cv_objdump -dw conftest 2>&1 \
+ | grep "add \+\$0xf\+8,%r23,%r15" > /dev/null 2>&1; then
+gcc_cv_as_x86_64_code_6_gottpoff=yes
+  else
+gcc_cv_as_x86_64_code_6_gottpoff=no
+  fi
+fi
+rm -f conftest
+else
+  echo "configure: failed program was" >&5
+  cat conftest.s >&5
+fi
+rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_x86_64_code_6_gottpoff" >&5
+$as_echo "$gcc_cv_as_x86_64_code_6_gottpoff" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_AS_R_X86_64_CODE_6_GOTTPOFF `if test 
x"$gcc_cv_as_x86_64_code_6_gottpoff" = xyes; then echo 1; else echo 0; fi`
+_ACEOF
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for GOTOFF in 
data" >&5
 $as_echo_n "checking assembler for GOTOFF in data... " >&6; }
 if 

Re: [PATCH v2] x86: Support x32 and IBT in heap trampoline

2024-02-14 Thread H.J. Lu
On Wed, Feb 14, 2024 at 11:59 AM Iain Sandoe  wrote:
>
>
>
> > On 14 Feb 2024, at 18:12, H.J. Lu  wrote:
> >
> > On Tue, Feb 13, 2024 at 8:46 AM Jakub Jelinek  wrote:
> >>
> >> On Tue, Feb 13, 2024 at 08:40:52AM -0800, H.J. Lu wrote:
> >>> Add x32 and IBT support to x86 heap trampoline implementation with a
> >>> testcase.
> >>>
> >>> 2024-02-13  Jakub Jelinek  
> >>>  H.J. Lu  
> >>>
> >>> libgcc/
> >>>
> >>>  PR target/113855
> >>>  * config/i386/heap-trampoline.c (trampoline_insns): Add IBT
> >>>  support and pad to the multiple of 4 bytes.  Use movabsq
> >>>  instead of movabs in comments.  Add -mx32 variant.
> >>>
> >>> gcc/testsuite/
> >>>
> >>>  PR target/113855
> >>>  * gcc.dg/heap-trampoline-1.c: New test.
> >>>  * lib/target-supports.exp (check_effective_target_heap_trampoline):
> >>>  New.
> >>
> >> LGTM, but please give Iain a day or two to chime in.
> >>
> >>Jakub
> >>
> >
> > I am checking it in today.
>
> I have just one question;
>
>  from your patch the use of endbr* seems to be unconditionally based on the
>  flags used to build libgcc.
>
>  However, I was expecting that the use of extended trampolines like this would
>  depend on command line flags used to compile the end-user’s code.

We only ship ONE libgcc binary.   You get the same libgcc binary regardless
what options one uses to compile an application.   Since ENBD64 is a NOP if
IBT isn't enabled, so it isn't an issue.

>  As per the discussion in 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113855#c4
>  I was expecting that we would need to extend this implementation to cover 
> more
>  cases (i.e. the GCC-14 implementation is “base”).
>
>  any comments?
> Iain
>
>
> >
> > --
> > H.J.
>


-- 
H.J.


Re: [PATCH v2] x86: Support x32 and IBT in heap trampoline

2024-02-14 Thread H.J. Lu
On Tue, Feb 13, 2024 at 8:46 AM Jakub Jelinek  wrote:
>
> On Tue, Feb 13, 2024 at 08:40:52AM -0800, H.J. Lu wrote:
> > Add x32 and IBT support to x86 heap trampoline implementation with a
> > testcase.
> >
> > 2024-02-13  Jakub Jelinek  
> >   H.J. Lu  
> >
> > libgcc/
> >
> >   PR target/113855
> >   * config/i386/heap-trampoline.c (trampoline_insns): Add IBT
> >   support and pad to the multiple of 4 bytes.  Use movabsq
> >   instead of movabs in comments.  Add -mx32 variant.
> >
> > gcc/testsuite/
> >
> >   PR target/113855
> >   * gcc.dg/heap-trampoline-1.c: New test.
> >   * lib/target-supports.exp (check_effective_target_heap_trampoline):
> >   New.
>
> LGTM, but please give Iain a day or two to chime in.
>
> Jakub
>

I am checking it in today.

-- 
H.J.


[PATCH] x86-64: Generate push2/pop2 only if the incoming stack is 16-byte aligned

2024-02-13 Thread H.J. Lu
Since push2/pop2 requires 16-byte stack alignment, don't generate them
if the incoming stack isn't 16-byte aligned.

gcc/

PR target/113912
* config/i386/i386.cc (ix86_can_use_push2pop2): New.
(ix86_pro_and_epilogue_can_use_push2pop2): Use it.
(ix86_emit_save_regs): Don't generate push2 if
ix86_can_use_push2pop2 return false.
(ix86_expand_epilogue): Don't generate pop2 if
ix86_can_use_push2pop2 return false.

gcc/testsuite/

PR target/113912
* gcc.target/i386/apx-push2pop2-2.c: New test.
---
 gcc/config/i386/i386.cc   | 24 ++-
 .../gcc.target/i386/apx-push2pop2-2.c | 24 +++
 2 files changed, 42 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-push2pop2-2.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index a4e12602f70..46f238651a6 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -6802,16 +6802,24 @@ get_probe_interval (void)
 
 #define SPLIT_STACK_AVAILABLE 256
 
-/* Helper function to determine whether push2/pop2 can be used in prologue or
-   epilogue for register save/restore.  */
+/* Return true if push2/pop2 can be generated.  */
+
 static bool
-ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
+ix86_can_use_push2pop2 (void)
 {
   /* Use push2/pop2 only if the incoming stack is 16-byte aligned.  */
   unsigned int incoming_stack_boundary
 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
-  if (incoming_stack_boundary % 128 != 0)
+  return incoming_stack_boundary % 128 == 0;
+}
+
+/* Helper function to determine whether push2/pop2 can be used in prologue or
+   epilogue for register save/restore.  */
+static bool
+ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
+{
+  if (!ix86_can_use_push2pop2 ())
 return false;
   int aligned = cfun->machine->fs.sp_offset % 16 == 0;
   return TARGET_APX_PUSH2POP2
@@ -7401,7 +7409,9 @@ ix86_emit_save_regs (void)
   int regno;
   rtx_insn *insn;
 
-  if (!TARGET_APX_PUSH2POP2 || cfun->machine->func_type != TYPE_NORMAL)
+  if (!TARGET_APX_PUSH2POP2
+  || !ix86_can_use_push2pop2 ()
+  || cfun->machine->func_type != TYPE_NORMAL)
 {
   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
@@ -10039,7 +10049,9 @@ ix86_expand_epilogue (int style)
 m->fs.cfa_reg == stack_pointer_rtx);
}
 
-  if (TARGET_APX_PUSH2POP2 && m->func_type == TYPE_NORMAL)
+  if (TARGET_APX_PUSH2POP2
+ && ix86_can_use_push2pop2 ()
+ && m->func_type == TYPE_NORMAL)
ix86_emit_restore_regs_using_pop2 ();
   else
ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2-2.c 
b/gcc/testsuite/gcc.target/i386/apx-push2pop2-2.c
new file mode 100644
index 000..975a6212b30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mpreferred-stack-boundary=3 -mapx-features=push2pop2 
-fomit-frame-pointer" } */
+
+extern int bar (int);
+
+void foo ()
+{
+  int a,b,c,d,e,f,i;
+  a = bar (5);
+  b = bar (a);
+  c = bar (b);
+  d = bar (c);
+  e = bar (d);
+  f = bar (e);
+  for (i = 1; i < 10; i++)
+  {
+a += bar (a + i) + bar (b + i) +
+ bar (c + i) + bar (d + i) +
+ bar (e + i) + bar (f + i);
+  }
+}
+
+/* { dg-final { scan-assembler-not "push2(|p)\[\\t \]*%r" } } */
+/* { dg-final { scan-assembler-not "pop2(|p)\[\\t \]*%r" } } */
-- 
2.43.0



[PATCH] x86-64: Use push2/pop2 only if the incoming stack is 16-byte aligned

2024-02-13 Thread H.J. Lu
Since push2/pop2 requires 16-byte stack alignment, don't use them if the
incoming stack isn't 16-byte aligned.

gcc/

PR target/113876
* config/i386/i386.cc (ix86_pro_and_epilogue_can_use_push2pop2):
Return false if the incoming stack isn't 16-byte aligned.

gcc/testsuite/

PR target/113876
* gcc.target/i386/pr113876.c: New test.
---
 gcc/config/i386/i386.cc  |  6 ++
 gcc/testsuite/gcc.target/i386/pr113876.c | 10 ++
 2 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113876.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index dbb26e8f76a..a4e12602f70 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -6807,6 +6807,12 @@ get_probe_interval (void)
 static bool
 ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
 {
+  /* Use push2/pop2 only if the incoming stack is 16-byte aligned.  */
+  unsigned int incoming_stack_boundary
+= (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
+   ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
+  if (incoming_stack_boundary % 128 != 0)
+return false;
   int aligned = cfun->machine->fs.sp_offset % 16 == 0;
   return TARGET_APX_PUSH2POP2
 && !cfun->machine->frame.save_regs_using_mov
diff --git a/gcc/testsuite/gcc.target/i386/pr113876.c 
b/gcc/testsuite/gcc.target/i386/pr113876.c
new file mode 100644
index 000..fbf26f6ab8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113876.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O -mapxf -mpreferred-stack-boundary=3 -finstrument-functions 
-mcmodel=large" } */
+
+void
+bar (unsigned long *p)
+{
+  p[0] = 0;
+  p[1] = 0;
+  p[2] = 0;
+}
-- 
2.43.0



[PATCH v2] x86: Support x32 and IBT in heap trampoline

2024-02-13 Thread H.J. Lu
Add x32 and IBT support to x86 heap trampoline implementation with a
testcase.

2024-02-13  Jakub Jelinek  
H.J. Lu  

libgcc/

PR target/113855
* config/i386/heap-trampoline.c (trampoline_insns): Add IBT
support and pad to the multiple of 4 bytes.  Use movabsq
instead of movabs in comments.  Add -mx32 variant.

gcc/testsuite/

PR target/113855
* gcc.dg/heap-trampoline-1.c: New test.
* lib/target-supports.exp (check_effective_target_heap_trampoline):
New.
---
 gcc/testsuite/gcc.dg/heap-trampoline-1.c | 23 +
 gcc/testsuite/lib/target-supports.exp| 12 +++
 libgcc/config/i386/heap-trampoline.c | 42 ++--
 3 files changed, 74 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/heap-trampoline-1.c

diff --git a/gcc/testsuite/gcc.dg/heap-trampoline-1.c 
b/gcc/testsuite/gcc.dg/heap-trampoline-1.c
new file mode 100644
index 000..1aebe00d731
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/heap-trampoline-1.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target heap_trampoline } } */
+/* { dg-options "-ftrampoline-impl=heap" } */
+
+__attribute__((noipa)) int
+bar (int (*fn) (int))
+{
+  return fn (42) + 1;
+}
+
+int
+main ()
+{
+  int a = 0;
+  int foo (int x) { if (x != 42) __builtin_abort (); return ++a; }
+  if (bar (foo) != 2 || a != 1)
+__builtin_abort ();
+  if (bar (foo) != 3 || a != 2)
+__builtin_abort ();
+  a = 42;
+  if (bar (foo) != 44 || a != 43)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 6ce8557c9a9..81715999f87 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -13477,3 +13477,15 @@ proc dg-require-python-h { args } {
 eval lappend extra-tool-flags $python_flags
 verbose "After appending, extra-tool-flags: ${extra-tool-flags}" 3
 }
+
+# Return 1 if the target supports heap-trampoline, 0 otherwise.
+proc check_effective_target_heap_trampoline {} {
+if { [istarget aarch64*-*-linux*]
+|| [istarget i?86-*-darwin*]
+|| [istarget x86_64-*-darwin*]
+|| [istarget i?86-*-linux*]
+|| [istarget x86_64-*-linux*] } {
+   return 1
+}
+return 0
+}
diff --git a/libgcc/config/i386/heap-trampoline.c 
b/libgcc/config/i386/heap-trampoline.c
index 1df0aa06108..a8637dc92d3 100644
--- a/libgcc/config/i386/heap-trampoline.c
+++ b/libgcc/config/i386/heap-trampoline.c
@@ -30,28 +30,64 @@ void __gcc_nested_func_ptr_created (void *chain, void 
*func, void *dst);
 void __gcc_nested_func_ptr_deleted (void);
 
 #if __x86_64__
+
+#ifdef __LP64__
 static const uint8_t trampoline_insns[] = {
-  /* movabs $,%r11  */
+#if defined __CET__ && (__CET__ & 1) != 0
+  /* endbr64.  */
+  0xf3, 0x0f, 0x1e, 0xfa,
+#endif
+
+  /* movabsq $,%r11  */
   0x49, 0xbb,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 
-  /* movabs $,%r10  */
+  /* movabsq $,%r10  */
   0x49, 0xba,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 
   /* rex.WB jmpq *%r11  */
-  0x41, 0xff, 0xe3
+  0x41, 0xff, 0xe3,
+
+  /* Pad to the multiple of 4 bytes.  */
+  0x90
 };
+#else
+static const uint8_t trampoline_insns[] = {
+#if defined __CET__ && (__CET__ & 1) != 0
+  /* endbr64.  */
+  0xf3, 0x0f, 0x1e, 0xfa,
+#endif
+
+  /* movl $,%r11d  */
+  0x41, 0xbb,
+  0x00, 0x00, 0x00, 0x00,
+
+  /* movl $,%r10d  */
+  0x41, 0xba,
+  0x00, 0x00, 0x00, 0x00,
+
+  /* rex.WB jmpq *%r11  */
+  0x41, 0xff, 0xe3,
+
+  /* Pad to the multiple of 4 bytes.  */
+  0x90
+};
+#endif
 
 union ix86_trampoline {
   uint8_t insns[sizeof(trampoline_insns)];
 
   struct __attribute__((packed)) fields {
+#if defined __CET__ && (__CET__ & 1) != 0
+uint8_t endbr64[4];
+#endif
 uint8_t insn_0[2];
 void *func_ptr;
 uint8_t insn_1[2];
 void *chain_ptr;
 uint8_t insn_2[3];
+uint8_t pad;
   } fields;
 };
 
-- 
2.43.0



[PATCH] x86: Support x32 and IBT in heap trampoline

2024-02-13 Thread H.J. Lu
On Tue, Feb 13, 2024 at 10:42:52AM +0100, Jakub Jelinek wrote:
> On Sat, Feb 10, 2024 at 10:05:34AM -0800, H.J. Lu wrote:
> > > I bet it probably doesn't work properly for -mx32 (which defines
> > > __x86_64__), CCing H.J. on that, but that is a preexisting issue
> > > (and I don't have any experience with it; I guess one would either
> > > need to add 4 bytes of padding after the func_ptr so that those
> > > bits remain zeros as sizeof (void *) is 4, but presumably it would be
> > > better to just use movl (but into %r10) and maybe the jmpl instead
> > > of movabsq.
> > 
> > Are there any testcases to exercise this code on Linux?
> 
> Here is an untested attempt to implement it for -mx32 (well, I've compiled
> it with -mx32 in libgcc by hand after stubbing
> /usr/include/gnu/stubs-x32.h).
> 
> Testcase could be something like:
> 
> /* { dg-do run } */
> /* { dg-options "-ftrampoline-impl=heap" } */
> 
> __attribute__((noipa)) int
> bar (int (*fn) (int))
> {
>   return fn (42) + 1;
> }
> 
> int
> main ()
> {
>   int a = 0;
>   int foo (int x) { if (x != 42) __builtin_abort (); return ++a; }
>   if (bar (foo) != 2 || a != 1)
> __builtin_abort ();
>   if (bar (foo) != 3 || a != 2)
> __builtin_abort ();
>   a = 42;
>   if (bar (foo) != 44 || a != 43)
> __builtin_abort ();
>   return 0;
> }
> but I must say I'm also surprised we have no tests for this in the
> testsuite.  Sure, we'd also need to add some effective target whether
> -ftrampoline-impl=heap can be used for a link/runtime test or not.
> 
> 2024-02-13  Jakub Jelinek  
> 
>   PR target/113855
>   * config/i386/heap-trampoline.c (trampoline_insns): Use movabsq
>   instead of movabs in comments.  Add -mx32 variant.
> 

It works on x32.  I modified your patch to add IBT support and pad the
trampoline to the multiple of 4 bytes.

Thanks.

H.J.
---
2024-02-13  Jakub Jelinek  
H.J. Lu  

PR target/113855
* config/i386/heap-trampoline.c (trampoline_insns): Add IBT
support and pad to the multiple of 4 bytes.  Use movabsq
instead of movabs in comments.  Add -mx32 variant.
---
 libgcc/config/i386/heap-trampoline.c | 42 ++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/libgcc/config/i386/heap-trampoline.c 
b/libgcc/config/i386/heap-trampoline.c
index 1df0aa06108..a8637dc92d3 100644
--- a/libgcc/config/i386/heap-trampoline.c
+++ b/libgcc/config/i386/heap-trampoline.c
@@ -30,28 +30,64 @@ void __gcc_nested_func_ptr_created (void *chain, void 
*func, void *dst);
 void __gcc_nested_func_ptr_deleted (void);
 
 #if __x86_64__
+
+#ifdef __LP64__
 static const uint8_t trampoline_insns[] = {
-  /* movabs $,%r11  */
+#if defined __CET__ && (__CET__ & 1) != 0
+  /* endbr64.  */
+  0xf3, 0x0f, 0x1e, 0xfa,
+#endif
+
+  /* movabsq $,%r11  */
   0x49, 0xbb,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 
-  /* movabs $,%r10  */
+  /* movabsq $,%r10  */
   0x49, 0xba,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 
   /* rex.WB jmpq *%r11  */
-  0x41, 0xff, 0xe3
+  0x41, 0xff, 0xe3,
+
+  /* Pad to the multiple of 4 bytes.  */
+  0x90
 };
+#else
+static const uint8_t trampoline_insns[] = {
+#if defined __CET__ && (__CET__ & 1) != 0
+  /* endbr64.  */
+  0xf3, 0x0f, 0x1e, 0xfa,
+#endif
+
+  /* movl $,%r11d  */
+  0x41, 0xbb,
+  0x00, 0x00, 0x00, 0x00,
+
+  /* movl $,%r10d  */
+  0x41, 0xba,
+  0x00, 0x00, 0x00, 0x00,
+
+  /* rex.WB jmpq *%r11  */
+  0x41, 0xff, 0xe3,
+
+  /* Pad to the multiple of 4 bytes.  */
+  0x90
+};
+#endif
 
 union ix86_trampoline {
   uint8_t insns[sizeof(trampoline_insns)];
 
   struct __attribute__((packed)) fields {
+#if defined __CET__ && (__CET__ & 1) != 0
+uint8_t endbr64[4];
+#endif
 uint8_t insn_0[2];
 void *func_ptr;
 uint8_t insn_1[2];
 void *chain_ptr;
 uint8_t insn_2[3];
+uint8_t pad;
   } fields;
 };
 
-- 
2.43.0



Re: [PATCH] x86, libgcc: Implement ia32 basic heap trampoline [PR113855].

2024-02-10 Thread H.J. Lu
On Sat, Feb 10, 2024 at 9:46 AM Jakub Jelinek  wrote:
>
> On Sat, Feb 10, 2024 at 05:14:44PM +, Iain Sandoe wrote:
> >   PR target/113855
> >
> > gcc/ChangeLog:
> >
> >   * config/i386/darwin.h (DARWIN_HEAP_T_LIB): Moved to be
> >   available to all sub-targets.
> >   * config/i386/darwin32-biarch.h (DARWIN_HEAP_T_LIB): Delete.
> >   * config/i386/darwin64-biarch.h (DARWIN_HEAP_T_LIB): Delete.
> >
> > libgcc/ChangeLog:
> >
> >   * config.host: Add trampoline support to x?86-linux.
> >   * config/i386/heap-trampoline.c (trampoline_insns): Provide
> >   a variant for IA32.
> >   (union ix86_trampoline): Likewise.
> >   (__gcc_nested_func_ptr_created): Implement a basic trampoline
> >   for IA32.
>
> LGTM.
>
> I bet it probably doesn't work properly for -mx32 (which defines
> __x86_64__), CCing H.J. on that, but that is a preexisting issue
> (and I don't have any experience with it; I guess one would either
> need to add 4 bytes of padding after the func_ptr so that those
> bits remain zeros as sizeof (void *) is 4, but presumably it would be
> better to just use movl (but into %r10) and maybe the jmpl instead
> of movabsq.
>
> Jakub
>

Are there any testcases to exercise this code on Linux?

-- 
H.J.


[PATCH] x86-64: Return 10_REG if there is no scratch register

2024-02-06 Thread H.J. Lu
If we can't find a scratch register for large model profiling, return
R10_REG.

PR target/113689
* config/i386/i386.cc (x86_64_select_profile_regnum): Return
R10_REG after sorry.
---
 gcc/config/i386/i386.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f02c6c02ac6..10bd5347dcf 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22788,7 +22788,7 @@ x86_64_select_profile_regnum (bool r11_ok 
ATTRIBUTE_UNUSED)
   sorry ("no register available for profiling %<-mcmodel=large%s%>",
 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
 
-  return INVALID_REGNUM;
+  return R10_REG;
 }
 
 /* Output assembler code to FILE to increment profiler label # LABELNO
-- 
2.43.0



[PATCH] x86: Update constraints for APX NDD instructions

2024-02-05 Thread H.J. Lu
1. The only supported TLS code sequence with ADD is

addq foo@gottpoff(%rip),%reg

Change je constraint to a memory operand in APX NDD ADD pattern with
register source operand.

2. The instruction length of APX NDD instructions with immediate operand:

op imm, mem, reg

may exceed the size limit of 15 byes when non-default address space,
segment register or address size prefix are used.

Add jM constraint which is a memory operand valid for APX NDD instructions
with immediate operand and add jO constraint which is an offsetable memory
operand valid for APX NDD instructions with immediate operand.  Update
APX NDD patterns with jM and jO constraints.

gcc/

PR target/113711
PR target/113733
* config/i386/constraints.md: List all constraints with j prefix.
(j>): Change auto-dec to auto-inc in documentation.
(je): Changed to a memory constraint with APX NDD TLS operand
check.
(jM): New memory constraint for APX NDD instructions.
(jO): Likewise.
* config/i386/i386-protos.h (x86_poff_operand_p): Removed.
* config/i386/i386.cc (x86_poff_operand_p): Likewise.
* config/i386/i386.md (*add3_doubleword): Use rjO.
(*add_1[SWI48]): Use je and jM.
(addsi_1_zext): Use jM.
(*addv4_doubleword_1[DWI]): Likewise.
(*sub_1[SWI]): Use jM.
(@add3_cc_overflow_1[SWI]): Likewise.
(*add3_doubleword_cc_overflow_1): Use rjO.
(*and3_doubleword): Likewise.
(*anddi_1): Use jM.
(*andsi_1_zext): Likewise.
(*and_1[SWI24]): Likewise.
(*3_doubleword[any_or]: Use rjO
(*code_1[any_or SWI248]): Use jM.
(*si_1_zext[zero_extend + any_or]): Likewise.
* config/i386/predicates.md (apx_ndd_memory_operand): New.
(apx_ndd_add_memory_operand): Likewise.

gcc/testsuite/

PR target/113711
PR target/113733
* gcc.target/i386/apx-ndd-2.c: New test.
* gcc.target/i386/apx-ndd-base-index-1.c: Likewise.
* gcc.target/i386/apx-ndd-no-seg-global-1.c: Likewise.
* gcc.target/i386/apx-ndd-seg-1.c: Likewise.
* gcc.target/i386/apx-ndd-seg-2.c: Likewise.
* gcc.target/i386/apx-ndd-seg-3.c: Likewise.
* gcc.target/i386/apx-ndd-seg-4.c: Likewise.
* gcc.target/i386/apx-ndd-seg-5.c: Likewise.
* gcc.target/i386/apx-ndd-tls-1a.c: Likewise.
* gcc.target/i386/apx-ndd-tls-2.c: Likewise.
* gcc.target/i386/apx-ndd-tls-3.c: Likewise.
* gcc.target/i386/apx-ndd-tls-4.c: Likewise.
* gcc.target/i386/apx-ndd-x32-1.c: Likewise.
---
 gcc/config/i386/constraints.md|  36 -
 gcc/config/i386/i386-protos.h |   1 -
 gcc/config/i386/i386.cc   |  25 
 gcc/config/i386/i386.md   | 129 +-
 gcc/config/i386/predicates.md |  65 +
 gcc/testsuite/gcc.target/i386/apx-ndd-2.c |  17 +++
 .../gcc.target/i386/apx-ndd-base-index-1.c|  50 +++
 .../gcc.target/i386/apx-ndd-no-seg-global-1.c |  74 ++
 gcc/testsuite/gcc.target/i386/apx-ndd-seg-1.c |  98 +
 gcc/testsuite/gcc.target/i386/apx-ndd-seg-2.c |  98 +
 gcc/testsuite/gcc.target/i386/apx-ndd-seg-3.c |  14 ++
 gcc/testsuite/gcc.target/i386/apx-ndd-seg-4.c |   9 ++
 gcc/testsuite/gcc.target/i386/apx-ndd-seg-5.c |  13 ++
 .../gcc.target/i386/apx-ndd-tls-1a.c  |  41 ++
 gcc/testsuite/gcc.target/i386/apx-ndd-tls-2.c |  38 ++
 gcc/testsuite/gcc.target/i386/apx-ndd-tls-3.c |  16 +++
 gcc/testsuite/gcc.target/i386/apx-ndd-tls-4.c |  31 +
 gcc/testsuite/gcc.target/i386/apx-ndd-x32-1.c |  49 +++
 18 files changed, 712 insertions(+), 92 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-base-index-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-no-seg-global-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-seg-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-seg-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-seg-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-seg-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-seg-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-tls-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-x32-1.c

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 280e4c8e36c..64702d9c0a8 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -372,6 +372,24 @@ (define_address_constraint "Ts"
   "Address operand without segment register"
   (match_operand 0 "address_no_seg_operand"))
 
+;; j prefix is used for 

Re: [PATCH v6] x86-64: Find a scratch register for large model profiling

2024-02-05 Thread H.J. Lu
On Mon, Feb 5, 2024 at 10:01 AM Uros Bizjak  wrote:
>
> On Mon, Feb 5, 2024 at 5:43 PM H.J. Lu  wrote:
> >
> > Changes in v6:
> >
> > 1. Use ix86_save_reg and accessible_reg_set in
> > x86_64_select_profile_regnum.
> > 2. Construct a complete reg name in x86_function_profiler.
> >
> > Changes in v5:
> >
> > 1. Add pr113689-3.c.
> > 2. Use %r10 if ix86_profile_before_prologue () return true.
> > 3. Try a callee-saved register which has been saved on stack in the
> > prologue.
> >
> > Changes in v4:
> >
> > 1. Remove pr113689-3.c.
> > 2. Use df_get_live_out.
> >
> > Changes in v3:
> >
> > 1. Remove r10_ok.
> >
> > Changes in v2:
> >
> > 1. Add int_parameter_registers to machine_function to track integer
> > registers used for parameter passing.
> > 2. Update x86_64_select_profile_regnum to try %r10 first and use an
> > caller-saved register, which isn't used for parameter passing.
> >
> > ---
> > 2 scratch registers, %r10 and %r11, are available at function entry for
> > large model profiling.  But %r10 may be used by stack realignment and we
> > can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
> > a caller-saved register which isn't live or a callee-saved register
> > which has been saved on stack in the prologue at entry for large model
> > profiling and sorry if we can't find one.
> >
> > gcc/
> >
> > PR target/113689
> > * config/i386/i386.cc (x86_64_select_profile_regnum): New.
> > (x86_function_profiler): Call x86_64_select_profile_regnum to
> > get a scratch register for large model profiling.
> >
> > gcc/testsuite/
> >
> > PR target/113689
> > * gcc.target/i386/pr113689-1.c: New file.
> > * gcc.target/i386/pr113689-2.c: Likewise.
> > * gcc.target/i386/pr113689-3.c: Likewise.
> > ---
> >  gcc/config/i386/i386.cc| 91 ++
> >  gcc/testsuite/gcc.target/i386/pr113689-1.c | 49 
> >  gcc/testsuite/gcc.target/i386/pr113689-2.c | 41 ++
> >  gcc/testsuite/gcc.target/i386/pr113689-3.c | 48 
> >  4 files changed, 214 insertions(+), 15 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index b3e7c74846e..08aad32af85 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -22749,6 +22749,48 @@ current_fentry_section (const char **name)
> >return true;
> >  }
> >
> > +/* Return a caller-saved register which isn't live or a callee-saved
> > +   register which has been saved on stack in the prologue at entry for
> > +   profile.  */
> > +
> > +static int
> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> > +{
> > +  /* Use %r10 if the profiler is emitted before the prologue or it isn't
> > + used by DRAP.  */
> > +  if (ix86_profile_before_prologue ()
> > +  || !crtl->drap_reg
> > +  || REGNO (crtl->drap_reg) != R10_REG)
> > +return R10_REG;
> > +
> > +  /* The profiler is emitted after the prologue.  If there is a
> > + caller-saved register which isn't live or a callee-saved
> > + register saved on stack in the prologue, use it.  */
> > +
> > +  bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
> > +
> > +  int i;
> > +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +if (GENERAL_REGNO_P (i)
> > +   && i != R10_REG
> > +#ifdef NO_PROFILE_COUNTERS
> > +   && (r11_ok || i != R11_REG)
> > +#else
> > +   && i != R11_REG
> > +#endif
> > +   && TEST_HARD_REG_BIT (accessible_reg_set, i)
> > +   && !fixed_regs[i]
> > +   && (ix86_save_reg (i, true, true)
> > +   || (call_used_regs[i]
> > +   && !REGNO_REG_SET_P (reg_live, i
> > +  return i;
>
> ix86_save_reg will never save fixed regs, so the above can be optimized a bit:
>
>&& TEST_HARD_REG_BIT (accessible_reg_set, i)
>&& (ix86_save_reg (i, true, true)
>|| (call_used_regs[i] && !fixed_regs[i]
>&& !REGNO_REG_SET_P (reg_live, i
>
> OK 

Re: [PATCH v5] x86-64: Find a scratch register for large model profiling

2024-02-05 Thread H.J. Lu
On Mon, Feb 5, 2024 at 2:56 AM Uros Bizjak  wrote:
>
> On Fri, Feb 2, 2024 at 11:47 PM H.J. Lu  wrote:
> >
> > Changes in v5:
> >
> > 1. Add pr113689-3.c.
> > 2. Use %r10 if ix86_profile_before_prologue () return true.
> > 3. Try a callee-saved register which has been saved on stack in the
> > prologue.
> >
> > Changes in v4:
> >
> > 1. Remove pr113689-3.c.
> > 2. Use df_get_live_out.
> >
> > Changes in v3:
> >
> > 1. Remove r10_ok.
> >
> > Changes in v2:
> >
> > 1. Add int_parameter_registers to machine_function to track integer
> > registers used for parameter passing.
> > 2. Update x86_64_select_profile_regnum to try %r10 first and use an
> > caller-saved register, which isn't used for parameter passing.
> >
> > ---
> > 2 scratch registers, %r10 and %r11, are available at function entry for
> > large model profiling.  But %r10 may be used by stack realignment and we
> > can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
> > a caller-saved register which isn't live or a callee-saved register
> > which has been saved on stack in the prologue at entry for large model
> > profiling and sorry if we can't find one.
> >
> > gcc/
> >
> > PR target/113689
> > * config/i386/i386.cc (set_saved_int_registers_bit): New.
> > (test_saved_int_registers_bit): Likewise.
> > (ix86_emit_save_regs): Call set_saved_int_registers_bit on
> > saved register.
> > (ix86_emit_save_regs_using_mov): Likewise.
> > (x86_64_select_profile_regnum): New.
> > (x86_function_profiler): Call x86_64_select_profile_regnum to
> > get a scratch register for large model profiling.
> > * config/i386/i386.h (machine_function): Add
> > saved_int_registers.
> >
> > gcc/testsuite/
> >
> > PR target/113689
> > * gcc.target/i386/pr113689-1.c: New file.
> > * gcc.target/i386/pr113689-2.c: Likewise.
> > * gcc.target/i386/pr113689-3.c: Likewise.
> > ---
> >  gcc/config/i386/i386.cc| 119 ++---
> >  gcc/config/i386/i386.h |   5 +
> >  gcc/testsuite/gcc.target/i386/pr113689-1.c |  49 +
> >  gcc/testsuite/gcc.target/i386/pr113689-2.c |  41 +++
> >  gcc/testsuite/gcc.target/i386/pr113689-3.c |  48 +
> >  5 files changed, 247 insertions(+), 15 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index b3e7c74846e..1c7aaa4535e 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -7387,6 +7387,32 @@ choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned 
> > int *align,
> >return plus_constant (Pmode, base_reg, base_offset);
> >  }
> >
> > +/* Set the integer register REGNO bit in saved_int_registers.  */
> > +
> > +static void
> > +set_saved_int_registers_bit (int regno)
> > +{
> > +  if (LEGACY_INT_REGNO_P (regno))
> > +cfun->machine->saved_int_registers |= 1 << regno;
> > +  else
> > +cfun->machine->saved_int_registers
> > +  |= 1 << (regno - FIRST_REX_INT_REG + 8);
> > +}
> > +
> > +/* Return true if the integer register REGNO bit in saved_int_registers
> > +   is set.  */
> > +
> > +static bool
> > +test_saved_int_registers_bit (int regno)
> > +{
> > +  if (LEGACY_INT_REGNO_P (regno))
> > +return (cfun->machine->saved_int_registers
> > +   & (1 << regno)) != 0;
> > +  else
> > +return (cfun->machine->saved_int_registers
> > +   & (1 << (regno - FIRST_REX_INT_REG + 8))) != 0;
> > +}
> > +
> >  /* Emit code to save registers in the prologue.  */
> >
> >  static void
> > @@ -7403,6 +7429,7 @@ ix86_emit_save_regs (void)
> > insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> > TARGET_APX_PPX));
> > RTX_FRAME_RELATED_P (insn) = 1;
> > +   set_saved_int_registers_bit (regno);
> >   }
> >  }
> >else
> > @@ -7415,6 +7442,7 @@ ix86_emit_save_regs (void)
> >for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
> >   

[PATCH v6] x86-64: Find a scratch register for large model profiling

2024-02-05 Thread H.J. Lu
Changes in v6:

1. Use ix86_save_reg and accessible_reg_set in
x86_64_select_profile_regnum.
2. Construct a complete reg name in x86_function_profiler.

Changes in v5:

1. Add pr113689-3.c.
2. Use %r10 if ix86_profile_before_prologue () return true.
3. Try a callee-saved register which has been saved on stack in the
prologue.

Changes in v4:

1. Remove pr113689-3.c.
2. Use df_get_live_out.

Changes in v3:

1. Remove r10_ok.

Changes in v2:

1. Add int_parameter_registers to machine_function to track integer
registers used for parameter passing.
2. Update x86_64_select_profile_regnum to try %r10 first and use an
caller-saved register, which isn't used for parameter passing.

---
2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
a caller-saved register which isn't live or a callee-saved register
which has been saved on stack in the prologue at entry for large model
profiling and sorry if we can't find one.

gcc/

PR target/113689
* config/i386/i386.cc (x86_64_select_profile_regnum): New.
(x86_function_profiler): Call x86_64_select_profile_regnum to
get a scratch register for large model profiling.

gcc/testsuite/

PR target/113689
* gcc.target/i386/pr113689-1.c: New file.
* gcc.target/i386/pr113689-2.c: Likewise.
* gcc.target/i386/pr113689-3.c: Likewise.
---
 gcc/config/i386/i386.cc| 91 ++
 gcc/testsuite/gcc.target/i386/pr113689-1.c | 49 
 gcc/testsuite/gcc.target/i386/pr113689-2.c | 41 ++
 gcc/testsuite/gcc.target/i386/pr113689-3.c | 48 
 4 files changed, 214 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..08aad32af85 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22749,6 +22749,48 @@ current_fentry_section (const char **name)
   return true;
 }
 
+/* Return a caller-saved register which isn't live or a callee-saved
+   register which has been saved on stack in the prologue at entry for
+   profile.  */
+
+static int
+x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
+{
+  /* Use %r10 if the profiler is emitted before the prologue or it isn't
+ used by DRAP.  */
+  if (ix86_profile_before_prologue ()
+  || !crtl->drap_reg
+  || REGNO (crtl->drap_reg) != R10_REG)
+return R10_REG;
+
+  /* The profiler is emitted after the prologue.  If there is a
+ caller-saved register which isn't live or a callee-saved
+ register saved on stack in the prologue, use it.  */
+
+  bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+
+  int i;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+if (GENERAL_REGNO_P (i)
+   && i != R10_REG
+#ifdef NO_PROFILE_COUNTERS
+   && (r11_ok || i != R11_REG)
+#else
+   && i != R11_REG
+#endif
+   && TEST_HARD_REG_BIT (accessible_reg_set, i)
+   && !fixed_regs[i]
+   && (ix86_save_reg (i, true, true)
+   || (call_used_regs[i]
+   && !REGNO_REG_SET_P (reg_live, i
+  return i;
+
+  sorry ("no register available for profiling %<-mcmodel=large%s%>",
+ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
+
+  return INVALID_REGNUM;
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
for profiling a function entry.  */
 void
@@ -22783,42 +22825,61 @@ x86_function_profiler (FILE *file, int labelno 
ATTRIBUTE_UNUSED)
fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
 #endif
 
+  int scratch;
+  const char *reg;
+  char legacy_reg[4] = { 0 };
+
   if (!TARGET_PECOFF)
{
  switch (ix86_cmodel)
{
case CM_LARGE:
- /* NB: R10 is caller-saved.  Although it can be used as a
-static chain register, it is preserved when calling
-mcount for nested functions.  */
+ scratch = x86_64_select_profile_regnum (true);
+ reg = hi_reg_name[scratch];
+ if (LEGACY_INT_REGNO_P (scratch))
+   {
+ legacy_reg[0] = 'r';
+ legacy_reg[1] = reg[0];
+ legacy_reg[2] = reg[1];
+ reg = legacy_reg;
+   }
  if (ASSEMBLER_DIALECT == ASM_INTEL)
-   fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
-  "\tcall\tr10\n", mcount_name);
+   fprintf (file, "1:\tmovabs\t%s, OFFSET FLAT:%s\n"
+  "\tcall\t%s\n", reg, mcount_name, reg);
  else
-   fprintf (file, "1:\tmovabsq\t$%s, 

Re: [PATCH] x86-64: Update gcc.target/i386/apx-ndd.c

2024-02-05 Thread H.J. Lu
On Mon, Feb 5, 2024 at 3:53 AM H.J. Lu <>  wrote:
>
> Fix the following issues:
>
> 1. Replace long with int64_t to support x32.
> 2. Replace \\(%rdi\\) with \\(%(?:r|e)di\\) for memory operand since x32
> uses (%edi).
> 3. Replace %(?:|r|e)al with %al in negb scan.
>
> * gcc.target/i386/apx-ndd.c: Updated.
> ---
>  gcc/testsuite/gcc.target/i386/apx-ndd.c | 68 -
>  1 file changed, 34 insertions(+), 34 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c 
> b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> index b215f66d3e2..0eb751ad225 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> @@ -75,9 +75,9 @@ FOO2 (short, add, +)
>  FOO (int, add, +)
>  FOO1 (int, add, +)
>  FOO2 (int, add, +)
> -FOO (long, add, +)
> -FOO1 (long, add, +)
> -FOO2 (long, add, +)
> +FOO (int64_t, add, +)
> +FOO1 (int64_t, add, +)
> +FOO2 (int64_t, add, +)
>
>  FOO (char, sub, -)
>  FOO1 (char, sub, -)
> @@ -85,8 +85,8 @@ FOO (short, sub, -)
>  FOO1 (short, sub, -)
>  FOO (int, sub, -)
>  FOO1 (int, sub, -)
> -FOO (long, sub, -)
> -FOO1 (long, sub, -)
> +FOO (int64_t, sub, -)
> +FOO1 (int64_t, sub, -)
>
>  F (char, neg, -)
>  F1 (char, neg, -)
> @@ -94,8 +94,8 @@ F (short, neg, -)
>  F1 (short, neg, -)
>  F (int, neg, -)
>  F1 (int, neg, -)
> -F (long, neg, -)
> -F1 (long, neg, -)
> +F (int64_t, neg, -)
> +F1 (int64_t, neg, -)
>
>  F (char, not, ~)
>  F1 (char, not, ~)
> @@ -103,8 +103,8 @@ F (short, not, ~)
>  F1 (short, not, ~)
>  F (int, not, ~)
>  F1 (int, not, ~)
> -F (long, not, ~)
> -F1 (long, not, ~)
> +F (int64_t, not, ~)
> +F1 (int64_t, not, ~)
>
>  FOO (char, and, &)
>  FOO1 (char, and, &)
> @@ -112,8 +112,8 @@ FOO (short, and, &)
>  FOO1 (short, and, &)
>  FOO (int, and, &)
>  FOO1 (int, and, &)
> -FOO (long, and, &)
> -FOO1 (long, and, &)
> +FOO (int64_t, and, &)
> +FOO1 (int64_t, and, &)
>
>  FOO (char, or, |)
>  FOO1 (char, or, |)
> @@ -121,8 +121,8 @@ FOO (short, or, |)
>  FOO1 (short, or, |)
>  FOO (int, or, |)
>  FOO1 (int, or, |)
> -FOO (long, or, |)
> -FOO1 (long, or, |)
> +FOO (int64_t, or, |)
> +FOO1 (int64_t, or, |)
>
>  FOO (char, xor, ^)
>  FOO1 (char, xor, ^)
> @@ -130,8 +130,8 @@ FOO (short, xor, ^)
>  FOO1 (short, xor, ^)
>  FOO (int, xor, ^)
>  FOO1 (int, xor, ^)
> -FOO (long, xor, ^)
> -FOO1 (long, xor, ^)
> +FOO (int64_t, xor, ^)
> +FOO1 (int64_t, xor, ^)
>
>  FOO (char, shl, <<)
>  FOO3 (char, shl, <<, 7)
> @@ -139,8 +139,8 @@ FOO (short, shl, <<)
>  FOO3 (short, shl, <<, 7)
>  FOO (int, shl, <<)
>  FOO3 (int, shl, <<, 7)
> -FOO (long, shl, <<)
> -FOO3 (long, shl, <<, 7)
> +FOO (int64_t, shl, <<)
> +FOO3 (int64_t, shl, <<, 7)
>
>  FOO (char, sar, >>)
>  FOO3 (char, sar, >>, 7)
> @@ -148,8 +148,8 @@ FOO (short, sar, >>)
>  FOO3 (short, sar, >>, 7)
>  FOO (int, sar, >>)
>  FOO3 (int, sar, >>, 7)
> -FOO (long, sar, >>)
> -FOO3 (long, sar, >>, 7)
> +FOO (int64_t, sar, >>)
> +FOO3 (int64_t, sar, >>, 7)
>
>  FOO (uint8_t, shr, >>)
>  FOO3 (uint8_t, shr, >>, 7)
> @@ -170,33 +170,33 @@ FOO4 (uint16_t, rol, <<, >>, 1)
>  FOO4 (uint32_t, rol, <<, >>, 1)
>  FOO4 (uint64_t, rol, <<, >>, 1)
>
> -/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), 
> %(?:|r|e)a(?:x|l)" 4 } } */
> +/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, 
> \\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
>  /* { dg-final { scan-assembler-times 
> "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
> -/* { dg-final { scan-assembler-times 
> "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } 
> } */
> -/* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), 
> %(?:|r|e)a(?:x|l)" 4 } } */
> +/* { dg-final { scan-assembler-times 
> "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%(?:r|e)di\\), 
> %(?:|r|e)a(?:x|l)" 4 } } */
> +/* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]*1, 
> \\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
>  /* { dg-final { scan-assembler-times 
> "sub(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), %(?:|r|e)di, %(?:|r|e)a(?:x|l)" 4 } 
> } */
> -/* { dg-final { scan-assembler-times "negb\[^\n\r]\\(%rdi\\), %(?:|r|e)al" 1 
> } } */
> -/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]\\(%rdi\\)

[PATCH] x86-64: Update gcc.target/i386/apx-ndd.c

2024-02-05 Thread H.J. Lu <>
Fix the following issues:

1. Replace long with int64_t to support x32.
2. Replace \\(%rdi\\) with \\(%(?:r|e)di\\) for memory operand since x32
uses (%edi).
3. Replace %(?:|r|e)al with %al in negb scan.

* gcc.target/i386/apx-ndd.c: Updated.
---
 gcc/testsuite/gcc.target/i386/apx-ndd.c | 68 -
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c 
b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index b215f66d3e2..0eb751ad225 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -75,9 +75,9 @@ FOO2 (short, add, +)
 FOO (int, add, +)
 FOO1 (int, add, +)
 FOO2 (int, add, +)
-FOO (long, add, +)
-FOO1 (long, add, +)
-FOO2 (long, add, +)
+FOO (int64_t, add, +)
+FOO1 (int64_t, add, +)
+FOO2 (int64_t, add, +)
 
 FOO (char, sub, -)
 FOO1 (char, sub, -)
@@ -85,8 +85,8 @@ FOO (short, sub, -)
 FOO1 (short, sub, -)
 FOO (int, sub, -)
 FOO1 (int, sub, -)
-FOO (long, sub, -)
-FOO1 (long, sub, -)
+FOO (int64_t, sub, -)
+FOO1 (int64_t, sub, -)
 
 F (char, neg, -)
 F1 (char, neg, -)
@@ -94,8 +94,8 @@ F (short, neg, -)
 F1 (short, neg, -)
 F (int, neg, -)
 F1 (int, neg, -)
-F (long, neg, -)
-F1 (long, neg, -)
+F (int64_t, neg, -)
+F1 (int64_t, neg, -)
 
 F (char, not, ~)
 F1 (char, not, ~)
@@ -103,8 +103,8 @@ F (short, not, ~)
 F1 (short, not, ~)
 F (int, not, ~)
 F1 (int, not, ~)
-F (long, not, ~)
-F1 (long, not, ~)
+F (int64_t, not, ~)
+F1 (int64_t, not, ~)
 
 FOO (char, and, &)
 FOO1 (char, and, &)
@@ -112,8 +112,8 @@ FOO (short, and, &)
 FOO1 (short, and, &)
 FOO (int, and, &)
 FOO1 (int, and, &)
-FOO (long, and, &)
-FOO1 (long, and, &)
+FOO (int64_t, and, &)
+FOO1 (int64_t, and, &)
 
 FOO (char, or, |)
 FOO1 (char, or, |)
@@ -121,8 +121,8 @@ FOO (short, or, |)
 FOO1 (short, or, |)
 FOO (int, or, |)
 FOO1 (int, or, |)
-FOO (long, or, |)
-FOO1 (long, or, |)
+FOO (int64_t, or, |)
+FOO1 (int64_t, or, |)
 
 FOO (char, xor, ^)
 FOO1 (char, xor, ^)
@@ -130,8 +130,8 @@ FOO (short, xor, ^)
 FOO1 (short, xor, ^)
 FOO (int, xor, ^)
 FOO1 (int, xor, ^)
-FOO (long, xor, ^)
-FOO1 (long, xor, ^)
+FOO (int64_t, xor, ^)
+FOO1 (int64_t, xor, ^)
 
 FOO (char, shl, <<)
 FOO3 (char, shl, <<, 7)
@@ -139,8 +139,8 @@ FOO (short, shl, <<)
 FOO3 (short, shl, <<, 7)
 FOO (int, shl, <<)
 FOO3 (int, shl, <<, 7)
-FOO (long, shl, <<)
-FOO3 (long, shl, <<, 7)
+FOO (int64_t, shl, <<)
+FOO3 (int64_t, shl, <<, 7)
 
 FOO (char, sar, >>)
 FOO3 (char, sar, >>, 7)
@@ -148,8 +148,8 @@ FOO (short, sar, >>)
 FOO3 (short, sar, >>, 7)
 FOO (int, sar, >>)
 FOO3 (int, sar, >>, 7)
-FOO (long, sar, >>)
-FOO3 (long, sar, >>, 7)
+FOO (int64_t, sar, >>)
+FOO3 (int64_t, sar, >>, 7)
 
 FOO (uint8_t, shr, >>)
 FOO3 (uint8_t, shr, >>, 7)
@@ -170,33 +170,33 @@ FOO4 (uint16_t, rol, <<, >>, 1)
 FOO4 (uint32_t, rol, <<, >>, 1)
 FOO4 (uint64_t, rol, <<, >>, 1)
 
-/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), 
%(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, 
\\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
 /* { dg-final { scan-assembler-times 
"lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
-/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), 
\\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
-/* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), 
%(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), 
\\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]*1, 
\\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
 /* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), 
%(?:|r|e)di, %(?:|r|e)a(?:x|l)" 4 } } */
-/* { dg-final { scan-assembler-times "negb\[^\n\r]\\(%rdi\\), %(?:|r|e)al" 1 } 
} */
-/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]\\(%rdi\\), 
%(?:|r|e)ax" 3 } } */
+/* { dg-final { scan-assembler-times "negb\[^\n\r]\\(%(?:r|e)di\\), %al" 1 } } 
*/
+/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]\\(%(?:r|e)di\\), 
%(?:|r|e)ax" 3 } } */
 /* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]%(?:|r|e)di, 
%(?:|r|e)ax" 4 } } */
-/* { dg-final { scan-assembler-times "not(?:b|l|w|q)\[^\n\r]\\(%rdi\\), 
%(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "not(?:b|l|w|q)\[^\n\r]\\(%(?:r|e)di\\), 
%(?:|r|e)a(?:x|l)" 4 } } */
 /* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, 
%(?:|r|e)ax" 4 } } */
-/* { dg-final { scan-assembler-times "andb\[^\n\r]*1, \\(%rdi\\), %al" 1 } } */
-/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), 
%(?:|r|e)ax" 3 } } */
+/* { dg-final { scan-assembler-times "andb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 1 
} } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, 
\\(%(?:r|e)di\\), %(?:|r|e)ax" 3 } } */
 /* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, 

[PATCH v5] x86-64: Find a scratch register for large model profiling

2024-02-02 Thread H.J. Lu
Changes in v5:

1. Add pr113689-3.c.
2. Use %r10 if ix86_profile_before_prologue () return true.
3. Try a callee-saved register which has been saved on stack in the
prologue.

Changes in v4:

1. Remove pr113689-3.c.
2. Use df_get_live_out.

Changes in v3:

1. Remove r10_ok.

Changes in v2:

1. Add int_parameter_registers to machine_function to track integer
registers used for parameter passing.
2. Update x86_64_select_profile_regnum to try %r10 first and use an
caller-saved register, which isn't used for parameter passing.

---
2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
a caller-saved register which isn't live or a callee-saved register
which has been saved on stack in the prologue at entry for large model
profiling and sorry if we can't find one.

gcc/

PR target/113689
* config/i386/i386.cc (set_saved_int_registers_bit): New.
(test_saved_int_registers_bit): Likewise.
(ix86_emit_save_regs): Call set_saved_int_registers_bit on
saved register.
(ix86_emit_save_regs_using_mov): Likewise.
(x86_64_select_profile_regnum): New.
(x86_function_profiler): Call x86_64_select_profile_regnum to
get a scratch register for large model profiling.
* config/i386/i386.h (machine_function): Add
saved_int_registers.

gcc/testsuite/

PR target/113689
* gcc.target/i386/pr113689-1.c: New file.
* gcc.target/i386/pr113689-2.c: Likewise.
* gcc.target/i386/pr113689-3.c: Likewise.
---
 gcc/config/i386/i386.cc| 119 ++---
 gcc/config/i386/i386.h |   5 +
 gcc/testsuite/gcc.target/i386/pr113689-1.c |  49 +
 gcc/testsuite/gcc.target/i386/pr113689-2.c |  41 +++
 gcc/testsuite/gcc.target/i386/pr113689-3.c |  48 +
 5 files changed, 247 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..1c7aaa4535e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -7387,6 +7387,32 @@ choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int 
*align,
   return plus_constant (Pmode, base_reg, base_offset);
 }
 
+/* Set the integer register REGNO bit in saved_int_registers.  */
+
+static void
+set_saved_int_registers_bit (int regno)
+{
+  if (LEGACY_INT_REGNO_P (regno))
+cfun->machine->saved_int_registers |= 1 << regno;
+  else
+cfun->machine->saved_int_registers
+  |= 1 << (regno - FIRST_REX_INT_REG + 8);
+}
+
+/* Return true if the integer register REGNO bit in saved_int_registers
+   is set.  */
+
+static bool
+test_saved_int_registers_bit (int regno)
+{
+  if (LEGACY_INT_REGNO_P (regno))
+return (cfun->machine->saved_int_registers
+   & (1 << regno)) != 0;
+  else
+return (cfun->machine->saved_int_registers
+   & (1 << (regno - FIRST_REX_INT_REG + 8))) != 0;
+}
+
 /* Emit code to save registers in the prologue.  */
 
 static void
@@ -7403,6 +7429,7 @@ ix86_emit_save_regs (void)
insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
TARGET_APX_PPX));
RTX_FRAME_RELATED_P (insn) = 1;
+   set_saved_int_registers_bit (regno);
  }
 }
   else
@@ -7415,6 +7442,7 @@ ix86_emit_save_regs (void)
   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
  {
+   set_saved_int_registers_bit (regno);
if (aligned)
  {
regno_list[loaded_regnum++] = regno;
@@ -7567,6 +7595,7 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
   {
 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
cfa_offset -= UNITS_PER_WORD;
+   set_saved_int_registers_bit (regno);
   }
 }
 
@@ -22749,6 +22778,48 @@ current_fentry_section (const char **name)
   return true;
 }
 
+/* Return a caller-saved register which isn't live or a callee-saved
+   register which has been saved on stack in the prologue at entry for
+   profile.  */
+
+static int
+x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
+{
+  /* Use %r10 if the profiler is emitted before the prologue or it isn't
+ used by DRAP.  */
+  if (ix86_profile_before_prologue ()
+  || !crtl->drap_reg
+  || REGNO (crtl->drap_reg) != R10_REG)
+return R10_REG;
+
+  /* The profiler is emitted after the prologue.  If there is a
+ caller-saved register which isn't live or a callee-saved
+ register saved on stack in the prologue, use it.  */
+
+  bitmap reg_live = df_get_live_out 

Re: [PATCH v4] x86-64: Find a scratch register for large model profiling

2024-02-02 Thread H.J. Lu
On Fri, Feb 02, 2024 at 05:10:05PM +0100, Jakub Jelinek wrote:
> On Fri, Feb 02, 2024 at 07:42:00AM -0800, H.J. Lu wrote:
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -22749,6 +22749,39 @@ current_fentry_section (const char **name)
> >return true;
> >  }
> >  
> > +/* Return an caller-saved register, which isn't live, at entry for
> > +   profile.  */
> > +
> > +static int
> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> > +{
> > +  /* Use %r10 if it isn't used by DRAP.  */
> > +  if (!crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG)
> 
> I'd really like to see flag_fentry != 0 || here, if the profiler is
> emitted before the prologue (so before initializing the drap register),
> %r10 is a fine choice.

Fixed in v5.

> > +return R10_REG;
> > +
> > +  bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
> 
> I meant at pro_and_epilogue time, but perhaps doing it here
> can discover arguments of the function which are used somewhere in
> the body too.

My patch works when an argument is unused in the function body.  The
the unused argument register will be used for profiler.  I will add
a testcase in v5 to verify it.

> 
> > +  int i;
> > +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +if (GENERAL_REGNO_P (i)
> > +   && i != R10_REG
> > +#ifdef NO_PROFILE_COUNTERS
> > +   && (r11_ok || i != R11_REG)
> > +#else
> > +   && i != R11_REG
> > +#endif
> > +   && (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> > +   && !fixed_regs[i]
> > +   && call_used_regs[i]
> 
> I wonder if this shouldn't be && (call_used_regs[i] || X)
> where X would cover registers known to be saved in the prologue
> which aren't live from the prologue to the body (stuff like hard frame
> pointer if used).
> Because if the prologue say saves %r12 or %rbx to stack but doesn't
> yet set it to something, why couldn't the profiler use it?
> I'd expect cfun->machine contains something what has been saved there.

Added

 /* Bit mask for integer registers saved on stack in prologue.  The
 lower 8 bits are for legacy registers and the upper 8 bits are
 for r8-r15.  */
  unsigned int saved_int_registers : 16;

to track them.

> 
> > +   && !REGNO_REG_SET_P (reg_live, i))
> > +  return i;
> > +
> > +  sorry ("no register available for profiling %<-mcmodel=large%s%>",
> > +ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> > +
> > +  return INVALID_REGNUM;
> > +}
> > +
> >  /* Output assembler code to FILE to increment profiler label # LABELNO
> > for profiling a function entry.  */
> >  void
> > @@ -22783,42 +22816,60 @@ x86_function_profiler (FILE *file, int labelno 
> > ATTRIBUTE_UNUSED)
> > fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
> >  #endif
> >  
> > +  int scratch;
> > +  const char *reg_prefix;
> > +  const char *reg;
> > +
> >if (!TARGET_PECOFF)
> > {
> >   switch (ix86_cmodel)
> > {
> > case CM_LARGE:
> > - /* NB: R10 is caller-saved.  Although it can be used as a
> > -static chain register, it is preserved when calling
> > -mcount for nested functions.  */
> > + scratch = x86_64_select_profile_regnum (true);
> > + reg = hi_reg_name[scratch];
> > + reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
> >   if (ASSEMBLER_DIALECT == ASM_INTEL)
> > -   fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
> > -  "\tcall\tr10\n", mcount_name);
> > +   fprintf (file,
> > +"1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
> > +"\tcall\t%s%s\n",
> > +reg_prefix, reg, mcount_name, reg_prefix, reg);
> >   else
> > -   fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
> > -mcount_name);
> > +   fprintf (file,
> > +"1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
> > +mcount_name, reg_prefix, reg, reg_prefix, reg);
> >   break;
> > case CM_LARGE_PIC:
> >  #ifdef NO_PROFILE_COUNTERS
> > + scratch = x86_64_select_profile_regnum (false);
> > + reg = hi_reg_name[scratch];
> > +

Re: [PATCH v2] x86-64: Find a scratch register for large model profiling

2024-02-02 Thread H.J. Lu
On Fri, Feb 2, 2024 at 4:22 AM Jakub Jelinek  wrote:
>
> On Thu, Feb 01, 2024 at 03:02:47PM -0800, H.J. Lu wrote:
> > @@ -2763,6 +2789,8 @@ construct_container (machine_mode mode, machine_mode 
> > orig_mode,
> >{
> >case X86_64_INTEGER_CLASS:
> >case X86_64_INTEGERSI_CLASS:
> > + if (!in_return)
> > +   set_int_parameter_registers_bit (intreg[0]);
> >   return gen_rtx_REG (mode, intreg[0]);
> >case X86_64_SSE_CLASS:
> >case X86_64_SSEHF_CLASS:
> > @@ -2821,6 +2849,11 @@ construct_container (machine_mode mode, machine_mode 
> > orig_mode,
> >if (mode == BLKmode)
> >   {
> > /* Use TImode for BLKmode values in 2 integer registers.  */
> > +   if (!in_return)
> > + {
> > +   set_int_parameter_registers_bit (intreg[0]);
> > +   set_int_parameter_registers_bit (intreg[1]);
> > + }
> > exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
> > gen_rtx_REG (TImode, intreg[0]),
> > GEN_INT (0));
>
> Isn't the above (computed from just whether a function has such an argument)
> already available from cum->nregs or similar plus the sequence of argument
> registers?  Or df which certainly also needs to know what registers contain
> arguments?

Fixed in v4 to use df_get_live_out instead.

> Though, the above means a function argument register which isn't used in a
> function will be impossible to use for mcount.
> We certainly can use it (although var-tracking will not know it got
> clobbered).
> So, wouldn't it be better to ask at the start of prologue generation df what
> registers are live at the start of the function (i.e. at the point of the
> NOTE_INSN_PROLOG_END because rest of prologue is emitted before that) and
> remember a suitable register for the profiling there?
> > @@ -22749,6 +22789,38 @@ current_fentry_section (const char **name)
> >return true;
> >  }
> >
> > +/* Return an caller-saved register, which isn't used for parameter
> > +   passing, at entry for profile.  */
> > +
> > +static int
> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> > +{
> > +  /* Use %r10 if it isn't used by DRAP.  */
> > +  bool r10_ok = !crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG;
> > +  if (r10_ok)
> > +return R10_REG;
> > +
> > +  int i;
> > +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +if (GENERAL_REGNO_P (i)
> > + && (r10_ok || i != R10_REG)
>
> r10_ok is false here, so just
>   if (flag_fentry != 0 || !crtl->drap_reg || REGNO (crtl->drap_reg) != 
> R10_REG)
> return R10_REG;
> at the start and don't declare r10_ok.
>
> > +#ifdef NO_PROFILE_COUNTERS
> > + && (r11_ok || i != R11_REG)
> > +#else
> > + && i != R11_REG
> > +#endif
> > + && (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> > + && !fixed_regs[i]
> > + && call_used_regs[i]
> > + && !test_int_parameter_registers_bit (i))
> > +  return i;
> > +
> > +  sorry ("no register available for profiling %<-mcmodel=large%s%>",
> > +  ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> > +
> > +  return INVALID_REGNUM;
> > +}
> > +
> >  /* Output assembler code to FILE to increment profiler label # LABELNO
> > for profiling a function entry.  */
> >  void
> > @@ -22783,42 +22855,60 @@ x86_function_profiler (FILE *file, int labelno 
> > ATTRIBUTE_UNUSED)
> >   fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
> >  #endif
> >
> > +  int scratch;
> > +  const char *reg_prefix;
> > +  const char *reg;
> > +
> >if (!TARGET_PECOFF)
> >   {
> > switch (ix86_cmodel)
> >   {
> >   case CM_LARGE:
> > -   /* NB: R10 is caller-saved.  Although it can be used as a
> > -  static chain register, it is preserved when calling
> > -  mcount for nested functions.  */
> > +   scratch = x86_64_select_profile_regnum (true);
> > +   reg = hi_reg_name[scratch];
> > +   reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
> > if (ASSEMBLER_DIALECT == ASM_INTEL)
> > - fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
> > -"\tcall\tr10\n", mcount_nam

[PATCH v4] x86-64: Find a scratch register for large model profiling

2024-02-02 Thread H.J. Lu
Changes in v4:

1. Remove pr113689-2.c.
2. Use df_get_live_out.

Changes in v3:

1. Remove r10_ok.

Changes in v2:

1. Add int_parameter_registers to machine_function to track integer
registers used for parameter passing.
2. Update x86_64_select_profile_regnum to try %r10 first and use an
caller-saved register, which isn't used for parameter passing.

---
2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
an caller-saved register, which isn't live, at entry for large model
profiling and sorry if we can't find one.

gcc/

PR target/113689
* config/i386/i386.cc (x86_64_select_profile_regnum): New.
(x86_function_profiler): Call x86_64_select_profile_regnum to
get a scratch register for large model profiling.
* config/i386/i386.h (machine_function): Add
int_parameter_registers.

gcc/testsuite/

PR target/113689
* gcc.target/i386/pr113689-1.c: New file.
* gcc.target/i386/pr113689-2.c: Likewise.
---
 gcc/config/i386/i386.cc| 81 ++
 gcc/testsuite/gcc.target/i386/pr113689-1.c | 49 +
 gcc/testsuite/gcc.target/i386/pr113689-2.c | 41 +++
 3 files changed, 156 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..b27445b8371 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22749,6 +22749,39 @@ current_fentry_section (const char **name)
   return true;
 }
 
+/* Return an caller-saved register, which isn't live, at entry for
+   profile.  */
+
+static int
+x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
+{
+  /* Use %r10 if it isn't used by DRAP.  */
+  if (!crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG)
+return R10_REG;
+
+  bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+
+  int i;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+if (GENERAL_REGNO_P (i)
+   && i != R10_REG
+#ifdef NO_PROFILE_COUNTERS
+   && (r11_ok || i != R11_REG)
+#else
+   && i != R11_REG
+#endif
+   && (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
+   && !fixed_regs[i]
+   && call_used_regs[i]
+   && !REGNO_REG_SET_P (reg_live, i))
+  return i;
+
+  sorry ("no register available for profiling %<-mcmodel=large%s%>",
+ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
+
+  return INVALID_REGNUM;
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
for profiling a function entry.  */
 void
@@ -22783,42 +22816,60 @@ x86_function_profiler (FILE *file, int labelno 
ATTRIBUTE_UNUSED)
fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
 #endif
 
+  int scratch;
+  const char *reg_prefix;
+  const char *reg;
+
   if (!TARGET_PECOFF)
{
  switch (ix86_cmodel)
{
case CM_LARGE:
- /* NB: R10 is caller-saved.  Although it can be used as a
-static chain register, it is preserved when calling
-mcount for nested functions.  */
+ scratch = x86_64_select_profile_regnum (true);
+ reg = hi_reg_name[scratch];
+ reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
  if (ASSEMBLER_DIALECT == ASM_INTEL)
-   fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
-  "\tcall\tr10\n", mcount_name);
+   fprintf (file,
+"1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
+"\tcall\t%s%s\n",
+reg_prefix, reg, mcount_name, reg_prefix, reg);
  else
-   fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
-mcount_name);
+   fprintf (file,
+"1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
+mcount_name, reg_prefix, reg, reg_prefix, reg);
  break;
case CM_LARGE_PIC:
 #ifdef NO_PROFILE_COUNTERS
+ scratch = x86_64_select_profile_regnum (false);
+ reg = hi_reg_name[scratch];
+ reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
  if (ASSEMBLER_DIALECT == ASM_INTEL)
{
  fprintf (file, "1:movabs\tr11, "
 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
- fprintf (file, "\tlea\tr10, 1b[rip]\n");
- fprintf (file, "\tadd\tr10, r11\n");
+ fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
+  reg_prefix, reg);
+ fprintf (file, "\tadd\t%s%s, r11\n",
+  reg_prefix, reg);
 

Re: [PATCH] x86-64: Find a scratch register for large model profiling

2024-02-02 Thread H.J. Lu
On Fri, Feb 2, 2024 at 4:07 AM  wrote:
>
> On 2 February 2024 00:02:54 CET, "H.J. Lu"  wrote:
> >On Thu, Feb 1, 2024 at 10:32 AM Jakub Jelinek  wrote:
> >>
> >> On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
> >> > --- a/gcc/config/i386/i386.cc
> >> > +++ b/gcc/config/i386/i386.cc
> >> > @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
> >> >return true;
> >> >  }
> >> >
> >> > +/* Return an unused caller-saved register at entry for profile.  */
> >> > +
> >> > +static int
> >> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> >> > +{
> >> > +  int i;
> >>
> >> Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
> >> behavior unless emitting profiler after prologue)?
> >
> >Fixed in v2.
>
> Nit: r10_ok is now superfluous, but lets wait for Jakub.
> thanks

Fixed in v3.

Thanks.

-- 
H.J.


[PATCH v3] x86-64: Find a scratch register for large model profiling

2024-02-02 Thread H.J. Lu
Changes in v3:

1. Remove r10_ok.

Changes in v2:

1. Add int_parameter_registers to machine_function to track integer
registers used for parameter passing.
2. Update x86_64_select_profile_regnum to try %r10 first and use an
caller-saved register, which isn't used for parameter passing.

---
2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
an caller-saved register, which isn't used for parameter passing, for
large model profiling and sorry if we can't find one.

gcc/

PR target/113689
* config/i386/i386.cc (set_int_parameter_registers_bit): New.
(test_int_parameter_registers_bit): Likewise.
(x86_64_select_profile_regnum): New.
(construct_container): Call set_int_parameter_registers_bit for
integer register parameter passing.
(function_arg_32): Likewise.
(x86_function_profiler): Call x86_64_select_profile_regnum to
get a scratch register for large model profiling.
* config/i386/i386.h (machine_function): Add
int_parameter_registers.

gcc/testsuite/

PR target/113689
* gcc.target/i386/pr113689-1.c: New file.
* gcc.target/i386/pr113689-2.c: Likewise.
* gcc.target/i386/pr113689-3.c: Likewise.
---
 gcc/config/i386/i386.cc| 121 ++---
 gcc/config/i386/i386.h |   5 +
 gcc/testsuite/gcc.target/i386/pr113689-1.c |  49 +
 gcc/testsuite/gcc.target/i386/pr113689-2.c |  41 +++
 gcc/testsuite/gcc.target/i386/pr113689-3.c |  24 
 5 files changed, 224 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..483d74a0811 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2628,6 +2628,32 @@ classify_argument (machine_mode mode, const_tree type,
   return n;
 }
 
+/* Set the integer register REGNO bit in int_parameter_registers.  */
+
+static void
+set_int_parameter_registers_bit (int regno)
+{
+  if (LEGACY_INT_REGNO_P (regno))
+cfun->machine->int_parameter_registers |= 1 << regno;
+  else
+cfun->machine->int_parameter_registers
+  |= 1 << (regno - FIRST_REX_INT_REG + 8);
+}
+
+/* Return true if the integer register REGNO bit in
+   int_parameter_registers is set.  */
+
+static bool
+test_int_parameter_registers_bit (int regno)
+{
+  if (LEGACY_INT_REGNO_P (regno))
+return (cfun->machine->int_parameter_registers
+   & (1 << regno)) != 0;
+  else
+return (cfun->machine->int_parameter_registers
+   & (1 << (regno - FIRST_REX_INT_REG + 8))) != 0;
+}
+
 /* Examine the argument and return set number of register required in each
class.  Return true iff parameter should be passed in memory.  */
 
@@ -2763,6 +2789,8 @@ construct_container (machine_mode mode, machine_mode 
orig_mode,
   {
   case X86_64_INTEGER_CLASS:
   case X86_64_INTEGERSI_CLASS:
+   if (!in_return)
+ set_int_parameter_registers_bit (intreg[0]);
return gen_rtx_REG (mode, intreg[0]);
   case X86_64_SSE_CLASS:
   case X86_64_SSEHF_CLASS:
@@ -2821,6 +2849,11 @@ construct_container (machine_mode mode, machine_mode 
orig_mode,
   if (mode == BLKmode)
{
  /* Use TImode for BLKmode values in 2 integer registers.  */
+ if (!in_return)
+   {
+ set_int_parameter_registers_bit (intreg[0]);
+ set_int_parameter_registers_bit (intreg[1]);
+   }
  exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
  gen_rtx_REG (TImode, intreg[0]),
  GEN_INT (0));
@@ -2829,7 +2862,11 @@ construct_container (machine_mode mode, machine_mode 
orig_mode,
  return ret;
}
   else
-   return gen_rtx_REG (mode, intreg[0]);
+   {
+ if (!in_return)
+   set_int_parameter_registers_bit (intreg[0]);
+ return gen_rtx_REG (mode, intreg[0]);
+   }
 }
 
   /* Otherwise figure out the entries of the PARALLEL.  */
@@ -2860,6 +2897,8 @@ construct_container (machine_mode mode, machine_mode 
orig_mode,
  = gen_rtx_EXPR_LIST (VOIDmode,
   gen_rtx_REG (tmpmode, *intreg),
   GEN_INT (i*8));
+   if (!in_return)
+ set_int_parameter_registers_bit (*intreg);
intreg++;
break;
  case X86_64_SSEHF_CLASS:
@@ -3241,6 +3280,7 @@ pass_in_reg:
  if (regno == AX_REG)
regno = CX_REG;
}
+ set_int_parameter_registers_bit (regno);
  return gen_rtx_REG (mode, regno);

Re: [PATCH] x86-64: Find a scratch register for large model profiling

2024-02-01 Thread H.J. Lu
On Thu, Feb 1, 2024 at 10:32 AM Jakub Jelinek  wrote:
>
> On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
> >return true;
> >  }
> >
> > +/* Return an unused caller-saved register at entry for profile.  */
> > +
> > +static int
> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> > +{
> > +  int i;
>
> Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
> behavior unless emitting profiler after prologue)?

Fixed in v2.

> > +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +if (GENERAL_REGNO_P (i)
> > +#ifdef NO_PROFILE_COUNTERS
> > + && (r11_ok || i != R11_REG)
> > +#else
> > + && i != R11_REG
> > +#endif
> > + && (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> > + && !fixed_regs[i]
> > + && call_used_regs[i]
> > + && !df_regs_ever_live_p (i))
>
> Also, isn't this too restrictive?
> I mean, all we care about is whether there is some register
> which is not live across the NOTE_INSN_PROLOG_END note, no?
> I.e. doesn't contain any of function's argument that are used later,
> and isn't set in the prologue to be used later.  E.g. call used
> register which is just saved to stack in the prologue might be just fine.

Fixed in v2.

> > +  return i;
> > +
> > +  sorry ("No register available for profiling %<-mcmodel=large%s%>",
>
> Diagnostics shouldn't start with capital letter.
>
> > +  ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> > +
> > +  return INVALID_REGNUM;
> > +}
> > +
> >  /* Output assembler code to FILE to increment profiler label # LABELNO
> > for profiling a function entry.  */
> >  void
>
> Jakub
>


-- 
H.J.


[PATCH v2] x86-64: Find a scratch register for large model profiling

2024-02-01 Thread H.J. Lu
Changes in v2:

1. Add int_parameter_registers to machine_function to track integer
registers used for parameter passing.
2. Update x86_64_select_profile_regnum to try %r10 first and use an
caller-saved register, which isn't used for parameter passing.

---
2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
an caller-saved register, which isn't used for parameter passing, for
large model profiling and sorry if we can't find one.

gcc/

PR target/113689
* config/i386/i386.cc (set_int_parameter_registers_bit): New.
(test_int_parameter_registers_bit): Likewise.
(x86_64_select_profile_regnum): New.
(construct_container): Call set_int_parameter_registers_bit for
integer register parameter passing.
(function_arg_32): Likewise.
(x86_function_profiler): Call x86_64_select_profile_regnum to
get a scratch register for large model profiling.
* config/i386/i386.h (machine_function): Add
int_parameter_registers.

gcc/testsuite/

PR target/113689
* gcc.target/i386/pr113689-1.c: New file.
* gcc.target/i386/pr113689-2.c: Likewise.
* gcc.target/i386/pr113689-3.c: Likewise.
---
 gcc/config/i386/i386.cc| 122 ++---
 gcc/config/i386/i386.h |   5 +
 gcc/testsuite/gcc.target/i386/pr113689-1.c |  49 +
 gcc/testsuite/gcc.target/i386/pr113689-2.c |  41 +++
 gcc/testsuite/gcc.target/i386/pr113689-3.c |  24 
 5 files changed, 225 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..d0538f138e9 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2628,6 +2628,32 @@ classify_argument (machine_mode mode, const_tree type,
   return n;
 }
 
+/* Set the integer register REGNO bit in int_parameter_registers.  */
+
+static void
+set_int_parameter_registers_bit (int regno)
+{
+  if (LEGACY_INT_REGNO_P (regno))
+cfun->machine->int_parameter_registers |= 1 << regno;
+  else
+cfun->machine->int_parameter_registers
+  |= 1 << (regno - FIRST_REX_INT_REG + 8);
+}
+
+/* Return true if the integer register REGNO bit in
+   int_parameter_registers is set.  */
+
+static bool
+test_int_parameter_registers_bit (int regno)
+{
+  if (LEGACY_INT_REGNO_P (regno))
+return (cfun->machine->int_parameter_registers
+   & (1 << regno)) != 0;
+  else
+return (cfun->machine->int_parameter_registers
+   & (1 << (regno - FIRST_REX_INT_REG + 8))) != 0;
+}
+
 /* Examine the argument and return set number of register required in each
class.  Return true iff parameter should be passed in memory.  */
 
@@ -2763,6 +2789,8 @@ construct_container (machine_mode mode, machine_mode 
orig_mode,
   {
   case X86_64_INTEGER_CLASS:
   case X86_64_INTEGERSI_CLASS:
+   if (!in_return)
+ set_int_parameter_registers_bit (intreg[0]);
return gen_rtx_REG (mode, intreg[0]);
   case X86_64_SSE_CLASS:
   case X86_64_SSEHF_CLASS:
@@ -2821,6 +2849,11 @@ construct_container (machine_mode mode, machine_mode 
orig_mode,
   if (mode == BLKmode)
{
  /* Use TImode for BLKmode values in 2 integer registers.  */
+ if (!in_return)
+   {
+ set_int_parameter_registers_bit (intreg[0]);
+ set_int_parameter_registers_bit (intreg[1]);
+   }
  exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
  gen_rtx_REG (TImode, intreg[0]),
  GEN_INT (0));
@@ -2829,7 +2862,11 @@ construct_container (machine_mode mode, machine_mode 
orig_mode,
  return ret;
}
   else
-   return gen_rtx_REG (mode, intreg[0]);
+   {
+ if (!in_return)
+   set_int_parameter_registers_bit (intreg[0]);
+ return gen_rtx_REG (mode, intreg[0]);
+   }
 }
 
   /* Otherwise figure out the entries of the PARALLEL.  */
@@ -2860,6 +2897,8 @@ construct_container (machine_mode mode, machine_mode 
orig_mode,
  = gen_rtx_EXPR_LIST (VOIDmode,
   gen_rtx_REG (tmpmode, *intreg),
   GEN_INT (i*8));
+   if (!in_return)
+ set_int_parameter_registers_bit (*intreg);
intreg++;
break;
  case X86_64_SSEHF_CLASS:
@@ -3241,6 +3280,7 @@ pass_in_reg:
  if (regno == AX_REG)
regno = CX_REG;
}
+ set_int_parameter_registers_bit (regno);
  return gen_rtx_REG (mode, regno);
}
   break;
@@ -22749,6 

[PATCH] x86-64: Find a scratch register for large model profiling

2024-02-01 Thread H.J. Lu
2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
a scratch register for large model profiling and sorry if we can't find
one.

gcc/

PR target/113689
* config/i386/i386.cc (x86_64_select_profile_regnum): New.
(x86_function_profiler): Call x86_64_select_profile_regnum to
get a scratch register for large model profiling.

gcc/testsuite/

PR target/113689
* gcc.target/i386/pr113689-1.c: New file.
* gcc.target/i386/pr113689-2.c: Likewise.
* gcc.target/i386/pr113689-3.c: Likewise.
* gcc.target/i386/pr98482-1.c: Updated.
* gcc.target/i386/pr98482-2.c: Likewise.
---
 gcc/config/i386/i386.cc| 73 +-
 gcc/testsuite/gcc.target/i386/pr113689-1.c | 41 
 gcc/testsuite/gcc.target/i386/pr113689-2.c | 32 ++
 gcc/testsuite/gcc.target/i386/pr113689-3.c | 24 +++
 gcc/testsuite/gcc.target/i386/pr98482-1.c  |  4 +-
 gcc/testsuite/gcc.target/i386/pr98482-2.c  |  2 +-
 6 files changed, 158 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..04f88a7162b 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
   return true;
 }
 
+/* Return an unused caller-saved register at entry for profile.  */
+
+static int
+x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
+{
+  int i;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+if (GENERAL_REGNO_P (i)
+#ifdef NO_PROFILE_COUNTERS
+   && (r11_ok || i != R11_REG)
+#else
+   && i != R11_REG
+#endif
+   && (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
+   && !fixed_regs[i]
+   && call_used_regs[i]
+   && !df_regs_ever_live_p (i))
+  return i;
+
+  sorry ("No register available for profiling %<-mcmodel=large%s%>",
+ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
+
+  return INVALID_REGNUM;
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
for profiling a function entry.  */
 void
@@ -22783,42 +22808,60 @@ x86_function_profiler (FILE *file, int labelno 
ATTRIBUTE_UNUSED)
fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
 #endif
 
+  int scratch;
+  const char *reg_prefix;
+  const char *reg;
+
   if (!TARGET_PECOFF)
{
  switch (ix86_cmodel)
{
case CM_LARGE:
- /* NB: R10 is caller-saved.  Although it can be used as a
-static chain register, it is preserved when calling
-mcount for nested functions.  */
+ scratch = x86_64_select_profile_regnum (true);
+ reg = hi_reg_name[scratch];
+ reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
  if (ASSEMBLER_DIALECT == ASM_INTEL)
-   fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
-  "\tcall\tr10\n", mcount_name);
+   fprintf (file,
+"1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
+"\tcall\t%s%s\n",
+reg_prefix, reg, mcount_name, reg_prefix, reg);
  else
-   fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
-mcount_name);
+   fprintf (file,
+"1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
+mcount_name, reg_prefix, reg, reg_prefix, reg);
  break;
case CM_LARGE_PIC:
 #ifdef NO_PROFILE_COUNTERS
+ scratch = x86_64_select_profile_regnum (false);
+ reg = hi_reg_name[scratch];
+ reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
  if (ASSEMBLER_DIALECT == ASM_INTEL)
{
  fprintf (file, "1:movabs\tr11, "
 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
- fprintf (file, "\tlea\tr10, 1b[rip]\n");
- fprintf (file, "\tadd\tr10, r11\n");
+ fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
+  reg_prefix, reg);
+ fprintf (file, "\tadd\t%s%s, r11\n",
+  reg_prefix, reg);
  fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
   mcount_name);
- fprintf (file, "\tadd\tr10, r11\n");
- fprintf (file, "\tcall\tr10\n");
+ fprintf (file, "\tadd\t%s%s, r11\n",
+  reg_prefix, reg);
+ fprintf (file, "\tcall\t%s%s\n",

Re: [PATCH v2] Handle private COMDAT function symbol reference in readonly data section

2024-01-31 Thread H.J. Lu
On Wed, Jan 31, 2024 at 10:11 AM Jakub Jelinek  wrote:
>
> On Wed, Jan 31, 2024 at 09:39:12AM -0800, H.J. Lu wrote:
> > GNU binutils has no issues with it:
>
> I know, I meant gcc.
> If I try the proposed:
> --- gcc/varasm.cc.jj2024-01-30 08:44:43.304175273 +0100
> +++ gcc/varasm.cc   2024-01-31 18:45:57.271087170 +0100
> @@ -7459,15 +7459,46 @@ default_elf_select_rtx_section (machine_
>  {
>int reloc = compute_reloc_for_rtx (x);
>
> +  tree decl = nullptr;
> +
> +  /* If it is a private COMDAT function symbol reference, call
> + function_rodata_section for the read-only or relocated read-only
> + data section associated with function DECL so that the COMDAT
> + section will be used for the private COMDAT function symbol.  */
> +  if (HAVE_COMDAT_GROUP)
> +{
> +  if (GET_CODE (x) == CONST
> +&& GET_CODE (XEXP (x, 0)) == PLUS
> +&& CONST_INT_P (XEXP (XEXP (x, 0), 1)))
> +   x = XEXP (XEXP (x, 0), 0);
> +
> +  if (GET_CODE (x) == SYMBOL_REF)
> +   {
> +decl = SYMBOL_REF_DECL (x);
> +if (decl
> +&& (TREE_CODE (decl) != FUNCTION_DECL
> +|| !DECL_COMDAT_GROUP (decl)
> +|| TREE_PUBLIC (decl)))
> +  decl = nullptr;
> +   }
> +}
> +
>/* ??? Handle small data here somehow.  */
>
>if (reloc & targetm.asm_out.reloc_rw_mask ())
>  {
> +  if (decl)
> +   return get_section (reloc == 1
> +   ? ".data.rel.ro.local" : ".data.rel.ro",
> +   SECTION_WRITE | SECTION_RELRO | SECTION_LINKONCE,
> +   decl);
>if (reloc == 1)
> return get_named_section (NULL, ".data.rel.ro.local", 1);
>else
> return get_named_section (NULL, ".data.rel.ro", 3);
>  }
> +  else if (decl)
> +return get_section (".rodata", SECTION_LINKONCE, decl);
>
>return mergeable_constant_section (mode, align, 0);
>  }
>
> and append
> typedef unsigned long int VV __attribute__((vector_size (2 * sizeof (long;
> VV vv;
> __attribute__((noipa)) static void fn1 (void) {}
> __attribute__((noipa)) static void fn2 (void) {}
>
> void
> fn3 ()
> {
>   VV a = { (unsigned long) , (unsigned long)  };
>   vv = a;
> }
> to the first testcase (this is just to get a normal non-comdat
> .data.rel.ro.local section referencing non-comdat non-public syumbol),
> then I get the
> pr113617.C:19:1: error: section type conflict with ‘static bool R::B<_R(_A 
> ...), _F>::F(R::H&, const R::H&, R::G) [with _R = void; _F = R::I (*(N1::N2::N3::C<{anonymous}::D, false>*, long long int, long 
> long int, long long int))(void*, long long int, long long int, long long 
> int)>; _A = {}]’
>19 | }
>   | ^
> In file included from pr113617.C:1:
> pr113617.h:21:15: note: ‘static bool R::B<_R(_A ...), _F>::F(R::H&, const 
> R::H&, R::G) [with _R = void; _F = R::I (*(N1::N2::N3::C<{anonymous}::D, false>*, long long int, long 
> long int, long long int))(void*, long long int, long long int, long long 
> int)>; _A = {}]’ was declared here
>21 |   static bool F(H &, const H &, G) { return false; }
>   |   ^
> I feared.
> So, it seems get_section handles section purely by name lookup
> and isn't prepared to deal with multiple different sections
> of the same name, but different comdat group.
>
> Thus, maybe at least temporarily we need to use unique
> section names here, say
> .data.rel.ro.local.pool.
> .data.rel.ro.pool.
> .rodata.pool.
> where  would be the name of the comdat group, i.e.
> IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl))
>
> Jakub
>

I am testing this patch.

-- 
H.J.
From 3c8b9ad67383d645e19746720deb6e8f020fccd0 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Fri, 26 Jan 2024 12:20:11 -0800
Subject: [PATCH v3] Handle private COMDAT function symbol reference in readonly data section

Changes in v3:

1. Add get_comdat_function_rodata_section.
2. Add a new test.

Changes in v2:

1. Check decl non-null before dereferencing it.
2. Update PR rtl-optimization/113617 from

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113617#c14

---
For a private COMDAT function symbol reference in readonly data section,
instead of putting it in .data.rel.ro or .rodata.cst section, call
get_comdat_function_rodata_section to put the private COMDAT function
symbol reference in .data.rel.ro or .rodata section in the same COMDAT
group as the function DECL.

gcc/

	PR rtl-optimization/113617
	* varasm.cc (get_comdat_function_rodata_section): New.
	(default_elf_select_rtx_sectio

[PATCH] Assuming the working GNU assembler with --with-gnu-as

2024-01-31 Thread H.J. Lu
When configuring GCC with

--target=TARGET

to build a cross compiler to reproduce a compiler bug, as and collect have

ORIGINAL_AS_FOR_TARGET=""

As the result, many target features are disabled which makes it almost
impossible to reproduce the bug.  Without assembler, the GCC build won't
finish and the run-time libraries won't build.  But the GCC itself can
be built.  It is very useful for debugging GCC with a cross compiler.
Enable GNU assembler features for --with-gnu-as if assembler isn't
available.

PR target/113684
* acinclude.m4 (gcc_GAS_CHECK_FEATURE): Assuming the working GNU
assembler for --with-gnu-as if assembler isn't available.
* configure: Regenerated.
---
 gcc/acinclude.m4 |   3 +
 gcc/configure| 492 +++
 2 files changed, 495 insertions(+)

diff --git a/gcc/acinclude.m4 b/gcc/acinclude.m4
index 425146719cf..3c959748f97 100644
--- a/gcc/acinclude.m4
+++ b/gcc/acinclude.m4
@@ -493,6 +493,9 @@ AC_CACHE_CHECK([assembler for $1], [$2],
   cat conftest.s >_MESSAGE_LOG_FD
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+AC_MSG_WARN([Assume the working GNU assembler])
+[$2]=yes
   fi])
 ifelse([$6],,,[dnl
 if test $[$2] = yes; then
diff --git a/gcc/configure b/gcc/configure
index c83e09beea9..10eef180dbe 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -25753,6 +25753,10 @@ else
   cat conftest.s >&5
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Assume the working GNU 
assembler" >&5
+$as_echo "$as_me: WARNING: Assume the working GNU assembler" >&2;}
+gcc_cv_as_balign_and_p2align=yes
   fi
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_balign_and_p2align" >&5
@@ -25785,6 +25789,10 @@ else
   cat conftest.s >&5
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Assume the working GNU 
assembler" >&5
+$as_echo "$as_me: WARNING: Assume the working GNU assembler" >&2;}
+gcc_cv_as_max_skip_p2align=yes
   fi
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_max_skip_p2align" 
>&5
@@ -25817,6 +25825,10 @@ else
   cat conftest.s >&5
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Assume the working GNU 
assembler" >&5
+$as_echo "$as_me: WARNING: Assume the working GNU assembler" >&2;}
+gcc_cv_as_literal16=yes
   fi
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_literal16" >&5
@@ -25860,6 +25872,10 @@ conftest_label2: .word 0
   cat conftest.s >&5
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Assume the working GNU 
assembler" >&5
+$as_echo "$as_me: WARNING: Assume the working GNU assembler" >&2;}
+gcc_cv_as_subsection_m1=yes
   fi
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_subsection_m1" >&5
@@ -25892,6 +25908,10 @@ else
   cat conftest.s >&5
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Assume the working GNU 
assembler" >&5
+$as_echo "$as_me: WARNING: Assume the working GNU assembler" >&2;}
+gcc_cv_as_weak=yes
   fi
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_weak" >&5
@@ -25924,6 +25944,10 @@ else
   cat conftest.s >&5
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Assume the working GNU 
assembler" >&5
+$as_echo "$as_me: WARNING: Assume the working GNU assembler" >&2;}
+gcc_cv_as_weakref=yes
   fi
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_weakref" >&5
@@ -25957,6 +25981,10 @@ else
   cat conftest.s >&5
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Assume the working GNU 
assembler" >&5
+$as_echo "$as_me: WARNING: Assume the working GNU assembler" >&2;}
+gcc_cv_as_nsubspa_comdat=yes
   fi
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_nsubspa_comdat" >&5
@@ -26005,6 +26033,10 @@ else
   cat conftest.s >&5
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Assume the working GNU 
assembler" >&5
+$as_echo "$as_me: WARNING: Assume the working GNU assembler" >&2;}
+gcc_cv_as_hidden=yes
   fi
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_hidden" >&5
@@ -26465,6 +26497,10 @@ else
   cat conftest.s >&5
 fi
 rm -f 

[PATCH] Assuming the working GNU assembler with --with-gnu-as

2024-01-31 Thread H.J. Lu
When configuring GCC with

--target=TARGET

to build a cross compiler to reproduce a compiler bug, as and collect have

ORIGINAL_AS_FOR_TARGET=""

As the result, many target features are disabled which makes it almost
impossible to reproduce the bug.  Without assembler, the GCC build won't
finish and the run-time libraries won't build.  But the GCC itself can
be built.  It is very useful for debugging GCC with a cross compiler.
Enable GNU assembler features for --with-gnu-as if assembler isn't
available.

PR target/113684
* acinclude.m4 (gcc_GAS_CHECK_FEATURE): Assuming the working GNU
assembler for --with-gnu-as if assembler isn't available.
* configure: Regenerated.
---
 gcc/acinclude.m4 |   3 +
 gcc/configure| 492 +++
 2 files changed, 495 insertions(+)

diff --git a/gcc/acinclude.m4 b/gcc/acinclude.m4
index 425146719cf..3c959748f97 100644
--- a/gcc/acinclude.m4
+++ b/gcc/acinclude.m4
@@ -493,6 +493,9 @@ AC_CACHE_CHECK([assembler for $1], [$2],
   cat conftest.s >_MESSAGE_LOG_FD
 fi
 rm -f conftest.o conftest.s
+  elif test x$gcc_cv_as = x -a x$gas_flag = xyes ; then
+AC_MSG_WARN([Assume the working GNU assembler])
+[$2]=yes
   fi])
 ifelse([$6],,,[dnl
 if test $[$2] = yes; then


Re: [PATCH v2] Handle private COMDAT function symbol reference in readonly data section

2024-01-31 Thread H.J. Lu
On Wed, Jan 31, 2024 at 9:10 AM Jakub Jelinek  wrote:
>
> On Wed, Jan 31, 2024 at 08:48:33AM -0800, H.J. Lu wrote:
> > Which function (target hook) can I use to generate
> >
> >  .section.data.rel.ro.local,"awG",@progbits,_ZN1AIxE3fooExx,comdat
>
> Just
>   if (decl)
> return get_section (reloc == 1
> ? ".data.rel.ro.local" : ".data.rel.ro",
> SECTION_WRITE | SECTION_RELRO | SECTION_LINKONCE,
> decl);
> for the first hunk and
>   else if (decl)
> return get_section (".rodata", SECTION_LINKONCE, decl);
> in the second case?
>
> Haven't tried it though, maybe the get_section section conflict stuff
> isn't able to handle it (and perhaps that is the reason why we simply don't
> emit functions into
> .section.text,"axG",@progbits,whatever,comdat
> sections instead of
> .section.text.function_name,"axG",@progbits,whatever,comdat

GNU binutils has no issues with it:

[hjl@gnu-cfl-3 tmp]$ cat x.s
.section .text,"axG",@progbits,whatever,comdat
nop
.text
nop
[hjl@gnu-cfl-3 tmp]$ gcc -c x.s
[hjl@gnu-cfl-3 tmp]$ readelf -SW x.o | grep text
  [ 2] .text PROGBITS 48
01 00  AX  0   0  1
  [ 5] .text PROGBITS 49
01 00 AXG  0   0  1
[hjl@gnu-cfl-3 tmp]$

If it doesn't work for some targets, we can use
targetm.asm_out.function_rodata_section.

> In such case, we could append something to those section names,
> like const.pool (or whatever else that couldn't clash with function names,
> so needs probably a dot somewhere).  Could be
> .data.rel.ro.local.const.pool
> .data.rel.ro.const.pool
> or
> .data.rel.ro.local..shared
> .data.rel.ro..shared
> or something similar, but .data.rel.ro.local.shared would not be ok,
> because it could clash with .data.rel.ro.local section for shared function.
>
> > > Another question is if we need to do anything about the
> > > DECL_COMDAT_GROUP (decl) && DECL_SECTION_NAME (decl)
> > > && startswith (DECL_SECTION_NAME (decl), ".gnu.linkonce.t.")
> > > case (older linkers) (i.e. when using years old GNU linkers).
> > >
> >
> > Should we support such targets? It is not easy for me to test it.
>
> Perhaps let's wait if somebody files an issue with such configuration.
>
> Jakub
>


-- 
H.J.


Re: [PATCH v2] Handle private COMDAT function symbol reference in readonly data section

2024-01-31 Thread H.J. Lu
On Wed, Jan 31, 2024 at 8:30 AM Jakub Jelinek  wrote:
>
> On Tue, Jan 30, 2024 at 06:21:36PM -0800, H.J. Lu wrote:
> > Changes in v2:
> >
> > 1. Check decl non-null before dereferencing it.
> > 2. Update PR rtl-optimization/113617 from
>
> Thanks for updating the testcase.
>
> > --- a/gcc/varasm.cc
> > +++ b/gcc/varasm.cc
> > @@ -7459,16 +7459,46 @@ default_elf_select_rtx_section (machine_mode mode, 
> > rtx x,
> >  {
> >int reloc = compute_reloc_for_rtx (x);
> >
> > +  tree decl = nullptr;
> > +
> > +  /* If it is a private COMDAT function symbol reference, call
> > + function_rodata_section for the read-only or relocated read-only
> > + data section associated with function DECL so that the COMDAT
> > + section will be used for the private COMDAT function symbol.  */
> > +  if (HAVE_COMDAT_GROUP)
> > +{
> > +  if (GET_CODE (x) == CONST
> > +   && GET_CODE (XEXP (x, 0)) == PLUS
> > +   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
> > + x = XEXP (XEXP (x, 0), 0);
> > +
> > +  if (GET_CODE (x) == SYMBOL_REF)
> > + {
> > +   decl = SYMBOL_REF_DECL (x);
> > +   if (decl
> > +   && (TREE_CODE (decl) != FUNCTION_DECL
> > +   || !DECL_COMDAT_GROUP (decl)
> > +   || TREE_PUBLIC (decl)))
> > + decl = nullptr;
> > + }
> > +}
> > +
> >/* ??? Handle small data here somehow.  */
> >
> >if (reloc & targetm.asm_out.reloc_rw_mask ())
> >  {
> > +  if (decl)
> > + return targetm.asm_out.function_rodata_section (decl, true);
>
> As I wrote before, I still very much dislike this.
> We want to refer to the
> _ZN1R1BIFvvENS_1IIFPFvPvxxxEPN2N12N22N31CIN12_GLOBAL__N_11DIxEELb0EEExxx1FERNS_1HERKSI_NS_1GE
> private symbol defined in
> .text._ZN1R1BIFvvENS_1IIFPFvPvxxxEPN2N12N22N31CIN12_GLOBAL__N_11DIxEELb0EEExxx1FERNS_1HERKSI_NS_1GE
> section in _ZN1AIxE3fooExx comdat group from some readonly data
> memory, and read that from
> _ZN2N12N22N31XILi1EE3booINS1_1CIN12_GLOBAL__N_11DIxEELb0EvxxxRT_ function
> defined in 
> .text._ZN2N12N22N31XILi1EE3booINS1_1CIN12_GLOBAL__N_11DIxEELb0EvxxxRT_
> section in the same comdat group.
>
> The patch puts that into
> .data.rel.ro.local._ZN1R1BIFvvENS_1IIFPFvPvxxxEPN2N12N22N31CIN12_GLOBAL__N_11DIxEELb0EEExxx1FERNS_1HERKSI_NS_1GE
> section in the same comdat group, but that just looks weird and for
> targets which use section anchors also inefficient.
>
> If we have a shared constant pool (otherwise the constants would be emitted
> into a per-function constant pool of that
> _ZN2N12N22N31XILi1EE3booINS1_1CIN12_GLOBAL__N_11DIxEELb0EvxxxRT_
> function and would live in something based on that function name.
> But in case it is shared, it is normally just .data.rel.ro.local or
> .data.rel.ro section, shared by whatever refers to it.
> These comdat private symbols are kind of exception, they can still be
> shared, but have to be shared only within the containing comdat group
> because it isn't valid to refer to them from other comdat groups.
> So, it is ok if say two different functions in the same comdat group
> actually share those MEM constants.
> Thus, I think for the DECL_COMDAT_GROUP (decl) && HAVE_COMDAT_GROUP
> case it would be best to make it clear in the section name that it
> is a .data.rel.ro.local or .data.rel.ro section shared by everything
> in the comdat group.  So, shouldn't it be just
> .section
> .data.rel.ro.local,"awG",@progbits,_ZN1AIxE3fooExx,comdat
> and emit that directly in this function rather than using
> targetm.asm_out.function_rodata_section?

Which function (target hook) can I use to generate

 .section.data.rel.ro.local,"awG",@progbits,_ZN1AIxE3fooExx,comdat

> Looking at targetm.asm_out.function_rodata_section, it is
> default_function_rodata_section on most targets, then on darwin,
> cygwin, AIX and mcore default_no_function_rodata_section which just
> returns the shared readonly_data_section (I hope those targets don't
> DECL_COMDAT_GROUP (decl) && HAVE_COMDAT_GROUP, otherwise it will simply not
> work) and then loongarch does some ugly magic (which is related to
> jumptables and so nothing we need to care about here hopefully).
>
> Another question is if we need to do anything about the
> DECL_COMDAT_GROUP (decl) && DECL_SECTION_NAME (decl)
> && startswith (DECL_SECTION_NAME (decl), ".gnu.linkonce.t.")
> case (older linkers) (i.e. when using years old GNU linkers).
>

Should we support such targets? It is not easy for me to test it.

Thanks.

-- 
H.J.


[PATCH v2] Handle private COMDAT function symbol reference in readonly data section

2024-01-30 Thread H.J. Lu
Changes in v2:

1. Check decl non-null before dereferencing it.
2. Update PR rtl-optimization/113617 from

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113617#c14

---
For a private COMDAT function symbol reference in readonly data section,
instead of putting it in .data.rel.ro or .rodata.cst section, call
function_rodata_section to get the read-only or relocated read-only
data section associated with the function DECL so that the COMDAT
section will be used for the private COMDAT function symbol.

gcc/

PR rtl-optimization/113617
* varasm.cc (default_elf_select_rtx_section): Call
function_rodata_section to get the read-only or relocated
read-only data section for private COMDAT function symbol
reference.

gcc/testsuite/

PR rtl-optimization/113617
* g++.dg/pr113617-1a.C: New test.
* g++.dg/pr113617-1b.C: Likewise.
---
 gcc/testsuite/g++.dg/pr113617-1a.C | 145 +
 gcc/testsuite/g++.dg/pr113617-1b.C |   8 ++
 gcc/varasm.cc  |  30 ++
 3 files changed, 183 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/pr113617-1a.C
 create mode 100644 gcc/testsuite/g++.dg/pr113617-1b.C

diff --git a/gcc/testsuite/g++.dg/pr113617-1a.C 
b/gcc/testsuite/g++.dg/pr113617-1a.C
new file mode 100644
index 000..c93f08b5068
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr113617-1a.C
@@ -0,0 +1,145 @@
+// { dg-do compile { target fpic } }
+// { dg-require-visibility "" }
+// { dg-options "-O2 -std=c++11 -fPIC -fvisibility=hidden 
-fvisibility-inlines-hidden" }
+
+namespace {
+template  struct J { static constexpr int value = V; };
+template  using K = J;
+using M = K;
+template  struct L { template  using type = _Tp; 
};
+template  using N = typename 
L<_Cond>::type<_If, _Else>;
+M k;
+template  struct O { using type = _Tp; };
+template 
+struct P : N, _Up> {};
+template  struct Q { using type = typename P<_Tp>::type; };
+}
+namespace R {
+struct H;
+enum G {};
+template  class S;
+struct T { using U = bool (*) (H &, const H &, G); U F; };
+template  class B;
+template 
+struct B<_R(_A...), _F> {
+  static bool F(H &, const H &, G) { return false; }
+  __attribute__((noipa)) static _R bar(const H &) {}
+};
+template 
+struct S<_R(_A...)> : T {
+  template  using AH = B<_R(), _F>;
+  template  S(_F) {
+using AG = AH<_F>;
+barr = AG::bar;
+F = AG::F;
+  }
+  using AF = _R (*)(const H &);
+  AF barr;
+};
+template  class I;
+template 
+struct I<_F(_B...)> {};
+template  using W = decltype(k);
+template  struct V {
+  typedef I::type(typename Q<_B>::type...)> type;
+};
+template 
+__attribute__((noipa)) typename V::value, _F, _B...>::type
+baz(_F, _B...) { return typename V::value, _F, _B...>::type (); }
+template  struct AJ {
+  template  struct _Ptr { using type = _Up *; };
+  using AI = typename _Ptr<_Tp>::type;
+};
+template  struct Y {
+  using AI = typename AJ<_Tp>::AI;
+  AI operator->();
+};
+}
+extern int z;
+namespace N1 {
+namespace N2 {
+namespace N3 {
+enum Z { Z1, Z2 };
+template  struct X {
+  template 
+  __attribute__((noipa)) void boo(long long, long long, long long, _F &) {}
+};
+struct AC {
+  AC(int);
+  void m1(R::S);
+};
+template 
+__attribute__((noipa)) void garply(void *, long long, long long, long long) {}
+template <>
+template 
+void X::boo(long long, long long x, long long y, _F ) {
+  AC pool(z);
+  for (;;) {
+auto job = R::baz(garply<_F>, , y, y, x);
+pool.m1(job);
+  }
+}
+struct AB {
+  static AB ();
+  template 
+  void boo(long first, long x, long y, _F fi) {
+switch (ab1) {
+case Z1:
+  ab2->boo(first, x, y, fi);
+case Z2:
+  ab3->boo(first, x, y, fi);
+}
+  }
+  Z ab1;
+  R::Y> ab2;
+  R::Y> ab3;
+};
+template  struct C;
+template  struct C<_F, false> {
+  __attribute__((noipa)) C(_F) {}
+  void boo(long first, long x, long y) {
+auto u = AB::bleh();
+u.boo(first, x, y, *this);
+  }
+};
+template  struct AA { typedef C<_F, 0> type; };
+}
+}
+}
+struct AD {
+  template 
+  static void boo(long first, long x, long y, _F f) {
+typename N1::N2::N3::AA<_F>::type fi(f);
+fi.boo(first, x, y);
+  }
+  template 
+  static void boo(long first, long x, _F f) {
+boo(first, x, 0, f);
+  }
+};
+template  struct A {
+  void foo(long long, long long);
+  int *c;
+};
+namespace {
+template  struct D { __attribute__((noipa)) D(int *) {} };
+}
+template 
+void A::foo(long long x, long long y)
+{
+  int e;
+  D d();
+  AD::boo(0, y, d);
+  long p;
+  for (p = 0; p < x; p++)
+c[p] = c[p - 1];
+}
+int z;
+long xx1;
+void corge() {
+  A a;
+  a.foo(xx1, 0);
+}
+
+// { dg-final { scan-assembler-not ".section\t\.data\.rel\.ro\.local,\"aw\"" { 
target { { i?86-*-linux* x86_64-*-linux* } && { ! ia32 } } } } }
+// { dg-final { scan-assembler ".section\t.data\.rel\.ro\.local\..*,\"awG\"" { 
target { { i?86-*-linux* x86_64-*-linux* } && { ! ia32 } } } } }
diff --git a/gcc/testsuite/g++.dg/pr113617-1b.C 
b/gcc/testsuite/g++.dg/pr113617-1b.C

Re: [PATCH] i386: Add "Ws" constraint for symbolic address/label reference [PR105576]

2024-01-30 Thread H.J. Lu
On Tue, Jan 16, 2024 at 11:47 PM Uros Bizjak  wrote:
>
> On Thu, Jan 11, 2024 at 7:24 PM Fangrui Song  wrote:
> >
> > Printing the raw symbol is useful in inline asm (e.g. in C++ to get the
> > mangled name).  Similar constraints are available in other targets (e.g.
> > "S" for aarch64/riscv, "Cs" for m68k).
> >
> > There isn't a good way for x86 yet, e.g. "i" doesn't work for
> > PIC/-mcmodel=large.  This patch adds "Ws".  Here are possible use cases:
> >
> > ```
> > namespace ns { extern int var; }
> > asm (".pushsection .xxx,\"aw\"; .dc.a %0; .popsection" :: "Ws"());
> > asm (".reloc ., BFD_RELOC_NONE, %0" :: "Ws"());
> > ```
> >
> > gcc/ChangeLog:
> >
> > PR target/105576
> > * config/i386/constraints.md: Define constraint "Ws".
> > * doc/md.texi: Document it.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/asm-raw-symbol.c: New testcase.
>
> OK.

Hi Fangrui,

I pushed it for you with indentation changes in the commit log.

Thanks.

> Thanks,
> Uros.
>
> >
> > ---
> >
> > This obsoletes 
> > https://gcc.gnu.org/pipermail/gcc-patches/2024-January/642580.html
> > I initially tried 'z', but Uros requested that a W prefix is used.
> > ---
> >  gcc/config/i386/constraints.md |  4 
> >  gcc/doc/md.texi|  4 
> >  gcc/testsuite/gcc.target/i386/asm-raw-symbol.c | 13 +
> >  3 files changed, 21 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/asm-raw-symbol.c
> >
> > diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
> > index 0c6e662df25..280e4c8e36c 100644
> > --- a/gcc/config/i386/constraints.md
> > +++ b/gcc/config/i386/constraints.md
> > @@ -348,6 +348,10 @@ (define_constraint "Wf"
> > to double word size."
> >(match_operand 0 "x86_64_dwzext_immediate_operand"))
> >
> > +(define_constraint "Ws"
> > +  "A symbolic reference or label reference."
> > +  (match_code "const,symbol_ref,label_ref"))
> > +
> >  (define_constraint "Z"
> >"32-bit unsigned integer constant, or a symbolic reference known
> > to fit that range (for immediate operands in zero-extending x86-64
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index 47a87d6ceec..b0c61925120 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -4275,6 +4275,10 @@ require non-@code{VOIDmode} immediate operands).
> >  128-bit integer constant where both the high and low 64-bit word
> >  satisfy the @code{e} constraint.
> >
> > +@item Ws
> > +A symbolic reference or label reference.
> > +You can use the @code{%p} modifier to print the raw symbol.
> > +
> >  @item Z
> >  32-bit unsigned integer constant, or a symbolic reference known
> >  to fit that range (for immediate operands in zero-extending x86-64
> > diff --git a/gcc/testsuite/gcc.target/i386/asm-raw-symbol.c 
> > b/gcc/testsuite/gcc.target/i386/asm-raw-symbol.c
> > new file mode 100644
> > index 000..b7854567dd9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/asm-raw-symbol.c
> > @@ -0,0 +1,13 @@
> > +/* { dg-do compile } */
> > +
> > +extern int var;
> > +
> > +void
> > +func (void)
> > +{
> > +  __asm__ ("@ %p0" : : "Ws" (func));
> > +  __asm__ ("@ %p0" : : "Ws" ( + 1));
> > +}
> > +
> > +/* { dg-final { scan-assembler "@ func" } } */
> > +/* { dg-final { scan-assembler "@ var\\+4" } } */
> > --
> > 2.43.0.275.g3460e3d667-goog
> >



-- 
H.J.


Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-30 Thread H.J. Lu
On Tue, Jan 30, 2024 at 4:58 AM H.J. Lu  wrote:
>
> On Tue, Jan 30, 2024 at 4:51 AM Jakub Jelinek  wrote:
> >
> > On Mon, Jan 29, 2024 at 06:05:25PM -0800, H.J. Lu wrote:
> > > LRA may call forcce_const_mem on this insn in the function
> > >
> > > (gdb) call debug_tree (func_decl)
> > >   > > type  > > type  > > align:8 warn_if_not_align:0 symtab:0 alias-set -1
> > > canonical-type 0x77690f18
> > > pointer_to_this >
> > > QI
> > > size 
> > > unit-size 
> > > align:8 warn_if_not_align:0 symtab:0 alias-set -1
> > > canonical-type 0x727512a0 method basetype  > > 0x7264b0a8 function_summary>
> > > arg-types  > > 0x726887e0>
> > > chain  > > 0x7264b2a0>
> > > chain  > > purpose 
> > > value 
> > > chain  > > 0x77690f18 void>>>>>
> > > pointer_to_this >
> > > addressable asm_written used nothrow public static weak decl_5 QI
> > > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:268:1
> > > align:16 warn_if_not_align:0 context  > > function_summary> initial  abstract_origin
> > > 
> > > result  > > 0x77690f18 void>
> > > ignored VOID
> > > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:269:20
> > > align:8 warn_if_not_align:0 context  > > 0x72770900 __ct_base >>
> > > full-name "function_summary::function_summary(symbol_table*,
> > > bool = false) [with T = clone_info]"
> > > template-info  > > template  > > 0x72701540>
> > > VOID
> > > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:268:1
> > > align:1 warn_if_not_align:0 context  > > 0x727010a8 function_summary> result  > > __ct >
> > > parms  > > 0x7768a2d0 1>
> > > value  > > 0x72646e00 function_summary>
> > > length:1
> > > elt:0  > > 0x726f5c78 T>>>>
> > > full-name "template
> > > function_summary::function_summary(symbol_table*, bool)">
> > > args  > > 0x72987930 clone_info>>>
> > > use_template=1
> > > arguments  > > type  > > 0x7264b0a8 function_summary>
> > > readonly sizes-gimplified public unsigned DI
> > > size 
> > > unit-size 
> > > align:64 warn_if_not_align:0 symtab:0 alias-set -1
> > > canonical-type 0x72751348>
> > > readonly used unsigned read DI
> > > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:269:20 size
> > >  unit-size  > > 8>
> > > align:64 warn_if_not_align:0 context  > > 0x72770900 __ct_base > abstract_origin  > > this>
> > > (reg/f:DI 117 [ this ]) arg-type 
> > > incoming-rtl (reg:DI 5 di [ this ])
> > > chain  > > 0x7264b2a0>
> > > used unsigned DI
> > > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:268:56 size
> > >  unit-size  > > 8>
> > > align:64 warn_if_not_align:0 context  > > 0x72770900 __ct_base > abstract_origin  > > symtab>
> > > (reg/v/f:DI 118 [ symtab ]) arg-type  > > 0x7264b2a0>
> > > incoming-rtl (reg:DI 4 si [ symtab ]) chain  > > 0x72773880 ggc>>>
> > > struct-function 0x722cb228 chain  > > __ct_comp >>
> > > (gdb)
> > >
> > >  in gcc master branch tree:
> > >
> > > (gdb) call debug_rtx (curr_insn)
> > > (insn 14 128 15 2 (set (reg:V2DI 121 [ _21 ])
> > > (vec_concat:V2DI (symbol_ref/i:DI
> > > ("_ZN16function_summaryIP10clone_infoE16symtab_insertionEP11cgraph_nodePv")
> > > [flags 0x3] )
> > > (symbol_ref/i:DI
> > > ("_ZN16function_summaryIP10clone_infoE14symtab_removalEP11cgraph_nodePv")
> > > [flags 0x3] )))
> > > "/export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h":36:22
> > > 7521 {vec_concatv2di}
> > >  (expr_list:REG_DEAD (reg/f:DI 123)
> > >

Re: [PATCH] Handle COMDAT function symbol reference in readonly data section

2024-01-30 Thread H.J. Lu
On Mon, Jan 29, 2024 at 3:08 PM H.J. Lu  wrote:
>
> For a COMDAT function symbol reference in readonly data section,
> instead of putting it in .data.rel.ro or .rodata.cst section, call
> function_rodata_section to get the read-only or relocated read-only
> data section associated with the function DECL so that the COMDAT
> section will be used for the COMDAT function symbol.
>
> gcc/
>
> PR rtl-optimization/113617
> * varasm.cc (default_elf_select_rtx_section): Call
> function_rodata_section to get the read-only or relocated
> read-only data section for COMDAT function symbol reference.
>
> gcc/testsuite/
>
> PR rtl-optimization/113617
> * g++.dg/pr113617-1a.C: New test.
> * g++.dg/pr113617-1b.C: Likewise.
> ---
>  gcc/testsuite/g++.dg/pr113617-1a.C | 170 +
>  gcc/testsuite/g++.dg/pr113617-1b.C |   8 ++
>  gcc/varasm.cc  |  28 +
>  3 files changed, 206 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/pr113617-1a.C
>  create mode 100644 gcc/testsuite/g++.dg/pr113617-1b.C
>
> diff --git a/gcc/testsuite/g++.dg/pr113617-1a.C 
> b/gcc/testsuite/g++.dg/pr113617-1a.C
> new file mode 100644
> index 000..effd50841c0
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr113617-1a.C
> @@ -0,0 +1,170 @@
> +// { dg-do compile { target fpic } }
> +// { dg-require-visibility "" }
> +// { dg-options "-O2 -std=c++11 -fPIC -fvisibility=hidden 
> -fvisibility-inlines-hidden" }
> +
> +namespace {
> +template  struct integral_constant {
> +  static constexpr int value = __v;
> +};
> +template  using __bool_constant = integral_constant<__v>;
> +using true_type = __bool_constant;
> +template  struct __conditional {
> +  template  using type = _Tp;
> +};
> +template 
> +using __conditional_t = typename __conditional<_Cond>::type<_If, _Else>;
> +true_type __trans_tmp_1;
> +template  struct remove_cv { using type = _Tp; };
> +template 
> +struct __decay_selector
> +: __conditional_t, _Up> {};
> +template  struct decay {
> +  using type = typename __decay_selector<_Tp>::type;
> +};
> +}
> +struct vtkCellArray {};
> +namespace blah {
> +struct _Any_data;
> +enum _Manager_operation {};
> +template  class function;
> +struct _Function_base {
> +  using _Manager_type = bool (*)(_Any_data &, const _Any_data &,
> + _Manager_operation);
> +  _Manager_type _M_manager;
> +};
> +template  class _Function_handler;
> +template 
> +struct _Function_handler<_Res(_ArgTypes...), _Functor> {
> +  static bool _M_manager(_Any_data &, const _Any_data &, _Manager_operation) 
> {
> +return false;
> +  }
> +  __attribute__((noipa)) static _Res _M_invoke(const _Any_data &) {}
> +};
> +template 
> +struct function<_Res(_ArgTypes...)> : _Function_base {
> +  template 
> +  using _Handler = _Function_handler<_Res(), _Functor>;
> +  template  function(_Functor) {
> +using _My_handler = _Handler<_Functor>;
> +_M_invoker = _My_handler::_M_invoke;
> +_M_manager = _My_handler::_M_manager;
> +  }
> +  using _Invoker_type = _Res (*)(const _Any_data &);
> +  _Invoker_type _M_invoker;
> +};
> +template  class _Bind;
> +template 
> +struct _Bind<_Functor(_Bound_args...)> {};
> +template  using __is_socketlike = decltype(__trans_tmp_1);
> +template  struct _Bind_helper {
> +  typedef _Bind::type(
> +  typename decay<_BoundArgs>::type...)>
> +  type;
> +};
> +template 
> +__attribute__((noipa)) typename _Bind_helper<__is_socketlike<_Func>::value, 
> _Func, _BoundArgs...>::type
> +bind(_Func, _BoundArgs...) { return typename 
> _Bind_helper<__is_socketlike<_Func>::value, _Func, _BoundArgs...>::type (); }
> +template  struct __uniq_ptr_impl {
> +  template  struct _Ptr { using type = _Up *; };
> +  using pointer = typename _Ptr<_Tp>::type;
> +};
> +template  struct unique_ptr {
> +  using pointer = typename __uniq_ptr_impl<_Tp>::pointer;
> +  pointer operator->();
> +};
> +}
> +extern int For_threadNumber;
> +namespace vtk {
> +namespace detail {
> +namespace smp {
> +enum BackendType { Sequential, STDThread };
> +template  struct vtkSMPToolsImpl {
> +  template 
> +  __attribute__((noipa)) void For(long long, long long, long long, 
> FunctorInternal &) {}
> +};
> +struct vtkSMPThreadPool {
> +  vtkSMPThreadPool(int);
> +  void DoJob(blah::function);
> +};
> +template 
> +__attribute__((noipa)) void ExecuteFunctorSTDThread(void *, long long, long 
> long

Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-30 Thread H.J. Lu
On Tue, Jan 30, 2024 at 4:51 AM Jakub Jelinek  wrote:
>
> On Mon, Jan 29, 2024 at 06:05:25PM -0800, H.J. Lu wrote:
> > LRA may call forcce_const_mem on this insn in the function
> >
> > (gdb) call debug_tree (func_decl)
> >   > type  > type  > align:8 warn_if_not_align:0 symtab:0 alias-set -1
> > canonical-type 0x77690f18
> > pointer_to_this >
> > QI
> > size 
> > unit-size 
> > align:8 warn_if_not_align:0 symtab:0 alias-set -1
> > canonical-type 0x727512a0 method basetype  > 0x7264b0a8 function_summary>
> > arg-types  > 0x726887e0>
> > chain  > 0x7264b2a0>
> > chain  > purpose 
> > value 
> > chain  > 0x77690f18 void>>>>>
> > pointer_to_this >
> > addressable asm_written used nothrow public static weak decl_5 QI
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:268:1
> > align:16 warn_if_not_align:0 context  > function_summary> initial  abstract_origin
> > 
> > result  > 0x77690f18 void>
> > ignored VOID
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:269:20
> > align:8 warn_if_not_align:0 context  > 0x72770900 __ct_base >>
> > full-name "function_summary::function_summary(symbol_table*,
> > bool = false) [with T = clone_info]"
> > template-info  > template  > 0x72701540>
> > VOID
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:268:1
> > align:1 warn_if_not_align:0 context  > 0x727010a8 function_summary> result  > __ct >
> > parms  > 0x7768a2d0 1>
> > value  > 0x72646e00 function_summary>
> > length:1
> > elt:0  > 0x726f5c78 T>>>>
> > full-name "template
> > function_summary::function_summary(symbol_table*, bool)">
> > args  > 0x72987930 clone_info>>>
> > use_template=1
> > arguments  > type  > 0x7264b0a8 function_summary>
> > readonly sizes-gimplified public unsigned DI
> > size 
> > unit-size 
> > align:64 warn_if_not_align:0 symtab:0 alias-set -1
> > canonical-type 0x72751348>
> > readonly used unsigned read DI
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:269:20 size
> >  unit-size  > 8>
> > align:64 warn_if_not_align:0 context  > 0x72770900 __ct_base > abstract_origin  > this>
> > (reg/f:DI 117 [ this ]) arg-type 
> > incoming-rtl (reg:DI 5 di [ this ])
> > chain  > 0x7264b2a0>
> > used unsigned DI
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:268:56 size
> >  unit-size  > 8>
> > align:64 warn_if_not_align:0 context  > 0x72770900 __ct_base > abstract_origin  > symtab>
> > (reg/v/f:DI 118 [ symtab ]) arg-type  > 0x7264b2a0>
> > incoming-rtl (reg:DI 4 si [ symtab ]) chain  > 0x72773880 ggc>>>
> > struct-function 0x722cb228 chain  > __ct_comp >>
> > (gdb)
> >
> >  in gcc master branch tree:
> >
> > (gdb) call debug_rtx (curr_insn)
> > (insn 14 128 15 2 (set (reg:V2DI 121 [ _21 ])
> > (vec_concat:V2DI (symbol_ref/i:DI
> > ("_ZN16function_summaryIP10clone_infoE16symtab_insertionEP11cgraph_nodePv")
> > [flags 0x3] )
> > (symbol_ref/i:DI
> > ("_ZN16function_summaryIP10clone_infoE14symtab_removalEP11cgraph_nodePv")
> > [flags 0x3] )))
> > "/export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h":36:22
> > 7521 {vec_concatv2di}
> >  (expr_list:REG_DEAD (reg/f:DI 123)
> > (expr_list:REG_DEAD (reg/f:DI 122)
> > (expr_list:REG_EQUIV (vec_concat:V2DI (symbol_ref/i:DI
> > ("_ZN16function_summaryIP10clone_infoE16symtab_insertionEP11cgraph_nodePv")
> > [flags 0x3] )
> > (symbol_ref/i:DI
> > ("_ZN16function_summaryIP10clone_infoE14symtab_removalEP11cgraph_nodePv")
> > [flags 0x3] ))
> > (nil)
> > (gdb)
> >
> > The referenced symbol,
> > function_summary::symtab_removal(cgraph_node*, void*),
> > and the referencing function are in different COMDAT groups.
>
> And is the referenced symbol non-public?  If so, how does that work?
>

I didn't check if it is public or private.  It is OK for public, but not OK
for private if they are in different comdat groups.

-- 
H.J.


[PATCH] Handle private COMDAT function symbol reference in readonly data section

2024-01-30 Thread H.J. Lu
For a private COMDAT function symbol reference in readonly data section,
instead of putting it in .data.rel.ro or .rodata.cst section, call
function_rodata_section to get the read-only or relocated read-only
data section associated with the function DECL so that the COMDAT
section will be used for the private COMDAT function symbol.

gcc/

PR rtl-optimization/113617
* varasm.cc (default_elf_select_rtx_section): Call
function_rodata_section to get the read-only or relocated
read-only data section for private COMDAT function symbol
reference.

gcc/testsuite/

PR rtl-optimization/113617
* g++.dg/pr113617-1a.C: New test.
* g++.dg/pr113617-1b.C: Likewise.
---
 gcc/testsuite/g++.dg/pr113617-1a.C | 170 +
 gcc/testsuite/g++.dg/pr113617-1b.C |   8 ++
 gcc/varasm.cc  |  29 +
 3 files changed, 207 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/pr113617-1a.C
 create mode 100644 gcc/testsuite/g++.dg/pr113617-1b.C

diff --git a/gcc/testsuite/g++.dg/pr113617-1a.C 
b/gcc/testsuite/g++.dg/pr113617-1a.C
new file mode 100644
index 000..effd50841c0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr113617-1a.C
@@ -0,0 +1,170 @@
+// { dg-do compile { target fpic } }
+// { dg-require-visibility "" }
+// { dg-options "-O2 -std=c++11 -fPIC -fvisibility=hidden 
-fvisibility-inlines-hidden" }
+
+namespace {
+template  struct integral_constant {
+  static constexpr int value = __v;
+};
+template  using __bool_constant = integral_constant<__v>;
+using true_type = __bool_constant;
+template  struct __conditional {
+  template  using type = _Tp;
+};
+template 
+using __conditional_t = typename __conditional<_Cond>::type<_If, _Else>;
+true_type __trans_tmp_1;
+template  struct remove_cv { using type = _Tp; };
+template 
+struct __decay_selector
+: __conditional_t, _Up> {};
+template  struct decay {
+  using type = typename __decay_selector<_Tp>::type;
+};
+}
+struct vtkCellArray {};
+namespace blah {
+struct _Any_data;
+enum _Manager_operation {};
+template  class function;
+struct _Function_base {
+  using _Manager_type = bool (*)(_Any_data &, const _Any_data &,
+ _Manager_operation);
+  _Manager_type _M_manager;
+};
+template  class _Function_handler;
+template 
+struct _Function_handler<_Res(_ArgTypes...), _Functor> {
+  static bool _M_manager(_Any_data &, const _Any_data &, _Manager_operation) {
+return false;
+  }
+  __attribute__((noipa)) static _Res _M_invoke(const _Any_data &) {}
+};
+template 
+struct function<_Res(_ArgTypes...)> : _Function_base {
+  template 
+  using _Handler = _Function_handler<_Res(), _Functor>;
+  template  function(_Functor) {
+using _My_handler = _Handler<_Functor>;
+_M_invoker = _My_handler::_M_invoke;
+_M_manager = _My_handler::_M_manager;
+  }
+  using _Invoker_type = _Res (*)(const _Any_data &);
+  _Invoker_type _M_invoker;
+};
+template  class _Bind;
+template 
+struct _Bind<_Functor(_Bound_args...)> {};
+template  using __is_socketlike = decltype(__trans_tmp_1);
+template  struct _Bind_helper {
+  typedef _Bind::type(
+  typename decay<_BoundArgs>::type...)>
+  type;
+};
+template 
+__attribute__((noipa)) typename _Bind_helper<__is_socketlike<_Func>::value, 
_Func, _BoundArgs...>::type
+bind(_Func, _BoundArgs...) { return typename 
_Bind_helper<__is_socketlike<_Func>::value, _Func, _BoundArgs...>::type (); }
+template  struct __uniq_ptr_impl {
+  template  struct _Ptr { using type = _Up *; };
+  using pointer = typename _Ptr<_Tp>::type;
+};
+template  struct unique_ptr {
+  using pointer = typename __uniq_ptr_impl<_Tp>::pointer;
+  pointer operator->();
+};
+}
+extern int For_threadNumber;
+namespace vtk {
+namespace detail {
+namespace smp {
+enum BackendType { Sequential, STDThread };
+template  struct vtkSMPToolsImpl {
+  template 
+  __attribute__((noipa)) void For(long long, long long, long long, 
FunctorInternal &) {}
+};
+struct vtkSMPThreadPool {
+  vtkSMPThreadPool(int);
+  void DoJob(blah::function);
+};
+template 
+__attribute__((noipa)) void ExecuteFunctorSTDThread(void *, long long, long 
long, long long) {}
+template <>
+template 
+void vtkSMPToolsImpl::For(long long, long long last, long long 
grain,
+ FunctorInternal ) {
+  vtkSMPThreadPool pool(For_threadNumber);
+  for (;;) {
+auto job = blah::bind(ExecuteFunctorSTDThread, , grain,
+ grain, last);
+pool.DoJob(job);
+  }
+}
+struct vtkSMPToolsAPI {
+  static vtkSMPToolsAPI ();
+  template 
+  void For(long first, long last, long grain, FunctorInternal fi) {
+switch (ActivatedBackend) {
+case Sequential:
+  SequentialBackend->For(first, last, grain, fi);
+case STDThread:
+  STDThreadBackend->For(first, last, grain, fi);
+}
+  }
+  BackendType ActivatedBackend;
+  blah::unique_ptr> SequentialBackend;
+  blah::unique_ptr> STDThreadBackend;
+};
+template  struct 

[PATCH] x86: Limit -mcmodel=large tests to lp64 target

2024-01-29 Thread H.J. Lu <>
-mcmodel=large is only supported for lp64 targets.  Limit -mcmodel=large
tests of libcall-1.c and pr107057.c to lp64 target.

* gcc.target/i386/libcall-1.c: Limit to lp64 target.
* gcc.target/i386/pr107057.c: Likewise.
---
 gcc/testsuite/gcc.target/i386/libcall-1.c | 2 +-
 gcc/testsuite/gcc.target/i386/pr107057.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/libcall-1.c 
b/gcc/testsuite/gcc.target/i386/libcall-1.c
index cb95aca7a29..34a31dd23c6 100644
--- a/gcc/testsuite/gcc.target/i386/libcall-1.c
+++ b/gcc/testsuite/gcc.target/i386/libcall-1.c
@@ -1,7 +1,7 @@
 /* Make sure that external refences for libcalls are generated even for
indirect calls.  */
 
-/* { dg-do compile { target int128 } } */
+/* { dg-do compile { target { int128 && lp64 } } } */
 /* { dg-options "-O2 -mcmodel=large" } */
 /* { dg-final { scan-assembler "globl\t__divti3" } } */
 /* { dg-skip-if "PR90698" { *-*-darwin* } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr107057.c 
b/gcc/testsuite/gcc.target/i386/pr107057.c
index 40b49ac21ec..f4ebc3deef3 100644
--- a/gcc/testsuite/gcc.target/i386/pr107057.c
+++ b/gcc/testsuite/gcc.target/i386/pr107057.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile { target lp64 } } */
 /* { dg-options "-mavx -mcmodel=large -O3" } */
 
 typedef double v2df __attribute__ ((vector_size (16)));
-- 
2.43.0



Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 3:12 PM H.J. Lu  wrote:
>
> On Mon, Jan 29, 2024 at 2:51 PM Jakub Jelinek  wrote:
> >
> > On Mon, Jan 29, 2024 at 11:29:22PM +0100, Jakub Jelinek wrote:
> > > On Mon, Jan 29, 2024 at 11:22:44PM +0100, Jakub Jelinek wrote:
> > > > On Mon, Jan 29, 2024 at 02:01:56PM -0800, H.J. Lu wrote:
> > > > > > A function accesses a function symbol defined in a comdat group.
> > > > > > If the function symbol is public, any comdat definition of the same 
> > > > > > group
> > > > > > signature should provide the function definition.  If the function 
> > > > > > symbol
> > > > > > is private to the comdat group, only functions in the same comdat
> > > > > > group can access the private function symbol.  If a function in a 
> > > > > > different
> > > > > > comdat group accesses a private symbol, it is a compiler bug and
> > > > > > link may catch it like in this case.
> > > > > >
> > > > >
> > > > > My patch simply puts the constant pool of the function symbol 
> > > > > reference
> > > > > in the same comdat group as the function definition.  I believe it is 
> > > > > the
> > > > > right thing to do.
> > > >
> > > > I disagree, I think we should use something like
> > > >   if (current_function_decl)
> > >
> > > Or perhaps && DECL_COMDAT_GROUP (current_function_decl) added here as 
> > > well,
> > > just to make it change things less often.
> > >
> > > > return targetm.asm_out.function_rodata_section 
> > > > (current_function_decl,
> > > > true);
> > > >
> > > > Obviously, for non-reloc or non-pic, we don't want an unconditional
> > > >   if (current_function_decl)
> > > > return targetm.asm_out.function_rodata_section 
> > > > (current_function_decl,
> > > > false);
> > > > that would kill mergeable sections, so perhaps
> > > >   if (current_function_decl
> > > >   && reloc
> > > >   && DECL_COMDAT_GROUP (current_function_decl))
> > > > return targetm.asm_out.function_rodata_section 
> > > > (current_function_decl,
> > > > false);
> >
> > Now, that doesn't actually work, because current_function_decl is always
> > NULL when the constant pool entries are emitted.
> > But basing the output section on what it refers rather than what refers to
> > it seems wrong, plus there is the section anchors support, which treats them
> > yet differently.
> > So, I wonder if force_const_mem shouldn't punt if asked to emit from

LRA may call forcce_const_mem on this insn in the function

(gdb) call debug_tree (func_decl)
 >
QI
size 
unit-size 
align:8 warn_if_not_align:0 symtab:0 alias-set -1
canonical-type 0x727512a0 method basetype 
arg-types 
chain 
chain 
value 
chain >>>>
pointer_to_this >
addressable asm_written used nothrow public static weak decl_5 QI
/export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:268:1
align:16 warn_if_not_align:0 context  initial  abstract_origin

result 
ignored VOID
/export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:269:20
align:8 warn_if_not_align:0 context >
full-name "function_summary::function_summary(symbol_table*,
bool = false) [with T = clone_info]"
template-info 
VOID
/export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:268:1
align:1 warn_if_not_align:0 context  result 
parms 
value 
length:1
elt:0 >>>
full-name "template
function_summary::function_summary(symbol_table*, bool)">
args >>
use_template=1
arguments 
readonly sizes-gimplified public unsigned DI
size 
unit-size 
align:64 warn_if_not_align:0 symtab:0 alias-set -1
canonical-type 0x72751348>
readonly used unsigned read DI
/export/gnu/import/git/gitlab/x86-gcc/gcc/symbol-summary.h:269:20 size
 unit-size 
align:64 warn_if_not_align:0 context  abstract_origin 
(reg/f:DI 117 [ this ]) arg-type 
incoming-rtl (reg:DI 5 di [ this ])
chain 
used unsigned DI
/expor

Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 2:51 PM Jakub Jelinek  wrote:
>
> On Mon, Jan 29, 2024 at 11:29:22PM +0100, Jakub Jelinek wrote:
> > On Mon, Jan 29, 2024 at 11:22:44PM +0100, Jakub Jelinek wrote:
> > > On Mon, Jan 29, 2024 at 02:01:56PM -0800, H.J. Lu wrote:
> > > > > A function accesses a function symbol defined in a comdat group.
> > > > > If the function symbol is public, any comdat definition of the same 
> > > > > group
> > > > > signature should provide the function definition.  If the function 
> > > > > symbol
> > > > > is private to the comdat group, only functions in the same comdat
> > > > > group can access the private function symbol.  If a function in a 
> > > > > different
> > > > > comdat group accesses a private symbol, it is a compiler bug and
> > > > > link may catch it like in this case.
> > > > >
> > > >
> > > > My patch simply puts the constant pool of the function symbol reference
> > > > in the same comdat group as the function definition.  I believe it is 
> > > > the
> > > > right thing to do.
> > >
> > > I disagree, I think we should use something like
> > >   if (current_function_decl)
> >
> > Or perhaps && DECL_COMDAT_GROUP (current_function_decl) added here as well,
> > just to make it change things less often.
> >
> > > return targetm.asm_out.function_rodata_section (current_function_decl,
> > > true);
> > >
> > > Obviously, for non-reloc or non-pic, we don't want an unconditional
> > >   if (current_function_decl)
> > > return targetm.asm_out.function_rodata_section (current_function_decl,
> > > false);
> > > that would kill mergeable sections, so perhaps
> > >   if (current_function_decl
> > >   && reloc
> > >   && DECL_COMDAT_GROUP (current_function_decl))
> > > return targetm.asm_out.function_rodata_section (current_function_decl,
> > > false);
>
> Now, that doesn't actually work, because current_function_decl is always
> NULL when the constant pool entries are emitted.
> But basing the output section on what it refers rather than what refers to
> it seems wrong, plus there is the section anchors support, which treats them
> yet differently.
> So, I wonder if force_const_mem shouldn't punt if asked to emit from
> DECL_COMDAT_GROUP (current_function_decl) a SYMBOL_REF (or CONST PLUS
> SYMBOL_REF ...) with the same DECL_COMDAT_GROUP with a private symbol,
> or shouldn't punt unless using a per-function (i.e. non-shared) constant
> pool, or force a per-function constant pool in that case somehow.
>

Here is the patch to only call function_rodata_section for COMDAT
function symbol reference:

https://patchwork.sourceware.org/project/gcc/list/?series=30329
-- 
H.J.


[PATCH] Handle COMDAT function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
For a COMDAT function symbol reference in readonly data section,
instead of putting it in .data.rel.ro or .rodata.cst section, call
function_rodata_section to get the read-only or relocated read-only
data section associated with the function DECL so that the COMDAT
section will be used for the COMDAT function symbol.

gcc/

PR rtl-optimization/113617
* varasm.cc (default_elf_select_rtx_section): Call
function_rodata_section to get the read-only or relocated
read-only data section for COMDAT function symbol reference.

gcc/testsuite/

PR rtl-optimization/113617
* g++.dg/pr113617-1a.C: New test.
* g++.dg/pr113617-1b.C: Likewise.
---
 gcc/testsuite/g++.dg/pr113617-1a.C | 170 +
 gcc/testsuite/g++.dg/pr113617-1b.C |   8 ++
 gcc/varasm.cc  |  28 +
 3 files changed, 206 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/pr113617-1a.C
 create mode 100644 gcc/testsuite/g++.dg/pr113617-1b.C

diff --git a/gcc/testsuite/g++.dg/pr113617-1a.C 
b/gcc/testsuite/g++.dg/pr113617-1a.C
new file mode 100644
index 000..effd50841c0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr113617-1a.C
@@ -0,0 +1,170 @@
+// { dg-do compile { target fpic } }
+// { dg-require-visibility "" }
+// { dg-options "-O2 -std=c++11 -fPIC -fvisibility=hidden 
-fvisibility-inlines-hidden" }
+
+namespace {
+template  struct integral_constant {
+  static constexpr int value = __v;
+};
+template  using __bool_constant = integral_constant<__v>;
+using true_type = __bool_constant;
+template  struct __conditional {
+  template  using type = _Tp;
+};
+template 
+using __conditional_t = typename __conditional<_Cond>::type<_If, _Else>;
+true_type __trans_tmp_1;
+template  struct remove_cv { using type = _Tp; };
+template 
+struct __decay_selector
+: __conditional_t, _Up> {};
+template  struct decay {
+  using type = typename __decay_selector<_Tp>::type;
+};
+}
+struct vtkCellArray {};
+namespace blah {
+struct _Any_data;
+enum _Manager_operation {};
+template  class function;
+struct _Function_base {
+  using _Manager_type = bool (*)(_Any_data &, const _Any_data &,
+ _Manager_operation);
+  _Manager_type _M_manager;
+};
+template  class _Function_handler;
+template 
+struct _Function_handler<_Res(_ArgTypes...), _Functor> {
+  static bool _M_manager(_Any_data &, const _Any_data &, _Manager_operation) {
+return false;
+  }
+  __attribute__((noipa)) static _Res _M_invoke(const _Any_data &) {}
+};
+template 
+struct function<_Res(_ArgTypes...)> : _Function_base {
+  template 
+  using _Handler = _Function_handler<_Res(), _Functor>;
+  template  function(_Functor) {
+using _My_handler = _Handler<_Functor>;
+_M_invoker = _My_handler::_M_invoke;
+_M_manager = _My_handler::_M_manager;
+  }
+  using _Invoker_type = _Res (*)(const _Any_data &);
+  _Invoker_type _M_invoker;
+};
+template  class _Bind;
+template 
+struct _Bind<_Functor(_Bound_args...)> {};
+template  using __is_socketlike = decltype(__trans_tmp_1);
+template  struct _Bind_helper {
+  typedef _Bind::type(
+  typename decay<_BoundArgs>::type...)>
+  type;
+};
+template 
+__attribute__((noipa)) typename _Bind_helper<__is_socketlike<_Func>::value, 
_Func, _BoundArgs...>::type
+bind(_Func, _BoundArgs...) { return typename 
_Bind_helper<__is_socketlike<_Func>::value, _Func, _BoundArgs...>::type (); }
+template  struct __uniq_ptr_impl {
+  template  struct _Ptr { using type = _Up *; };
+  using pointer = typename _Ptr<_Tp>::type;
+};
+template  struct unique_ptr {
+  using pointer = typename __uniq_ptr_impl<_Tp>::pointer;
+  pointer operator->();
+};
+}
+extern int For_threadNumber;
+namespace vtk {
+namespace detail {
+namespace smp {
+enum BackendType { Sequential, STDThread };
+template  struct vtkSMPToolsImpl {
+  template 
+  __attribute__((noipa)) void For(long long, long long, long long, 
FunctorInternal &) {}
+};
+struct vtkSMPThreadPool {
+  vtkSMPThreadPool(int);
+  void DoJob(blah::function);
+};
+template 
+__attribute__((noipa)) void ExecuteFunctorSTDThread(void *, long long, long 
long, long long) {}
+template <>
+template 
+void vtkSMPToolsImpl::For(long long, long long last, long long 
grain,
+ FunctorInternal ) {
+  vtkSMPThreadPool pool(For_threadNumber);
+  for (;;) {
+auto job = blah::bind(ExecuteFunctorSTDThread, , grain,
+ grain, last);
+pool.DoJob(job);
+  }
+}
+struct vtkSMPToolsAPI {
+  static vtkSMPToolsAPI ();
+  template 
+  void For(long first, long last, long grain, FunctorInternal fi) {
+switch (ActivatedBackend) {
+case Sequential:
+  SequentialBackend->For(first, last, grain, fi);
+case STDThread:
+  STDThreadBackend->For(first, last, grain, fi);
+}
+  }
+  BackendType ActivatedBackend;
+  blah::unique_ptr> SequentialBackend;
+  blah::unique_ptr> STDThreadBackend;
+};
+template  struct vtkSMPTools_FunctorInternal;

Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 2:01 PM H.J. Lu  wrote:
>
> On Mon, Jan 29, 2024 at 1:42 PM H.J. Lu  wrote:
> >
> > On Mon, Jan 29, 2024 at 1:22 PM H.J. Lu  wrote:
> > >
> > > On Mon, Jan 29, 2024 at 1:00 PM H.J. Lu  wrote:
> > > >
> > > > On Mon, Jan 29, 2024 at 9:34 AM H.J. Lu  wrote:
> > > > >
> > > > > On Mon, Jan 29, 2024 at 9:00 AM Jakub Jelinek  
> > > > > wrote:
> > > > > >
> > > > > > On Mon, Jan 29, 2024 at 08:45:45AM -0800, H.J. Lu wrote:
> > > > > > > In this case, these are internal to the same comdat group:
> > > > > >
> > > > > > But that is only by accident, no?
> > > > >
> > > > > This may be by luck.  I don't know if gcc checks it when
> > > > > generating such references.
> > > > >
> > > > > > I mean, if you need to refer to such a symbol from
> > > > > > non-comdat function or comdat function in a different comdat group
> > > > > > and RA decides it wants the constant in memory rather than code?
> > > > > > Your patch uses
> > > > > >   if (decl)
> > > > > > return targetm.asm_out.function_rodata_section (decl, ???);
> > > > > > and default_function_rodata_section only looks at comdat group of 
> > > > > > the
> > > > > > passed in decl.  But the decl here is what the constant refers to, 
> > > > > > not
> > > > > > who is referring it.
> > > >
> > > > LRA puts a function symbol reference in a constant pool via
> > > >
> > > > #0  force_const_mem (in_mode=E_DImode, x=0x7fffe9e7e000)
> > > > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/varasm.cc:3951
> > > > #1  0x01833870 in curr_insn_transform (check_only_p=false)
> > > > at 
> > > > /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:4473
> > > > #2  0x01836eae in lra_constraints (first_p=true)
> > > > at 
> > > > /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:5462
> > > > #3  0x0181fcf1 in lra (f=0x0, verbose=5)
> > > > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra.cc:2442
> > > > #4  0x017c8828 in do_reload ()
> > > > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:5973
> > > > #5  0x017c8d25 in (anonymous namespace)::pass_reload::execute (
> > > > this=0x48d8730)
> > > > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:6161
> > > >
> > > > for
> > > >
> > > > (gdb) call debug_rtx (curr_insn)
> > > > (insn 12 57 15 2 (set (reg:V2DI 101 [ _16 ])
> > > > (vec_concat:V2DI (symbol_ref:DI
> > > > ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
> > > > [flags 0x3] )
> > > > (reg/f:DI 109))) 7521 {vec_concatv2di}
> > > >  (expr_list:REG_DEAD (reg/f:DI 110)
> > > > (expr_list:REG_DEAD (reg/f:DI 109)
> > > > (expr_list:REG_EQUIV (vec_concat:V2DI (symbol_ref:DI
> > > > ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
> > > > [flags 0x3] )
> > > > (symbol_ref:DI
> > > > ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx9_M_invokeERKNS_9_Any_dataE")
> > > > [flags 0x3] ))
> > > > (nil)
> > > > (gdb)
> > > >
> > > > CONST_POOL_OK_P doesn't check if it is safe to do so for function
> > > > symbols.   Here is a patch to add the check.
> > > >
> > > > --
> > > > H.J.
> > >
> > > On the other hand, does C++ even allow access to non-public members
> > > from different classes?  So my patch should be safe and linker should
> > > catch all invalid comdat usages like this bug.
> >
> > A function accesses a function symbol defined in a comdat group.
> > If the function symbol is public, any comdat definition of the same group
> > signature should provide the function definition.  If the function symbol
> > is private to the comdat group, only functions in the same comdat
> > group can access the private function symbol.  If a function in a different
> > comdat group accesses a private symbol, it is a compiler bug and
> > link may catch it like in this case.
> >
>
> My patch simply puts the constant pool of the function symbol reference
> in the same comdat group as the function definition.  I believe it is the
> right thing to do.

If we are concerned that not all comdat definitions provide such a constant
pool, we can change LA to only allow such a constant pool when it is safe
to do so.

-- 
H.J.


Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 1:42 PM H.J. Lu  wrote:
>
> On Mon, Jan 29, 2024 at 1:22 PM H.J. Lu  wrote:
> >
> > On Mon, Jan 29, 2024 at 1:00 PM H.J. Lu  wrote:
> > >
> > > On Mon, Jan 29, 2024 at 9:34 AM H.J. Lu  wrote:
> > > >
> > > > On Mon, Jan 29, 2024 at 9:00 AM Jakub Jelinek  wrote:
> > > > >
> > > > > On Mon, Jan 29, 2024 at 08:45:45AM -0800, H.J. Lu wrote:
> > > > > > In this case, these are internal to the same comdat group:
> > > > >
> > > > > But that is only by accident, no?
> > > >
> > > > This may be by luck.  I don't know if gcc checks it when
> > > > generating such references.
> > > >
> > > > > I mean, if you need to refer to such a symbol from
> > > > > non-comdat function or comdat function in a different comdat group
> > > > > and RA decides it wants the constant in memory rather than code?
> > > > > Your patch uses
> > > > >   if (decl)
> > > > > return targetm.asm_out.function_rodata_section (decl, ???);
> > > > > and default_function_rodata_section only looks at comdat group of the
> > > > > passed in decl.  But the decl here is what the constant refers to, not
> > > > > who is referring it.
> > >
> > > LRA puts a function symbol reference in a constant pool via
> > >
> > > #0  force_const_mem (in_mode=E_DImode, x=0x7fffe9e7e000)
> > > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/varasm.cc:3951
> > > #1  0x01833870 in curr_insn_transform (check_only_p=false)
> > > at 
> > > /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:4473
> > > #2  0x01836eae in lra_constraints (first_p=true)
> > > at 
> > > /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:5462
> > > #3  0x0181fcf1 in lra (f=0x0, verbose=5)
> > > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra.cc:2442
> > > #4  0x017c8828 in do_reload ()
> > > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:5973
> > > #5  0x017c8d25 in (anonymous namespace)::pass_reload::execute (
> > > this=0x48d8730)
> > > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:6161
> > >
> > > for
> > >
> > > (gdb) call debug_rtx (curr_insn)
> > > (insn 12 57 15 2 (set (reg:V2DI 101 [ _16 ])
> > > (vec_concat:V2DI (symbol_ref:DI
> > > ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
> > > [flags 0x3] )
> > > (reg/f:DI 109))) 7521 {vec_concatv2di}
> > >  (expr_list:REG_DEAD (reg/f:DI 110)
> > > (expr_list:REG_DEAD (reg/f:DI 109)
> > > (expr_list:REG_EQUIV (vec_concat:V2DI (symbol_ref:DI
> > > ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
> > > [flags 0x3] )
> > > (symbol_ref:DI
> > > ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx9_M_invokeERKNS_9_Any_dataE")
> > > [flags 0x3] ))
> > > (nil)
> > > (gdb)
> > >
> > > CONST_POOL_OK_P doesn't check if it is safe to do so for function
> > > symbols.   Here is a patch to add the check.
> > >
> > > --
> > > H.J.
> >
> > On the other hand, does C++ even allow access to non-public members
> > from different classes?  So my patch should be safe and linker should
> > catch all invalid comdat usages like this bug.
>
> A function accesses a function symbol defined in a comdat group.
> If the function symbol is public, any comdat definition of the same group
> signature should provide the function definition.  If the function symbol
> is private to the comdat group, only functions in the same comdat
> group can access the private function symbol.  If a function in a different
> comdat group accesses a private symbol, it is a compiler bug and
> link may catch it like in this case.
>

My patch simply puts the constant pool of the function symbol reference
in the same comdat group as the function definition.  I believe it is the
right thing to do.

-- 
H.J.


[PATCH v3] x86: Generate REG_CFA_UNDEFINED for unsaved callee-saved registers

2024-01-29 Thread H.J. Lu
Changes in v3:

1. Fix a typo in REG_CFA_UNDEFINED note comment.
2. Replace assemble with compile in tests and remove -save-temps since
".cfi_undefined regno" is generated now.

Changes in v2:

1. Add REG_CFA_UNDEFINED notes to a frame-related instruction in prologue.
2. Add comments for add_cfi_undefined.

---
Attach REG_CFA_UNDEFINED notes for unsaved callee-saved registers which
have been used in the function to a frame-related instruction in prologue.

gcc/

PR target/38534
* dwarf2cfi.cc (add_cfi_undefined): New.
(dwarf2out_frame_debug_cfa_undefined): Likewise.
(dwarf2out_frame_debug): Handle REG_CFA_UNDEFINED.
* reg-notes.def (REG_CFA_UNDEFINED): New.
* config/i386/i386.cc (ix86_expand_prologue): Attach
REG_CFA_UNDEFINED notes for unsaved callee-saved registers
which have been used in the function to a frame-related
instruction in prologue.

gcc/testsuite/

PR target/38534
* gcc.target/i386/no-callee-saved-19.c: New test.
* gcc.target/i386/no-callee-saved-20.c: Likewise.
* gcc.target/i386/pr38534-7.c: Likewise.
* gcc.target/i386/pr38534-8.c: Likewise.
---
 gcc/config/i386/i386.cc   | 29 ++
 gcc/dwarf2cfi.cc  | 58 +++
 gcc/reg-notes.def |  4 ++
 .../gcc.target/i386/no-callee-saved-19.c  | 17 ++
 .../gcc.target/i386/no-callee-saved-20.c  | 12 
 gcc/testsuite/gcc.target/i386/pr38534-7.c | 18 ++
 gcc/testsuite/gcc.target/i386/pr38534-8.c | 13 +
 7 files changed, 151 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/no-callee-saved-19.c
 create mode 100644 gcc/testsuite/gcc.target/i386/no-callee-saved-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-8.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..4b7026f3ab4 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -9304,6 +9304,35 @@ ix86_expand_prologue (void)
  combined with prologue modifications.  */
   if (TARGET_SEH)
 emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+  if (cfun->machine->call_saved_registers
+  != TYPE_NO_CALLEE_SAVED_REGISTERS)
+return;
+
+  /* Attach REG_CFA_UNDEFINED notes for unsaved callee-saved registers
+ which have been used in the function to a frame-related instruction
+ in prologue.  */
+
+  insn = nullptr;
+  rtx_insn *next;
+  for (next = get_insns (); next; next = NEXT_INSN (next))
+{
+  if (!RTX_FRAME_RELATED_P (next))
+   continue;
+  insn = next;
+}
+
+  if (!insn)
+return;
+
+  for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+if (df_regs_ever_live_p (i)
+   && !fixed_regs[i]
+   && !call_used_regs[i]
+   && !STACK_REGNO_P (i)
+   && !MMX_REGNO_P (i))
+  add_reg_note (insn, REG_CFA_UNDEFINED,
+   gen_rtx_REG (word_mode, i));
 }
 
 /* Emit code to restore REG using a POP or POPP insn.  */
diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
index 1231b5bb5f0..9ba0ac07ee7 100644
--- a/gcc/dwarf2cfi.cc
+++ b/gcc/dwarf2cfi.cc
@@ -517,6 +517,20 @@ add_cfi_restore (unsigned reg)
   add_cfi (cfi);
 }
 
+/* Add DW_CFA_undefined either to the current insn stream or to a vector,
+   or both.  */
+
+static void
+add_cfi_undefined (unsigned reg)
+{
+  dw_cfi_ref cfi = new_cfi ();
+
+  cfi->dw_cfi_opc = DW_CFA_undefined;
+  cfi->dw_cfi_oprnd1.dw_cfi_reg_num = reg;
+
+  add_cfi (cfi);
+}
+
 /* Perform ROW->REG_SAVE[COLUMN] = CFI.  CFI may be null, indicating
that the register column is no longer saved.  */
 
@@ -1532,6 +1546,37 @@ dwarf2out_frame_debug_cfa_restore (rtx reg, bool 
emit_cfi)
 }
 }
 
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_UNDEFINED
+   note.  */
+
+static void
+dwarf2out_frame_debug_cfa_undefined (rtx reg)
+{
+  gcc_assert (REG_P (reg));
+
+  rtx span = targetm.dwarf_register_span (reg);
+  if (!span)
+{
+  unsigned int regno = dwf_regno (reg);
+  add_cfi_undefined (regno);
+}
+  else
+{
+  /* We have a PARALLEL describing where the contents of REG live.
+Restore the register for each piece of the PARALLEL.  */
+  gcc_assert (GET_CODE (span) == PARALLEL);
+
+  const int par_len = XVECLEN (span, 0);
+  for (int par_index = 0; par_index < par_len; par_index++)
+   {
+ reg = XVECEXP (span, 0, par_index);
+ gcc_assert (REG_P (reg));
+ unsigned int regno = dwf_regno (reg);
+ add_cfi_undefined (regno);
+   }
+}
+}
+
 /* A subroutine of dwarf2out_frame_debug, process a REG_CFA_WINDOW_SAVE.
 
??? Perhaps we should note in the CIE where windows are saved (instead
@@ -2326,6 +2371,19 @@ dwarf2out_frame_debug (rtx_insn *insn)
handled_one = true;
break;
 
+  case REG_CFA_UNDEFINED:
+   n = XEXP (note, 0);
+   if (n 

Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 1:22 PM H.J. Lu  wrote:
>
> On Mon, Jan 29, 2024 at 1:00 PM H.J. Lu  wrote:
> >
> > On Mon, Jan 29, 2024 at 9:34 AM H.J. Lu  wrote:
> > >
> > > On Mon, Jan 29, 2024 at 9:00 AM Jakub Jelinek  wrote:
> > > >
> > > > On Mon, Jan 29, 2024 at 08:45:45AM -0800, H.J. Lu wrote:
> > > > > In this case, these are internal to the same comdat group:
> > > >
> > > > But that is only by accident, no?
> > >
> > > This may be by luck.  I don't know if gcc checks it when
> > > generating such references.
> > >
> > > > I mean, if you need to refer to such a symbol from
> > > > non-comdat function or comdat function in a different comdat group
> > > > and RA decides it wants the constant in memory rather than code?
> > > > Your patch uses
> > > >   if (decl)
> > > > return targetm.asm_out.function_rodata_section (decl, ???);
> > > > and default_function_rodata_section only looks at comdat group of the
> > > > passed in decl.  But the decl here is what the constant refers to, not
> > > > who is referring it.
> >
> > LRA puts a function symbol reference in a constant pool via
> >
> > #0  force_const_mem (in_mode=E_DImode, x=0x7fffe9e7e000)
> > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/varasm.cc:3951
> > #1  0x01833870 in curr_insn_transform (check_only_p=false)
> > at 
> > /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:4473
> > #2  0x01836eae in lra_constraints (first_p=true)
> > at 
> > /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:5462
> > #3  0x0181fcf1 in lra (f=0x0, verbose=5)
> > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra.cc:2442
> > #4  0x017c8828 in do_reload ()
> > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:5973
> > #5  0x017c8d25 in (anonymous namespace)::pass_reload::execute (
> > this=0x48d8730)
> > at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:6161
> >
> > for
> >
> > (gdb) call debug_rtx (curr_insn)
> > (insn 12 57 15 2 (set (reg:V2DI 101 [ _16 ])
> > (vec_concat:V2DI (symbol_ref:DI
> > ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
> > [flags 0x3] )
> > (reg/f:DI 109))) 7521 {vec_concatv2di}
> >  (expr_list:REG_DEAD (reg/f:DI 110)
> > (expr_list:REG_DEAD (reg/f:DI 109)
> > (expr_list:REG_EQUIV (vec_concat:V2DI (symbol_ref:DI
> > ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
> > [flags 0x3] )
> > (symbol_ref:DI
> > ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx9_M_invokeERKNS_9_Any_dataE")
> > [flags 0x3] ))
> > (nil)
> > (gdb)
> >
> > CONST_POOL_OK_P doesn't check if it is safe to do so for function
> > symbols.   Here is a patch to add the check.
> >
> > --
> > H.J.
>
> On the other hand, does C++ even allow access to non-public members
> from different classes?  So my patch should be safe and linker should
> catch all invalid comdat usages like this bug.

A function accesses a function symbol defined in a comdat group.
If the function symbol is public, any comdat definition of the same group
signature should provide the function definition.  If the function symbol
is private to the comdat group, only functions in the same comdat
group can access the private function symbol.  If a function in a different
comdat group accesses a private symbol, it is a compiler bug and
link may catch it like in this case.

-- 
H.J.


Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 1:00 PM H.J. Lu  wrote:
>
> On Mon, Jan 29, 2024 at 9:34 AM H.J. Lu  wrote:
> >
> > On Mon, Jan 29, 2024 at 9:00 AM Jakub Jelinek  wrote:
> > >
> > > On Mon, Jan 29, 2024 at 08:45:45AM -0800, H.J. Lu wrote:
> > > > In this case, these are internal to the same comdat group:
> > >
> > > But that is only by accident, no?
> >
> > This may be by luck.  I don't know if gcc checks it when
> > generating such references.
> >
> > > I mean, if you need to refer to such a symbol from
> > > non-comdat function or comdat function in a different comdat group
> > > and RA decides it wants the constant in memory rather than code?
> > > Your patch uses
> > >   if (decl)
> > > return targetm.asm_out.function_rodata_section (decl, ???);
> > > and default_function_rodata_section only looks at comdat group of the
> > > passed in decl.  But the decl here is what the constant refers to, not
> > > who is referring it.
>
> LRA puts a function symbol reference in a constant pool via
>
> #0  force_const_mem (in_mode=E_DImode, x=0x7fffe9e7e000)
> at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/varasm.cc:3951
> #1  0x01833870 in curr_insn_transform (check_only_p=false)
> at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:4473
> #2  0x01836eae in lra_constraints (first_p=true)
> at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:5462
> #3  0x0181fcf1 in lra (f=0x0, verbose=5)
> at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra.cc:2442
> #4  0x017c8828 in do_reload ()
> at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:5973
> #5  0x017c8d25 in (anonymous namespace)::pass_reload::execute (
> this=0x48d8730)
> at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:6161
>
> for
>
> (gdb) call debug_rtx (curr_insn)
> (insn 12 57 15 2 (set (reg:V2DI 101 [ _16 ])
> (vec_concat:V2DI (symbol_ref:DI
> ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
> [flags 0x3] )
> (reg/f:DI 109))) 7521 {vec_concatv2di}
>  (expr_list:REG_DEAD (reg/f:DI 110)
> (expr_list:REG_DEAD (reg/f:DI 109)
> (expr_list:REG_EQUIV (vec_concat:V2DI (symbol_ref:DI
> ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
> [flags 0x3] )
> (symbol_ref:DI
> ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx9_M_invokeERKNS_9_Any_dataE")
> [flags 0x3] ))
> (nil)
> (gdb)
>
> CONST_POOL_OK_P doesn't check if it is safe to do so for function
> symbols.   Here is a patch to add the check.
>
> --
> H.J.

On the other hand, does C++ even allow access to non-public members
from different classes?  So my patch should be safe and linker should
catch all invalid comdat usages like this bug.

-- 
H.J.


Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 9:34 AM H.J. Lu  wrote:
>
> On Mon, Jan 29, 2024 at 9:00 AM Jakub Jelinek  wrote:
> >
> > On Mon, Jan 29, 2024 at 08:45:45AM -0800, H.J. Lu wrote:
> > > In this case, these are internal to the same comdat group:
> >
> > But that is only by accident, no?
>
> This may be by luck.  I don't know if gcc checks it when
> generating such references.
>
> > I mean, if you need to refer to such a symbol from
> > non-comdat function or comdat function in a different comdat group
> > and RA decides it wants the constant in memory rather than code?
> > Your patch uses
> >   if (decl)
> > return targetm.asm_out.function_rodata_section (decl, ???);
> > and default_function_rodata_section only looks at comdat group of the
> > passed in decl.  But the decl here is what the constant refers to, not
> > who is referring it.

LRA puts a function symbol reference in a constant pool via

#0  force_const_mem (in_mode=E_DImode, x=0x7fffe9e7e000)
at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/varasm.cc:3951
#1  0x01833870 in curr_insn_transform (check_only_p=false)
at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:4473
#2  0x01836eae in lra_constraints (first_p=true)
at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra-constraints.cc:5462
#3  0x0181fcf1 in lra (f=0x0, verbose=5)
at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/lra.cc:2442
#4  0x017c8828 in do_reload ()
at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:5973
#5  0x017c8d25 in (anonymous namespace)::pass_reload::execute (
this=0x48d8730)
at /export/gnu/import/git/gitlab/x86-gcc-test/gcc/ira.cc:6161

for

(gdb) call debug_rtx (curr_insn)
(insn 12 57 15 2 (set (reg:V2DI 101 [ _16 ])
(vec_concat:V2DI (symbol_ref:DI
("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
[flags 0x3] )
(reg/f:DI 109))) 7521 {vec_concatv2di}
 (expr_list:REG_DEAD (reg/f:DI 110)
(expr_list:REG_DEAD (reg/f:DI 109)
(expr_list:REG_EQUIV (vec_concat:V2DI (symbol_ref:DI
("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE")
[flags 0x3] )
(symbol_ref:DI
("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx9_M_invokeERKNS_9_Any_dataE")
[flags 0x3] ))
(nil)
(gdb)

CONST_POOL_OK_P doesn't check if it is safe to do so for function
symbols.   Here is a patch to add the check.

-- 
H.J.
From 1947920740e48cdc8076299f8cc58e797ec39a7c Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Mon, 29 Jan 2024 12:53:32 -0800
Subject: [PATCH] lra: Add const_pool_reference_ok

LRA may put a function symbol reference in

(gdb) call debug_rtx (curr_insn)
(insn 12 57 15 2 (set (reg:V2DI 101 [ _16 ])
(vec_concat:V2DI (symbol_ref:DI ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE") [flags 0x3] )
(reg/f:DI 109))) 7521 {vec_concatv2di}
 (expr_list:REG_DEAD (reg/f:DI 110)
(expr_list:REG_DEAD (reg/f:DI 109)
(expr_list:REG_EQUIV (vec_concat:V2DI (symbol_ref:DI ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE") [flags 0x3] )
(symbol_ref:DI ("_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx9_M_invokeERKNS_9_Any_dataE") [flags 0x3] ))
(nil)
(gdb)

in the constant pool.  But it isn't safe when the referenced function
symbol is in a different COMDAT group from the current instruction
function body if the function symbol isn't public.

Add const_pool_reference_ok to check if a function symbol can be forced
into the constant pool.

	PR rtl-optimization/113617
	* lra-constraints.cc (const_pool_reference_ok): New.
	(CONST_POOL_OK_P): Use.
---
 gcc/lra-constraints.cc | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 0ae81c1ff9c..59e6944c245 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -925,6 +925,35 @@ operands_match_p (rtx x, rtx y, int y_hard_regno)
   return true;
 }
 
+/* Return true if

[PATCH v2] x86: Generate REG_CFA_UNDEFINED for unsaved callee-saved registers

2024-01-29 Thread H.J. Lu
Changes in v2:

1. Add REG_CFA_UNDEFINED notes to a frame-related instruction in prologue.
2. Add comments for add_cfi_undefined.

---
Attach REG_CFA_UNDEFINED notes for unsaved callee-saved registers which
have been used in the function to a frame-related instruction in prologue.

gcc/

PR target/38534
* dwarf2cfi.cc (add_cfi_undefined): New.
(dwarf2out_frame_debug_cfa_undefined): Likewise.
(dwarf2out_frame_debug): Handle REG_CFA_UNDEFINED.
* reg-notes.def (REG_CFA_UNDEFINED): New.
* config/i386/i386.cc (ix86_expand_prologue): Attach
REG_CFA_UNDEFINED notes for unsaved callee-saved registers
which have been used in the function to a frame-related
instruction in prologue.

gcc/testsuite/

PR target/38534
* gcc.target/i386/no-callee-saved-19.c: New test.
* gcc.target/i386/no-callee-saved-20.c: Likewise.
* gcc.target/i386/pr38534-7.c: Likewise.
* gcc.target/i386/pr38534-8.c: Likewise.
---
 gcc/config/i386/i386.cc   | 29 ++
 gcc/dwarf2cfi.cc  | 58 +++
 gcc/reg-notes.def |  4 ++
 .../gcc.target/i386/no-callee-saved-19.c  | 17 ++
 .../gcc.target/i386/no-callee-saved-20.c  | 12 
 gcc/testsuite/gcc.target/i386/pr38534-7.c | 18 ++
 gcc/testsuite/gcc.target/i386/pr38534-8.c | 13 +
 7 files changed, 151 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/no-callee-saved-19.c
 create mode 100644 gcc/testsuite/gcc.target/i386/no-callee-saved-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-8.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..4b7026f3ab4 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -9304,6 +9304,35 @@ ix86_expand_prologue (void)
  combined with prologue modifications.  */
   if (TARGET_SEH)
 emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+  if (cfun->machine->call_saved_registers
+  != TYPE_NO_CALLEE_SAVED_REGISTERS)
+return;
+
+  /* Attach REG_CFA_UNDEFINED notes for unsaved callee-saved registers
+ which have been used in the function to a frame-related instruction
+ in prologue.  */
+
+  insn = nullptr;
+  rtx_insn *next;
+  for (next = get_insns (); next; next = NEXT_INSN (next))
+{
+  if (!RTX_FRAME_RELATED_P (next))
+   continue;
+  insn = next;
+}
+
+  if (!insn)
+return;
+
+  for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+if (df_regs_ever_live_p (i)
+   && !fixed_regs[i]
+   && !call_used_regs[i]
+   && !STACK_REGNO_P (i)
+   && !MMX_REGNO_P (i))
+  add_reg_note (insn, REG_CFA_UNDEFINED,
+   gen_rtx_REG (word_mode, i));
 }
 
 /* Emit code to restore REG using a POP or POPP insn.  */
diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
index 1231b5bb5f0..9ba0ac07ee7 100644
--- a/gcc/dwarf2cfi.cc
+++ b/gcc/dwarf2cfi.cc
@@ -517,6 +517,20 @@ add_cfi_restore (unsigned reg)
   add_cfi (cfi);
 }
 
+/* Add DW_CFA_undefined either to the current insn stream or to a vector,
+   or both.  */
+
+static void
+add_cfi_undefined (unsigned reg)
+{
+  dw_cfi_ref cfi = new_cfi ();
+
+  cfi->dw_cfi_opc = DW_CFA_undefined;
+  cfi->dw_cfi_oprnd1.dw_cfi_reg_num = reg;
+
+  add_cfi (cfi);
+}
+
 /* Perform ROW->REG_SAVE[COLUMN] = CFI.  CFI may be null, indicating
that the register column is no longer saved.  */
 
@@ -1532,6 +1546,37 @@ dwarf2out_frame_debug_cfa_restore (rtx reg, bool 
emit_cfi)
 }
 }
 
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_UNDEFINED
+   note.  */
+
+static void
+dwarf2out_frame_debug_cfa_undefined (rtx reg)
+{
+  gcc_assert (REG_P (reg));
+
+  rtx span = targetm.dwarf_register_span (reg);
+  if (!span)
+{
+  unsigned int regno = dwf_regno (reg);
+  add_cfi_undefined (regno);
+}
+  else
+{
+  /* We have a PARALLEL describing where the contents of REG live.
+Restore the register for each piece of the PARALLEL.  */
+  gcc_assert (GET_CODE (span) == PARALLEL);
+
+  const int par_len = XVECLEN (span, 0);
+  for (int par_index = 0; par_index < par_len; par_index++)
+   {
+ reg = XVECEXP (span, 0, par_index);
+ gcc_assert (REG_P (reg));
+ unsigned int regno = dwf_regno (reg);
+ add_cfi_undefined (regno);
+   }
+}
+}
+
 /* A subroutine of dwarf2out_frame_debug, process a REG_CFA_WINDOW_SAVE.
 
??? Perhaps we should note in the CIE where windows are saved (instead
@@ -2326,6 +2371,19 @@ dwarf2out_frame_debug (rtx_insn *insn)
handled_one = true;
break;
 
+  case REG_CFA_UNDEFINED:
+   n = XEXP (note, 0);
+   if (n == nullptr)
+ {
+   n = PATTERN (insn);
+   if (GET_CODE (n) == PARALLEL)
+ n = XVECEXP (n, 0, 0);
+   n = XEXP (n, 0);
+ }
+  

Re: [PATCH] x86: Generate REG_CFA_UNDEFINED for unsaved callee-saved registers

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 8:30 AM Jakub Jelinek  wrote:
>
> On Mon, Jan 29, 2024 at 08:00:26AM -0800, H.J. Lu wrote:
> > Attach REG_CFA_UNDEFINED notes for unsaved callee-saved registers which
> > have been used in the function to an instruction in prologue.
> >
> > gcc/
> >
> >   PR target/38534
> >   * dwarf2cfi.cc (add_cfi_undefined): New.
> >   (dwarf2out_frame_debug_cfa_undefined): Likewise.
> >   (dwarf2out_frame_debug): Handle REG_CFA_UNDEFINED.
> >   * reg-notes.def (REG_CFA_UNDEFINED): New.
> >   * config/i386/i386.cc (ix86_expand_prologue): Attach
> >   REG_CFA_UNDEFINED notes for unsaved callee-saved registers
> >   which have been used in the function to an instruction in
> >   prologue.
> >
> > gcc/testsuite/
> >
> >   PR target/38534
> >   * gcc.target/i386/no-callee-saved-19.c: New test.
> >   * gcc.target/i386/no-callee-saved-20.c: Likewise.
> >   * gcc.target/i386/pr38534-7.c: Likewise.
> >   * gcc.target/i386/pr38534-8.c: Likewise.
> > ---
> >  gcc/config/i386/i386.cc   | 20 +++
> >  gcc/dwarf2cfi.cc  | 55 +++
> >  gcc/reg-notes.def |  4 ++
> >  .../gcc.target/i386/no-callee-saved-19.c  | 17 ++
> >  .../gcc.target/i386/no-callee-saved-20.c  | 12 
> >  gcc/testsuite/gcc.target/i386/pr38534-7.c | 18 ++
> >  gcc/testsuite/gcc.target/i386/pr38534-8.c | 13 +
> >  7 files changed, 139 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/no-callee-saved-19.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/no-callee-saved-20.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-7.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-8.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index b3e7c74846e..6ec87b6a16f 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -9304,6 +9304,26 @@ ix86_expand_prologue (void)
> >   combined with prologue modifications.  */
> >if (TARGET_SEH)
> >  emit_insn (gen_prologue_use (stack_pointer_rtx));
> > +
> > +  if (cfun->machine->call_saved_registers
> > +  != TYPE_NO_CALLEE_SAVED_REGISTERS)
> > +return;
> > +
> > +  insn = get_insns ();
> > +  if (!insn)
> > +return;
>
> You can't attach the notes to a random instruction that happens to be first
> in the function.
> 1) it needs to be a real instruction, not a note

Will fix it.

> 2) it needs to be RTX_FRAME_RELATED_P

This should work:

  insn = nullptr;
  rtx_insn *next;
  for (next = get_insns (); next; next = NEXT_INSN (next))
{
  if (!RTX_FRAME_RELATED_P (next))
continue;
  insn = next;
}

  if (!insn)
return;

> 3) if it is RTX_FRAME_RELATED_P, but doesn't contain any previous REG_CFA_*
>notes:
>3a) if it has REG_FRAME_RELATED_EXPR note, then I believe just that
>note argument is processed instead of the instruction pattern and
>I think REG_CFA_* notes which precede REG_FRAME_RELATED_EXPR are
>processed, but REG_CFA_* notes after it are not; so adding
>REG_CFA_UNDEFINED notes at least if the adding is after the existing
>notes instead of before them may be problematic

Since register note is added to the head:

/* Add register note with kind KIND and datum DATUM to INSN.  */

void
add_reg_note (rtx insn, enum reg_note kind, rtx datum)
{
  REG_NOTES (insn) = alloc_reg_note (kind, datum, REG_NOTES (insn));
}

it isn't an issue.

>3b) if it has neither REG_CFA_* nor REG_FRAME_RELATED_EXPR notes, then
>normally the pattern of the insn would be processed in dwarf2cfi.
>But with the REG_CFA_* notes that part will be ignored.
>
> > --- a/gcc/dwarf2cfi.cc
> > +++ b/gcc/dwarf2cfi.cc
> > @@ -517,6 +517,17 @@ add_cfi_restore (unsigned reg)
> >add_cfi (cfi);
> >  }
> >
>
> Function comment missing.

Will fix it in the v3 patch.

Thanks.

> > +static void
> > +add_cfi_undefined (unsigned reg)
> > +{
> > +  dw_cfi_ref cfi = new_cfi ();
> > +
> > +  cfi->dw_cfi_opc = DW_CFA_undefined;
> > +  cfi->dw_cfi_oprnd1.dw_cfi_reg_num = reg;
> > +
> > +  add_cfi (cfi);
> > +}
> > +
> >  /* Perform ROW->REG_SAVE[COLUMN] = CFI.  CFI may be null, indicating
> > that the register column is no longer saved.  */
>
> Jakub
>


-- 
H.J.


Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 9:00 AM Jakub Jelinek  wrote:
>
> On Mon, Jan 29, 2024 at 08:45:45AM -0800, H.J. Lu wrote:
> > In this case, these are internal to the same comdat group:
>
> But that is only by accident, no?

This may be by luck.  I don't know if gcc checks it when
generating such references.

> I mean, if you need to refer to such a symbol from
> non-comdat function or comdat function in a different comdat group
> and RA decides it wants the constant in memory rather than code?
> Your patch uses
>   if (decl)
> return targetm.asm_out.function_rodata_section (decl, ???);
> and default_function_rodata_section only looks at comdat group of the
> passed in decl.  But the decl here is what the constant refers to, not
> who is referring it.
>
> Jakub
>


-- 
H.J.


Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 8:34 AM Jakub Jelinek  wrote:
>
> On Mon, Jan 29, 2024 at 08:23:21AM -0800, H.J. Lu wrote:
> > > baz:
> > > movq.LC0(%rip), %xmm0
> > > ret
> >
> > I don't think this is valid.  We can't reference a non-public
> > symbol outside of a COMDAT group.  It is OK to reference
> > foo or foo + 1, but not .LC0.
>
> But that is exactly what your patch does, e.g. on the first testcase:
> --- pr113617-1a.s   2024-01-29 11:29:55.831512974 +0100
> +++ pr113617-1a.s   2024-01-29 11:30:04.335394116 +0100
> @@ -51,28 +-51,28 @@
> .section
> .text._ZN3vtk6detail3smp15vtkSMPToolsImplILi1EE3ForINS1_27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EvxxxRT_,"axG",@progbits,_ZN26vtkStaticCellLinksTemplateIxE18ThreadedBuildLinksExxP12vtkCellArray,comdat
> .align 2
> .p2align 4
> .type   
> _ZN3vtk6detail3smp15vtkSMPToolsImplILi1EE3ForINS1_27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EvxxxRT_,
>  @function
>  
> _ZN3vtk6detail3smp15vtkSMPToolsImplILi1EE3ForINS1_27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EvxxxRT_:
> pushq   %r15
> leaq
> _ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx9_M_invokeERKNS_9_Any_dataE(%rip),
>  %rax
> leaq
> _ZN3vtk6detail3smp23ExecuteFunctorSTDThreadINS1_27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EvPvxxx(%rip),
>  %r15
> pushq   %r14
> movq%rax, %xmm1
> pushq   %r13
> pushq   %r12
> movq%rdx, %r12
> pushq   %rbp
> movq%r8, %rbp
> pushq   %rbx
> movq%rcx, %rbx
> subq$40, %rsp
> movlFor_threadNumber(%rip), %esi
> movq.LC0(%rip), %xmm0
> leaq31(%rsp), %r13
> punpcklqdq  %xmm1, %xmm0
> movq%r13, %rdi
> movaps  %xmm0, (%rsp)
> call_ZN3vtk6detail3smp16vtkSMPThreadPoolC1Ei@PLT
> movq(%rsp), %r14
> .p2align 4,,10
> .p2align 3
> @@ -191,9 +191,9 @@ vtkConstrainedSmoothingFilterRequestData
> .size   For_threadNumber, 4
>  For_threadNumber:
> .zero   4
> -   .section.data.rel.ro.local,"aw"
> +   .section
> .data.rel.ro.local._ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE,"awG",@progbits,_ZN26vtkStaticCellLinksTemplateIxE18ThreadedBuildLinksExxP12vtkCellArray,comdat
> .align 8
>  .LC0:
> .quad   
> _ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE
> -   .ident  "GCC: (GNU) 14.0.1 20240127 (experimental)"
> +   .ident  "GCC: (GNU) 14.0.1 20240129 (experimental)"
> .section.note.GNU-stack,"",@progbits
>
> Jakub
>

In this case, these are internal to the same comdat group:

.section 
.text._ZN3vtk6detail3smp15vtkSMPToolsImplILi1EE3ForINS1_27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EvxxxRT_,"axG",@progbits,_ZN26vtkStaticCellLinksTemplateIxE18ThreadedBuildLinksExxP12vtkCellArray,comdat
.align 2
.p2align 4
.type 
_ZN3vtk6detail3smp15vtkSMPToolsImplILi1EE3ForINS1_27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EvxxxRT_,
@function
_ZN3vtk6detail3smp15vtkSMPToolsImplILi1EE3ForINS1_27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EvxxxRT_:
.LFB27:
.cfi_startproc
...
movq .LC0(%rip), %xmm0
...
.section 
.text._ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE,"axG",@progbits,_ZN26vtkStaticCellLinksTemplateIxE18ThreadedBuildLinksExxP12vtkCellArray,comdat
.p2align 4
.type 
_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE,
@function
_ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE:
.LFB34:
.cfi_startproc
xorl %eax, %eax
ret
.cfi_endproc
...
.section 
.data.rel.ro.local._ZN4

Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 8:03 AM Jakub Jelinek  wrote:
>
> On Mon, Jan 29, 2024 at 06:36:47AM -0800, H.J. Lu wrote:
> > TARGET_ASM_SELECT_RTX_SECTION is for constant in RTL.
> > It should have a non-public label reference which can't be used
> > by other TUs.  The same section can contain other constants.
> > If there is a COMAT issue, linker will catch it.
>
> Let me try to explain on short assembly snippet what I believe your patch is
> doing and what I'm afraid of.  I believe your patch when we need to emit
> a RTL constant foo or foo+1 or foo+2 (where foo is defined in a comdat
> section) instead of emitting using say foo in assembly puts those
> constants into .data.rel.ro.local section determined by the decl that is
> referenced.
> Now, when first_tu.o wins and emits the qux comdat, it will contain
> the .data.rel.ro.local.foo which bar function refers to, but in second_tu.o
> it wants to refer to different offsets from the same function and loses.
>
> I simply believe the constants need to be in section based on what refers
> to those symbols, not the value of those constants, and that is what we used
> to do before your patch (and I'd like to understand what's wrong with what
> GCC emits and why).
>
> first_tu.s:
> 
> .section.text.foo,"axG",@progbits,qux,comdat
> .p2align 4
> .type   foo, @function
> foo:
> xorl%eax, %eax
> ret
> .size   foo, .-foo
> .text
> .p2align 4
> .type   bar, @function
> bar:
> movq.LC0(%rip), %xmm0
> ret
> .size   bar, .-bar
> .section.data.rel.ro.local.foo,"awG",@progbits,qux,comdat
> .align 8
> .LC0:
> .quad   foo
>
> second_tu.s:
> 
> .section.text.foo,"axG",@progbits,qux,comdat
> .p2align 4
> .type   foo, @function
> foo:
> xorl%eax, %eax
> ret
> .size   foo, .-foo
> .text
> .p2align 4
> .type   baz, @function
> baz:
> movq.LC0(%rip), %xmm0
> ret

I don't think this is valid.  We can't reference a non-public
symbol outside of a COMDAT group.  It is OK to reference
foo or foo + 1, but not .LC0.

> .size   baz, .-baz
> .section.data.rel.ro.local.foo,"awG",@progbits,qux,comdat
> .align 8
> .LC0:
> .quad   foo+1
> .text
> .p2align 4
> .type   corge, @function
> corge:
> movq.LC1(%rip), %xmm0
> ret
> .size   corge, .-corge
> .section.data.rel.ro.local.foo,"awG",@progbits,qux,comdat
> .align 8
> .LC1:
> .quad   foo+2
> gcc -shared -o test.so first_tu.s second_tu.s
> `.data.rel.ro.local.foo' referenced in section `.text' of /tmp/cceeUWyH.o: 
> defined in discarded section `.data.rel.ro.local.foo[qux]' of /tmp/cceeUWyH.o
> `.data.rel.ro.local.foo' referenced in section `.text' of /tmp/cceeUWyH.o: 
> defined in discarded section `.data.rel.ro.local.foo[qux]' of /tmp/cceeUWyH.o
> collect2: error: ld returned 1 exit status
>
> Jakub
>


-- 
H.J.


Re: [PATCH] x86: Generate .cfi_undefined for unsaved callee-saved registers

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 2:08 AM Jakub Jelinek  wrote:
>
> On Sat, Jan 27, 2024 at 12:41:24PM -0800, H.J. Lu wrote:
> > When assembler directives for DWARF frame unwind is enabled, generate
> > the .cfi_undefined directive for unsaved callee-saved registers which
> > have been used in the function.
> >
> > gcc/
> >
> >   PR target/38534
> >   * config/i386/i386.cc (ix86_post_cfi_startproc): New.
> >   (TARGET_ASM_POST_CFI_STARTPROC): Likewise.
> >
> > gcc/testsuite/
> >
> >   PR target/38534
> >   * gcc.target/i386/no-callee-saved-19.c: New test.
> >   * gcc.target/i386/no-callee-saved-20.c: Likewise.
> >   * gcc.target/i386/pr38534-7.c: Likewise.
> >   * gcc.target/i386/pr38534-8.c: Likewise.
>
> This only works for -fdwarf2-cfi-asm, but doesn't work for
> -fno-dwarf2-cfi-asm.  I think we need something that will work for both.

-fno-dwarf2-cfi-asm stops generating all CFI directives.

> So, I'd say we want to add support for REG_CFA_UNDEFINED note, emit those
> notes on some frame related insn in the prologue during prologue expansion
> in pro_and_epilogue pass and handle that in dwarf2cfi.cc pass.

It is a good idea.  Here is the patch:

https://patchwork.sourceware.org/project/gcc/list/?series=30314

> One question is where those should be emitted.  Emitting them right
> at the start of the function has an advantage that it can be emitted in
> CIE for all FDEs of noreturn functions (or with the new attribute).  But
> disadvantage is of course that it will make e.g. debugging experience worse
> even in the prologues of functions where the callee saved registers which
> current function actually doesn't save aren't modified yet.
> E.g. for the cases where callee saved registers are saved to memory or
> registers I think dwarf2cfi.cc attempts to optimize and move the .cfi_*
> directives or .eh_frame record later into the function as long as the
> corresponding original register isn't modified yet.  Perhaps that should
> be done also for the undefined case, ideally by using the same dwarf2cfi.cc
> code.  So just perhaps at the start of the function read in the
> REG_CFA_UNDEFINED notes for all the ever modified callee saved registers
> which won't be actually saved and turn that into similar record like for
> the saving into stack or other regs, just noting it is undefined instead
> and have it pushed later as much as possible.

My patch doesn't implement this optimization.

-- 
H.J.


[PATCH] x86: Generate REG_CFA_UNDEFINED for unsaved callee-saved registers

2024-01-29 Thread H.J. Lu
Attach REG_CFA_UNDEFINED notes for unsaved callee-saved registers which
have been used in the function to an instruction in prologue.

gcc/

PR target/38534
* dwarf2cfi.cc (add_cfi_undefined): New.
(dwarf2out_frame_debug_cfa_undefined): Likewise.
(dwarf2out_frame_debug): Handle REG_CFA_UNDEFINED.
* reg-notes.def (REG_CFA_UNDEFINED): New.
* config/i386/i386.cc (ix86_expand_prologue): Attach
REG_CFA_UNDEFINED notes for unsaved callee-saved registers
which have been used in the function to an instruction in
prologue.

gcc/testsuite/

PR target/38534
* gcc.target/i386/no-callee-saved-19.c: New test.
* gcc.target/i386/no-callee-saved-20.c: Likewise.
* gcc.target/i386/pr38534-7.c: Likewise.
* gcc.target/i386/pr38534-8.c: Likewise.
---
 gcc/config/i386/i386.cc   | 20 +++
 gcc/dwarf2cfi.cc  | 55 +++
 gcc/reg-notes.def |  4 ++
 .../gcc.target/i386/no-callee-saved-19.c  | 17 ++
 .../gcc.target/i386/no-callee-saved-20.c  | 12 
 gcc/testsuite/gcc.target/i386/pr38534-7.c | 18 ++
 gcc/testsuite/gcc.target/i386/pr38534-8.c | 13 +
 7 files changed, 139 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/no-callee-saved-19.c
 create mode 100644 gcc/testsuite/gcc.target/i386/no-callee-saved-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr38534-8.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..6ec87b6a16f 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -9304,6 +9304,26 @@ ix86_expand_prologue (void)
  combined with prologue modifications.  */
   if (TARGET_SEH)
 emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+  if (cfun->machine->call_saved_registers
+  != TYPE_NO_CALLEE_SAVED_REGISTERS)
+return;
+
+  insn = get_insns ();
+  if (!insn)
+return;
+
+  /* Attach REG_CFA_UNDEFINED notes for unsaved callee-saved registers
+ which have been used in the function to an instruction in prologue.
+   */
+  for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+if (df_regs_ever_live_p (i)
+   && !fixed_regs[i]
+   && !call_used_regs[i]
+   && !STACK_REGNO_P (i)
+   && !MMX_REGNO_P (i))
+  add_reg_note (insn, REG_CFA_UNDEFINED,
+   gen_rtx_REG (word_mode, i));
 }
 
 /* Emit code to restore REG using a POP or POPP insn.  */
diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
index 1231b5bb5f0..12862ed1070 100644
--- a/gcc/dwarf2cfi.cc
+++ b/gcc/dwarf2cfi.cc
@@ -517,6 +517,17 @@ add_cfi_restore (unsigned reg)
   add_cfi (cfi);
 }
 
+static void
+add_cfi_undefined (unsigned reg)
+{
+  dw_cfi_ref cfi = new_cfi ();
+
+  cfi->dw_cfi_opc = DW_CFA_undefined;
+  cfi->dw_cfi_oprnd1.dw_cfi_reg_num = reg;
+
+  add_cfi (cfi);
+}
+
 /* Perform ROW->REG_SAVE[COLUMN] = CFI.  CFI may be null, indicating
that the register column is no longer saved.  */
 
@@ -1532,6 +1543,37 @@ dwarf2out_frame_debug_cfa_restore (rtx reg, bool 
emit_cfi)
 }
 }
 
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_UNDEFINED
+   note.  */
+
+static void
+dwarf2out_frame_debug_cfa_undefined (rtx reg)
+{
+  gcc_assert (REG_P (reg));
+
+  rtx span = targetm.dwarf_register_span (reg);
+  if (!span)
+{
+  unsigned int regno = dwf_regno (reg);
+  add_cfi_undefined (regno);
+}
+  else
+{
+  /* We have a PARALLEL describing where the contents of REG live.
+Restore the register for each piece of the PARALLEL.  */
+  gcc_assert (GET_CODE (span) == PARALLEL);
+
+  const int par_len = XVECLEN (span, 0);
+  for (int par_index = 0; par_index < par_len; par_index++)
+   {
+ reg = XVECEXP (span, 0, par_index);
+ gcc_assert (REG_P (reg));
+ unsigned int regno = dwf_regno (reg);
+ add_cfi_undefined (regno);
+   }
+}
+}
+
 /* A subroutine of dwarf2out_frame_debug, process a REG_CFA_WINDOW_SAVE.
 
??? Perhaps we should note in the CIE where windows are saved (instead
@@ -2326,6 +2368,19 @@ dwarf2out_frame_debug (rtx_insn *insn)
handled_one = true;
break;
 
+  case REG_CFA_UNDEFINED:
+   n = XEXP (note, 0);
+   if (n == nullptr)
+ {
+   n = PATTERN (insn);
+   if (GET_CODE (n) == PARALLEL)
+ n = XVECEXP (n, 0, 0);
+   n = XEXP (n, 0);
+ }
+   dwarf2out_frame_debug_cfa_undefined (n);
+   handled_one = true;
+   break;
+
   case REG_CFA_SET_VDRAP:
n = XEXP (note, 0);
if (REG_P (n))
diff --git a/gcc/reg-notes.def b/gcc/reg-notes.def
index 5b878fb2a1c..8a78ebb6864 100644
--- a/gcc/reg-notes.def
+++ b/gcc/reg-notes.def
@@ -152,6 +152,10 @@ REG_CFA_NOTE (CFA_EXPRESSION)
the given register.  */
 REG_CFA_NOTE (CFA_VAL_EXPRESSION)
 
+/* Attached to 

Re: [PATCH] Handle function symbol reference in readonly data section

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 3:03 AM Jakub Jelinek  wrote:
>
> On Sat, Jan 27, 2024 at 07:10:55AM -0800, H.J. Lu wrote:
> > For function symbol reference in readonly data section, instead of putting
> > it in .data.rel.ro or .rodata.cst section, call function_rodata_section to
> > get the read-only or relocated read-only data section associated with the
> > function DECL so that the COMDAT section will be used for a COMDAT function
> > symbol.
>
> I have to admit I still don't understand what the linker doesn't like on
> what GCC emits and why references to the public symbols at the start of
> comdat sections are ok in .text but not in .data.rel.ro but are in .data
> or .rodata sections (or what the exact rules are, see also what we emit on
> __attribute__((noinline, noipa)) inline void foo () {}
> void bar () { foo (); } void (*p) () = foo; void (*const q) () = foo; void 
> (*const *r) () = 
> ).
> I've always thought that the problematic references are when something
> references non-public symbols in comdat sections, especially not at their
> start, because if linker selects some comdat section(s) from some other
> TU, there is no guarantee e.g. the code is identical (just in valid program
> should behave the same) and if such reference comes from other comdat that
> is kept or from non-comdat sections, the question is what should be
> referenced.
>
> But in this case, I believe we are referencing the function at the start of
> a code comdat section.
>
> Now, in my limited understanding what the patch does is totally wrong
> for multiple reasons.  On the first testcase it changes
> -   .section.data.rel.ro.local,"aw"
> +   .section
> .data.rel.ro.local._ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE,"awG",@progbits,_ZN26vtkStaticCellLinksTemplateIxE18ThreadedBuildLinksExxP12vtkCellArray,comdat
> .align 8
>  .LC0:
> .quad   
> _ZN4blah17_Function_handlerIFvvENS_5_BindIFPFvPvxxxEPN3vtk6detail3smp27vtkSMPTools_FunctorInternalIN12_GLOBAL__N_19CountUsesIxEELb0EEExxx10_M_managerERNS_9_Any_dataERKSI_NS_18_Manager_operationE
> Now, I believe such a .data.rel.ro.local.* section is normally
> used for .data.rel.ro.local constants from the referenced function,
> if we have some relocatable constant needed in that function we
> emit those there.
> If linker picks up the comdat from current TU, it will be all fine,
> sure, but if it picks up the comdat from another TU, the
> .data.rel.ro.local._ZN4blah17_Function_handlerIFvvENS_5* section
> there might not be present or might contain some unrelated stuff.
> Given the handling of (const (plus (symbol_ref) (const_int)), we
> also don't know whether the section holds a reference to the start,
> or to some other offset of it, how many etc.
> And, we refenre a non-public symbol (.LC0) from non-comdat section
> to a comdat section.

TARGET_ASM_SELECT_RTX_SECTION is for constant in RTL.
It should have a non-public label reference which can't be used
by other TUs.  The same section can contain other constants.
If there is a COMAT issue, linker will catch it.

> If I'm wrong on this, please try to explain.
>
> Jakub
>


-- 
H.J.


Re: [PATCH] x86: Save callee-saved registers in noreturn functions for -O0/-Og

2024-01-29 Thread H.J. Lu
On Mon, Jan 29, 2024 at 2:11 AM Jakub Jelinek  wrote:
>
> On Sat, Jan 27, 2024 at 07:00:03AM -0800, H.J. Lu wrote:
> > On Sat, Jan 27, 2024 at 6:09 AM Jakub Jelinek  wrote:
> > >
> > > On Sat, Jan 27, 2024 at 05:52:34AM -0800, H.J. Lu wrote:
> > > > @@ -3391,7 +3392,9 @@ ix86_set_func_type (tree fndecl)
> > > >   function is marked as noreturn in the IR output, which leads the
> > > >   incompatible attribute error in LTO1.  */
> > > >bool has_no_callee_saved_registers
> > > > -= (((TREE_NOTHROW (fndecl) || !flag_exceptions)
> > > > += ((optimize
> > > > + && !optimize_debug
> > >
> > > Shouldn't that be opt_for_fn (fndecl, optimize) and ditto for
> > > optimize_debug?
> > > I mean, aren't the options not restored yet when this function is called
> > > (i.e. remain in whatever state they were in the previous function or
> > > global state)?
> >
> > store_parm_decls is called when parsing a function.  store_parm_decls
> > calls allocate_struct_function which calls
> >
> >   invoke_set_current_function_hook (fndecl);
> >
> > which has
> >
> >  /* Change optimization options if needed.  */
> >   if (optimization_current_node != opts)
> > {
> >   optimization_current_node = opts;
> >   cl_optimization_restore (_options, _options_set,
> >TREE_OPTIMIZATION (opts));
> > }
> >
> >   targetm.set_current_function (fndecl);
> >
> > which calls ix86_set_current_function after global_options
> > has been updated.   ix86_set_func_type is called from
> > ix86_set_current_function.
>
> Sorry, you're right, I just saw option restore later in 
> ix86_set_current_function
> and missed that it is target option restore only.
>
> > > Also, why check "noreturn" attribute rather than
> > > TREE_THIS_VOLATILE (fndecl)?
> > >
> >
> > The comments above this code has
> >
> >  NB: Don't use TREE_THIS_VOLATILE to check if this is a noreturn
> >  function.  The local-pure-const pass turns an interrupt function
> >  into a noreturn function by setting TREE_THIS_VOLATILE.  Normally
> >  the local-pure-const pass is run after ix86_set_func_type is called.
> >  When the local-pure-const pass is enabled for LTO, the interrupt
> >  function is marked as noreturn in the IR output, which leads the
> >  incompatible attribute error in LTO1.
>
> So in that case, I think it would be best to test
>   TREE_THIS_VOLATILE (fndecl)
>   && lookup_attribute ("noreturn", DECL_ATTRIBUTES (fndecl))
>   && ...
> because if it doesn't have noreturn attribute, it will not have
> TREE_THIS_VOLATILE set and TREE_THIS_VOLATILE is much cheaper to test than
> looking an attribute.
>

Fixed in the v3 patch:

https://patchwork.sourceware.org/project/gcc/list/?series=30308

Thanks.

-- 
H.J.


  1   2   3   4   5   6   7   8   9   10   >