[PATCH v7] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-16 Thread Takayuki 'January June' Suwa via Gcc-patches
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (with two exceptions; i. the register saved to/restored
from is the stack pointer, ii. the function needs an additional stack
pointer adjustment to grow the stack).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...   ;; no frame pointer needed
;; no additional stack growth
; } prologue
  ...
mov.n   a14, a6 ;; A6 is not SP
  ...
call0   foo
  ...
mov.n   a8, a14 ;; A8 is not SP
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(no save needed)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16  ;; replaced with A14's slot
  ...
call0   foo
  ...
l32i.n  a8, sp, 16  ;; through SP
  ...
; epilogue {
  ...
(no restoration needed)
  ...
; } epilogue

This patch adds the abovementioned logic to the function prologue/epilogue
RTL expander code.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): Add new member
'eliminated_callee_saved_regs'.
(xtensa_can_eliminate_callee_saved_reg_p): New function to
determine whether the register can be eliminated or not.
(xtensa_expand_prologue): Add invoking the above function and
elimination the use of callee-saved register by using its stack
slot through the stack pointer (or the frame pointer if needed)
directly.
(xtensa_expand_prologue): Modify to not emit register restoration
insn from its stack slot if the register is already eliminated.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/elim_callee_saved.c: New.
---
 gcc/config/xtensa/xtensa.cc   | 134 ++
 .../gcc.target/xtensa/elim_callee_saved.c |  37 +
 2 files changed, 146 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3e2e22d4cbe..d987f1dfede 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -105,6 +105,7 @@ struct GTY(()) machine_function
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
   rtx last_logues_a9_content;
+  bitmap eliminated_callee_saved_regs;
 };
 
 static void xtensa_option_override (void);
@@ -3343,6 +3344,65 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int 
flags)
 cfun->machine->last_logues_a9_content = GEN_INT (offset);
 }
 
+static bool
+xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
+rtx_insn **p_insnS,
+rtx_insn **p_insnR)
+{
+  df_ref ref;
+  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
+  rtx pattern;
+
+  if (!optimize || !df || call_used_or_fixed_reg_p (regno)
+  || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM))
+return false;
+
+  for (ref = DF_REG_DEF_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_DEST (pattern))
+&& REGNO (SET_DEST (pattern)) == regno
+&& REG_NREGS (SET_DEST (pattern)) == 1
+&& REG_P (SET_SRC (pattern)))
+  {
+   if (insnS)
+ return false;
+   insnS = insn;
+   continue;
+  }
+else
+  return false;
+
+  for (ref = DF_REG_USE_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_SRC (pattern))
+&& REGNO (SET_SRC (pattern)) == regno
+&& REG_NREGS (SET_SRC (pattern)) == 1
+&& REG_P (SET_DEST (pattern)))
+  {
+   if (insnR)
+ return false;
+   insnR = insn;
+   continue;
+  }
+else
+  return false;
+
+  if (!insnS || !insnR)
+return false;
+
+  *p_insnS = insnS, *p_insnR = insnR;
+
+  return true;
+}
+
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@@ -3382,6 +3442,7 @@ xtensa_expand_prologue (void)
   df_ref ref;
   bool stack_pointer_needed = frame_pointer_needed

Re: [PATCH] PR tree-optimization/108697 - Create a lazy ssa_cache

2023-02-16 Thread Richard Biener via Gcc-patches
On Thu, Feb 16, 2023 at 3:34 PM Andrew MacLeod  wrote:
>
>
> On 2/16/23 02:55, Richard Biener wrote:
> > On Wed, Feb 15, 2023 at 6:07 PM Andrew MacLeod via Gcc-patches
> >  wrote:
> >> This patch implements the suggestion that we have an alternative
> >> ssa-cache which does not zero memory, and instead uses a bitmap to track
> >> whether a value is currently set or not.  It roughly mimics what
> >> path_range_query was doing internally.
> >>
> >> For sparsely used cases, expecially in large programs, this is more
> >> efficient.  I changed path_range_query to use this, and removed it old
> >> bitmap (and a hack or two around PHI calculations), and also utilized
> >> this is the assume_query class.
> >>
> >> Performance wise, the patch doesn't affect VRP (since that still uses
> >> the original version).  Switching to the lazy version caused a slowdown
> >> of 2.5% across VRP.
> >>
> >> There was a noticeable improvement elsewhere.,  across 230 GCC source
> >> files, threading ran over 12% faster!.  Overall compilation improved by
> >> 0.3%  Not sure it makes much difference in compiler.i, but it shouldn't
> >> hurt.
> >>
> >> bootstraps on x86_64-pc-linux-gnu with no regressions.   OK for trunk?
> >> or do you want to wait for the next release...
> > I see
> >
> > @@ -365,16 +335,8 @@ path_range_query::compute_ranges_in_phis (basic_block 
> > bb)
> >
> > Value_Range r (TREE_TYPE (name));
> > if (range_defined_in_block (r, name, bb))
> > -   {
> > - unsigned v = SSA_NAME_VERSION (name);
> > - set_cache (r, name);
> > - bitmap_set_bit (phi_set, v);
> > - // Pretend we don't have a cache entry for this name until
> > - // we're done with all PHIs.
> > - bitmap_clear_bit (m_has_cache_entry, v);
> > -   }
> > +   m_cache.set_global_range (name, r);
> >   }
> > -  bitmap_ior_into (m_has_cache_entry, phi_set);
> >   }
> >
> >   // Return TRUE if relations may be invalidated after crossing edge E.
> >
> > which I think is not correct - if we have
> >
> >   # _1 = PHI <..., _2>
> >   # _2 = PHI <..., _1>
> >
> > then their effects are supposed to be executed in parallel, that is,
> > both PHI argument _2 and _1 are supposed to see the "old" version.
> > The previous code tried to make sure the range of the new _1 doesn't
> > get seen when processing the argument _1 in the definition of _2.
> >
> > The new version drops this, possibly resulting in wrong-code.
>
> This is dropped because it is actually handled properly in
> range_defined_in_block now.  (which I think Aldy was describing).
>
> It didnt make sense to me why it was handled here like this, so I traced
> through the call chain to find out if it was still actually needed and
> discussed it with Aldy.  I think it was mostly a leftover wart.

Ah, thanks for checking.

> >
> > While I think it's appropriate to sort out compile-time issues like this
> > during stage4 at least the above makes me think it should be defered
> > to next stage1.
>
> I am happy to defer it since its a marginal increase anyway.

Sure - thus OK for stage1.

Thanks,
Richard.

>
> Andrew
>
>


Re: [PATCH] -Wdangling-pointer: don't mark SSA lhs sets as stores

2023-02-16 Thread Richard Biener via Gcc-patches
On Fri, Feb 17, 2023 at 8:09 AM Alexandre Oliva via Gcc-patches
 wrote:
>
>
> check_dangling_stores has some weirdnesses that causes its behavior to
> change when the target ABI requires C++ ctors to return this: while
> scanning stmts backwards in e.g. the AS ctor on a target that returns
> this in ctors, the scan first encounters a copy of this to the SSA
> name used to hold the return value.  m_ptr_query.get_ref resolves lhs
> (the return SSA name) to the rhs (the default SSA name for this), does
> not skip it because auto_var_p is false for SSA_NAMEs, and proceeds to
> add it to stores, which seems to prevent later attempts to add stores
> into *this from succeeding, which disables warnings that should have
> triggered.
>
> This is also the case when the backwards search finds unrelated stores
> to other fields of *this before it reaches stores that IMHO should be
> warned about.  The store found first disables checking of other
> stores, as if the store appearing later in the code would necessarily
> overwrite the store that should be warned about.  I've added an
> xfailed variant of the existing test (struct An) that triggers this
> problem, but I'm not sure how to go about fixing it.
>
> Meanwhile, this patch prevents assignments with SSA_NAMEs in the lhs
> from being regarded as stores, which is enough to remove the
> undesirable side effect on -Wdangling-pointer of ABI-mandated ctors'
> returning this.  Another variant of the existing test (struct Al) that
> demonstrates the problem regardless of this aspect of the ABI, and
> that gets the desired warning with the proposed patch, but not
> without.
>
> Curiously, this fix exposes yet another problem in
> Wdangling-pointer-5.c: it is the return stmt of the unrelated pointer
> p, not the store into possibly-overlapping *vpp2, that caused the
> warning to not be issued for the store in *vpp1.  I'm not sure whether
> we should or should not warn in that case, but this patch adjusts the
> test to reflect the behavior change.
>
> Regstrapped on x86_64-linux-gnu.
> Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

It seems the case should run into

  else if (TREE_CODE (lhs_ref.ref) == SSA_NAME)
{
  gimple *def_stmt = SSA_NAME_DEF_STMT (lhs_ref.ref);
  if (!gimple_nop_p (def_stmt))
/* Avoid looking at or before stores into unknown objects.  */
return;

  tree var = SSA_NAME_VAR (lhs_ref.ref);
  if (TREE_CODE (var) == PARM_DECL && DECL_BY_REFERENCE (var))
/* Avoid by-value arguments transformed into by-reference.  */
continue;

and what your patch tried to avoid is running into

  if (stores.add (lhs_ref.ref))
continue;

?  I wonder what the circumstances are that we want the latter to happen if
the former condition is true?

> for  gcc/ChangeLog
>
> * gimple-ssa-warn-access.cc
> (pass_waccess::check_dangling_stores): Skip non-stores.
>
> for  gcc/testsuite/ChangeLog
>
> * g++.dg/warn/Wdangling-pointer.C (warn_init_ref_member): Add
> two new variants, one fixed, one xfailed.
> * c-c++-common/Wdangling-pointer-5.c
> (nowarn_store_arg_store_arg): Add now-expected warnings.
> ---
>  gcc/gimple-ssa-warn-access.cc|3 ++
>  gcc/testsuite/c-c++-common/Wdangling-pointer-5.c |4 ++-
>  gcc/testsuite/g++.dg/warn/Wdangling-pointer.C|   29 
> +-
>  3 files changed, 32 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
> index 2eab1d59abd05..c0efb3fdb4e52 100644
> --- a/gcc/gimple-ssa-warn-access.cc
> +++ b/gcc/gimple-ssa-warn-access.cc
> @@ -4511,7 +4511,8 @@ pass_waccess::check_dangling_stores (basic_block bb,
>use the escaped locals.  */
> return;
>
> -  if (!is_gimple_assign (stmt) || gimple_clobber_p (stmt))
> +  if (!is_gimple_assign (stmt) || gimple_clobber_p (stmt)
> + || !gimple_store_p (stmt))
> continue;
>
>access_ref lhs_ref;
> diff --git a/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c 
> b/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
> index 2a165cea76768..cb6da9e86394d 100644
> --- a/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
> +++ b/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
> @@ -75,9 +75,9 @@ void nowarn_store_arg_store (void **vpp)
>
>  void* nowarn_store_arg_store_arg (void **vpp1, void **vpp2)
>  {
> -  int x;
> +  int x;  // { dg-message "'x' declared here" }
>void **p = (void**)sink (0);
> -  *vpp1 =  // warn here?
> +  *vpp1 =  // { dg-warning "storing the address of local variable 
> 'x' in '\\*vpp1'" }
>*vpp2 = 0;  // might overwrite *vpp1
>return p;
>  }
> diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C 
> b/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C
> index 22c559e4adafe..a94477a647666 100644
> --- 

[PATCH] [PR77760] [libstdc++] encode __time_get_state in tm

2023-02-16 Thread Alexandre Oliva via Gcc-patches


On platforms that fail the ptrtomemfn-cast-to-pfn hack, such as
arm-*-vxworks*, time_get fails with %I and %p because the state is not
preserved across do_get calls.

This patch introduces an alternate hack, that encodes the state in
unused bits of struct tm before calling do_get, extracts them in
do_get, does the processing, and encodes it back, so that get extracts
it.

The finalizer is adjusted for idempotence, because both do_get and get
may call it.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

PR libstdc++/77760
* include/bits/locale_facets_nonio.h (__time_get_state): Add
_M_state_tm, _M_save_to and _M_restore_from.
* include/bits/locale_facets_nonio.tcc (time_get::get): Drop
do_get-overriding hack.  Use state unconditionally, and encode
it in tm around do_get.
(time_get::do_get): Extract state from tm, and encode it back,
around parsing and finalizing.
* src/c++98/locale_facets.cc
(__time_get_state::_M_finalize_state): Make tm_hour and
tm_year idempotent.
---
 libstdc++-v3/include/bits/locale_facets_nonio.h   |   80 +
 libstdc++-v3/include/bits/locale_facets_nonio.tcc |   43 ++-
 libstdc++-v3/src/c++98/locale_facets.cc   |8 ++
 3 files changed, 93 insertions(+), 38 deletions(-)

diff --git a/libstdc++-v3/include/bits/locale_facets_nonio.h 
b/libstdc++-v3/include/bits/locale_facets_nonio.h
index 372cf0429501d..711bede158427 100644
--- a/libstdc++-v3/include/bits/locale_facets_nonio.h
+++ b/libstdc++-v3/include/bits/locale_facets_nonio.h
@@ -361,6 +361,86 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 void
 _M_finalize_state(tm* __tm);
 
+  private:
+void
+_M_state_tm(tm* __tm, bool __totm)
+{
+  // Check we don't invade the in-range tm bits, even if int is
+  // 16-bits wide.
+#define _M_min_shift_tm_sec 6
+#define _M_min_shift_tm_min 6
+#define _M_min_shift_tm_hour 5
+#define _M_min_shift_tm_mday 5
+#define _M_min_shift_tm_mon 4
+#define _M_min_shift_tm_year 16 // 14, but signed, so avoid it.
+#define _M_min_shift_tm_wday 3
+#define _M_min_shift_tm_yday 9
+#define _M_min_shift_tm_isdst 1
+  // Represent __STF in __WDT bits of __TMF up to the __MSB bit.
+  // In __MSB, 0 stands for the most significant bit of __TMF,
+  // 1 the bit next to it, and so on.
+#define _M_time_get_state_bitfield_inout(__tmf, __msb, __wdt, __stf)   \
+  do   \
+  {\
+const unsigned __shift = (sizeof (__tm->__tmf) * __CHAR_BIT__  \
+ - (__msb) - (__wdt)); \
+static char __attribute__ ((__unused__))   \
+  __check_parms_##__tmf[(__msb) >= 0 && (__wdt) > 0
\
+   && __shift >= (_M_min_shift_##__tmf \
+  + (sizeof (__tm->__tmf)  \
+ * __CHAR_BIT__) - 16) \
+   ? 1 : -1];  \
+const unsigned __mask = ((1 << (__wdt)) - 1) << __shift;   \
+if (!__totm)   \
+  this->__stf = (__tm->__tmf & __mask) >> __shift; \
+__tm->__tmf &= ~__mask;\
+if (__totm)
\
+  __tm->__tmf |= ((unsigned)this->__stf << __shift) & __mask;  \
+}  \
+  while (0)
+
+  _M_time_get_state_bitfield_inout (tm_hour,  0, 1, _M_have_I);
+  _M_time_get_state_bitfield_inout (tm_wday,  0, 1, _M_have_wday);
+  _M_time_get_state_bitfield_inout (tm_yday,  0, 1, _M_have_yday);
+  _M_time_get_state_bitfield_inout (tm_mon,   0, 1, _M_have_mon);
+  _M_time_get_state_bitfield_inout (tm_mday,  0, 1, _M_have_mday);
+  _M_time_get_state_bitfield_inout (tm_yday,  1, 1, _M_have_uweek);
+  _M_time_get_state_bitfield_inout (tm_yday,  2, 1, _M_have_wweek);
+  _M_time_get_state_bitfield_inout (tm_isdst, 0, 1, _M_have_century);
+  _M_time_get_state_bitfield_inout (tm_hour,  1, 1, _M_is_pm);
+  _M_time_get_state_bitfield_inout (tm_isdst, 1, 1, _M_want_century);
+  _M_time_get_state_bitfield_inout (tm_yday,  3, 1, _M_want_xday);
+  // _M_pad1
+  _M_time_get_state_bitfield_inout (tm_wday,  1, 6, _M_week_no);
+  // _M_pad2
+  _M_time_get_state_bitfield_inout (tm_mon,   1, 8, _M_century);
+  // _M_pad3
+
+#undef _M_min_shift_tm_hour
+#undef _M_min_shift_tm_sec
+#undef _M_min_shift_tm_min
+#undef _M_min_shift_tm_hour
+#undef _M_min_shift_tm_mday
+#undef _M_min_shift_tm_mon
+#undef 

[PATCH] [libstdc++] ensure mutex_pool survives _Safe_sequence_base

2023-02-16 Thread Alexandre Oliva via Gcc-patches


On vxworks, after destroying the semaphore used to implement a mutex,
__gthread_mutex_lock fails and __gnu_cxx::__mutex::lock calls
__throw_concurrence_lock_error.  Nothing ensures the mutex_pool
mutexes survive init-once objects containing _Safe_sequence_base.  If
such an object completes construction before mutex_pool
initialization, it will be registered for atexit destruction after the
mutex_pool mutexes, so the _M_detach_all() call in the
_Safe_sequence_base dtor will use already-destructed mutexes, and
basic_string/requirements/citerators_cc fails calling terminate.

This patch fixes this problem by ensuring the mutex pool completes
construction before any _Safe_sequence_base-containing object, so that
the mutex pool survives them all.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

* include/debug/safe_base.h (_Safe_sequence_base): Ensure
the mutex pool survives *this.
---
 libstdc++-v3/include/debug/safe_base.h |   10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/debug/safe_base.h 
b/libstdc++-v3/include/debug/safe_base.h
index 1dfa9f68b65b5..d4ba404cdac6e 100644
--- a/libstdc++-v3/include/debug/safe_base.h
+++ b/libstdc++-v3/include/debug/safe_base.h
@@ -203,7 +203,15 @@ namespace __gnu_debug
 // Initialize with a version number of 1 and no iterators
 _Safe_sequence_base() _GLIBCXX_NOEXCEPT
 : _M_iterators(0), _M_const_iterators(0), _M_version(1)
-{ }
+{
+  // Make sure the mutex_pool machinery is initialized before any
+  // full object containing a _Safe_sequence_base completes
+  // construction, so that any local static mutexes in the mutex
+  // pool won't be destructed before our destructor runs;
+  // _M_detach_all could fail otherwise, on targets whose mutexes
+  // stop working after being destroyed.
+  (void)this->_M_get_mutex();
+}
 
 #if __cplusplus >= 201103L
 _Safe_sequence_base(const _Safe_sequence_base&) noexcept

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [arm] [vxworks] xfail fp-double-convert-float-1.c

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Even with vcvt.f32.f64, the FE_UPWARD test rounds down and fails.  I'm
not sure whether this opcode disregards the rounding mode (it looks
like it should take it into account) or it is a qemu bug, but it does
not look like GCC is doing anything wrong, and the test fails, so I'm
marking the fail as expected on arm-*-vxworks*.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.dg/torture/fp-double-convert-float-1.c: XFAIL on
arm-*-vxworks*.
---
 .../gcc.dg/torture/fp-double-convert-float-1.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/fp-double-convert-float-1.c 
b/gcc/testsuite/gcc.dg/torture/fp-double-convert-float-1.c
index 1c28a9e101eb7..c3ca69d64bbc6 100644
--- a/gcc/testsuite/gcc.dg/torture/fp-double-convert-float-1.c
+++ b/gcc/testsuite/gcc.dg/torture/fp-double-convert-float-1.c
@@ -1,5 +1,5 @@
 /* PR57245 */
-/* { dg-do run } */
+/* { dg-do run { xfail { arm-*-vxworks* } } } */
 /* { dg-require-effective-target fenv } */
 /* { dg-require-effective-target hard_float } */
 /* { dg-additional-options "-frounding-math" } */

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [libstdc++] xfail noreplace tests on vxworks

2023-02-16 Thread Alexandre Oliva via Gcc-patches


vxworks ignores O_EXCL in open, so noreplace open succeeds when it is
expected to fail.  xfail the tests.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

* testsuite/27_io/basic_ofstream/open/char/noreplace.cc: xfail
on vxworks.
* testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc:
Likewise.
---
 .../27_io/basic_ofstream/open/char/noreplace.cc|2 +-
 .../27_io/basic_ofstream/open/wchar_t/noreplace.cc |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/testsuite/27_io/basic_ofstream/open/char/noreplace.cc 
b/libstdc++-v3/testsuite/27_io/basic_ofstream/open/char/noreplace.cc
index 56ff2d7cead3c..2e99707df86d0 100644
--- a/libstdc++-v3/testsuite/27_io/basic_ofstream/open/char/noreplace.cc
+++ b/libstdc++-v3/testsuite/27_io/basic_ofstream/open/char/noreplace.cc
@@ -1,4 +1,4 @@
-// { dg-do run }
+// { dg-do run { xfail *-*-vxworks* } }
 
 #include 
 
diff --git 
a/libstdc++-v3/testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc 
b/libstdc++-v3/testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc
index f0425cdab3d23..ddb7fd691608c 100644
--- a/libstdc++-v3/testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc
+++ b/libstdc++-v3/testsuite/27_io/basic_ofstream/open/wchar_t/noreplace.cc
@@ -1,4 +1,4 @@
-// { dg-do run }
+// { dg-do run { xfail *-*-vxworks* } }
 
 #include 
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [PR104882] [arm] require mve hw for mve run test

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The pr104882.c test is an execution test, but arm_v8_1m_mve_ok only
tests for compile-time support.  Add a requirement for mve hardware.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

PR target/104882
* gcc.target/arm/simd/pr104882.c: Require mve hardware.
---
 gcc/testsuite/gcc.target/arm/simd/pr104882.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/arm/simd/pr104882.c 
b/gcc/testsuite/gcc.target/arm/simd/pr104882.c
index ae9709af42f22..1ea7a14836f54 100644
--- a/gcc/testsuite/gcc.target/arm/simd/pr104882.c
+++ b/gcc/testsuite/gcc.target/arm/simd/pr104882.c
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-require-effective-target arm_mve_hw } */
 /* { dg-require-effective-target arm_v8_1m_mve_ok } */
 /* { dg-add-options arm_v8_1m_mve } */
 /* { dg-additional-options "-O2" } */

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: [PATCH] simplify-rtx: Fix VOIDmode operand handling in simplify_subreg [PR108805]

2023-02-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Feb 2023, Uros Bizjak wrote:

> simplify_subreg can return VOIDmode const_int operand and will
> cause ICE in simplify_gen_subreg when this operand is passed to it.
> 
> The patch prevents VOIDmode temporary from entering simplify_gen_subreg.
> We can't process const_int operand any further, since outermode
> is not an integer mode here.

But if it's a CONST_INT then we know it's of int_outermode, no? That is,
doesn't simplify_subreg (mode, ...) always return something in 'mode'
and thus we can always pass just 'mode' as third argument to the
following simplify_gen_subreg call?

Richard.

> 2023-02-16  Uroš Bizjak  
> 
> gcc/ChangeLog:
> 
> PR target/108805
> * simplify_rtx.cc (simplify_context::simplify_subreg): Prevent
> VOIDmode const_int result from simplify_subreg from entering
> simplify_gen_subreg.
> 
> gcc/testsuite/ChangeLog:
> 
> PR target/108805
> * gcc.dg/pr108805.c: New test.
> 
> Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
> 
> OK for master and release branches?
> 
> Uros.
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


[PATCH] [arm] complete vmsr/vmrs blank and case adjustments

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Back in September last year, some of the vmsr and vmrs patterns had an
extraneous blank removed, and the case of register names lowered, but
another instance remained, and so did a few testcases.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/ChangeLog

* config/arm/vfp.md (*thumb2_movsi_vfp): Drop blank after tab
after vmsr and vmrs, and lower the case of P0.

for  gcc/testsuite/ChangeLog

* gcc.target/arm/acle/cde-mve-full-assembly.c: Drop blank
after tab after vmsr, and lower the case of P0.
---
 gcc/config/arm/vfp.md  |4 
 .../gcc.target/arm/acle/cde-mve-full-assembly.c|  264 ++--
 2 files changed, 134 insertions(+), 134 deletions(-)

diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index f34f35e1185e2..60e7ba35d8b25 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -312,9 +312,9 @@ (define_insn "*thumb2_movsi_vfp"
 case 12: case 13:
   return output_move_vfp (operands);
 case 14:
-  return \"vmsr\\t P0, %1\";
+  return \"vmsr\\tp0, %1\";
 case 15:
-  return \"vmrs\\t %0, P0\";
+  return \"vmrs\\t%0, p0\";
 case 16:
   return \"mcr\\tp10, 7, %1, cr1, cr0, 0\\t @SET_FPSCR\";
 case 17:
diff --git a/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c 
b/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c
index d025c3391fbe5..72f330185944a 100644
--- a/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-mve-full-assembly.c
@@ -534,80 +534,80 @@
contain back references).  */
 /*
 ** test_cde_vcx1q_mfloat16x8_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_mfloat32x4_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_muint8x16_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_muint16x8_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_muint32x4_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_muint64x2_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_mint8x16_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_mint16x8_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** test_cde_vcx1q_mint32x4_tintint:
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
-** (?:vmov\.i32q0, #0  @ v16qi|vmsr P0, r2 @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
+** (?:vmov\.i32q0, #0  @ v16qi|vmsrp0, r2  @ movhi)
 ** vpst
 ** vcx1t   p0, q0, #32
 ** bx  lr
 */
 /*
 ** 

Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-02-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Feb 2023, juzhe.zhong wrote:

> Thanks for the great work to fix this issue for rvv.Hi,richard. This is the
> patch to differentiate mask mode of same bytesize. Adjust the precision
> correctly according to rvv isa. Would you mind helping us with this patch ?
> Since it‘s very important for rvv support in gcc 

If adjusting the precision works fine then I suppose the patch looks
reasonable.  I'll defer to Richard S. though since he's the one knowing
the mode stuff better.  I'd have integrated the precision adjustment
with the ADJUST_NITER hook since that is also documented to adjust
the precision btw.

Richard.

> Thanks. 
>  Replied Message 
> From
> incarnation.p@outlook.com
> Date
> 02/16/2023 23:12
> To
> gcc-patches@gcc.gnu.org
> Cc
> juzhe.zh...@rivai.ai,
> kito.ch...@sifive.com,
> rguent...@suse.de,
> pan2...@intel.com
> Subject
> [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
> From: Pan Li 
> 
>    Fix the bug of the rvv bool mode precision with the adjustment.
>    The bits size of vbool*_t will be adjusted to
>    [1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>    adjusted mode precison of vbool*_t will help underlying pass to
>    make the right decision for both the correctness and optimization.
> 
>    Given below sample code:
>    void test_1(int8_t * restrict in, int8_t * restrict out)
>    {
>      vbool8_t v2 = *(vbool8_t*)in;
>      vbool16_t v5 = *(vbool16_t*)in;
>      *(vbool16_t*)(out + 200) = v5;
>      *(vbool8_t*)(out + 100) = v2;
>    }
> 
>    Before the precision adjustment:
>    addi    a4,a1,100
>    vsetvli a5,zero,e8,m1,ta,ma
>    addi    a1,a1,200
>    vlm.v   v24,0(a0)
>    vsm.v   v24,0(a4)
>    // Need one vsetvli and vlm.v for correctness here.
>    vsm.v   v24,0(a1)
> 
>    After the precision adjustment:
>    csrr    t0,vlenb
>    slli    t1,t0,1
>    csrr    a3,vlenb
>    sub sp,sp,t1
>    slli    a4,a3,1
>    add a4,a4,sp
>    sub a3,a4,a3
>    vsetvli a5,zero,e8,m1,ta,ma
>    addi    a2,a1,200
>    vlm.v   v24,0(a0)
>    vsm.v   v24,0(a3)
>    addi    a1,a1,100
>    vsetvli a4,zero,e8,mf2,ta,ma
>    csrr    t0,vlenb
>    vlm.v   v25,0(a3)
>    vsm.v   v25,0(a2)
>    slli    t1,t0,1
>    vsetvli a5,zero,e8,m1,ta,ma
>    vsm.v   v24,0(a1)
>    add sp,sp,t1
>    jr  ra
> 
>    However, there may be some optimization opportunates after
>    the mode precision adjustment. It can be token care of in
>    the RISC-V backend in the underlying separted PR(s).
> 
>    PR 108185
>    PR 108654
> 
> gcc/ChangeLog:
> 
>    * config/riscv/riscv-modes.def (ADJUST_PRECISION):
>    * config/riscv/riscv.cc (riscv_v_adjust_precision):
>    * config/riscv/riscv.h (riscv_v_adjust_precision):
>    * genmodes.cc (ADJUST_PRECISION):
>    (emit_mode_adjustments):
> 
> gcc/testsuite/ChangeLog:
> 
>    * gcc.target/riscv/pr108185-1.c: New test.
>    * gcc.target/riscv/pr108185-2.c: New test.
>    * gcc.target/riscv/pr108185-3.c: New test.
>    * gcc.target/riscv/pr108185-4.c: New test.
>    * gcc.target/riscv/pr108185-5.c: New test.
>    * gcc.target/riscv/pr108185-6.c: New test.
>    * gcc.target/riscv/pr108185-7.c: New test.
>    * gcc.target/riscv/pr108185-8.c: New test.
> 
> Signed-off-by: Pan Li 
> ---
> gcc/config/riscv/riscv-modes.def    |  8 +++
> gcc/config/riscv/riscv.cc   | 12 
> gcc/config/riscv/riscv.h    |  1 +
> gcc/genmodes.cc | 25 ++-
> gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++
> gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
> 12 files changed, 598 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
> 
> diff --git a/gcc/config/riscv/riscv-modes.def
> b/gcc/config/riscv/riscv-modes.def
> index d5305efa8a6..110bddce851 100644
> --- a/gcc/config/riscv/riscv-modes.def
> +++ b/gcc/config/riscv/riscv-modes.def
> @@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks *
> riscv_bytes_per_vector_chunk);
> ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks *
> 

[PATCH] [arm] adjust expectations for armv8_2-fp16-move-[12].c

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Commit 3a7ba8fd0cda387809e4902328af2473662b6a4a, a patch for
tree-ssa-sink, enabled the removal of basic blocks in ways that
affected the generated code for both of these tests, deviating from
the expectations of the tests.

The simplest case is that of -2, in which the edge unsplitting ends up
enabling a conditional return rather than a conditional branch to a
set-and-return block.  That looks like an improvement to me, but the
condition in which the branch or the return takes place can be
reasonably reversed (and, with the current code, it is), I've relaxed
the pattern in the test so as to accept reversed and unreversed
conditions applied to return or branch opcodes.

The situation in -1 is a little more elaborate: conditional branches
based on FP compares in test_select_[78] are initially expanded with
CCFPE compare-and-cbranch on G{T,E}, but when ce2 turns those into a
cmove, because now we have a different fallthrough block, the
condition is reversed, and that lands us with a compare-and-cmove
sequence that needs CCFP for UNL{E,T}.  The insn output reverses the
condition and swaps the cmove input operands, so the vcmp and vsel
insns come out the same except for the missing 'e' (for the compare
mode) in vcmp, so, since such reversals could have happened to any of
the tests depending on legitimate basic block layout, I've combined
the vcmp and vcmpe counts.

I see room for improving cmove sequence generation, e.g. trying direct
and reversed conditions and selecting the cheapest one (which would
require CCFP conditions to be modeled as more expensive than CCFPE),
or for some other machine-specific (peephole2?) optimization to turn
CCFP-requiring compare and cmove into CCFPE compare and swapped-inputs
cmove, but I haven't tried that.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.target/arm/armv8_2-fp16-move-1.c: Combine vcmp and vcmpe
expected counts into a single pattern.
* gcc.target/arm/armv8_2-fp16-move-2.c: Accept conditional
return and reversed conditions.
---
 gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c |3 +--
 gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c |2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c 
b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
index 009bb8d1575a4..444c4a3353555 100644
--- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
+++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
@@ -196,5 +196,4 @@ test_compare_5 (__fp16 a, __fp16 b)
 /* { dg-final { scan-assembler-not {vcmp\.f16} } }  */
 /* { dg-final { scan-assembler-not {vcmpe\.f16} } }  */
 
-/* { dg-final { scan-assembler-times {vcmp\.f32} 4 } }  */
-/* { dg-final { scan-assembler-times {vcmpe\.f32} 8 } }  */
+/* { dg-final { scan-assembler-times {vcmpe?\.f32} 12 } }  */
diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c 
b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c
index fcb857f29ff15..dff57ac8147c2 100644
--- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c
+++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-2.c
@@ -8,4 +8,4 @@ test_select (__fp16 a, __fp16 b, __fp16 c)
 {
   return (a < b) ? b : c;
 }
-/* { dg-final { scan-assembler "bmi" } } */
+/* { dg-final { scan-assembler "bx?(mi|pl)" } } */

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [PR51534] [arm] split out pr51534 test for softfp

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The test uses arm_hard_ok and arm_softfp_ok as if they were mutually
exclusive, but they test whether the corresponding -mfloat-abi= flag
is usable, not whether it is in effect, so it is possible for both to
pass, and then the test comes out with incorrect expectations
whichever the default float-abi is.

Separate the test into hard and softfp variants, and extend the softfp
variant to accept both ARM and Thumb opcodes; it unwarrantedly assumed
the latter.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

PR target/51534
* gcc.target/arm/pr51534.c: Split softfp variant into...
* gcc.target/arm/pr51534s.c: ... this, and support ARM too.
---
 gcc/testsuite/gcc.target/arm/pr51534.c  |9 ++--
 gcc/testsuite/gcc.target/arm/pr51534s.c |   72 +++
 2 files changed, 76 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/pr51534s.c

diff --git a/gcc/testsuite/gcc.target/arm/pr51534.c 
b/gcc/testsuite/gcc.target/arm/pr51534.c
index 5e121f5fb9946..ba21259bee554 100644
--- a/gcc/testsuite/gcc.target/arm/pr51534.c
+++ b/gcc/testsuite/gcc.target/arm/pr51534.c
@@ -1,9 +1,9 @@
 /* Test the vector comparison intrinsics when comparing to immediate zero.
*/
 
-/* { dg-do assemble } */
+/* { dg-do assemble { target { arm_hard_ok } } } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O3" } */
+/* { dg-options "-save-temps -mfloat-abi=hard -O3" } */
 /* { dg-add-options arm_neon } */
 
 #include 
@@ -64,9 +64,8 @@ GEN_COND_TESTS(vceq)
 /* { dg-final { scan-assembler-times "vceq\.i8\[   \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" 4 } } */
 /* { dg-final { scan-assembler-times "vceq\.i16\[  \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" 4 } } */
 /* { dg-final { scan-assembler-times "vceq\.i32\[  \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" 4 } } */
-/* { dg-final { scan-assembler-times "vmov\.i32\[  \]+\[dD\]\[0-9\]+, 
#0x" 3 { target { arm_hard_ok } } } } */
-/* { dg-final { scan-assembler-times "vmov\.i32\[  \]+\[qQ\]\[0-9\]+, 
#4294967295" 3 { target { arm_hard_ok } } } } */
-/* { dg-final { scan-assembler-times "mov\[\]+r\[0-9\]+, #-1" 6 { target { 
arm_softfp_ok } } } } */
+/* { dg-final { scan-assembler-times "vmov\.i32\[  \]+\[dD\]\[0-9\]+, 
#0x" 3 } } */
+/* { dg-final { scan-assembler-times "vmov\.i32\[  \]+\[qQ\]\[0-9\]+, 
#4294967295" 3 } } */
 
 /* And ensure we don't have unexpected output too.  */
 /* { dg-final { scan-assembler-not "vc\[gl\]\[te\]\.u\[0-9\]+\[
\]+\[qQdD\]\[0-9\]+, \[qQdD\]\[0-9\]+, #0" } } */
diff --git a/gcc/testsuite/gcc.target/arm/pr51534s.c 
b/gcc/testsuite/gcc.target/arm/pr51534s.c
new file mode 100644
index 0..b1638919c2f75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr51534s.c
@@ -0,0 +1,72 @@
+/* Test the vector comparison intrinsics when comparing to immediate zero.
+   */
+
+/* { dg-do assemble { target { arm_softfp_ok } } } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -mfloat-abi=softfp -O3" } */
+/* { dg-add-options arm_neon } */
+
+#include 
+
+#define GEN_TEST(T, D, C, R) \
+  R test_##C##_##T (T a) { return C (a, D (0)); }
+
+#define GEN_DOUBLE_TESTS(S, T, C) \
+  GEN_TEST (T, vdup_n_s##S, C##_s##S, u##T) \
+  GEN_TEST (u##T, vdup_n_u##S, C##_u##S, u##T) 
+
+#define GEN_QUAD_TESTS(S, T, C) \
+  GEN_TEST (T, vdupq_n_s##S, C##q_s##S, u##T) \
+  GEN_TEST (u##T, vdupq_n_u##S, C##q_u##S, u##T) 
+
+#define GEN_COND_TESTS(C) \
+  GEN_DOUBLE_TESTS (8, int8x8_t, C) \
+  GEN_DOUBLE_TESTS (16, int16x4_t, C) \
+  GEN_DOUBLE_TESTS (32, int32x2_t, C) \
+  GEN_QUAD_TESTS (8, int8x16_t, C) \
+  GEN_QUAD_TESTS (16, int16x8_t, C) \
+  GEN_QUAD_TESTS (32, int32x4_t, C)
+
+GEN_COND_TESTS(vcgt)
+GEN_COND_TESTS(vcge)
+GEN_COND_TESTS(vclt)
+GEN_COND_TESTS(vcle)
+GEN_COND_TESTS(vceq)
+
+/* Scan for expected outputs.  */
+/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s16\[\]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s32\[\]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s16\[\]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcgt\.s32\[\]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s16\[\]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s32\[\]+\[dD\]\[0-9\]+, 
\[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[qQ\]\[0-9\]+, 
\[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler "vcge\.s16\[

[PATCH] [arm] adjust tests for quotes around +cdecp

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Back when quotes were added around "+cdecp" in the "coproc must be
a constant immediate" error in arm-builtins.cc, tests for that message
lagged behind.  Fixed thusly.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.target/arm/acle/cde-errors.c: Adjust messages for quote
around +cdecp.
* gcc.target/arm/acle/cde-mve-error-2.c: Likewise.
---
 gcc/testsuite/gcc.target/arm/acle/cde-errors.c |   52 ++---
 .../gcc.target/arm/acle/cde-mve-error-2.c  |   82 ++--
 2 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/acle/cde-errors.c 
b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
index 85a91666cd5ef..f38514848677e 100644
--- a/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cde-errors.c
@@ -47,19 +47,19 @@ uint64_t test_cde (uint32_t n, uint32_t m)
   accum += __arm_cx3da (7, accum, n, m,   0); /* { dg-error 
{coprocessor 7 is not enabled with \+cdecp7} } */
 
   /* `coproc` out of range.  */
-  accum += __arm_cx1   (8,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx1a  (8, (uint32_t)accum,   0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx2   (8, n, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx2a  (8, (uint32_t)accum, n,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx3   (8, n, m,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx3a  (8, (uint32_t)accum, n, m, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-
-  accum += __arm_cx1d  (8,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx1da (8, accum, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx2d  (8, n, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx2da (8, accum, n,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx3d  (8, n, m,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
-  accum += __arm_cx3da (8, accum, n, m,   0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with \+cdecp} } */
+  accum += __arm_cx1   (8,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx1a  (8, (uint32_t)accum,   0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx2   (8, n, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx2a  (8, (uint32_t)accum, n,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx3   (8, n, m,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx3a  (8, (uint32_t)accum, n, m, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+
+  accum += __arm_cx1d  (8,0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx1da (8, accum, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx2d  (8, n, 0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx2da (8, accum, n,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx3d  (8, n, m,  0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
+  accum += __arm_cx3da (8, accum, n, m,   0); /* { dg-error {coproc 
must be a constant immediate in range \[0-7\] enabled with .\+cdecp.} } */
 
   /* `imm` out of range.  */
   accum += __arm_cx1   (0,8192); /* { dg-error 
{argument 2 to '__builtin_arm_cx1si' must be a constant immediate in range 
\[0-8191\]} } */
@@ 

RE: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-02-16 Thread Li, Pan2 via Gcc-patches
Thank you all.

Hi Richard,

Could you please help to review the precision adjustment related change when 
you free? I am looking forward your option of this issue from the expert’s 
perspective, !

Pan

From: juzhe.zhong 
Sent: Thursday, February 16, 2023 11:23 PM
To: incarnation.p@outlook.com
Cc: gcc-patches@gcc.gnu.org; kito.ch...@sifive.com; rguent...@suse.de; Li, Pan2 

Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

Thanks for the great work to fix this issue for rvv.
Hi,richard. This is the patch to differentiate mask mode of same bytesize. 
Adjust the precision correctly according to rvv isa. Would you mind helping us 
with this patch ? Since it‘s very important for rvv support in gcc

Thanks.
 Replied Message 
From
incarnation.p@outlook.com
Date
02/16/2023 23:12
To
gcc-patches@gcc.gnu.org
Cc
juzhe.zh...@rivai.ai,
kito.ch...@sifive.com,
rguent...@suse.de,
pan2...@intel.com
Subject
[PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
From: Pan Li mailto:pan2...@intel.com>>

   Fix the bug of the rvv bool mode precision with the adjustment.
   The bits size of vbool*_t will be adjusted to
   [1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
   adjusted mode precison of vbool*_t will help underlying pass to
   make the right decision for both the correctness and optimization.

   Given below sample code:
   void test_1(int8_t * restrict in, int8_t * restrict out)
   {
 vbool8_t v2 = *(vbool8_t*)in;
 vbool16_t v5 = *(vbool16_t*)in;
 *(vbool16_t*)(out + 200) = v5;
 *(vbool8_t*)(out + 100) = v2;
   }

   Before the precision adjustment:
   addia4,a1,100
   vsetvli a5,zero,e8,m1,ta,ma
   addia1,a1,200
   vlm.v   v24,0(a0)
   vsm.v   v24,0(a4)
   // Need one vsetvli and vlm.v for correctness here.
   vsm.v   v24,0(a1)

   After the precision adjustment:
   csrrt0,vlenb
   sllit1,t0,1
   csrra3,vlenb
   sub sp,sp,t1
   sllia4,a3,1
   add a4,a4,sp
   sub a3,a4,a3
   vsetvli a5,zero,e8,m1,ta,ma
   addia2,a1,200
   vlm.v   v24,0(a0)
   vsm.v   v24,0(a3)
   addia1,a1,100
   vsetvli a4,zero,e8,mf2,ta,ma
   csrrt0,vlenb
   vlm.v   v25,0(a3)
   vsm.v   v25,0(a2)
   sllit1,t0,1
   vsetvli a5,zero,e8,m1,ta,ma
   vsm.v   v24,0(a1)
   add sp,sp,t1
   jr  ra

   However, there may be some optimization opportunates after
   the mode precision adjustment. It can be token care of in
   the RISC-V backend in the underlying separted PR(s).

   PR 108185
   PR 108654

gcc/ChangeLog:

   * config/riscv/riscv-modes.def (ADJUST_PRECISION):
   * config/riscv/riscv.cc (riscv_v_adjust_precision):
   * config/riscv/riscv.h (riscv_v_adjust_precision):
   * genmodes.cc (ADJUST_PRECISION):
   (emit_mode_adjustments):

gcc/testsuite/ChangeLog:

   * gcc.target/riscv/pr108185-1.c: New test.
   * gcc.target/riscv/pr108185-2.c: New test.
   * gcc.target/riscv/pr108185-3.c: New test.
   * gcc.target/riscv/pr108185-4.c: New test.
   * gcc.target/riscv/pr108185-5.c: New test.
   * gcc.target/riscv/pr108185-6.c: New test.
   * gcc.target/riscv/pr108185-7.c: New test.
   * gcc.target/riscv/pr108185-8.c: New test.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>
---
gcc/config/riscv/riscv-modes.def|  8 +++
gcc/config/riscv/riscv.cc   | 12 
gcc/config/riscv/riscv.h|  1 +
gcc/genmodes.cc | 25 ++-
gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++
gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
12 files changed, 598 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index d5305efa8a6..110bddce851 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * 

[PATCH] -Wdangling-pointer: don't mark SSA lhs sets as stores

2023-02-16 Thread Alexandre Oliva via Gcc-patches


check_dangling_stores has some weirdnesses that causes its behavior to
change when the target ABI requires C++ ctors to return this: while
scanning stmts backwards in e.g. the AS ctor on a target that returns
this in ctors, the scan first encounters a copy of this to the SSA
name used to hold the return value.  m_ptr_query.get_ref resolves lhs
(the return SSA name) to the rhs (the default SSA name for this), does
not skip it because auto_var_p is false for SSA_NAMEs, and proceeds to
add it to stores, which seems to prevent later attempts to add stores
into *this from succeeding, which disables warnings that should have
triggered.

This is also the case when the backwards search finds unrelated stores
to other fields of *this before it reaches stores that IMHO should be
warned about.  The store found first disables checking of other
stores, as if the store appearing later in the code would necessarily
overwrite the store that should be warned about.  I've added an
xfailed variant of the existing test (struct An) that triggers this
problem, but I'm not sure how to go about fixing it.

Meanwhile, this patch prevents assignments with SSA_NAMEs in the lhs
from being regarded as stores, which is enough to remove the
undesirable side effect on -Wdangling-pointer of ABI-mandated ctors'
returning this.  Another variant of the existing test (struct Al) that
demonstrates the problem regardless of this aspect of the ABI, and
that gets the desired warning with the proposed patch, but not
without.

Curiously, this fix exposes yet another problem in
Wdangling-pointer-5.c: it is the return stmt of the unrelated pointer
p, not the store into possibly-overlapping *vpp2, that caused the
warning to not be issued for the store in *vpp1.  I'm not sure whether
we should or should not warn in that case, but this patch adjusts the
test to reflect the behavior change.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/ChangeLog

* gimple-ssa-warn-access.cc
(pass_waccess::check_dangling_stores): Skip non-stores.

for  gcc/testsuite/ChangeLog

* g++.dg/warn/Wdangling-pointer.C (warn_init_ref_member): Add
two new variants, one fixed, one xfailed.
* c-c++-common/Wdangling-pointer-5.c
(nowarn_store_arg_store_arg): Add now-expected warnings.
---
 gcc/gimple-ssa-warn-access.cc|3 ++
 gcc/testsuite/c-c++-common/Wdangling-pointer-5.c |4 ++-
 gcc/testsuite/g++.dg/warn/Wdangling-pointer.C|   29 +-
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
index 2eab1d59abd05..c0efb3fdb4e52 100644
--- a/gcc/gimple-ssa-warn-access.cc
+++ b/gcc/gimple-ssa-warn-access.cc
@@ -4511,7 +4511,8 @@ pass_waccess::check_dangling_stores (basic_block bb,
   use the escaped locals.  */
return;
 
-  if (!is_gimple_assign (stmt) || gimple_clobber_p (stmt))
+  if (!is_gimple_assign (stmt) || gimple_clobber_p (stmt)
+ || !gimple_store_p (stmt))
continue;
 
   access_ref lhs_ref;
diff --git a/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c 
b/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
index 2a165cea76768..cb6da9e86394d 100644
--- a/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
+++ b/gcc/testsuite/c-c++-common/Wdangling-pointer-5.c
@@ -75,9 +75,9 @@ void nowarn_store_arg_store (void **vpp)
 
 void* nowarn_store_arg_store_arg (void **vpp1, void **vpp2)
 {
-  int x;
+  int x;  // { dg-message "'x' declared here" }
   void **p = (void**)sink (0);
-  *vpp1 =  // warn here?
+  *vpp1 =  // { dg-warning "storing the address of local variable 
'x' in '\\*vpp1'" }
   *vpp2 = 0;  // might overwrite *vpp1
   return p;
 }
diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C 
b/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C
index 22c559e4adafe..a94477a647666 100644
--- a/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C
+++ b/gcc/testsuite/g++.dg/warn/Wdangling-pointer.C
@@ -35,7 +35,34 @@ void warn_init_ref_member ()
 { }
   } ai;
 
-  sink (, );
+  struct Al
+  {
+const S 
+Al ():
+  // The temporary S object is destroyed when Al::Al() returns.
+  sref (S ())  // { dg-warning "storing the address" }
+{
+  // Copying this to an SSA_NAME used to disable the warning:
+  Al *ptr = this;
+  asm ("" : "+r" (ptr));
+}
+  } al;
+
+  struct An
+  {
+An *next;
+const S 
+An ():
+  next (0),
+  // The temporary S object is destroyed when An::An() returns.
+  sref (S ())  // { dg-warning "storing the address" "" { xfail *-*-* } }
+{
+  // ??? Writing to another part of *this disables the warning:
+  next = 0;
+}
+  } an;
+
+  sink (, , , );
 }
 
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain 

[PATCH] [arm] disable aes-1742098 mitigation for a72 combine tests

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The expected asm output for aes-fuse-[12].c does not correspond to
that which is generated when -mfix-cortex-a57-aes-1742098 is enabled.
It was introduced after the test, and enabled by default for the
selected processor.  Disabling the option restores the circumstance
that was tested for.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.target/arm/aes-fuse-1.c: Add
-mno-fix-cortex-a57-aes-1742098.
* gcc.target/arm/aes-fuse-2.c: Likewise.
---
 gcc/testsuite/gcc.target/arm/aes-fuse-1.c |4 
 gcc/testsuite/gcc.target/arm/aes-fuse-2.c |4 
 2 files changed, 8 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/aes-fuse-1.c 
b/gcc/testsuite/gcc.target/arm/aes-fuse-1.c
index 27b08aeef7ba7..6ffb4991cca69 100644
--- a/gcc/testsuite/gcc.target/arm/aes-fuse-1.c
+++ b/gcc/testsuite/gcc.target/arm/aes-fuse-1.c
@@ -2,6 +2,10 @@
 /* { dg-require-effective-target arm_crypto_ok } */
 /* { dg-add-options arm_crypto } */
 /* { dg-additional-options "-mcpu=cortex-a72 -O3 -dp" } */
+/* The mitigation applies to a72 by default, and protects the CRYPTO_AES
+   inputs, such as the explicit xor ops, from being combined like test used to
+   expect.  */
+/* { dg-additional-options "-mno-fix-cortex-a57-aes-1742098" } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/arm/aes-fuse-2.c 
b/gcc/testsuite/gcc.target/arm/aes-fuse-2.c
index 1266a28753169..b72479c0e5726 100644
--- a/gcc/testsuite/gcc.target/arm/aes-fuse-2.c
+++ b/gcc/testsuite/gcc.target/arm/aes-fuse-2.c
@@ -2,6 +2,10 @@
 /* { dg-require-effective-target arm_crypto_ok } */
 /* { dg-add-options arm_crypto } */
 /* { dg-additional-options "-mcpu=cortex-a72 -O3 -dp" } */
+/* The mitigation applies to a72 by default, and protects the CRYPTO_AES
+   inputs, such as the explicit xor ops, from being combined like test used to
+   expect.  */
+/* { dg-additional-options "-mno-fix-cortex-a57-aes-1742098" } */
 
 #include 
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [vxworks] make wint_t and wchar_t the same distinct type

2023-02-16 Thread Alexandre Oliva via Gcc-patches


We used to define WINT_TYPE to WCHAR_TYPE, so that both wint_t and
wchar_t mapped to the same underlying type, but this caused a glitch
in Wstringop-overflow-6.C: on vxworks, wint_t is typedef'ed to
wchar_t, headers got included in the test that declared functions that
take wint_t parameters, and those conflicted with the builtin
declarations that had wint_t mapped to the underlying integral type.

The problem is that, in C++, wchar_t is a distinct type.  Having
wint_t be a typedef to wchar_t in the headers, but a typedef to
wchar_t's underlying integral type in builtins, makes for mismatches
between the declarations.

This patch defines WINT_TYPE to "wchar_t" for vxworks, and adjusts the
fallout, namely:

- since wchar_t may not have been defined yet when
  c_common_nodes_and_builtins runs, use the node already reserved for
  wchar_t for wint_t when WINT_TYPE is defined to wchar_t.

- for the same reason, when WINT_TYPE is wchar_t and we're not
  compiling C++ where wchar_t is a compiler built-in, define
  __WINT_TYPE__ to WCHAR_TYPE rather than WINT_TYPE, because wchar_t
  may not even be defined in the translation unit.

- recognize and handle wchar_type_node when type_suffix is called for
  wint_type_node.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/ChangeLog

* config/vx-common.h (WINT_TYPE): Alias to "wchar_t".

for  gcc/c-family/ChangeLog

* c-common.cc (c_common_nodes_and_builtins): Take
wchar_type_node for wint_type_node when aliased.
(c_stddef_cpp_builtins): Define __WINT_TYPE__, when aliased to
wchar_t, to the underlying type rather than wchar_t in
non-C++.
* c-cppbuiltin.cc (type_suffix): Handle wchar_type_node.
---
 gcc/c-family/c-common.cc |   16 +---
 gcc/c-family/c-cppbuiltin.cc |2 ++
 gcc/config/vx-common.h   |2 +-
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index ae92cd5adaf5e..a92597c2f544f 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -4576,8 +4576,11 @@ c_common_nodes_and_builtins (void)
   char32_array_type_node
 = build_array_type (char32_type_node, array_domain_type);
 
-  wint_type_node =
-TREE_TYPE (identifier_global_value (get_identifier (WINT_TYPE)));
+  if (strcmp (WINT_TYPE, "wchar_t") == 0)
+wint_type_node = wchar_type_node;
+  else
+wint_type_node =
+  TREE_TYPE (identifier_global_value (get_identifier (WINT_TYPE)));
 
   intmax_type_node =
 TREE_TYPE (identifier_global_value (get_identifier (INTMAX_TYPE)));
@@ -5359,7 +5362,14 @@ c_stddef_cpp_builtins(void)
   builtin_define_with_value ("__SIZE_TYPE__", SIZE_TYPE, 0);
   builtin_define_with_value ("__PTRDIFF_TYPE__", PTRDIFF_TYPE, 0);
   builtin_define_with_value ("__WCHAR_TYPE__", MODIFIED_WCHAR_TYPE, 0);
-  builtin_define_with_value ("__WINT_TYPE__", WINT_TYPE, 0);
+  /* C++ has wchar_t as a builtin type, C doesn't, so if WINT_TYPE
+ maps to wchar_t, define it to the underlying WCHAR_TYPE in C, and
+ to wchar_t in C++, so the desired type equivalence holds.  */
+  if (!c_dialect_cxx ()
+  && strcmp (WINT_TYPE, "wchar_t") == 0)
+builtin_define_with_value ("__WINT_TYPE__", WCHAR_TYPE, 0);
+  else
+builtin_define_with_value ("__WINT_TYPE__", WINT_TYPE, 0);
   builtin_define_with_value ("__INTMAX_TYPE__", INTMAX_TYPE, 0);
   builtin_define_with_value ("__UINTMAX_TYPE__", UINTMAX_TYPE, 0);
   if (flag_char8_t)
diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc
index b333f97fd3237..98f5aef2af95d 100644
--- a/gcc/c-family/c-cppbuiltin.cc
+++ b/gcc/c-family/c-cppbuiltin.cc
@@ -1903,6 +1903,8 @@ type_suffix (tree type)
  systems use it anyway.  */
   || type == char_type_node)
 is_long = 0;
+  else if (type == wchar_type_node)
+return type_suffix (underlying_wchar_type_node);
   else
 gcc_unreachable ();
 
diff --git a/gcc/config/vx-common.h b/gcc/config/vx-common.h
index 83580d0dec288..9733c90fe4c6f 100644
--- a/gcc/config/vx-common.h
+++ b/gcc/config/vx-common.h
@@ -69,7 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef WINT_TYPE_SIZE
 #define WINT_TYPE_SIZE WCHAR_TYPE_SIZE
 #undef WINT_TYPE
-#define WINT_TYPE WCHAR_TYPE
+#define WINT_TYPE "wchar_t"
 
 /* -- Debug and unwind info formats --  */
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] Accept pmf-vbit-in-delta extra warning

2023-02-16 Thread Alexandre Oliva via Gcc-patches


cp_build_binary_op, that issues -Waddress warnings, issues an extra
warning on arm targets, that g++.dg/warn/Waddress-5.C does not expect
when comparing a pointer-to-member-function literal with null.

The reason for the extra warning is that, on arm targets,
TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_delta, which
causes a different path to be taken, that extracts the
pointer-to-function and the delta fields (minus the vbit) and compares
each one with zero.  It's when comparing this pointer-to-function with
zero, in a recursive cp_build_binary_op, that another warning is
issued.

I suppose there should be a way to skip the warning in this recursive
call, without disabling other warnings that might be issued there, but
this patch only arranges for the test to tolerate the extra warning.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* g++.dg/warn/Waddress-5.C: Tolerate extra -Waddress warning.
---
 gcc/testsuite/g++.dg/warn/Waddress-5.C |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/warn/Waddress-5.C 
b/gcc/testsuite/g++.dg/warn/Waddress-5.C
index b1287b2fac316..1de88076f7767 100644
--- a/gcc/testsuite/g++.dg/warn/Waddress-5.C
+++ b/gcc/testsuite/g++.dg/warn/Waddress-5.C
@@ -23,7 +23,11 @@ void T (bool);
 void warn_memptr_if ()
 {
   // Exercise warnings for addresses of nonstatic member functions.
-  if (::f == 0) // { dg-warning "the address '::f'" }
+  // On targets with TARGET_PTRMEMFUNC_VBIT_LOCATION ==
+  // ptrmemfunc_vbit_in_delta, cp_build_binary_op recurses to compare
+  // the pfn from the ptrmemfunc with null, so we get two warnings.
+  // This matches both.  ??? Should we disable one of them?
+  if (::f == 0) // { dg-warning "A::f" }
 T (0);
 
   if (::vf) // { dg-warning "-Waddress" }

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] Drop need for constant I in ctf test

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Though I is supposed to be a constant expression, this is not the case
on vxworks, but this is not what this debug information format test is
testing for, so use real constants to initialize complex variables.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.dg/debug/ctf/ctf-complex-1.c: Do not test whether I is
usable in initializers.
---
 gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c 
b/gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c
index a36dd9b6b90a9..e6c3199f913d7 100644
--- a/gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c
+++ b/gcc/testsuite/gcc.dg/debug/ctf/ctf-complex-1.c
@@ -14,8 +14,8 @@
 
 #include 
 
-double complex z1 = I * I;
+double complex z1 = -1;
 
-const long double complex z2 = I * I;
+const long double complex z2 = -1;
 
-float complex z4 = 1+2.11*I;
+float complex z4 = 1;

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] Skip module_cmi_p and related unsupported module test

2023-02-16 Thread Alexandre Oliva via Gcc-patches


When a multi-source module is found to be unsupported, we fail
module_cmi_p and subsequent sources.  Override proc unsupported to
mark the result in module_do, and test it to skip module_cmp_p and
subsequent related tests.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* g++.dg/modules/modules.exp: Override unsupported to update
module_do, and test it after dg-test.
---
 gcc/testsuite/g++.dg/modules/modules.exp |   14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/testsuite/g++.dg/modules/modules.exp 
b/gcc/testsuite/g++.dg/modules/modules.exp
index 61994b059457b..ba1287427bf05 100644
--- a/gcc/testsuite/g++.dg/modules/modules.exp
+++ b/gcc/testsuite/g++.dg/modules/modules.exp
@@ -315,6 +315,14 @@ proc module-check-requirements { tests } {
 # cleanup any detritus from previous run
 cleanup_module_files [find $DEFAULT_REPO *.gcm]
 
+set module_do {"compile" "P"}
+rename unsupported saved-unsupported
+proc unsupported { args } {
+global module_do
+lset module_do 1 "N"
+return [saved-unsupported $args]
+}
+
 # not grouped tests, sadly tcl doesn't have negated glob
 foreach test [prune [lsort [find $srcdir/$subdir {*.[CH]}]] \
  "$srcdir/$subdir/*_?.\[CH\]"] {
@@ -327,6 +335,9 @@ foreach test [prune [lsort [find $srcdir/$subdir {*.[CH]}]] 
\
set module_cmis {}
verbose "Testing $nshort $std" 1
dg-test $test "$std" $DEFAULT_MODFLAGS
+   if { [lindex $module_do 1] == "N" } {
+   continue
+   }
set testcase [string range $test [string length "$srcdir/"] end]
cleanup_module_files [module_cmi_p $testcase $module_cmis]
}
@@ -372,6 +383,9 @@ foreach src [lsort [find $srcdir/$subdir {*_a.[CHX}]] {
}
}
dg-test -keep-output $test "$std" $DEFAULT_MODFLAGS
+   if { [lindex $module_do 1] == "N" } {
+   break
+   }
set testcase [string range $test [string length "$srcdir/"] 
end]
lappend mod_files [module_cmi_p $testcase $module_cmis]
}

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PR100127] Test for coroutine header in clang-compatible tests

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The test is compatible with clang as well as gcc, but ISTM that
testing for the __clang__ macro is just as potentially error-prone as
macros that used to be GCC-specific are now defined in compilers that
aim for GCC compatibility.  Use a __has_include feature test instead.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

PR c++/100127
* g++.dg/coroutines/pr100127.C: Test for header rather than
compiler macro.
---
 gcc/testsuite/g++.dg/coroutines/pr100127.C   |2 +-
 gcc/testsuite/g++.dg/coroutines/pr100772-a.C |2 +-
 gcc/testsuite/g++.dg/coroutines/pr100772-b.C |2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/coroutines/pr100127.C 
b/gcc/testsuite/g++.dg/coroutines/pr100127.C
index 374cd710077af..1eaa72ff0acdd 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr100127.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr100127.C
@@ -1,4 +1,4 @@
-#ifdef __clang__
+#if __has_include() // for __clang__
 #include 
 namespace std {
   using namespace std::experimental;
diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-a.C 
b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
index a325d384fc390..724c377c82e5b 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
@@ -1,5 +1,5 @@
 //  { dg-additional-options "-fsyntax-only " }
-#ifdef __clang__
+#if __has_include() // for __clang__
 #include 
 namespace std {
   using namespace std::experimental;
diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-b.C 
b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
index 6cdf8d1e529e5..4cf31e5f9e0c2 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
@@ -1,4 +1,4 @@
-#ifdef __clang__
+#if __has_include() // for __clang__
 #include 
 namespace std {
   using namespace std::experimental;

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[libstdc++] Use __gthread_join in jthread/95989

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Ref: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570617.html

Bernd Edlinger  reported that the 95989.cc
test fails without pthread_join at the end of main, but pthread_join
is no good for a test that doesn't require pthreads.

This patch adds a __gthread_join call instead.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

* testsuite/30_threads/jthread/95989.cc (main): Call
__gthread_join at the end.
---
 libstdc++-v3/testsuite/30_threads/jthread/95989.cc |1 +
 1 file changed, 1 insertion(+)

diff --git a/libstdc++-v3/testsuite/30_threads/jthread/95989.cc 
b/libstdc++-v3/testsuite/30_threads/jthread/95989.cc
index e98836d094531..407b52748438c 100644
--- a/libstdc++-v3/testsuite/30_threads/jthread/95989.cc
+++ b/libstdc++-v3/testsuite/30_threads/jthread/95989.cc
@@ -52,4 +52,5 @@ main()
   test01();
   test02();
   test03();
+  __gthread_join(0, NULL);
 }

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] [arm] xfail fp-uint64-convert-double-* on all arm targets

2023-02-16 Thread Alexandre Oliva via Gcc-patches


It wasn't long ago that I xfailed these tests on arm-*-eabi, but the
fail is expected on all other arm targets: even when hard float is
available, conversions between 64-bit integers and double are always
emulated on ARM, and the emulation disregards rounding modes.  So,
bump the xfail to all of arm-*-*.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  gcc/testsuite/ChangeLog

* gcc.dg/torture/fp-uint64-convert-double-1.c: XFAIL on all of
arm-*-*.
* gcc.dg/torture/fp-uint64-convert-double-2.c: Likewise.
---
 .../gcc.dg/torture/fp-uint64-convert-double-1.c|2 +-
 .../gcc.dg/torture/fp-uint64-convert-double-2.c|2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-1.c 
b/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-1.c
index 61cfa96374631..8f437e2efb1db 100644
--- a/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-1.c
+++ b/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-1.c
@@ -1,5 +1,5 @@
 /* PR84407 */
-/* { dg-do run { xfail { arm-*-eabi* } } } */
+/* { dg-do run { xfail { arm-*-* } } } */
 /* { dg-require-effective-target fenv } */
 /* { dg-require-effective-target hard_float } */
 /* { dg-additional-options "-frounding-math -fexcess-precision=standard" } */
diff --git a/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-2.c 
b/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-2.c
index b32b28a329580..fd3f4cbfb830f 100644
--- a/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-2.c
+++ b/gcc/testsuite/gcc.dg/torture/fp-uint64-convert-double-2.c
@@ -1,5 +1,5 @@
 /* PR84407 */
-/* { dg-do run { xfail { arm-*-eabi* } } } */
+/* { dg-do run { xfail { arm-*-* } } } */
 /* { dg-require-effective-target fenv } */
 /* { dg-require-effective-target hard_float } */
 /* { dg-additional-options "-frounding-math" } */


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[libstdc++] [testsuite] intro/names.cc: undef func on vxw7krn

2023-02-16 Thread Alexandre Oliva via Gcc-patches


The '#define func' added in 2021, to test that system headers don't
violate the user namespace, exposes such a bug in the vxworks sysLib.h
header, so add yet another such annotated workaround.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?

for  libstdc++-v3/ChangeLog

* testsuite/17_intro/names.cc: Undef func on vxworks >= 7 in
kernel mode.
---
 libstdc++-v3/testsuite/17_intro/names.cc |2 ++
 1 file changed, 2 insertions(+)

diff --git a/libstdc++-v3/testsuite/17_intro/names.cc 
b/libstdc++-v3/testsuite/17_intro/names.cc
index d3e0db9bab6b9..c2d67ebe01276 100644
--- a/libstdc++-v3/testsuite/17_intro/names.cc
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
@@ -329,6 +329,8 @@
 #undef d
 #undef e
 #undef f
+// in sysLib.h, func appears as a formal parameter name
+#undef func
 #endif // __RTP__
 
 #endif // VxWorks Major >= 7

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[arm] [testsuite] asm-flag-4.c: match quotes in expected message

2023-02-16 Thread Alexandre Oliva via Gcc-patches


Quotes were added around the "asm" keyword in the message expected by
the test, so the test needs adjusting.

Regstrapped on x86_64-linux-gnu.
Tested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).
Ok to install?


for  gcc/testsuite/ChangeLog

* gcc.target/arm/asm-flag-4.c: Match quotes around "asm" in
message.
---
 gcc/testsuite/gcc.target/arm/asm-flag-4.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-4.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-4.c
index 75378cc89b092..6841b6ea1e272 100644
--- a/gcc/testsuite/gcc.target/arm/asm-flag-4.c
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-4.c
@@ -11,5 +11,5 @@ void __attribute__((target("arm"))) f(char *out)
 
 void __attribute__((target("thumb"))) g(char *out)
 {
-  asm("" : "=@ccne"(out[0]));  /* { dg-message "asm flags not supported" } */
+  asm("" : "=@ccne"(out[0]));  /* { dg-message ".asm. flags not supported" } */
 }


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: C++ modules and AAPCS/ARM EABI clash on inline key methods

2023-02-16 Thread Alexandre Oliva via Gcc-patches
On Apr  5, 2022, Alexandre Oliva  wrote:

> Would something like this be acceptable/desirable?  It's overreaching,
> in that not all arm platforms are expected to fail, but the result on
> them will be an unexpected pass, which is not quite as bad as the
> unexpected fail we get on most arm variants now.

Ping?
https://gcc.gnu.org/pipermail/gcc-patches/2022-April/592763.html

[PR105224] C++ modules and AAPCS/ARM EABI clash on inline key methods

g++.dg/modules/virt-2_a.C fails on arm-eabi and many other arm targets
that use the AAPCS variant.  ARM is the only target that overrides
TARGET_CXX_KEY_METHOD_MAY_BE_INLINE.  It's not clear to me which way
the clash between AAPCS and C++ Modules design should be resolved, but
currently it favors AAPCS and thus the test fails.

Skipping the test or conditionally dropping the inline keyword breaks
subsequent tests, so I'm XFAILing the expectation that vtable and rtti
symbols are output on arm*-*-*.

Retested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?


for  gcc/testsuite/ChangeLog

PR c++/105224
* g++.dg/modules/virt-2_a.C: XFAIL syms on arm*-*-*.
---
 gcc/testsuite/g++.dg/modules/virt-2_a.C |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/modules/virt-2_a.C 
b/gcc/testsuite/g++.dg/modules/virt-2_a.C
index 580552be5a0d8..b265515e2c7fd 100644
--- a/gcc/testsuite/g++.dg/modules/virt-2_a.C
+++ b/gcc/testsuite/g++.dg/modules/virt-2_a.C
@@ -22,6 +22,6 @@ export int Visit (Visitor *v)
 }
 
 // Emit here
-// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} } }
+// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} { xfail arm*-*-* } } }
+// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} { xfail arm*-*-* } } }
+// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} { xfail arm*-*-* } } }


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: [PING 2] [PATCH] swap: Fix incorrect lane extraction by vec_extract() [PR106770]

2023-02-16 Thread Surya Kumari Jangala via Gcc-patches
Ping. Please review the patch.

On 12/01/23 10:21 pm, Surya Kumari Jangala via Gcc-patches wrote:
> Ping
> 
> On 04/01/23 1:58 pm, Surya Kumari Jangala via Gcc-patches wrote:
>> swap: Fix incorrect lane extraction by vec_extract() [PR106770]
>>
>> In the routine rs6000_analyze_swaps(), special handling of swappable
>> instructions is done even if the webs that contain the swappable
>> instructions are not optimized, i.e., the webs do not contain any
>> permuting load/store instructions along with the associated register
>> swap instructions. Doing special handling in such webs will result in
>> the extracted lane being adjusted unnecessarily for vec_extract.
>>
>> Modifying swappable instructions is also incorrect in webs where
>> loads/stores on quad word aligned addresses are changed to lvx/stvx.
>> Similarly, in webs where swap(load(vector constant)) instructions are
>> replaced with load(swapped vector constant), the swappable
>> instructions should not be modified.
>>
>> 2023-01-04  Surya Kumari Jangala  
>>
>> gcc/
>>  PR rtl-optimization/106770
>>  * rs6000-p8swap.cc (rs6000_analyze_swaps): .
>>
>> gcc/testsuite/
>>  PR rtl-optimization/106770
>>  * gcc.target/powerpc/pr106770.c: New test.
>> ---
>>
>> diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
>> b/gcc/config/rs6000/rs6000-p8swap.cc
>> index 19fbbfb67dc..7ed39251df9 100644
>> --- a/gcc/config/rs6000/rs6000-p8swap.cc
>> +++ b/gcc/config/rs6000/rs6000-p8swap.cc
>> @@ -179,6 +179,9 @@ class swap_web_entry : public web_entry_base
>>unsigned int special_handling : 4;
>>/* Set if the web represented by this entry cannot be optimized.  */
>>unsigned int web_not_optimizable : 1;
>> +  /* Set if the web represented by this entry has been optimized, ie,
>> + register swaps of permuting loads/stores have been removed.  */
>> +  unsigned int web_is_optimized : 1;
>>/* Set if this insn should be deleted.  */
>>unsigned int will_delete : 1;
>>  };
>> @@ -2627,22 +2630,43 @@ rs6000_analyze_swaps (function *fun)
>>/* For each load and store in an optimizable web (which implies
>>   the loads and stores are permuting), find the associated
>>   register swaps and mark them for removal.  Due to various
>> - optimizations we may mark the same swap more than once.  Also
>> - perform special handling for swappable insns that require it.  */
>> + optimizations we may mark the same swap more than once. Fix up
>> + the non-permuting loads and stores by converting them into
>> + permuting ones.  */
>>for (i = 0; i < e; ++i)
>>  if ((insn_entry[i].is_load || insn_entry[i].is_store)
>>  && insn_entry[i].is_swap)
>>{
>>  swap_web_entry* root_entry
>>= (swap_web_entry*)((_entry[i])->unionfind_root ());
>> -if (!root_entry->web_not_optimizable)
>> +if (!root_entry->web_not_optimizable) {
>>mark_swaps_for_removal (insn_entry, i);
>> +  root_entry->web_is_optimized = true;
>> +}
>>}
>> -else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
>> +else if (insn_entry[i].is_swappable
>> + && (insn_entry[i].special_handling == SH_NOSWAP_LD ||
>> + insn_entry[i].special_handling == SH_NOSWAP_ST))
>> +  {
>> +swap_web_entry* root_entry
>> +  = (swap_web_entry*)((_entry[i])->unionfind_root ());
>> +if (!root_entry->web_not_optimizable) {
>> +  handle_special_swappables (insn_entry, i);
>> +  root_entry->web_is_optimized = true;
>> +}
>> +  }
>> +
>> +  /* Perform special handling for swappable insns that require it. 
>> + Note that special handling should be done only for those 
>> + swappable insns that are present in webs optimized above.  */
>> +  for (i = 0; i < e; ++i)
>> +if (insn_entry[i].is_swappable && insn_entry[i].special_handling &&
>> +!(insn_entry[i].special_handling == SH_NOSWAP_LD || 
>> +  insn_entry[i].special_handling == SH_NOSWAP_ST))
>>{
>>  swap_web_entry* root_entry
>>= (swap_web_entry*)((_entry[i])->unionfind_root ());
>> -if (!root_entry->web_not_optimizable)
>> +if (root_entry->web_is_optimized)
>>handle_special_swappables (insn_entry, i);
>>}
>>  
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106770.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>> new file mode 100644
>> index 000..84e9aead975
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power8 -O3 " } */
>> +/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */
>> +
>> +/* Test case to resolve PR106770  */
>> +
>> +#include 
>> +
>> +int cmp2(double a, double b)
>> +{
>> +vector double va = vec_promote(a, 1);
>> +vector double vb = vec_promote(b, 1);
>> +vector long long vlt = (vector 

Re: [PATCH v6] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-16 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/02/16 7:18, Max Filippov wrote:
> Hi Suwa-san,

Hi!

> 
> On Thu, Jan 26, 2023 at 7:17 PM Takayuki 'January June' Suwa
>  wrote:
>>
>> In the case of the CALL0 ABI, values that must be retained before and
>> after function calls are placed in the callee-saved registers (A12
>> through A15) and referenced later.  However, it is often the case that
>> the save and the reference are each only once and a simple register-
>> register move (with two exceptions; i. the register saved to/restored
>> from is the stack pointer, ii. the function needs an additional stack
>> pointer adjustment to grow the stack).
>>
>> e.g. in the following example, if there are no other occurrences of
>> register A14:
>>
>> ;; before
>> ; prologue {
>>   ...
>> s32i.n  a14, sp, 16
>>   ...   ;; no frame pointer needed
>> ;; no additional stack growth
>> ; } prologue
>>   ...
>> mov.n   a14, a6 ;; A6 is not SP
>>   ...
>> call0   foo
>>   ...
>> mov.n   a8, a14 ;; A8 is not SP
>>   ...
>> ; epilogue {
>>   ...
>> l32i.n  a14, sp, 16
>>   ...
>> ; } epilogue
>>
>> It can be possible like this:
>>
>> ;; after
>> ; prologue {
>>   ...
>> (no save needed)
>>   ...
>> ; } prologue
>>   ...
>> s32i.n  a6, sp, 16  ;; replaced with A14's slot
>>   ...
>> call0   foo
>>   ...
>> l32i.n  a8, sp, 16  ;; through SP
>>   ...
>> ; epilogue {
>>   ...
>> (no restoration needed)
>>   ...
>> ; } epilogue
>>
>> This patch adds the abovementioned logic to the function prologue/epilogue
>> RTL expander code.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.cc (machine_function): Add new member
>> 'eliminated_callee_saved_bmp'.
>> (xtensa_can_eliminate_callee_saved_reg_p): New function to
>> determine whether the register can be eliminated or not.
>> (xtensa_expand_prologue): Add invoking the above function and
>> elimination the use of callee-saved register by using its stack
>> slot through the stack pointer (or the frame pointer if needed)
>> directly.
>> (xtensa_expand_prologue): Modify to not emit register restoration
>> insn from its stack slot if the register is already eliminated.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/xtensa/elim_callee_saved.c: New.
>> ---
>>  gcc/config/xtensa/xtensa.cc   | 132 ++
>>  .../gcc.target/xtensa/elim_callee_saved.c |  38 +
>>  2 files changed, 145 insertions(+), 25 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c
> 
> This version passes regression tests, but I still have a couple questions.
> 
>> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
>> index 3e2e22d4cbe..ff59c933d4d 100644
>> --- a/gcc/config/xtensa/xtensa.cc
>> +++ b/gcc/config/xtensa/xtensa.cc
>> @@ -105,6 +105,7 @@ struct GTY(()) machine_function
>>bool epilogue_done;
>>bool inhibit_logues_a1_adjusts;
>>rtx last_logues_a9_content;
>> +  HOST_WIDE_INT eliminated_callee_saved_bmp;
>>  };
>>
>>  static void xtensa_option_override (void);
>> @@ -3343,6 +3344,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, 
>> int flags)
>>  cfun->machine->last_logues_a9_content = GEN_INT (offset);
>>  }
>>
>> +static bool
>> +xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
>> +rtx_insn **p_insnS,
>> +rtx_insn **p_insnR)
>> +{
>> +  df_ref ref;
>> +  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
>> +  rtx pattern;
>> +
>> +  if (!optimize || !df || call_used_or_fixed_reg_p (regno))
>> +return false;
>> +
>> +  for (ref = DF_REG_DEF_CHAIN (regno);
>> +   ref; ref = DF_REF_NEXT_REG (ref))
>> +if (DF_REF_CLASS (ref) != DF_REF_REGULAR
>> +   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
>> +  continue;
>> +else if (GET_CODE (pattern = PATTERN (insn)) == SET
>> +&& REG_P (SET_DEST (pattern))
>> +&& REGNO (SET_DEST (pattern)) == regno
>> +&& REG_NREGS (SET_DEST (pattern)) == 1
>> +&& REG_P (SET_SRC (pattern))
>> +&& REGNO (SET_SRC (pattern)) != A1_REG)
> 
> Do I understand correctly that the check for A1 here and below is
> for the case when regno is a hard frame pointer and the function
> needs the frame pointer? If so, wouldn't it be better to check
> for it explicitly in the beginning?

I see.  But I can't be sure that the body of the function never saves and 
restores the stack pointer to another register if the function doesn't need the 
frame pointer.
Therefore, I think that the validity depends on the regtest.

> 
>> +  {
>> +   if (insnS)
>> + return false;
>> +   insnS = insn;
>> +   continue;
>> +  }
>> +else
>> +  

[PATCHES, Committed] As obvious

2023-02-16 Thread Jerry DeLisle via Gcc-patches
Committed as obvious:

commit 061b13ed014ba0b6891800a5c7f852bf58e4d856
Author: Jerry DeLisle 
Date:   Thu Feb 16 18:13:56 2023 -0800

 Fortran Tests: Allow passing on mingw.

 gcc/testsuite/ChangeLog:

 * gfortran.dg/bind_c_array_params_2.f90: Add *-*-ming* to
dg-final.

and

commit f978585c2939691176ad8d3fa9c2e4e91ed18bf4 (HEAD -> master,
origin/master, origin/HEAD)
Author: Jerry DeLisle 
Date:   Thu Feb 16 19:29:44 2023 -0800

 Fortran test: Modify test cases to pass on mingw.

 gcc/testsuite/ChangeLog:

 * gfortran.dg/ISO_Fortran_binding_14.f90: Change example
function to
 CLOCK which is available on mingw as well as other platforms.
 * gfortran.dg/pr96486.f90: Change variable to PATH likewise.




Re: [PATCH] rs6000: Fix vector parity support [PR108699]

2023-02-16 Thread Kewen.Lin via Gcc-patches
Hi Segher,

Thanks for the comments!

on 2023/2/16 23:10, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Feb 16, 2023 at 08:06:02PM +0800, Kewen.Lin wrote:
>> on 2023/2/16 19:14, Segher Boessenkool wrote:
>>> On Thu, Feb 16, 2023 at 05:23:40PM +0800, Kewen.Lin wrote:
 This patch is to fix the handling with one more pre-insn
 vpopcntb.  It also fixes an oversight having V8HI in VEC_IP,
 replaces VParity with VEC_IP, and adjusts the existing
 UNSPEC_PARITY to a more meaningful name UNSPEC_PARITYB.
>>>
>>> Please don't do that.  UNSPEC_PARITYB is worse than UNSPEC_PARITY,
>>> even more so for the prtyw etc. instructions.
>>
>> I thought the scalar insns prty[wd] also operate on byte
>> (especially on the least significant bit in each byte),
>> PARITYB(yte) seems better ...
> 
> The scalar instruction does not include a "b" in the mnemonic, and it
> says nothing "byte" or "bit" in the instruction name either.  The
> existing name is simpler, less confusing, simply better.
> 
>>> You might want to express the vector parity insns separately, but then
>>> *do that*, don't rename the normal stuff as well, and use a more obvious
>>> name like UNSPEC_VPARITY please.
>>
>> I'll update for vector only.  Maybe it's better with UNSPEC_VPARITY*B*?
>> since the mnemonic has "b"(yte).
> 
> No, you are right that the semantics are pretty much the same.  Please
> just keep UNSPEC_PARITY everywhere.

OK, since it has UNSPEC, I would hope the reader can realize it's
different from RTL opcode parity and mainly operating on byte.  :)

> 
const vsll __builtin_altivec_vprtybd (vsll);
 -VPRTYBD parityv2di2 {}
 +VPRTYBD p9v_paritybv2di2 {}
>>>
>>> Why this?  Please keep the simpler names if at all possible.
>>
>> The bif would like to map with the vector parity byte insns
>> directly, the parity2 can't work here any more.
> 
> Ah, because it cannot use the expander here, it has to be a define_insn?

No, the above statement seems to cause some misunderstanding, let me clarify:
first, the built-in functions __builtin_altivec_vprtyb[wdq] require to be
mapped to hardware insns vprtyb[wdq] directly as the functions name show.
Before this patch, the standard pattern name parity2 expands to those
insns directly (wrongly), so it's fine to use those expanders here.  After
this patch, those expands get fixed to get parity for each vector element
(vpopcntb + vprtyb*), they are not valid to be used for expanding these
built-in functions (not 1-1 map any more), so this patch fixes it with
the correct name which maps to vprtyb*.

> Why is that?
> 
>> The name is updated from previous *p9v_parity2 (becoming
>> to a named define_insn), I noticed there are some names with
>> p8v_, p9v_, meant to keep it consistent with the context.
>> You want this to be simplified as parity*b*v2di2?
> 
> Without the "b".  But that would be better then, yes.  This is a great
> example why p9v_ in the name is not good: most users do not care at all
> what ISA version this insn first appeared in.

The name without "b" is standard pattern name, whose semantic doesn't align
with what these insns provide and we already have the matched expander with
it ("parity2"), so we can't use the name here :(.  As you felt a name
with "b" is better than "p9v_*", I'll go with "parityb" then.  :)

>>> Later patches can do all other things (also, not do this expand for
>>> TImode at all, ho hum).
>>
>> OK, I guess all the others are for next stage1. :)
> 
> Yes exactly.  And one (small, self-contained) thing per patch please.

Got it, thanks again!

BR,
Kewen


Re: Re: [PATCH V2 0/5] RISC-V: Implement Scalar Cryptography Extension

2023-02-16 Thread shihua
OK, I will send another one which remove riscv_scalar_crypto.h and update 
testcases with __builtin_riscv_XX


 -原始邮件-
 发件人: "Kito Cheng" 
 发送时间: 2023-02-16 21:28:34 (星期四)
 收件人: "Liao Shihua" 
 抄送: gcc-patches@gcc.gnu.org, jia...@iscas.ac.cn, m...@iki.fi, 
pal...@dabbelt.com, shiyul...@iscas.ac.cn, ben.marsh...@pqshield.com, 
christoph.muell...@vrull.eu
 主题: Re: [PATCH V2 0/5] RISC-V: Implement Scalar Cryptography Extension
 
 Hi Shihua:
 
 Thanks for your patches! This patch set is generally in good shape,
 but I would prefer to remove riscv_scalar_crypto.h at this moment
 since it's NOT standardized yet.
 
 Do you mind sending a new version of this patch set which does not
 include that and also update the testcases?
 
 
 
 On Thu, Feb 16, 2023 at 3:52 PM Liao Shihua  wrote:
 
  This series adds basic support for the Scalar Cryptography extensions:
  * Zbkb
  * Zbkc
  * Zbkx
  * Zknd
  * Zkne
  * Zknh
  * Zksed
  * Zksh
 
  The implementation follows the version Scalar Cryptography v1.0.0 of 
the specification,
  and the intrinsic of Scalar Cryptography extensions follows 
riscv-c-api
  which can be found here:
  https://github.com/riscv/riscv-crypto/releases/tag/v1.0.0-scalar
  https://github.com/riscv-non-isa/riscv-c-api-doc/pull/31
 
  It works by Wu Siyu and Liao Shihua .
 
  Liao Shihua (5):
Add prototypes for RISC-V Crypto built-in functions
Implement ZBKB, ZBKC and ZBKX extensions
Implement ZKND and ZKNE extensions
Implement ZKNH extensions
Implement ZKSH and ZKSED extensions
 
   gcc/config.gcc|   2 +-
   gcc/config/riscv/bitmanip.md  |  20 +-
   gcc/config/riscv/constraints.md   |   8 +
   gcc/config/riscv/crypto.md| 435 
++
   gcc/config/riscv/riscv-builtins.cc|  26 ++
   gcc/config/riscv/riscv-crypto.def |  94 
   gcc/config/riscv/riscv-ftypes.def |  10 +
   gcc/config/riscv/riscv.md |   4 +-
   gcc/config/riscv/riscv_scalar_crypto.h| 218 +
   gcc/testsuite/gcc.target/riscv/zbkb32.c   |  36 ++
   gcc/testsuite/gcc.target/riscv/zbkb64.c   |  28 ++
   gcc/testsuite/gcc.target/riscv/zbkc32.c   |  17 +
   gcc/testsuite/gcc.target/riscv/zbkc64.c   |  17 +
   gcc/testsuite/gcc.target/riscv/zbkx32.c   |  18 +
   gcc/testsuite/gcc.target/riscv/zbkx64.c   |  18 +
   gcc/testsuite/gcc.target/riscv/zknd32.c   |  18 +
   gcc/testsuite/gcc.target/riscv/zknd64.c   |  36 ++
   gcc/testsuite/gcc.target/riscv/zkne32.c   |  18 +
   gcc/testsuite/gcc.target/riscv/zkne64.c   |  30 ++
   gcc/testsuite/gcc.target/riscv/zknh-sha256.c  |  29 ++
   .../gcc.target/riscv/zknh-sha512-32.c |  43 ++
   .../gcc.target/riscv/zknh-sha512-64.c |  31 ++
   gcc/testsuite/gcc.target/riscv/zksed.c|  20 +
   gcc/testsuite/gcc.target/riscv/zksh.c |  19 +
   24 files changed, 1183 insertions(+), 12 deletions(-)
   create mode 100644 gcc/config/riscv/crypto.md
   create mode 100644 gcc/config/riscv/riscv-crypto.def
   create mode 100644 gcc/config/riscv/riscv_scalar_crypto.h
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknd32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknd64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zkne32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zkne64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zksed.c
   create mode 100644 gcc/testsuite/gcc.target/riscv/zksh.c
 
  --
  2.38.1.windows.1
 


Re: [PATCH] LoongArch: Fix multiarch tuple canonization

2023-02-16 Thread Lulu Cheng

Hi,

在 2023/2/15 下午6:42, WANG Xuerui 写道:

Hi,

On 2023/2/13 18:38, Xi Ruoyao wrote:

Multiarch tuple will be coded in file or directory names in
multiarch-aware distros, so one ABI should have only one multiarch
tuple.  For example, "--target=loongarch64-linux-gnu --with-abi=lp64s"
and "--target=loongarch64-linux-gnusf" should both set multiarch tuple
to "loongarch64-linux-gnusf".  Before this commit,
"--target=loongarch64-linux-gnu --with-abi=lp64s --disable-multilib"
will produce wrong result (loongarch64-linux-gnu).

A recent LoongArch psABI revision mandates "loongarch64-linux-gnu" to be
used for -mabi=lp64d (instead of "loongarch64-linux-gnuf64") for some
non-technical reason [1].  Note that we cannot make
"loongarch64-linux-gnuf64" an alias for "loongarch64-linux-gnu" because
to implement such an alias, we must create thousands of symlinks in the
distro and doing so would be completely unpractical.  This commit also
aligns GCC with the revision.

Tested by building cross compilers with --enable-multiarch and multiple
combinations of --target=loongarch64-linux-gnu*, --with-abi=lp64{s,f,d},
and --{enable,disable}-multilib; and run "xgcc --print-multiarch" then
manually verify the result with eyesight.

Ok for trunk and backport to releases/gcc-12?

[1]: https://github.com/loongson/LoongArch-Documentation/pull/80

gcc/ChangeLog:

* config.gcc (triplet_abi): Set its value based on $with_abi,
instead of $target.
(la_canonical_triplet): Set it after $triplet_abi is set
correctly.
* config/loongarch/t-linux (MULTILIB_OSDIRNAMES): Make the
multiarch tuple for lp64d "loongarch64-linux-gnu" (without
"f64" suffix).
---
  gcc/config.gcc   | 14 +++---
  gcc/config/loongarch/t-linux |  2 +-
  2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 067720ac795..c070e6ecd2e 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4889,20 +4889,16 @@ case "${target}" in
  case ${target} in
  loongarch64-*-*-*f64)
  abi_pattern="lp64d"
-    triplet_abi="f64"
  ;;
  loongarch64-*-*-*f32)
  abi_pattern="lp64f"
-    triplet_abi="f32"
  ;;
  loongarch64-*-*-*sf)
  abi_pattern="lp64s"
-    triplet_abi="sf"
  ;;
  loongarch64-*-*-*)
  abi_pattern="lp64[dfs]"
  abi_default="lp64d"
-    triplet_abi=""
  ;;
  *)
  echo "Unsupported target ${target}." 1>&2
@@ -4923,9 +4919,6 @@ case "${target}" in
    ;;
  esac
  - la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}"
-
-
  # Perform initial sanity checks on --with-* options.
  case ${with_arch} in
  "" | loongarch64 | la464) ;; # OK, append here.
@@ -4996,6 +4989,13 @@ case "${target}" in
  ;;
  esac
  +    case ${with_abi} in
+  "lp64d") triplet_abi="";;
+  "lp64f") triplet_abi="f32";;
+  "lp64s") triplet_abi="sf";;
+    esac
+ la_canonical_triplet="loongarch64-${triplet_os}${triplet_abi}"
+
  # Set default value for with_abiext (internal)
  case ${with_abiext} in
  "")
diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
index 131c45fdced..e40da179203 100644
--- a/gcc/config/loongarch/t-linux
+++ b/gcc/config/loongarch/t-linux
@@ -40,7 +40,7 @@ ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),)
    MULTILIB_OSDIRNAMES = \
    mabi.lp64d=../lib64$\
-  $(call if_multiarch,:loongarch64-linux-gnuf64)
+  $(call if_multiarch,:loongarch64-linux-gnu)
    MULTILIB_OSDIRNAMES += \
    mabi.lp64f=../lib64/f32$\


Thanks for the quick patch; however Revy told me offline yesterday 
that this might conflict with things Debian side once this gets 
merged. He may have more details to share.


Adding him to CC -- you could keep him CC-ed on future changes that 
may impact distro packaging.


Thank you for your feedback.

This modification plan is determined by the operating system group. If 
there is any problem, you can describe it clearly.


If there is no problem, we will combine this patch.

Thanks!



[PATCH] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd

2023-02-16 Thread Jiufu Guo via Gcc-patches
Hi,

Compare with previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611823.html
This patch does not define new insn for mtvsrws, but use exit one.

As mentioned in PR108338, on p9, we could use mtvsrws to implement
the bitcast from SI#0 to SF (or lowpart DI to SF).

For code:
  *(long long*)buff = di;
  float f = *(float*)(buff);

We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
"mtvsrws 1,3 ; xscvspdpn 1,1".

This patch update this, and also enhance the bitcast from highpart
DI to SF.

Bootstrap and regtests pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu)


PR target/108338

gcc/ChangeLog:

* config/rs6000/predicates.md (lowpart_subreg_operator): New
define_predicate.
* config/rs6000/rs6000.md (any_rshift): New code_iterator.
(movsf_from_si): Update to generate mtvsrws.
(movsf_from_si2): Rename to...
(movsf_from_si2_): ... this.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr108338.c: New test.

---
 gcc/config/rs6000/predicates.md |  5 +++
 gcc/config/rs6000/rs6000.md | 34 +++--
 gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +
 3 files changed, 70 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..e57c9d99c6b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
   else
 return false;
 })
+
+(define_predicate "lowpart_subreg_operator"
+  (and (match_code "subreg")
+   (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
+   == SUBREG_BYTE (op)")))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4a7812fa592..74b1c9cee6a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8200,13 +8200,24 @@ (define_insn_and_split "movsf_from_si"
 {
   rtx op0 = operands[0];
   rtx op1 = operands[1];
-  rtx op2 = operands[2];
-  rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
 
-  /* Move SF value to upper 32-bits for xscvspdpn.  */
-  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
-  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
-  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  if (TARGET_P9_VECTOR)
+{
+  rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
+  emit_insn (gen_vsx_splat_v4si (op0_v, op1));
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+}
+  else
+{
+  rtx op2 = operands[2];
+  rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
+
+  /* Move SF value to upper 32-bits for xscvspdpn.  */
+  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+}
+
   DONE;
 }
   [(set_attr "length"
@@ -8219,18 +8230,19 @@ (define_insn_and_split "movsf_from_si"
"*,  *, p9v,   p8v,   *, *,
 p8v,p8v,   p8v,   *")])
 
+(define_code_iterator any_rshift [ashiftrt lshiftrt])
+
 ;; For extracting high part element from DImode register like:
 ;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
 ;; split it before reload with "and mask" to avoid generating shift right
 ;; 32 bit then shift left 32 bit.
-(define_insn_and_split "movsf_from_si2"
+(define_insn_and_split "movsf_from_si2_"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
(unspec:SF
-[(subreg:SI
-  (ashiftrt:DI
+[(match_operator:SI 3 "lowpart_subreg_operator"
+  [(any_rshift:DI
(match_operand:DI 1 "input_operand" "r")
-   (const_int 32))
-  0)]
+   (const_int 32))])]
 UNSPEC_SF_FROM_SI))
   (clobber (match_scratch:DI 2 "=r"))]
   "TARGET_NO_SF_SUBREG"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c 
b/gcc/testsuite/gcc.target/powerpc/pr108338.c
new file mode 100644
index 000..2438dc13f41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
@@ -0,0 +1,42 @@
+// { dg-do run }
+// { dg-options "-O2 -save-temps" }
+
+float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff);
+  return f;
+}
+
+float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff + 4);
+  return f; 
+}
+
+/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && 
has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && 
has_arch_pwr8 } } } } */
+
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { 
has_arch_pwr8 && { 

[wwwdocs] testing: Tweak the link to upstream FTensor (was: Anyone using FTensor to test GCC (or otherwise)?)

2023-02-16 Thread Gerald Pfeifer
On Tue, 14 Feb 2023, NightStrike wrote:
>> Alas http://www.wlandry.net/Projects/FTensor has been down for a while,
>> and there does not appear to be a new location?
> https://wlandry.net/Projects/FTensor/ works

Ah, indeed. Thank you! Somehow that must have been the one combination I 
did not try.

I pushed the little patch below.

Gerald

commit b74309c36e59105ef0d8e0d91a85a5bfa884e175
Author: Gerald Pfeifer 
Date:   Fri Feb 17 02:19:19 2023 +0100

Tweak the link to upstream FTensor.

diff --git a/htdocs/testing/testing-ftensor.html 
b/htdocs/testing/testing-ftensor.html
index 2e67b4d8..7b1f4675 100644
--- a/htdocs/testing/testing-ftensor.html
+++ b/htdocs/testing/testing-ftensor.html
@@ -11,7 +11,7 @@
 FTensor build and test guide
 
 This page is a guide to running the testing and timing programs for the
-http://www.wlandry.net/Projects/FTensor;>FTensor
+https://wlandry.net/Projects/FTensor;>FTensor
 tensor class library as part of GCC integration testing.
 
 Resource usage


[pushed] doc: Reword how to get possible values of a parameter (was: Document all param values and remove defaults (PR middle-end/86078))

2023-02-16 Thread Gerald Pfeifer
On Mon, 24 Sep 2018, Martin Liška wrote:
> As mentioned in the PR we miss defaults for quite some param option.
> I agreed with Richi that easiest way how to fix that would be to remove
> that from documentation and use rather --help=param. It's done in the 
> patch.

And here is a little follow-up patch after that, ahem, little while...

Pushed.

Gerald


gcc/ChangeLog:

* doc/invoke.texi (Optimize Options): Reword the explanation
getting minimal, maximal and default values of a parameter.
---
 gcc/doc/invoke.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6def1fd631e..7b308cd3c31 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14678,8 +14678,8 @@ The names of specific parameters, and the meaning of 
the values, are
 tied to the internals of the compiler, and are subject to change
 without notice in future releases.
 
-In order to get minimal, maximal and default value of a parameter,
-one can use @option{--help=param -Q} options.
+In order to get the minimal, maximal and default values of a parameter,
+use the @option{--help=param -Q} options.
 
 In each case, the @var{value} is an integer.  The following choices
 of @var{name} are recognized for all targets:
-- 
2.39.1


[pushed] analyzer: respect some conditions from bit masks [PR108806]

2023-02-16 Thread David Malcolm via Gcc-patches
PR analyzer/108806 reports false +ves seen from -fanalyzer on code like this
in qemu-7.2.0's hw/intc/omap_intc.c:

  [...snip...]
  struct omap_intr_handler_bank_s* bank = NULL;
  if ((offset & 0xf80) == 0x80) {
[...set "bank" to non-NULL...]
  }
  switch (offset) {
[...snip various cases that don't deref "bank"...]
case 0x80:
  return bank->inputs;
case 0x84:
  return bank->mask;
[...etc...]
   }

where the analyzer falsely complains about execution paths in which
"(offset & 0xf80) == 0x80" was false (leaving "bank" as NULL), but then
in which "switch (offset)" goes to a case for which
"(offset & 0xf80) == 0x80" is true and dereferences NULL "bank", i.e.
paths in which "(offset & 0xf80) == 0x80" is both true *and* false.

This patch adds enough logic to constraint_manager for -fanalyzer to
reject such execution paths as impossible, fixing the false +ves.

Integration testing shows this eliminates 20 probable false positives:

Comparison: 9.08% -> 9.34% GOOD: 66 BAD: 661 -> 641 (-20)

where the affected warnings/projects are:

  -Wanalyzer-null-dereference: 0.00% GOOD: 0 BAD: 279 -> 269 (-10)
qemu-7.2.0: 175 -> 165 (-10)

  -Wanalyzer-use-of-uninitialized-value: 0.00% GOOD: 0 BAD: 153 -> 143 (-10)
 coreutils-9.1:  18 ->  14 (-4)
qemu-7.2.0:  54 ->  48 (-6)

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-6101-g4d3b7be281e73e.

gcc/analyzer/ChangeLog:
PR analyzer/108806
* constraint-manager.cc (bounded_range::dump_to_pp): Use
bounded_range::singleton_p.
(constraint_manager::add_bounded_ranges): Handle singleton ranges
by adding an EQ_EXPR constraint.
(constraint_manager::impossible_derived_conditions_p): New.
(constraint_manager::eval_condition): Reject EQ_EXPR when it would
imply impossible derived conditions.
(selftest::test_bits): New.
(selftest::run_constraint_manager_tests): Run it.
* constraint-manager.h (bounded_range::singleton_p): New.
(constraint_manager::impossible_derived_conditions_p): New decl.
* region-model.cc (region_model::get_rvalue_1): Handle
BIT_AND_EXPR, BIT_IOR_EXPR, and BIT_XOR_EXPR.

gcc/testsuite/ChangeLog:
PR analyzer/108806
* gcc.dg/analyzer/null-deref-pr108806-qemu.c: New test.
* gcc.dg/analyzer/pr103217.c: Add -Wno-analyzer-too-complex.
* gcc.dg/analyzer/switch.c (test_bitmask_1): New.
(test_bitmask_2): New.
* gcc.dg/analyzer/uninit-pr108806-qemu.c: New test.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/constraint-manager.cc| 166 +-
 gcc/analyzer/constraint-manager.h |   7 +
 gcc/analyzer/region-model.cc  |   3 +
 .../analyzer/null-deref-pr108806-qemu.c   | 105 +++
 gcc/testsuite/gcc.dg/analyzer/pr103217.c  |   2 +
 gcc/testsuite/gcc.dg/analyzer/switch.c|  76 
 .../gcc.dg/analyzer/uninit-pr108806-qemu.c| 108 
 7 files changed, 466 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/null-deref-pr108806-qemu.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/uninit-pr108806-qemu.c

diff --git a/gcc/analyzer/constraint-manager.cc 
b/gcc/analyzer/constraint-manager.cc
index 5a859c6c0f7..2c9c435527e 100644
--- a/gcc/analyzer/constraint-manager.cc
+++ b/gcc/analyzer/constraint-manager.cc
@@ -421,7 +421,7 @@ dump_cst (pretty_printer *pp, tree cst, bool show_types)
 void
 bounded_range::dump_to_pp (pretty_printer *pp, bool show_types) const
 {
-  if (tree_int_cst_equal (m_lower, m_upper))
+  if (singleton_p ())
 dump_cst (pp, m_lower, show_types);
   else
 {
@@ -2118,6 +2118,17 @@ bool
 constraint_manager::add_bounded_ranges (const svalue *sval,
const bounded_ranges *ranges)
 {
+  /* If RANGES is just a singleton, convert this to adding the constraint:
+ "SVAL == {the singleton}".  */
+  if (ranges->get_count () == 1
+  && ranges->get_range (0).singleton_p ())
+{
+  tree range_cst = ranges->get_range (0).m_lower;
+  const svalue *range_sval
+   = m_mgr->get_or_create_constant_svalue (range_cst);
+  return add_constraint (sval, EQ_EXPR, range_sval);
+}
+
   sval = sval->unwrap_any_unmergeable ();
 
   /* Nothing can be known about unknown/poisoned values.  */
@@ -2466,6 +2477,66 @@ constraint_manager::eval_condition (equiv_class_id 
lhs_ec,
   return tristate::unknown ();
 }
 
+/* Return true iff "LHS == RHS" is known to be impossible due to
+   derived conditions.
+
+   Look for an EC containing an EC_VAL of the form (LHS OP CST).
+   If found, see if (LHS OP CST) == EC_VAL is false.
+   If so, we know this condition is false.
+
+   For example, if we already know that
+ (X & CST_MASK) == Y
+   and we're evaluating X == Z, we can test to see if
+ (Z & CST_MASK) == EC_VAL
+   and thus if:
+ (Z & CST_MASK) == Y
+   and 

[og12] Attempt to register OpenMP pinned memory using a device instead of 'mlock' (was: [PATCH] libgomp, openmp: pinned memory)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2023-02-16T16:17:32+, "Stubbs, Andrew via Gcc-patches" 
 wrote:
>> On 2022-06-09T11:38:22+0200, I wrote:
>> > [...]
>> > *register* your standard 'malloc'ed etc. memory via 'cuMemHostRegister',
>> > :
>> > "Page-locks the memory range specified [...] and maps it for the
>> > device(s) [...].  This memory range also is added to the same tracking
>> > mechanism as cuMemHostAlloc to automatically accelerate [...]"?  (No
>> > manual 'mlock'ing involved in that case, too; presumably again using this
>> > interface likely circumvents any "annoying" 'ulimit' limitations?)
>> >
>> > Such a *register* abstraction can then be implemented by all the libgomp
>> > offloading plugins: they just call the respective
>> > CUDA/HSA/etc. functions to register such (existing, 'malloc'ed, etc.)
>> > memory.
>> >
>> > ..., but maybe I'm missing some crucial "detail" here?
>>
>> Indeed this does appear to work; see attached
>> "[WIP] Attempt to register OpenMP pinned memory using a device instead of
>> 'mlock'".
>> Any comments (aside from the TODOs that I'm still working on)?

With those TODOs resolved, I've now pushed to devel/omp/gcc-12
commit a5a4800e92773da7126c00a9c79b172494d58ab5
"Attempt to register OpenMP pinned memory using a device instead of 'mlock'",
see attached.


> The mmap implementation was not optimized for a lot of small allocations, and 
> I can't see that issue changing here

That's correct, 'mmap' remains.  Under the hood, 'cuMemHostRegister' must
surely also be doing some 'mlock'-like thing, so I figured it's best to
feed page-boundary memory regions to it, which 'mmap' gets us.

> so I don't know if this can be used for mlockall replacement.
>
> I had assumed that using the Cuda allocator would fix that limitation.

>From what I've read (but no first-hand experiments), there's non-trivial
overhead with 'cuMemHostRegister' (just like with 'mlock'), so routing
all small allocations individually through it probably isn't a good idea
either.  Therefore, I suppose, we'll indeed want to use some local
allocator if we wish this "optimized for a lot of small allocations".

And, getting rid of 'mlockall' is yet another topic.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From a5a4800e92773da7126c00a9c79b172494d58ab5 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Thu, 16 Feb 2023 15:57:37 +0100
Subject: [PATCH] Attempt to register OpenMP pinned memory using a device
 instead of 'mlock'

Implemented for nvptx offloading via 'cuMemHostRegister'.  This means: (a) not
running into 'mlock' limitations, and (b) the device is aware of this and may
optimize host <-> device memory transfers.

This re-works og12 commit ab7520b3b4cd9fdabfd63652badde478955bd3b5
"libgomp: pinned memory".

	include/
	* cuda/cuda.h (cuMemHostRegister, cuMemHostUnregister): New.
	libgomp/
	* config/linux/allocator.c (linux_memspace_alloc)
	(linux_memspace_free, linux_memspace_realloc): Attempt to register
	OpenMP pinned memory using a device instead of 'mlock'.
	* libgomp-plugin.h (GOMP_OFFLOAD_register_page_locked)
	(GOMP_OFFLOAD_unregister_page_locked): New.
	* libgomp.h (gomp_register_page_locked)
	(gomp_unregister_page_locked): New
	(struct gomp_device_descr): Add 'register_page_locked_func',
	'unregister_page_locked_func'.
	* plugin/cuda-lib.def (cuMemHostRegister_v2, cuMemHostRegister)
	(cuMemHostUnregister): New.
	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_register_page_locked)
	(GOMP_OFFLOAD_unregister_page_locked): New.
	* target.c (gomp_register_page_locked)
	(gomp_unregister_page_locked): New.
	(gomp_load_plugin_for_device): Handle 'register_page_locked',
	'unregister_page_locked'.
	* testsuite/libgomp.c/alloc-pinned-1.c: Adjust.
	* testsuite/libgomp.c/alloc-pinned-2.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-3.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-4.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-5.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-6.c: Likewise.
---
 include/ChangeLog.omp|   4 +
 include/cuda/cuda.h  |   3 +
 libgomp/ChangeLog.omp|  24 
 libgomp/config/linux/allocator.c |  74 +-
 libgomp/libgomp-plugin.h |   2 +
 libgomp/libgomp.h|   4 +
 libgomp/plugin/cuda-lib.def  |   3 +
 libgomp/plugin/plugin-nvptx.c|  33 +
 libgomp/target.c | 137 +++
 libgomp/testsuite/libgomp.c/alloc-pinned-1.c |  25 
 libgomp/testsuite/libgomp.c/alloc-pinned-2.c |  25 
 libgomp/testsuite/libgomp.c/alloc-pinned-3.c |  43 +-
 

Re: [og12] In 'libgomp/allocator.c:omp_realloc', route 'free' through 'MEMSPACE_FREE' (was: [PATCH] libgomp, OpenMP, nvptx: Low-latency memory allocator)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2023-02-14T15:11:14+, Andrew Stubbs  wrote:
> On 14/02/2023 12:54, Thomas Schwinge wrote:
>> On 2022-01-13T11:13:51+, Andrew Stubbs  wrote:
>>> Updated patch: this version fixes some missed cases of malloc in the
>>> realloc implementation.
>>
>> Right, and as it seems I've run into another issue: a stray 'free'.
>>
>>> --- a/libgomp/allocator.c
>>> +++ b/libgomp/allocator.c
>>
>> Re 'omp_realloc':
>>
>>> @@ -660,9 +709,10 @@ retry:
>>> gomp_mutex_unlock (_data->lock);
>>>   #endif
>>> if (prev_size)
>>> - new_ptr = realloc (data->ptr, new_size);
>>> + new_ptr = MEMSPACE_REALLOC (allocator_data->memspace, data->ptr,
>>> + data->size, new_size);
>>> else
>>> - new_ptr = malloc (new_size);
>>> + new_ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size);
>>> if (new_ptr == NULL)
>>>{
>>>   #ifdef HAVE_SYNC_BUILTINS
>>> @@ -690,7 +740,11 @@ retry:
>>>   && (free_allocator_data == NULL
>>>   || free_allocator_data->pool_size == ~(uintptr_t) 0))
>>>   {
>>> -  new_ptr = realloc (data->ptr, new_size);
>>> +  omp_memspace_handle_t memspace __attribute__((unused))
>>> + = (allocator_data
>>> +? allocator_data->memspace
>>> +: predefined_alloc_mapping[allocator]);
>>> +  new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, 
>>> new_size);
>>> if (new_ptr == NULL)
>>>goto fail;
>>> ret = (char *) new_ptr + sizeof (struct omp_mem_header);
>>> @@ -701,7 +755,11 @@ retry:
>>>   }
>>> else
>>>   {
>>> -  new_ptr = malloc (new_size);
>>> +  omp_memspace_handle_t memspace __attribute__((unused))
>>> + = (allocator_data
>>> +? allocator_data->memspace
>>> +: predefined_alloc_mapping[allocator]);
>>> +  new_ptr = MEMSPACE_ALLOC (memspace, new_size);
>>> if (new_ptr == NULL)
>>>goto fail;
>>>   }
>>> @@ -735,32 +793,35 @@ retry:
>> |free (data->ptr);
>>> return ret;
>>
>> I run into a SIGSEGV if a non-'malloc'-based allocation is 'free'd here.
>>
>> The attached
>> "In 'libgomp/allocator.c:omp_realloc', route 'free' through 'MEMSPACE_FREE'"
>> appears to resolve my issue, but not yet regression-tested.

No issues in testing.

>> Does that
>> look correct to you?
>
> That looks correct.

Thanks.  I've pushed to devel/omp/gcc-12 branch
commit 3a2c07395b0a565955a7b86f0eba866937e15989
"In 'libgomp/allocator.c:omp_realloc', route 'free' through 'MEMSPACE_FREE'",
see attached.

> The only remaining use of "free" should be the one
> referring to the allocator object itself (i.e. the destructor).

ACK.

>> Or, instead of invoking 'MEMSPACE_FREE', should we scrap the
>> 'used_pool_size' bookkeeping here, and just invoke 'omp_free' instead?
>>
>>  --- libgomp/allocator.c
>>  +++ libgomp/allocator.c
>>  @@ -842,19 +842,7 @@ retry:
>> if (old_size - old_alignment < size)
>>   size = old_size - old_alignment;
>> memcpy (ret, ptr, size);
>>  -  if (__builtin_expect (free_allocator_data
>>  -   && free_allocator_data->pool_size < ~(uintptr_t) 0, 
>> 0))
>>  -{
>>  -#ifdef HAVE_SYNC_BUILTINS
>>  -  __atomic_add_fetch (_allocator_data->used_pool_size, 
>> -data->size,
>>  - MEMMODEL_RELAXED);
>>  -#else
>>  -  gomp_mutex_lock (_allocator_data->lock);
>>  -  free_allocator_data->used_pool_size -= data->size;
>>  -  gomp_mutex_unlock (_allocator_data->lock);
>>  -#endif
>>  -}
>>  -  free (data->ptr);
>>  +  ialias_call (omp_free) (ptr, free_allocator);
>> return ret;
>>
>> (I've not yet analyzed whether that's completely equivalent.)
>
> The used_pool_size code comes from upstream, so if you want to go beyond
> the mechanical substitution of "free" then you're adding a new patch
> (rather than tweaking an old one). I'll leave that for others to comment on.

And I'll leave that for another day, and/or another person.  ;-)


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 3a2c07395b0a565955a7b86f0eba866937e15989 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 14 Feb 2023 13:35:03 +0100
Subject: [PATCH] In 'libgomp/allocator.c:omp_realloc', route 'free' through
 'MEMSPACE_FREE'

... to not run into a SIGSEGV if a non-'malloc'-based allocation is 'free'd
here.

Fix-up for og12 commit c5d1d7651297a273321154a5fe1b01eba9dcf604
"libgomp, nvptx: low-latency memory allocator".

	libgomp/
	* allocator.c (omp_realloc): Route 'free' through 'MEMSPACE_FREE'.
---
 libgomp/ChangeLog.omp |  2 ++
 libgomp/allocator.c   | 12 +++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git 

[og12] Clarify/verify OpenMP 'omp_calloc' zero-initialization for pinned memory (was: [PATCH] libgomp, openmp: pinned memory)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2022-01-13T13:53:03+, Andrew Stubbs  wrote:
> Pinned memory is allocated via mmap

> --- /dev/null
> +++ b/libgomp/config/linux/allocator.c

> +static void *
> +linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
> +{
> +  if (pin)
> +return linux_memspace_alloc (memspace, size, pin);
> +[...]

This confused me for a moment, why we don't have to manually
zero-initialize here.  I've pushed to devel/omp/gcc-12 branch
commit 57b8f0600262566cd4f1ab12bf1bdafb29dbdc34
"Clarify/verify OpenMP 'omp_calloc' zero-initialization for pinned memory",
see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 57b8f0600262566cd4f1ab12bf1bdafb29dbdc34 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Wed, 15 Feb 2023 10:23:03 +0100
Subject: [PATCH] Clarify/verify OpenMP 'omp_calloc' zero-initialization for
 pinned memory

Clarification for og12 commit ab7520b3b4cd9fdabfd63652badde478955bd3b5
"libgomp: pinned memory".  No functional change.

	libgomp/
	* config/linux/allocator.c (linux_memspace_alloc)
	(linux_memspace_calloc): Clarify zero-initialization for pinned
	memory.
	* testsuite/libgomp.c/alloc-pinned-1.c: Verify zero-initialization
	for pinned memory.
	* testsuite/libgomp.c/alloc-pinned-2.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-3.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-4.c: Likewise.
	* testsuite/libgomp.c/alloc-pinned-5.c: Likewise.
---
 libgomp/ChangeLog.omp| 10 ++
 libgomp/config/linux/allocator.c |  2 ++
 libgomp/testsuite/libgomp.c/alloc-pinned-1.c | 10 ++
 libgomp/testsuite/libgomp.c/alloc-pinned-2.c | 10 ++
 libgomp/testsuite/libgomp.c/alloc-pinned-3.c |  9 +
 libgomp/testsuite/libgomp.c/alloc-pinned-4.c |  9 +
 libgomp/testsuite/libgomp.c/alloc-pinned-5.c | 10 ++
 7 files changed, 60 insertions(+)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 1c4b1833c0b..530f5c6acf6 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,5 +1,15 @@
 2023-02-16  Thomas Schwinge  
 
+	* config/linux/allocator.c (linux_memspace_alloc)
+	(linux_memspace_calloc): Clarify zero-initialization for pinned
+	memory.
+	* testsuite/libgomp.c/alloc-pinned-1.c: Verify zero-initialization
+	for pinned memory.
+	* testsuite/libgomp.c/alloc-pinned-2.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-3.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-4.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-5.c: Likewise.
+
 	* config/linux/allocator.c (linux_memspace_calloc): Elide
 	(innocuous) duplicate 'if' condition.
 	* config/nvptx/allocator.c (nvptx_memspace_free): Explicitly
diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c
index 8a9171c36df..f278e5cdf14 100644
--- a/libgomp/config/linux/allocator.c
+++ b/libgomp/config/linux/allocator.c
@@ -65,6 +65,7 @@ linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin)
 }
   else if (pin)
 {
+  /* 'mmap' zero-initializes, which 'linux_memspace_calloc' relies on.  */
   void *addr = mmap (NULL, size, PROT_READ | PROT_WRITE,
 			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   if (addr == MAP_FAILED)
@@ -96,6 +97,7 @@ linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
   return ret;
 }
   else if (pin)
+/* If PINned, 'linux_memspace_alloc' 'mmap's, which zero-initializes.  */
 return linux_memspace_alloc (memspace, size, pin);
   else
 return calloc (1, size);
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
index 79792b16d83..fb7ac8b0080 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
@@ -54,6 +54,14 @@ get_pinned_mem ()
 }
 #endif
 
+static void
+verify0 (char *p, size_t s)
+{
+  for (size_t i = 0; i < s; ++i)
+if (p[i] != 0)
+  abort ();
+}
+
 #include 
 
 int
@@ -91,5 +99,7 @@ main ()
   if (get_pinned_mem () <= amount2)
 abort ();
 
+  verify0 (p, SIZE);
+
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
index 228c656b715..651b89fb42f 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
@@ -54,6 +54,14 @@ get_pinned_mem ()
 }
 #endif
 
+static void
+verify0 (char *p, size_t s)
+{
+  for (size_t i = 0; i < s; ++i)
+if (p[i] != 0)
+  abort ();
+}
+
 #include 
 
 int
@@ -97,5 +105,7 @@ main ()
   if (get_pinned_mem () <= amount2)
 abort ();
 
+  verify0 (p, SIZE);
+
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-3.c b/libgomp/testsuite/libgomp.c/alloc-pinned-3.c
index 

[og12] Miscellaneous clean-up re OpenMP 'ompx_unified_shared_mem_space', 'ompx_host_mem_space' (was: [PATCH 3/5] openmp, nvptx: ompx_unified_shared_mem_alloc)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2023-02-10T15:31:47+, Andrew Stubbs  wrote:
> On 10/02/2023 14:21, Thomas Schwinge wrote:
>> Is the correct fix the following [...]
>
> Yes, [...]

>>> --- a/libgomp/config/nvptx/allocator.c
>>> +++ b/libgomp/config/nvptx/allocator.c
>>> @@ -125,6 +125,8 @@ nvptx_memspace_alloc (omp_memspace_handle_t memspace, 
>>> size_t size)
>>> __atomic_store_n (&__nvptx_lowlat_heap_root, root.raw, 
>>> MEMMODEL_RELEASE);
>>> return result;
>>>   }
>>> +  else if (memspace == ompx_host_mem_space)
>>> +return NULL;
>>> else
>>>   return malloc (size);
>>>   }
>>> @@ -145,6 +147,8 @@ nvptx_memspace_calloc (omp_memspace_handle_t memspace, 
>>> size_t size)
>>>
>>> return result;
>>>   }
>>> +  else if (memspace == ompx_host_mem_space)
>>> +return NULL;
>>> else
>>>   return calloc (1, size);
>>>   }
>>> @@ -354,6 +358,8 @@ nvptx_memspace_realloc (omp_memspace_handle_t memspace, 
>>> void *addr,
>>>}
>>> return result;
>>>   }
>>> +  else if (memspace == ompx_host_mem_space)
>>> +return NULL;
>>> else
>>>   return realloc (addr, size);
>>>   }
>>
>> (I'd have added an explicit no-op (or, 'abort'?) to
>> 'nvptx_memspace_free', but that's maybe just me...)  ;-\
>
> Why? The host memspace is just the regular heap, which can be a thing on
> any device. It's an extension though so we can define it either way.

My point was: for nvptx libgomp, all 'ompx_host_mem_space' allocator
functions (cited above) 'return NULL', and it's a cheap check to verify
that in 'nvptx_memspace_free'.

>>> --- a/libgomp/libgomp.h
>>> +++ b/libgomp/libgomp.h
>>
>>> +extern void * gomp_usm_alloc (size_t size, int device_num);
>>> +extern void gomp_usm_free (void *device_ptr, int device_num);
>>> +extern bool gomp_is_usm_ptr (void *ptr);
>>
>> 'gomp_is_usm_ptr' isn't defined/used anywhere; I'll remove it.
>
> I think I started that and then decided against. Thanks.

These three combined, I've pushed to devel/omp/gcc-12 branch
commit 23f52e49368d7b26a1b1a72d6bb903d31666e961
"Miscellaneous clean-up re OpenMP 'ompx_unified_shared_mem_space', 
'ompx_host_mem_space'",
see attached.


>>> --- a/libgomp/target.c
>>> +++ b/libgomp/target.c
>>
>>> @@ -3740,6 +3807,9 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
>>> *device,
>>> DLSYM (unload_image);
>>> DLSYM (alloc);
>>> DLSYM (free);
>>> +  DLSYM_OPT (usm_alloc, usm_alloc);
>>> +  DLSYM_OPT (usm_free, usm_free);
>>> +  DLSYM_OPT (is_usm_ptr, is_usm_ptr);
>>> DLSYM (dev2host);
>>> DLSYM (host2dev);
>>
>> As a sanity check, shouldn't we check that either none or all three of
>> those are defined, like in the 'if (cuda && cuda != 4) { [error] }' check
>> a bit further down?
>
> This is only going to happen when somebody writes a new plugin, and then
> they'll discover very quickly that there are issues. I've wasted more
> time writing this sentence than it's worth already. :)

Eh.  ;-) OK, outvoted.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 23f52e49368d7b26a1b1a72d6bb903d31666e961 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 14 Feb 2023 17:10:57 +0100
Subject: [PATCH] Miscellaneous clean-up re OpenMP
 'ompx_unified_shared_mem_space', 'ompx_host_mem_space'

Clean-up for og12 commit 84914e197d91a67b3d27db0e4c69a433462983a5
"openmp, nvptx: ompx_unified_shared_mem_alloc".  No functional change.

	libgomp/
	* config/linux/allocator.c (linux_memspace_calloc): Elide
	(innocuous) duplicate 'if' condition.
	* config/nvptx/allocator.c (nvptx_memspace_free): Explicitly
	handle 'memspace == ompx_host_mem_space'.
	* libgomp.h (gomp_is_usm_ptr): Remove.
---
 libgomp/ChangeLog.omp| 6 ++
 libgomp/config/linux/allocator.c | 3 +--
 libgomp/config/nvptx/allocator.c | 4 
 libgomp/libgomp.h| 1 -
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index b667c72b8ca..1c4b1833c0b 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,5 +1,11 @@
 2023-02-16  Thomas Schwinge  
 
+	* config/linux/allocator.c (linux_memspace_calloc): Elide
+	(innocuous) duplicate 'if' condition.
+	* config/nvptx/allocator.c (nvptx_memspace_free): Explicitly
+	handle 'memspace == ompx_host_mem_space'.
+	* libgomp.h (gomp_is_usm_ptr): Remove.
+
 	* basic-allocator.c (BASIC_ALLOC_YIELD): instead of '#deine',
 	'#define' it.
 
diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c
index 07af3a2821a..8a9171c36df 100644
--- a/libgomp/config/linux/allocator.c
+++ b/libgomp/config/linux/allocator.c
@@ -95,8 +95,7 @@ linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
   memset (ret, 0, size);
   return ret;
 }
- 

[og12] Un-break nvptx libgomp build (was: [OG12][committed] amdgcn: OpenMP low-latency allocator)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2023-02-16T18:06:41+, Andrew Stubbs  wrote:
> 1. 230216-basic-allocator.patch
>
> Separate the allocator from NVPTX so the code can be shared.

Yay!

> nvptx, libgomp: Move the low-latency allocator code
>
> There shouldn't be a functionality change; this is just so AMD can share
> the code.

I've quickly observed one "functionality" change:

> --- /dev/null
> +++ b/libgomp/basic-allocator.c

> +#ifndef BASIC_ALLOC_YIELD
> +#deine BASIC_ALLOC_YIELD
> +#endif

In file included from [...]/libgomp/config/nvptx/allocator.c:49:
[...]/libgomp/config/nvptx/../../basic-allocator.c:52:2: error: invalid 
preprocessing directive #deine; did you mean #define?
   52 | #deine BASIC_ALLOC_YIELD
  |  ^
  |  define

Yes, indeed.

I've pushed to devel/omp/gcc-12 branch
commit 6cc0e7bebf1b3ad6aacf75419e7f06942409f90c
"Un-break nvptx libgomp build", see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 6cc0e7bebf1b3ad6aacf75419e7f06942409f90c Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Thu, 16 Feb 2023 21:59:55 +0100
Subject: [PATCH] Un-break nvptx libgomp build

In file included from [...]/libgomp/config/nvptx/allocator.c:49:
[...]/libgomp/config/nvptx/../../basic-allocator.c:52:2: error: invalid preprocessing directive #deine; did you mean #define?
   52 | #deine BASIC_ALLOC_YIELD
  |  ^
  |  define

Yes, indeed.

Fix-up for og12 commit 9583738a62a33a276b2aad980a27e77097f95924
"nvptx, libgomp: Move the low-latency allocator code".

	libgomp/
	* basic-allocator.c (BASIC_ALLOC_YIELD): instead of '#deine',
	'#define' it.
---
 libgomp/ChangeLog.omp | 3 +++
 libgomp/basic-allocator.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index ecc14b4f537..b667c72b8ca 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,5 +1,8 @@
 2023-02-16  Thomas Schwinge  
 
+	* basic-allocator.c (BASIC_ALLOC_YIELD): instead of '#deine',
+	'#define' it.
+
 	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
 	compilation.
 	* testsuite/libgomp.c/usm-2.c: Likewise.
diff --git a/libgomp/basic-allocator.c b/libgomp/basic-allocator.c
index 94b99a89e0b..b4b9e4ba13a 100644
--- a/libgomp/basic-allocator.c
+++ b/libgomp/basic-allocator.c
@@ -49,7 +49,7 @@
 #endif
 
 #ifndef BASIC_ALLOC_YIELD
-#deine BASIC_ALLOC_YIELD
+#define BASIC_ALLOC_YIELD
 #endif
 
 #define ALIGN(VAR) (((VAR) + 7) & ~7)/* 8-byte granularity.  */
-- 
2.25.1



[og12] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation (was: [OG12 commit] amdgcn, libgomp: USM allocation update)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2022-10-24T17:26:44+0100, Andrew Stubbs  wrote:
> I've committed this patch to the devel/omp/gcc-12 branch.

> --- a/libgomp/testsuite/libgomp.c/usm-1.c
> +++ b/libgomp/testsuite/libgomp.c/usm-1.c

> --- a/libgomp/testsuite/libgomp.c/usm-2.c
> +++ b/libgomp/testsuite/libgomp.c/usm-2.c

> --- a/libgomp/testsuite/libgomp.c/usm-3.c
> +++ b/libgomp/testsuite/libgomp.c/usm-3.c

> --- a/libgomp/testsuite/libgomp.c/usm-4.c
> +++ b/libgomp/testsuite/libgomp.c/usm-4.c

> @@ -1,5 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target omp_usm } */
> +/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target 
> offload_target_amdgcn } } */

I've pushed to devel/omp/gcc-12 branch
commit b4d4603df3fed290ccf721899be6bc69f037fe2b
"'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation",
see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From b4d4603df3fed290ccf721899be6bc69f037fe2b Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 14 Feb 2023 18:57:04 +0100
Subject: [PATCH] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading
 compilation

Change '-foffload=amdgcn-amdhsa=[...]' to
'-foffload-options=amdgcn-amdhsa=[...]', so that non-GCN offloading compilation
doesn't get disabled.

Fix-up for og12 commit 6ec2c29dbbc19e7d2a8f991a5848e10c65c7c74c
"amdgcn, libgomp: USM allocation update".

	libgomp/
	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
	compilation.
	* testsuite/libgomp.c/usm-2.c: Likewise.
	* testsuite/libgomp.c/usm-3.c: Likewise.
	* testsuite/libgomp.c/usm-4.c: Likewise.
---
 libgomp/ChangeLog.omp   | 8 
 libgomp/testsuite/libgomp.c/usm-1.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-2.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-3.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-4.c | 2 +-
 5 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 2a20516cd09..ecc14b4f537 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,11 @@
+2023-02-16  Thomas Schwinge  
+
+	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
+	compilation.
+	* testsuite/libgomp.c/usm-2.c: Likewise.
+	* testsuite/libgomp.c/usm-3.c: Likewise.
+	* testsuite/libgomp.c/usm-4.c: Likewise.
+
 2023-02-16  Tobias Burnus  
 
 	Backported from master:
diff --git a/libgomp/testsuite/libgomp.c/usm-1.c b/libgomp/testsuite/libgomp.c/usm-1.c
index f7bf897b839..35f37de7542 100644
--- a/libgomp/testsuite/libgomp.c/usm-1.c
+++ b/libgomp/testsuite/libgomp.c/usm-1.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
diff --git a/libgomp/testsuite/libgomp.c/usm-2.c b/libgomp/testsuite/libgomp.c/usm-2.c
index 3f52adbd7e1..783075edb54 100644
--- a/libgomp/testsuite/libgomp.c/usm-2.c
+++ b/libgomp/testsuite/libgomp.c/usm-2.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
diff --git a/libgomp/testsuite/libgomp.c/usm-3.c b/libgomp/testsuite/libgomp.c/usm-3.c
index 225cba5fe58..733f0f34090 100644
--- a/libgomp/testsuite/libgomp.c/usm-3.c
+++ b/libgomp/testsuite/libgomp.c/usm-3.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
diff --git a/libgomp/testsuite/libgomp.c/usm-4.c b/libgomp/testsuite/libgomp.c/usm-4.c
index d4addfc587a..5bf99df3b24 100644
--- a/libgomp/testsuite/libgomp.c/usm-4.c
+++ b/libgomp/testsuite/libgomp.c/usm-4.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
-- 
2.25.1



[PATCH] testsuite: Tweak gcc.dg/attr-aligned.c for CRIS

2023-02-16 Thread Hans-Peter Nilsson via Gcc-patches
Asking for the lines outside the "#if __CRIS__" part.
Ok to commit?

-- >8 --
tm.texi says for BIGGEST_ALIGNMENT (from which
__BIGGEST_ALIGNMENT__ is derived): "Biggest alignment that
any data type can require on this machine, in bits."

That is, using that value might be too strict for alignment
of *functions* and CRIS requires at least 16-bit alignment
for functions.  But, one purpose of the test is to test that
alignment can be set to a large but valid value, so pick
512, which has some use as a historically required alignment
for certain I/O descriptors.

* gcc.dg/attr-aligned.c: Adjust comment for ALIGN_MAX_STATIC.
(ALIGN_MAX_STATIC): Set to 512 for CRIS.
---
 gcc/testsuite/gcc.dg/attr-aligned.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/attr-aligned.c 
b/gcc/testsuite/gcc.dg/attr-aligned.c
index 887bdd0f3799..4f0c885dc812 100644
--- a/gcc/testsuite/gcc.dg/attr-aligned.c
+++ b/gcc/testsuite/gcc.dg/attr-aligned.c
@@ -18,6 +18,10 @@
 # else
 #   define ALIGN_MAX_STATIC  ALIGN_MAX_HARD
 # endif
+#elif __CRIS__
+/* __BIGGEST_ALIGNMENT__ doesn't cover functions (16 bits for CRIS). */
+#  define ALIGN_MAX_STATIC  512
+#  define ALIGN_TOO_BIG_OFILE   (ALIGN_MAX_HARD << 1)
 #elif pdp11
 #  define ALIGN_MAX_STATIC  2
 /* Work around a pdp11 ICE (see PR target/87821).  */
@@ -29,7 +33,9 @@
 /* Is this processor- or operating-system specific?  */
 #  define ALIGN_MAX_STATIC  ALIGN_MAX_HARD
 #else
-   /* Guaranteed to be accepted regardless of the target.  */
+   /* Guaranteed to be accepted regardless of the target for objects.
+  This might not be true for alignment of functions though, so
+  may need to be set to a target-specific value above.  */
 #  define ALIGN_MAX_STATIC  __BIGGEST_ALIGNMENT__
/* Guaranteed to be rejected regardless of the target.  */
 #  define ALIGN_TOO_BIG_OFILE   (ALIGN_MAX_HARD << 1)
-- 
2.30.2



[OG12][committed] amdgcn: OpenMP low-latency allocator

2023-02-16 Thread Andrew Stubbs

These patches implement an LDS memory allocator for OpenMP on AMD.

1. 230216-basic-allocator.patch

Separate the allocator from NVPTX so the code can be shared.

2. 230216-amd-low-lat.patch

Allocate the memory, adjust the default address space, and hook up the 
allocator.


They will need to be integrated with the rest of the memory management 
patch-stack when I repost that for mainline.


Andrewnvptx, libgomp: Move the low-latency allocator code

There shouldn't be a functionality change; this is just so AMD can share
the code.

The new basic-allocator.c is designed to be included so it can be used as a
template multiple times and inlined.

libgomp/ChangeLog:

* config/nvptx/allocator.c (BASIC_ALLOC_PREFIX): New define, and
include basic-allocator.c.
(__nvptx_lowlat_heap_root): Remove.
(heapdesc): Remove.
(nvptx_memspace_alloc): Move implementation to basic-allocator.c.
(nvptx_memspace_calloc): Likewise.
(nvptx_memspace_free): Likewise.
(nvptx_memspace_realloc): Likewise.
* config/nvptx/team.c (__nvptx_lowlat_heap_root): Remove.
(gomp_nvptx_main): Call __nvptx_lowlat_init.
* basic-allocator.c: New file.

diff --git a/libgomp/basic-allocator.c b/libgomp/basic-allocator.c
new file mode 100644
index 000..94b99a89e0b
--- /dev/null
+++ b/libgomp/basic-allocator.c
@@ -0,0 +1,380 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+/* This is a basic "malloc" implementation intended for use with small,
+   low-latency memories.
+
+   To use this template, define BASIC_ALLOC_PREFIX, and then #include the
+   source file.  The other configuration macros are optional.
+
+   The root heap descriptor is stored in the first bytes of the heap, and each
+   free chunk contains a similar descriptor for the next free chunk in the
+   chain.
+
+   The descriptor is two values: offset and size, which describe the
+   location of a chunk of memory available for allocation. The offset is
+   relative to the base of the heap.  The special offset value 0x
+   indicates that the heap (free chain) is locked.  The offset and size are
+   32-bit values so the base alignment can be 8-bytes.
+
+   Memory is allocated to the first free chunk that fits.  The free chain
+   is always stored in order of the offset to assist coalescing adjacent
+   chunks.  */
+
+#include "libgomp.h"
+
+#ifndef BASIC_ALLOC_PREFIX
+#error "BASIC_ALLOC_PREFIX not defined."
+#endif
+
+#ifndef BASIC_ALLOC_YIELD
+#deine BASIC_ALLOC_YIELD
+#endif
+
+#define ALIGN(VAR) (((VAR) + 7) & ~7)/* 8-byte granularity.  */
+
+#define fn1(prefix, name) prefix ## _ ## name
+#define fn(prefix, name) fn1 (prefix, name)
+#define basic_alloc_init fn(BASIC_ALLOC_PREFIX,init)
+#define basic_alloc_alloc fn(BASIC_ALLOC_PREFIX,alloc)
+#define basic_alloc_calloc fn(BASIC_ALLOC_PREFIX,calloc)
+#define basic_alloc_free fn(BASIC_ALLOC_PREFIX,free)
+#define basic_alloc_realloc fn(BASIC_ALLOC_PREFIX,realloc)
+
+typedef struct {
+  uint32_t offset;
+  uint32_t size;
+} heapdesc;
+
+void
+basic_alloc_init (char *heap, size_t limit)
+{
+  if (heap == NULL)
+return;
+
+  /* Initialize the head of the free chain.  */
+  heapdesc *root = (heapdesc*)heap;
+  root->offset = ALIGN(1);
+  root->size = limit - root->offset;
+
+  /* And terminate the chain.  */
+  heapdesc *next = (heapdesc*)(heap + root->offset);
+  next->offset = 0;
+  next->size = 0;
+}
+
+static void *
+basic_alloc_alloc (char *heap, size_t size)
+{
+  if (heap == NULL)
+return NULL;
+
+  /* Memory is allocated in N-byte granularity.  */
+  size = ALIGN (size);
+
+  /* Acquire a lock on the low-latency heap.  */
+  heapdesc root, *root_ptr = (heapdesc*)heap;
+  do
+{
+  root.offset = __atomic_exchange_n (_ptr->offset, 0x, 
+MEMMODEL_ACQUIRE);
+  if (root.offset != 0x)
+   {
+ root.size = root_ptr->size;
+ 

[PATCH] simplify-rtx: Fix VOIDmode operand handling in simplify_subreg [PR108805]

2023-02-16 Thread Uros Bizjak via Gcc-patches
simplify_subreg can return VOIDmode const_int operand and will
cause ICE in simplify_gen_subreg when this operand is passed to it.

The patch prevents VOIDmode temporary from entering simplify_gen_subreg.
We can't process const_int operand any further, since outermode
is not an integer mode here.

2023-02-16  Uroš Bizjak  

gcc/ChangeLog:

PR target/108805
* simplify_rtx.cc (simplify_context::simplify_subreg): Prevent
VOIDmode const_int result from simplify_subreg from entering
simplify_gen_subreg.

gcc/testsuite/ChangeLog:

PR target/108805
* gcc.dg/pr108805.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

OK for master and release branches?

Uros.
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 0a1dd88b0a8..281bc418df0 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7664,7 +7664,7 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
0).exists (_outermode))
 {
   rtx tem = simplify_subreg (int_outermode, op, innermode, byte);
-  if (tem)
+  if (tem && GET_MODE (tem) != VOIDmode)
return simplify_gen_subreg (outermode, tem, GET_MODE (tem), 0);
 }
 
diff --git a/gcc/testsuite/gcc.dg/pr108805.c b/gcc/testsuite/gcc.dg/pr108805.c
new file mode 100644
index 000..280d3f5c377
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108805.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target longlong64 } } */
+/* { dg-options "-O" } */
+/* { dg-additional-options "-msse2" { target x86_64-*-* i?86-*-* } } */
+
+typedef __INT8_TYPE__ __attribute__((__vector_size__ (4))) U;
+typedef __INT32_TYPE__ __attribute__((__vector_size__ (4))) V;
+typedef __UINT64_TYPE__ __attribute__((__vector_size__ (8))) W;
+
+int i;
+U h;
+W g;
+
+U
+foo (void)
+{
+  W w = i != g;
+  V v = __builtin_convertvector (i | w >> 2, V);
+  U u = (U) v[0] + h;
+  return u;
+}


[pushed] testsuite, objective-c: Cater for Windows intptr type.

2023-02-16 Thread Iain Sandoe via Gcc-patches
Tested by nightstrike on Windows, and on x86_64 Linux and Darwin and
i686-darwin, pushed to trunk,
thanks
Iain

--- 8< ---

This adjusts the diagnostic output matches to cater for the differences
in intptr types on Windows.

Patch from 'nightstrike'.

Signed-off-by: Iain Sandoe 

gcc/testsuite/ChangeLog:

* objc.dg/proto-lossage-4.m: Amendn diagnostic matches for Windows.
---
 gcc/testsuite/objc.dg/proto-lossage-4.m | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/objc.dg/proto-lossage-4.m 
b/gcc/testsuite/objc.dg/proto-lossage-4.m
index 9d1def5f9de..9b2367a568a 100644
--- a/gcc/testsuite/objc.dg/proto-lossage-4.m
+++ b/gcc/testsuite/objc.dg/proto-lossage-4.m
@@ -28,13 +28,13 @@ long foo(void) {
   receiver += [receiver anotherValue]; /* { dg-warning "invalid receiver type 
.intptr_t." } */
 
   receiver += [(Obj *)receiver someValue]; /* { dg-warning ".Obj. may not 
respond to .\\-someValue." } */
-/* { dg-warning "assignment to 'intptr_t' {aka '(long )?int'} from 'id' makes 
integer from pointer without a cast" "" { target *-*-* } .-1 } */
+/* { dg-warning "assignment to 'intptr_t' {aka '(long )*int'} from 'id' makes 
integer from pointer without a cast" "" { target *-*-* } .-1 } */
 
   receiver += [(Obj *)receiver anotherValue];
   receiver += [(Obj  *)receiver someValue];
   receiver += [(Obj  *)receiver anotherValue];
   receiver += [objrcvr someValue]; /* { dg-warning ".Obj. may not respond to 
.\\-someValue." } */
-/* { dg-warning "assignment to 'intptr_t' {aka '(long )?int'} from 'id' makes 
integer from pointer without a cast" "" { target *-*-* } .-1 } */
+/* { dg-warning "assignment to 'intptr_t' {aka '(long )*int'} from 'id' makes 
integer from pointer without a cast" "" { target *-*-* } .-1 } */
 
   receiver += [objrcvr anotherValue];
   receiver += [(Obj  *)objrcvr someValue];
@@ -42,7 +42,7 @@ long foo(void) {
   receiver += [objrcvr2 someValue];
   receiver += [objrcvr2 anotherValue];
   receiver += [(Obj *)objrcvr2 someValue]; /* { dg-warning ".Obj. may not 
respond to .\\-someValue." } */
-/* { dg-warning "assignment to 'intptr_t' {aka '(long )?int'} from 'id' makes 
integer from pointer without a cast" "" { target *-*-* } .-1 } */
+/* { dg-warning "assignment to 'intptr_t' {aka '(long )*int'} from 'id' makes 
integer from pointer without a cast" "" { target *-*-* } .-1 } */
 
   receiver += [(Obj *)objrcvr2 anotherValue];
 
-- 
2.37.1 (Apple Git-137.1)



RE: Attempt to register OpenMP pinned memory using a device instead of 'mlock' (was: [PATCH] libgomp, openmp: pinned memory)

2023-02-16 Thread Stubbs, Andrew via Gcc-patches
> -Original Message-
> From: Thomas Schwinge 
> Sent: 16 February 2023 15:33
> To: Andrew Stubbs ; Jakub Jelinek ;
> Tobias Burnus ; gcc-patches@gcc.gnu.org
> Subject: Attempt to register OpenMP pinned memory using a device instead of
> 'mlock' (was: [PATCH] libgomp, openmp: pinned memory)
> 
> Hi!
> 
> On 2022-06-09T11:38:22+0200, I wrote:
> > On 2022-06-07T13:28:33+0100, Andrew Stubbs  wrote:
> >> On 07/06/2022 13:10, Jakub Jelinek wrote:
> >>> On Tue, Jun 07, 2022 at 12:05:40PM +0100, Andrew Stubbs wrote:
>  Following some feedback from users of the OG11 branch I think I need to
>  withdraw this patch, for now.
> 
>  The memory pinned via the mlock call does not give the expected
> performance
>  boost. I had not expected that it would do much in my test setup, given
> that
>  the machine has a lot of RAM and my benchmarks are small, but others
> have
>  tried more and on varying machines and architectures.
> >>>
> >>> I don't understand why there should be any expected performance boost
> (at
> >>> least not unless the machine starts swapping out pages),
> >>> { omp_atk_pinned, true } is solely about the requirement that the memory
> >>> can't be swapped out.
> >>
> >> It seems like it takes a faster path through the NVidia drivers. This is
> >> a black box, for me, but that seems like a plausible explanation. The
> >> results are different on x86_64 and powerpc hosts (such as the Summit
> >> supercomputer).
> >
> > For example, it's documented that 'cuMemHostAlloc',
> >
>  ia.com%2Fcuda%2Fcuda-driver-
> api%2Fgroup__CUDA__MEM.html%23group__CUDA__MEM_1g572ca4011bfcb25034888a14d4e
> 035b9=05%7C01%7Candrew.stubbs%40siemens.com%7C239a86c9ff1142313daa08db1
> 0331cfc%7C38ae3bcd95794fd4addab42e1495d55a%7C1%7C0%7C638121583939887694%7CUn
> known%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJX
> VCI6Mn0%3D%7C3000%7C%7C%7C=7S8K2opKAV%2F5Ub2tyZtcgplptZ65dNc3b%2F2IYoh
> me%2Fw%3D=0>,
> > "Allocates page-locked host memory".  The crucial thing, though, what
> > makes this different from 'malloc' plus 'mlock' is, that "The driver
> > tracks the virtual memory ranges allocated with this function and
> > automatically accelerates calls to functions such as cuMemcpyHtoD().
> > Since the memory can be accessed directly by the device, it can be read
> > or written with much higher bandwidth than pageable memory obtained with
> > functions such as malloc()".
> >
> > Similar, for example, for 'cuMemAllocHost',
> >
>  ia.com%2Fcuda%2Fcuda-driver-
> api%2Fgroup__CUDA__MEM.html%23group__CUDA__MEM_1gdd8311286d2c2691605362c689b
> c64e0=05%7C01%7Candrew.stubbs%40siemens.com%7C239a86c9ff1142313daa08db1
> 0331cfc%7C38ae3bcd95794fd4addab42e1495d55a%7C1%7C0%7C638121583939887694%7CUn
> known%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJX
> VCI6Mn0%3D%7C3000%7C%7C%7C=TAhX%2BFjPavhKZKICMDiO%2BuZuytxnkaDvfDArT0R
> KDV0%3D=0>.
> >
> > This, to me, would explain why "the mlock call does not give the expected
> > performance boost", in comparison with 'cuMemAllocHost'/'cuMemHostAlloc';
> > with 'mlock' you're missing the "tracks the virtual memory ranges"
> > aspect.
> >
> > Also, by means of the Nvidia Driver allocating the memory, I suppose
> > using this interface likely circumvents any "annoying" 'ulimit'
> > limitations?  I get this impression, because documentation continues
> > stating that "Allocating excessive amounts of memory with
> > cuMemAllocHost() may degrade system performance, since it reduces the
> > amount of memory available to the system for paging.  As a result, this
> > function is best used sparingly to allocate staging areas for data
> > exchange between host and device".
> >
>  It seems that it isn't enough for the memory to be pinned, it has to be
>  pinned using the Cuda API to get the performance boost.
> >>>
> >>> For performance boost of what kind of code?
> >>> I don't understand how Cuda API could be useful (or can be used at all)
> if
> >>> offloading to NVPTX isn't involved.  The fact that somebody asks for
> host
> >>> memory allocation with omp_atk_pinned set to true doesn't mean it will
> be
> >>> in any way related to NVPTX offloading (unless it is in NVPTX target
> region
> >>> obviously, but then mlock isn't available, so sure, if there is
> something
> >>> CUDA can provide for that case, nice).
> >>
> >> This is specifically for NVPTX offload, of course, but then that's what
> >> our customer is paying for.
> >>
> >> The expectation, from users, is that memory pinning will give the
> >> benefits specific to the active device. We can certainly make that
> >> happen when there is only one (flavour of) offload device present. I had
> >> hoped it could be one way for all, but it looks like not.
> >
> > Aren't there CUDA Driver interfaces for that?  That is:
> 

[PATCH] gcc: Remove size limit of PCH for *-*-mingw32 hosts

2023-02-16 Thread LIU Hao via Gcc-patches


--
Best regards,
LIU Hao
From a4d5e161fbaa5b9994077ffb474e2b55c6c3b3cb Mon Sep 17 00:00:00 2001
From: LIU Hao 
Date: Tue, 10 May 2022 13:19:07 +0800
Subject: [PATCH] gcc: Remove size limit of PCH for *-*-mingw32 hosts

PCHs can now be relocated, so the size limit makes no sense any more.

This patch was submited to MSYS2 9 months ago for GCC 12. No issue has been 
reported so far.

Reference: 
https://github.com/msys2/MINGW-packages/blob/717d5a5a09e2370e3bd7e12b393a26dbfbe48921/mingw-w64-gcc/0010-Fix-using-large-PCH.patch
Signed-off-by: LIU Hao 

gcc/ChangeLog:

PR pch/14940
* gcc/config/i386/host-mingw32.cc (mingw32_gt_pch_get_address):
Remove the size limit `pch_VA_max_size`
---
 gcc/config/i386/host-mingw32.cc | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/host-mingw32.cc b/gcc/config/i386/host-mingw32.cc
index aeee956ed11..acff6138d63 100644
--- a/gcc/config/i386/host-mingw32.cc
+++ b/gcc/config/i386/host-mingw32.cc
@@ -44,9 +44,6 @@ static size_t mingw32_gt_pch_alloc_granularity (void);
 
 static inline void w32_error(const char*, const char*, int, const char*);
 
-/* FIXME: Is this big enough?  */
-static const size_t pch_VA_max_size  = 128 * 1024 * 1024;
-
 /* Granularity for reserving address space.  */
 static size_t va_granularity = 0x1;
 
@@ -88,9 +85,6 @@ static void *
 mingw32_gt_pch_get_address (size_t size, int)
 {
   void* res;
-  size = (size + va_granularity - 1) & ~(va_granularity - 1);
-  if (size > pch_VA_max_size)
-return NULL;
 
   /* FIXME: We let system determine base by setting first arg to NULL.
  Allocating at top of available address space avoids unnecessary
@@ -100,7 +94,7 @@ mingw32_gt_pch_get_address (size_t size, int)
  If we allocate at bottom we need to reserve the address as early
  as possible and at the same point in each invocation. */
  
-  res = VirtualAlloc (NULL, pch_VA_max_size,
+  res = VirtualAlloc (NULL, size,
  MEM_RESERVE | MEM_TOP_DOWN,
  PAGE_NOACCESS);
   if (!res)
@@ -150,7 +144,7 @@ mingw32_gt_pch_use_address (void *, size_t size, int 
fd,
 
   /* Offset must be also be a multiple of allocation granularity for
  this to work.  We can't change the offset. */ 
-  if ((offset & (va_granularity - 1)) != 0 || size > pch_VA_max_size)
+  if ((offset & (va_granularity - 1)) != 0)
 return -1;
 
 
-- 
2.39.2



OpenPGP_signature
Description: OpenPGP digital signature


[PATCH][ARM] MVE: Implementing auto-vectorized array * scalar instructions

2023-02-16 Thread Victor L. Do Nascimento via Gcc-patches
Hi all,

The back-end pattern for mapping the auto-vectorized representation of
vector * scalar to to machine instruction VMUL was missing, and
multiple instructions were needed to reproduce this behavior as a
result of failed RTL pattern match in combine pass.

RTL patterns were introduced to reproduce the behavior of the
intrinsics vmulq_n_ and vmulq_n_f.

In the case of literal constants, an intermediate instruction was
added in to initial RTL expansion to ensure a general-purpose register
was allocated to store the constant, which could then be be extracted
from the constant vector.

For the function

void test_vmulimm_s32x4 (int32_t * __restrict__ dest, int32_t *a) 
{ 
  int i;
  for (i=0; i<4; i++) { 
dest[i] = a[i] * 5; 
  }
}


The GIMPLE -> RTL expansion is modified to produce:
(set (reg:SI 119)
 (const_int 5 [0x5]))
(set (reg:V4SI 118)
 (mult:V4SI (vec_duplicate:V4SI (reg:SI 119))
(reg:V4SI 117)))

instead of:
(set (reg:V4SI 119)
 (const_vector:V4SI [
(const_int 5 [0x5]) repeated x4
  ]))
(set (reg:V4SI 118)
 (mult:V4SI (reg:V4SI 117)
(reg:V4SI 119)))

The end assembly for the above function introduces the emission of the 
following insn:
vmul.i32 q3, q3, r3

as opposed to:
vmul.i32 q3, q3, q2

All tests in gcc.target/arm/simd/mve-vmul-scalar-1.c now pass.

Added new RTL templates, amended unit test and checked for regressions on 
arm-none-eabi.

Thanks,
Victor

gcc:
* gcc/config/arm/arm.cc (neon_vdup_constant): static keyword
removed.
* gcc/config/arm/arm-protos.h (neon_vdup_constant): prototype
added.
* gcc/config/arm/mve.md (@mve_vmulq_n_2): New.
* gcc/config/arm/predicates.md (reg_or_me_replicated_const_operand):
New.
* gcc/config/arm/vec-common.md (mul3): Modify to use
`reg_or_me_replicated_const_operand'.

testsuite:
* gcc.target/arm/simd/mve-vmul-scalar-1.c: Corrected typo,
xfails removed. 
---
 gcc/config/arm/arm-protos.h|  1 +
 gcc/config/arm/arm.cc  |  2 +-
 gcc/config/arm/mve.md  | 11 +++
 gcc/config/arm/predicates.md   |  8 
 gcc/config/arm/vec-common.md   | 14 --
 .../gcc.target/arm/simd/mve-vmul-scalar-1.c| 13 ++---
 6 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index aea472bfbb9..4cf9fb00e01 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -199,6 +199,7 @@ extern rtx arm_load_tp (rtx);
 extern bool arm_coproc_builtin_available (enum unspecv);
 extern bool arm_coproc_ldc_stc_legitimate_address (rtx);
 extern rtx arm_stack_protect_tls_canary_mem (bool);
+extern rtx neon_vdup_constant (rtx, bool);
 
 
 #if defined TREE_CODE
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index efc48349dd3..7d9d265b0a7 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -13301,7 +13301,7 @@ neon_pairwise_reduce (rtx op0, rtx op1, machine_mode 
mode,
If this is the case, and GENERATE is set, we also generate
instructions to do this and return an RTX to assign to the register.  */
 
-static rtx
+rtx
 neon_vdup_constant (rtx vals, bool generate)
 {
   machine_mode mode = GET_MODE (vals);
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 555ad1b66c8..806c24e33aa 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1376,6 +1376,17 @@
   [(set_attr "type" "mve_move")
 ])
 
+(define_insn "@mve_vmulq_n_2"
+  [
+   (set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
+   (mult:MVE_VLD_ST (vec_duplicate:MVE_VLD_ST (match_operand: 1 
"s_register_operand" "r"))
+  (match_operand:MVE_VLD_ST 2 
"s_register_operand" "w")))
+  ]
+  "TARGET_HAVE_MVE"
+  "vmul.%#\t%q0, %q2, %r1"
+  [(set_attr "type" "mve_move")
+])
+
 ;;
 ;; [vmulq_u, vmulq_s])
 ;;
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 3139750c606..31eadfa2d3b 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -113,6 +113,14 @@
   && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
 })
 
+(define_predicate "reg_or_mve_replicated_const_operand"
+  (if_then_else (and (match_test "TARGET_HAVE_MVE")
+(match_code "const_vector")
+(match_test "const_vec_duplicate_p (op)"))
+   (match_operand 0 "immediate_operand")
+   (match_operand 0 "s_register_operand"))
+)
+
 (define_predicate "neon_inv_logic_op2"
   (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
(match_operand 0 "s_register_operand")))
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index f06df4db636..17b67c214b4 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ 

Attempt to register OpenMP pinned memory using a device instead of 'mlock' (was: [PATCH] libgomp, openmp: pinned memory)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2022-06-09T11:38:22+0200, I wrote:
> On 2022-06-07T13:28:33+0100, Andrew Stubbs  wrote:
>> On 07/06/2022 13:10, Jakub Jelinek wrote:
>>> On Tue, Jun 07, 2022 at 12:05:40PM +0100, Andrew Stubbs wrote:
 Following some feedback from users of the OG11 branch I think I need to
 withdraw this patch, for now.

 The memory pinned via the mlock call does not give the expected performance
 boost. I had not expected that it would do much in my test setup, given 
 that
 the machine has a lot of RAM and my benchmarks are small, but others have
 tried more and on varying machines and architectures.
>>>
>>> I don't understand why there should be any expected performance boost (at
>>> least not unless the machine starts swapping out pages),
>>> { omp_atk_pinned, true } is solely about the requirement that the memory
>>> can't be swapped out.
>>
>> It seems like it takes a faster path through the NVidia drivers. This is
>> a black box, for me, but that seems like a plausible explanation. The
>> results are different on x86_64 and powerpc hosts (such as the Summit
>> supercomputer).
>
> For example, it's documented that 'cuMemHostAlloc',
> ,
> "Allocates page-locked host memory".  The crucial thing, though, what
> makes this different from 'malloc' plus 'mlock' is, that "The driver
> tracks the virtual memory ranges allocated with this function and
> automatically accelerates calls to functions such as cuMemcpyHtoD().
> Since the memory can be accessed directly by the device, it can be read
> or written with much higher bandwidth than pageable memory obtained with
> functions such as malloc()".
>
> Similar, for example, for 'cuMemAllocHost',
> .
>
> This, to me, would explain why "the mlock call does not give the expected
> performance boost", in comparison with 'cuMemAllocHost'/'cuMemHostAlloc';
> with 'mlock' you're missing the "tracks the virtual memory ranges"
> aspect.
>
> Also, by means of the Nvidia Driver allocating the memory, I suppose
> using this interface likely circumvents any "annoying" 'ulimit'
> limitations?  I get this impression, because documentation continues
> stating that "Allocating excessive amounts of memory with
> cuMemAllocHost() may degrade system performance, since it reduces the
> amount of memory available to the system for paging.  As a result, this
> function is best used sparingly to allocate staging areas for data
> exchange between host and device".
>
 It seems that it isn't enough for the memory to be pinned, it has to be
 pinned using the Cuda API to get the performance boost.
>>>
>>> For performance boost of what kind of code?
>>> I don't understand how Cuda API could be useful (or can be used at all) if
>>> offloading to NVPTX isn't involved.  The fact that somebody asks for host
>>> memory allocation with omp_atk_pinned set to true doesn't mean it will be
>>> in any way related to NVPTX offloading (unless it is in NVPTX target region
>>> obviously, but then mlock isn't available, so sure, if there is something
>>> CUDA can provide for that case, nice).
>>
>> This is specifically for NVPTX offload, of course, but then that's what
>> our customer is paying for.
>>
>> The expectation, from users, is that memory pinning will give the
>> benefits specific to the active device. We can certainly make that
>> happen when there is only one (flavour of) offload device present. I had
>> hoped it could be one way for all, but it looks like not.
>
> Aren't there CUDA Driver interfaces for that?  That is:
>
 I had not done this
 this because it was difficult to resolve the code abstraction
 difficulties and anyway the implementation was supposed to be device
 independent, but it seems we need a specific pinning mechanism for each
 device.
>
> If not directly *allocating and registering* such memory via
> 'cuMemAllocHost'/'cuMemHostAlloc', you should still be able to only
> *register* your standard 'malloc'ed etc. memory via 'cuMemHostRegister',
> :
> "Page-locks the memory range specified [...] and maps it for the
> device(s) [...].  This memory range also is added to the same tracking
> mechanism as cuMemHostAlloc to automatically accelerate [...]"?  (No
> manual 'mlock'ing involved in that case, too; presumably again using this
> interface likely circumvents any "annoying" 'ulimit' limitations?)
>
> Such a *register* abstraction can then be implemented by all the libgomp
> offloading plugins: they just call the respective
> CUDA/HSA/etc. functions to register such (existing, 'malloc'ed, etc.)
> memory.
>
> ..., but maybe I'm missing some crucial "detail" here?

Indeed this 

Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

2023-02-16 Thread 盼 李 via Gcc-patches
Hi all,

Thank you for your patience. Just file another PR like "Bugfix for rvv bool 
mode precision adjustment" for the mode precision adjustment only. Feel free to 
comment if any questions or concerns.

Pan

From: 盼 李 
Sent: Wednesday, February 15, 2023 23:57
To: Richard Biener 
Cc: Andrew Stubbs ; juzhe.zh...@rivai.ai 
; gcc-patches ; kito.cheng 
; richard.sandif...@arm.com 
Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

After some investigation, the mode precision adjusting can help to tell the 
difference from the VxN1BI to VxN64BI, besides the existing mode_size. Thus I 
would like to prepare the patch for the precision adjustment only first.

Unfortunately, there is one selftest failure right now when I try to adjust the 
precision of VxN*BI and I am still working on it. Of course, will keep you all 
posted.

VxN1BI  adjust precision => 1
VxN2BI  adjust precision => 2
VxN4BI  adjust precision => 4
VxN8BI  adjust precision => 8
VxN16BI  adjust precision => 16
VxN32BI  adjust precision => 32
VxN64BI  adjust precision => 64

Pan

From: Richard Biener 
Sent: Monday, February 13, 2023 23:47
To: 盼 李 
Cc: Andrew Stubbs ; juzhe.zh...@rivai.ai 
; gcc-patches ; kito.cheng 
; richard.sandif...@arm.com 
Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types

On Mon, 13 Feb 2023, 盼 李 wrote:

> Thanks all for your help and comments.
>
> Let me share more information about this patch. Especially for the 
> tree-ssa-sccvn.cc part.
>
> Assume we have the blow test code for this issue.
>
> void
> test_1(int8_t * restrict in, int8_t * restrict out) {
> vbool8_t v2 = *(vbool8_t*)in;
> vbool16_t v5 = *(vbool16_t*)in;
>
> *(vbool8_t*)(out + 100) = v2;
> *(vbool16_t*)(out + 200) = v5;
> }
>
> Without the tree-ssa-sccvn.cc file code change.
> 
> void test_1 (int8_t * restrict in, int8_t * restrict out)
> {
>   vbool8_t v2;
>   __rvv_bool16_t _1;
>
>[local count: 1073741824]:
>   v2_4 = MEM[(vbool8_t *)in_3(D)];
>   _1 = VIEW_CONVERT_EXPR<__rvv_bool16_t>(v2_4);  // insert during 039.fre1
>   MEM[(vbool8_t *)out_5(D) + 100B] = v2_4;
>   MEM[(vbool16_t *)out_5(D) + 200B] = _1;
>   return;
> }
>
> WIthin the tree-ssa-sccvn.cc file code change.
> 
> void test_1 (int8_t * restrict in, int8_t * restrict out)
> {
>   vbool16_t v5;
>   vbool8_t v2;
>
>[local count: 1073741824]:
>   v2_3 = MEM[(vbool8_t *)in_2(D)];
>   v5_4 = MEM[(vbool16_t *)in_2(D)];
>   MEM[(vbool8_t *)out_5(D) + 100B] = v2_3;
>   MEM[(vbool16_t *)out_5(D) + 200B] = v5_4;
>   return;
> }
>
> Thus, I figured out the a-main.c.039t.fre1 pass results in this CONVERT being 
> inserted.
> With some debugging, I located the difference that comes from the
> expressions_equal_p. If GET_MODE_SIZE(mode) is the same between the VxN8Bimode
> and VxN4Bimode, the expressions_equal_p will compare the same address of a 
> tree, aka
> POLY_INT_CST [8, 8].
>
> visit_reference_op_load
> |- vn_reference_lookup
> |- vn_reference_lookup_2
>  |- find_slot_with_hash
>  |- vn_reference_hasher::equal
>  |- expressions_equal_p
>
> Meanwhile, we also double-checked that set the different MODE_SIZE of both the
> VxN8Bimode and VxN4Bimode (for example, [8, 1] and [4,1] for test only) are 
> able
> to resolve this issue. But they should be [1, 1] according to the ISA 
> semantics.
>
> Thus, we try to set other MODE_XXX but it seems not working at all. For 
> example:
>
> VNx4BIMode NUNITS [0x4, 0x4]
> VNx8BIMode NUNITS [0x8, 0x8]
>
> Finally, I found the TARGET_MODES_TIEABLE_P and inject it into the function
> visit_reference_op_load to resolve this issue.
>
> I will continue to try other ways besides the tree-ssa-sccvn.cc if this may 
> not be
> the right place for this issue.

There are other places like alias analysis which will be not happy
if the mode size/precision do not match reality.  So no, I don't think
modes_tieable is the correct thing to check here.  Instead the existing
check seems to be to the point but the modes are not set up correctly
to carry the info of one having padding at the end and the other not.

Richard.

> Thank again and will keep you posted.
>
> Pan
>
>
>
> 
> From: Andrew Stubbs 
> Sent: Monday, February 13, 2023 19:00
> To: Richard Biener ; juzhe.zh...@rivai.ai 
> 
> Cc: Pan Li ; gcc-patches 
> ; kito.cheng ; 
> richard.sandif...@arm.com 
> Subject: Re: [PATCH] RISC-V: Bugfix for mode tieable of the rvv bool types
>
> I presume I've been CC'd on this conversation because weird vector
> architecture problems have happened to me before. :)
>
> However, I'm not sure I can help much because AMD GCN does not use
> BImode vectors at all. This is partly because loading boolean 

[PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-02-16 Thread incarnation.p.lee--- via Gcc-patches
From: Pan Li 

Fix the bug of the rvv bool mode precision with the adjustment.
The bits size of vbool*_t will be adjusted to
[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
adjusted mode precison of vbool*_t will help underlying pass to
make the right decision for both the correctness and optimization.

Given below sample code:
void test_1(int8_t * restrict in, int8_t * restrict out)
{
  vbool8_t v2 = *(vbool8_t*)in;
  vbool16_t v5 = *(vbool16_t*)in;
  *(vbool16_t*)(out + 200) = v5;
  *(vbool8_t*)(out + 100) = v2;
}

Before the precision adjustment:
addia4,a1,100
vsetvli a5,zero,e8,m1,ta,ma
addia1,a1,200
vlm.v   v24,0(a0)
vsm.v   v24,0(a4)
// Need one vsetvli and vlm.v for correctness here.
vsm.v   v24,0(a1)

After the precision adjustment:
csrrt0,vlenb
sllit1,t0,1
csrra3,vlenb
sub sp,sp,t1
sllia4,a3,1
add a4,a4,sp
sub a3,a4,a3
vsetvli a5,zero,e8,m1,ta,ma
addia2,a1,200
vlm.v   v24,0(a0)
vsm.v   v24,0(a3)
addia1,a1,100
vsetvli a4,zero,e8,mf2,ta,ma
csrrt0,vlenb
vlm.v   v25,0(a3)
vsm.v   v25,0(a2)
sllit1,t0,1
vsetvli a5,zero,e8,m1,ta,ma
vsm.v   v24,0(a1)
add sp,sp,t1
jr  ra

However, there may be some optimization opportunates after
the mode precision adjustment. It can be token care of in
the RISC-V backend in the underlying separted PR(s).

PR 108185
PR 108654

gcc/ChangeLog:

* config/riscv/riscv-modes.def (ADJUST_PRECISION):
* config/riscv/riscv.cc (riscv_v_adjust_precision):
* config/riscv/riscv.h (riscv_v_adjust_precision):
* genmodes.cc (ADJUST_PRECISION):
(emit_mode_adjustments):

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr108185-1.c: New test.
* gcc.target/riscv/pr108185-2.c: New test.
* gcc.target/riscv/pr108185-3.c: New test.
* gcc.target/riscv/pr108185-4.c: New test.
* gcc.target/riscv/pr108185-5.c: New test.
* gcc.target/riscv/pr108185-6.c: New test.
* gcc.target/riscv/pr108185-7.c: New test.
* gcc.target/riscv/pr108185-8.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-modes.def|  8 +++
 gcc/config/riscv/riscv.cc   | 12 
 gcc/config/riscv/riscv.h|  1 +
 gcc/genmodes.cc | 25 ++-
 gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
 12 files changed, 598 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index d5305efa8a6..110bddce851 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * 
riscv_bytes_per_vector_chunk);
 ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
 ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
 
+ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1));
+ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2));
+ADJUST_PRECISION (VNx4BI, riscv_v_adjust_precision (VNx4BImode, 4));
+ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8));
+ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 16));
+ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision (VNx32BImode, 32));
+ADJUST_PRECISION (VNx64BI, riscv_v_adjust_precision (VNx64BImode, 64));
+
 /*
| Mode| MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
| | LMUL| SEW/LMUL| LMUL| SEW/LMUL|
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 

Re: [PATCH] rs6000: Fix vector parity support [PR108699]

2023-02-16 Thread Segher Boessenkool
Hi!

On Thu, Feb 16, 2023 at 08:06:02PM +0800, Kewen.Lin wrote:
> on 2023/2/16 19:14, Segher Boessenkool wrote:
> > On Thu, Feb 16, 2023 at 05:23:40PM +0800, Kewen.Lin wrote:
> >> This patch is to fix the handling with one more pre-insn
> >> vpopcntb.  It also fixes an oversight having V8HI in VEC_IP,
> >> replaces VParity with VEC_IP, and adjusts the existing
> >> UNSPEC_PARITY to a more meaningful name UNSPEC_PARITYB.
> > 
> > Please don't do that.  UNSPEC_PARITYB is worse than UNSPEC_PARITY,
> > even more so for the prtyw etc. instructions.
> 
> I thought the scalar insns prty[wd] also operate on byte
> (especially on the least significant bit in each byte),
> PARITYB(yte) seems better ...

The scalar instruction does not include a "b" in the mnemonic, and it
says nothing "byte" or "bit" in the instruction name either.  The
existing name is simpler, less confusing, simply better.

> > You might want to express the vector parity insns separately, but then
> > *do that*, don't rename the normal stuff as well, and use a more obvious
> > name like UNSPEC_VPARITY please.
> 
> I'll update for vector only.  Maybe it's better with UNSPEC_VPARITY*B*?
> since the mnemonic has "b"(yte).

No, you are right that the semantics are pretty much the same.  Please
just keep UNSPEC_PARITY everywhere.

> >>const vsll __builtin_altivec_vprtybd (vsll);
> >> -VPRTYBD parityv2di2 {}
> >> +VPRTYBD p9v_paritybv2di2 {}
> > 
> > Why this?  Please keep the simpler names if at all possible.
> 
> The bif would like to map with the vector parity byte insns
> directly, the parity2 can't work here any more.

Ah, because it cannot use the expander here, it has to be a define_insn?
Why is that?

> The name is updated from previous *p9v_parity2 (becoming
> to a named define_insn), I noticed there are some names with
> p8v_, p9v_, meant to keep it consistent with the context.
> You want this to be simplified as parity*b*v2di2?

Without the "b".  But that would be better then, yes.  This is a great
example why p9v_ in the name is not good: most users do not care at all
what ISA version this insn first appeared in.

> > It is completely non-obvious what a "paritybsi2" is.  There is no such
> > thing as a "parityb", not for normal people anyway.  It is very
> > important that names give a hint of what they stand for.
> > 
> > The _cmpb of the existing name indicates that a cmpb insn is generated
> > here as well.  Has that changed>
> > 
> 
> I got the same understanding initially, but as you may have noticed
> there isn't a cmpb, it seems just to be different from the name
> parity2 so put the condition as one suffix.

Yeah.  Something for a future improvement.

> >> -(define_insn "parity2_cmpb"
> >> +(define_insn "parityb2"
> >>[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
> >> -  (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] 
> >> UNSPEC_PARITY))]
> >> +  (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")]
> >> +  UNSPEC_PARITYB))]
> >>"TARGET_CMPB && TARGET_POPCNTB"
> >>"prty %0,%1"
> >>[(set_attr "type" "popcnt")])
> > 
> > Hrm, the original name was not so good apparently.  Still, please don't
> > change multiple independent things in one patch, it makes the patch hard
> > to read and understand and very hard to spot mistakes in.
> 
> Got it, good point.

And we are in stage 4 so you really really do not want something that
may be a mistake, that may cause any problems :-)

> > So first do a patch that is essentially just this?
> 
> OK, will update and test it again.

Thanks!

> > Later patches can do all other things (also, not do this expand for
> > TImode at all, ho hum).
> 
> OK, I guess all the others are for next stage1. :)

Yes exactly.  And one (small, self-contained) thing per patch please.

Thanks again,


Segher


[committed] libstdc++: Replace non-ascii character in test

2023-02-16 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

This ensures the test will work with -fexec-charset=ascii.

libstdc++-v3/ChangeLog:

* testsuite/std/format/arguments/lwg3810.cc: Replace UTF-8
ellipsis character.
---
 libstdc++-v3/testsuite/std/format/arguments/lwg3810.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/std/format/arguments/lwg3810.cc 
b/libstdc++-v3/testsuite/std/format/arguments/lwg3810.cc
index c1be229040f..60587a93d2c 100644
--- a/libstdc++-v3/testsuite/std/format/arguments/lwg3810.cc
+++ b/libstdc++-v3/testsuite/std/format/arguments/lwg3810.cc
@@ -20,6 +20,6 @@ test_ctad()
   using std::make_format_args;
   using SomeContext = std::wformat_context;
 
-  // foo(make_format_args(…)); // won't work
+  // foo(make_format_args(...)); // won't work
   foo(basic_format_args(make_format_args(1, 2, 3))); // should 
work
 }
-- 
2.39.2



'libgomp.fortran/target-nowait-array-section.f90' (was: [committed] libgomp: Fix comment typo)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2023-02-16T12:20:17+0100, Jakub Jelinek via Gcc-patches 
 wrote:
> I saw
> FAIL: libgomp.fortran/target-nowait-array-section.f90   -O  execution test
> in my last x86_64-linux bootstrap.  From quick skimming, it might be just
> unreliable test, which assumes that asynchronous execution wouldn't produce
> ordered sequence, but can't it happen even with asynchronous execution?

Yes, that does match my findings reported a while ago, see:
.


Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[committed] libstdc++: Implement (P0290)

2023-02-16 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

This was approved for the Concurrency TS v2 in Issaquah.

Although the TS is based on C++20, this enables the new header for C++17
as well. This will make it available to more users, and I hope that will
get more feedback on the feature.

libstdc++-v3/ChangeLog:

* include/Makefile.am: Add new header.
* include/Makefile.in: Regenerate.
* include/experimental/synchronized_value: New file.
* testsuite/experimental/synchronized_value.cc: New test.
---
 libstdc++-v3/include/Makefile.am  |   1 +
 libstdc++-v3/include/Makefile.in  |   1 +
 .../include/experimental/synchronized_value   | 100 ++
 .../experimental/synchronized_value.cc|  42 
 4 files changed, 144 insertions(+)
 create mode 100644 libstdc++-v3/include/experimental/synchronized_value
 create mode 100644 libstdc++-v3/testsuite/experimental/synchronized_value.cc

diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 5b501272830..a880e8ee227 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -798,6 +798,7 @@ experimental_headers = \
${experimental_srcdir}/source_location \
${experimental_srcdir}/string \
${experimental_srcdir}/string_view \
+   ${experimental_srcdir}/synchronized_value \
${experimental_srcdir}/system_error \
${experimental_srcdir}/timer \
${experimental_srcdir}/tuple \
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index 36e35e13806..0ff875b280b 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -1144,6 +1144,7 @@ experimental_headers = \
${experimental_srcdir}/source_location \
${experimental_srcdir}/string \
${experimental_srcdir}/string_view \
+   ${experimental_srcdir}/synchronized_value \
${experimental_srcdir}/system_error \
${experimental_srcdir}/timer \
${experimental_srcdir}/tuple \
diff --git a/libstdc++-v3/include/experimental/synchronized_value 
b/libstdc++-v3/include/experimental/synchronized_value
new file mode 100644
index 000..9a91da912ca
--- /dev/null
+++ b/libstdc++-v3/include/experimental/synchronized_value
@@ -0,0 +1,100 @@
+//  -*- C++ -*-
+
+// Copyright The GNU Toolchain Authors.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// .
+
+/** @file include/experimental/synchronized_value
+ *  This is a TS C++ Library header.
+ *  @ingroup libfund-ts
+ */
+
+#ifndef _GLIBCXX_EXPERIMENTAL_SYNCVAL
+#define _GLIBCXX_EXPERIMENTAL_SYNCVAL 1
+
+#pragma GCC system_header
+
+#include  // for std::mutex
+
+#if __cplusplus >= 201703L
+#include 
+#include 
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+namespace experimental::inline concurrency_v2
+{
+#define __cpp_lib_concurrency_v2_synchronized_value 202302
+
+  template
+class synchronized_value
+{
+  // TODO: Use partial specialization after PR c++/71954 is fixed.
+  template
+   static inline constexpr bool __is_self
+ = sizeof...(_Args) == 1
+ && (is_same_v<__remove_cvref_t<_Args>, synchronized_value> && 
...);
+
+#if ! __cpp_concepts
+  template
+   using __not_self = bool_constant>;
+#endif
+
+public:
+  synchronized_value(const synchronized_value&) = delete;
+  synchronized_value& operator=(const synchronized_value&) = delete;
+
+#if __cpp_concepts
+  template
+   requires (!__is_self<_Args...>) && is_constructible_v<_Tp, _Args...>
+#else
+  template>,
+  typename = _Require>>
+#endif
+   synchronized_value(_Args&&... __args)
+   noexcept(is_nothrow_constructible_v<_Tp, _Args...>)
+   : _M_val(std::forward<_Args>(__args)...)
+   { }
+
+  template
+   friend invoke_result_t<_Fn, _Up&, _Types&...>
+   apply(_Fn&&, synchronized_value<_Up>&, synchronized_value<_Types>&...);
+
+private:
+  mutex _M_mut;
+ 

[committed] libstdc++: Implement P2255R2 dangling checks for std::pair

2023-02-16 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

This uses the new __reference_constructs_from_temporary built-in to
identify when a std::pair constructor will bind a reference to a
temporary that goes out of scope at the end of the constructor.  For
example, std::pair p(1, 2); will call the pair::pair(U1&&, U2&&) constructor with U1=int and U2=int. In the
constructor body a temporary long will be created and the p.first member
will bind to that temporary. When the constructor returns, the reference
is immediately dangling. P2255 requires the constructor to be deleted to
prevent this bug.

Although P2255 was approved for C++23, it fixes a longstanding LWG issue
in older standards, and it turns silent runtime undefined behaviour into
a compilation error. Because of that, the dangling checks are applied
all the way back to C++98.  However, if these changes cause too much
code to be rejected (e.g. in cases where the dangling reference is never
used after the constructor returns) then we can consider removing them
for C++20 and older standards.

The affected constructors are deleted for C++20 and later, when concepts
are available to simplify the constructor constraints. For C++17 and
earlier the overload sets are complicated and awkward to maintain, so
the dangling checks are done in static assertions in the constructor
bodies, instead of being SFINAE-friendly constraints. The pre-C++17
assertions are only enabled for Debug Mode, to avoid introducing a
breaking change in Stage 4. We should consider enabling them by default
in Stage 1 for GCC 14.

libstdc++-v3/ChangeLog:

* include/bits/stl_pair.h (pair) [C++20]: Add non-dangling
constraints to constructors and add deleted overloads for the
dangling cases, as per P2255R2.
(pair) [!C++20 && _GLIBCXX_DEBUG]: Add static assertions to
make dangling cases ill-formed.
* testsuite/20_util/pair/dangling_ref.cc: New test.
---
 libstdc++-v3/include/bits/stl_pair.h  | 112 +++---
 .../testsuite/20_util/pair/dangling_ref.cc|  67 +++
 2 files changed, 164 insertions(+), 15 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/20_util/pair/dangling_ref.cc

diff --git a/libstdc++-v3/include/bits/stl_pair.h 
b/libstdc++-v3/include/bits/stl_pair.h
index d0c73410526..3f1624f40b4 100644
--- a/libstdc++-v3/include/bits/stl_pair.h
+++ b/libstdc++-v3/include/bits/stl_pair.h
@@ -281,6 +281,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return is_convertible_v<_U2, _T2>;
  return false;
}
+
+  // True if construction from _U1 and _U2 would create a dangling ref.
+  template
+   static constexpr bool
+   _S_dangles()
+   {
+#if __has_builtin(__reference_constructs_from_temporary)
+ if constexpr (__reference_constructs_from_temporary(_T1, _U1&&))
+   return true;
+ else
+   return __reference_constructs_from_temporary(_T2, _U2&&);
+#else
+ return false;
+#endif
+   }
   /// @endcond
 
 public:
@@ -295,25 +310,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   /// Constructor accepting two values of arbitrary types
   template
-   requires (_S_constructible<_U1, _U2>())
+   requires (_S_constructible<_U1, _U2>()) && (!_S_dangles<_U1, _U2>())
constexpr explicit(!_S_convertible<_U1, _U2>())
pair(_U1&& __x, _U2&& __y)
noexcept(_S_nothrow_constructible<_U1, _U2>())
: first(std::forward<_U1>(__x)), second(std::forward<_U2>(__y))
{ }
 
+  template
+   requires (_S_constructible<_U1, _U2>()) && (_S_dangles<_U1, _U2>())
+   constexpr explicit(!_S_convertible<_U1, _U2>())
+   pair(_U1&&, _U2&&) = delete;
+
   /// Converting constructor from a const `pair` lvalue
   template
requires (_S_constructible())
+ && (!_S_dangles<_U1, _U2>())
constexpr explicit(!_S_convertible())
pair(const pair<_U1, _U2>& __p)
noexcept(_S_nothrow_constructible())
: first(__p.first), second(__p.second)
{ }
 
+  template
+   requires (_S_constructible())
+ && (_S_dangles())
+   constexpr explicit(!_S_convertible())
+   pair(const pair<_U1, _U2>&) = delete;
+
   /// Converting constructor from a non-const `pair` rvalue
   template
-   requires (_S_constructible<_U1, _U2>())
+   requires (_S_constructible<_U1, _U2>()) && (!_S_dangles<_U1, _U2>())
constexpr explicit(!_S_convertible<_U1, _U2>())
pair(pair<_U1, _U2>&& __p)
noexcept(_S_nothrow_constructible<_U1, _U2>())
@@ -321,25 +348,42 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  second(std::forward<_U2>(__p.second))
{ }
 
+  template
+   requires (_S_constructible<_U1, _U2>()) && (_S_dangles<_U1, _U2>())
+   constexpr explicit(!_S_convertible<_U1, _U2>())
+   pair(pair<_U1, _U2>&&) = delete;
+
 #if __cplusplus > 202002L
   /// Converting constructor from a non-const `pair` 

[committed] libstdc++: Fix name of in comment

2023-02-16 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* include/experimental/optional: Fix header name in comment.
---
 libstdc++-v3/include/experimental/optional | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/experimental/optional 
b/libstdc++-v3/include/experimental/optional
index 3aae4563037..4f6c103a639 100644
--- a/libstdc++-v3/include/experimental/optional
+++ b/libstdc++-v3/include/experimental/optional
@@ -1,4 +1,4 @@
-//  -*- C++ -*-
+//  -*- C++ -*-
 
 // Copyright (C) 2013-2023 Free Software Foundation, Inc.
 //
-- 
2.39.2



[committed] libstdc++: Enable CTAD for std::basic_format_args (LWG 3810)

2023-02-16 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

This was just approved in Issaquah.

libstdc++-v3/ChangeLog:

* include/std/format (__format::_Arg_store): New class template.
(basic_format_args): Remove nested type _Store and add deduction
guide from _Arg_store.
(basic_format_arg, make_format_args): Adjust.
* testsuite/std/format/arguments/lwg3810.cc: New test.
---
 libstdc++-v3/include/std/format   | 101 ++
 .../testsuite/std/format/arguments/lwg3810.cc |  25 +
 2 files changed, 82 insertions(+), 44 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/std/format/arguments/lwg3810.cc

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 1cce4ebd45c..b1e627048de 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -2757,6 +2757,10 @@ namespace __format
}
   };
 
+  // [format.arg.store], class template format-arg-store
+  template
+class _Arg_store;
+
 } // namespace __format
 /// @endcond
 
@@ -2833,6 +2837,9 @@ namespace __format
   template
friend class basic_format_args;
 
+  template
+   friend class __format::_Arg_store;
+
   static_assert(is_trivially_copyable_v<__format::_Arg_value<_Context>>);
 
   __format::_Arg_value<_Context> _M_val;
@@ -3150,11 +3157,11 @@ namespace __format
 
   static_assert( __format::_Arg_max_ <= (1 << _S_packed_type_bits) );
 
-  // [format.arg.store], class template format-arg-store
-  // XXX: Should this be defined outside the class, so basic_format_args
-  // can use CTAD with a _Store argument?
   template
-   class _Store;
+   using _Store = __format::_Arg_store<_Context, _Args...>;
+
+  template
+   friend class __format::_Arg_store;
 
   using uint64_t = __UINT64_TYPE__;
   using _Format_arg = basic_format_arg<_Context>;
@@ -3215,52 +3222,60 @@ namespace __format
   }
 };
 
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 3810. CTAD for std::basic_format_args
+  template
+basic_format_args(__format::_Arg_store<_Context, _Args...>)
+  -> basic_format_args<_Context>;
+
+  template
+auto
+make_format_args(_Args&&... __fmt_args) noexcept;
+
   // An array of type-erased formatting arguments.
-  template
-template
-  class basic_format_args<_Context>::_Store
-  {
-   friend class basic_format_args;
+  template
+class __format::_Arg_store
+{
+  friend std::basic_format_args<_Context>;
 
-   template
- friend auto
- make_format_args(_Argz&&...) noexcept;
+  template
+   friend auto
+   std::make_format_args(_Argz&&...) noexcept;
 
-   // For a sufficiently small number of arguments we only store values.
-   // basic_format_args can get the types from the _Args pack.
-   static constexpr bool _S_values_only
- = sizeof...(_Args) <= _S_max_packed_args;
+  // For a sufficiently small number of arguments we only store values.
+  // basic_format_args can get the types from the _Args pack.
+  static constexpr bool _S_values_only
+   = sizeof...(_Args) <= basic_format_args<_Context>::_S_max_packed_args;
 
-   using _Element_t
- = __conditional_t<_S_values_only,
-   __format::_Arg_value<_Context>,
-   basic_format_arg<_Context>>;
+  using _Element_t
+   = __conditional_t<_S_values_only,
+ __format::_Arg_value<_Context>,
+ basic_format_arg<_Context>>;
 
-   _Element_t _M_args[sizeof...(_Args)];
+  _Element_t _M_args[sizeof...(_Args)];
 
-   template
- static _Element_t
- _S_make_elt(_Tp& __v)
- {
-   basic_format_arg<_Context> __arg(__v);
-   if constexpr (_S_values_only)
- return __arg._M_val;
-   else
- return __arg;
- }
+  template
+   static _Element_t
+   _S_make_elt(_Tp& __v)
+   {
+ basic_format_arg<_Context> __arg(__v);
+ if constexpr (_S_values_only)
+   return __arg._M_val;
+ else
+   return __arg;
+   }
 
-   template
- requires (sizeof...(_Tp) == sizeof...(_Args))
- [[__gnu__::__always_inline__]]
- _Store(_Tp&... __a) noexcept
- : _M_args{_S_make_elt(__a)...}
- { }
-  };
+  template
+   requires (sizeof...(_Tp) == sizeof...(_Args))
+   [[__gnu__::__always_inline__]]
+   _Arg_store(_Tp&... __a) noexcept
+   : _M_args{_S_make_elt(__a)...}
+   { }
+};
 
   template
-template requires (sizeof...(_Args) == 0)
-  class basic_format_args<_Context>::_Store<_Args...>
-  { };
+class __format::_Arg_store<_Context>
+{ };
 
   template
 template
@@ -3300,10 +3315,8 @@ namespace __format
 inline auto
 make_format_args(_Args&&... __fmt_args) noexcept
 {
-  using 

Re: [PING 2] [PATCH 0/3] RISC-V: optimize stack manipulation in save-restore

2023-02-16 Thread Jeff Law via Gcc-patches




On 2/16/23 00:17, Fei Gao wrote:

ping.
We are in stage4 of our development cycle -- meaning that the focus is 
on regression bugfixing, not new features, optimizations and the like.


This patch is in the queue and will be looked at once we move back into 
stage1 development for gcc-14.


jeff


Re: [PATCH] PR tree-optimization/108697 - Create a lazy ssa_cache

2023-02-16 Thread Andrew MacLeod via Gcc-patches



On 2/16/23 02:55, Richard Biener wrote:

On Wed, Feb 15, 2023 at 6:07 PM Andrew MacLeod via Gcc-patches
 wrote:

This patch implements the suggestion that we have an alternative
ssa-cache which does not zero memory, and instead uses a bitmap to track
whether a value is currently set or not.  It roughly mimics what
path_range_query was doing internally.

For sparsely used cases, expecially in large programs, this is more
efficient.  I changed path_range_query to use this, and removed it old
bitmap (and a hack or two around PHI calculations), and also utilized
this is the assume_query class.

Performance wise, the patch doesn't affect VRP (since that still uses
the original version).  Switching to the lazy version caused a slowdown
of 2.5% across VRP.

There was a noticeable improvement elsewhere.,  across 230 GCC source
files, threading ran over 12% faster!.  Overall compilation improved by
0.3%  Not sure it makes much difference in compiler.i, but it shouldn't
hurt.

bootstraps on x86_64-pc-linux-gnu with no regressions.   OK for trunk?
or do you want to wait for the next release...

I see

@@ -365,16 +335,8 @@ path_range_query::compute_ranges_in_phis (basic_block bb)

Value_Range r (TREE_TYPE (name));
if (range_defined_in_block (r, name, bb))
-   {
- unsigned v = SSA_NAME_VERSION (name);
- set_cache (r, name);
- bitmap_set_bit (phi_set, v);
- // Pretend we don't have a cache entry for this name until
- // we're done with all PHIs.
- bitmap_clear_bit (m_has_cache_entry, v);
-   }
+   m_cache.set_global_range (name, r);
  }
-  bitmap_ior_into (m_has_cache_entry, phi_set);
  }

  // Return TRUE if relations may be invalidated after crossing edge E.

which I think is not correct - if we have

  # _1 = PHI <..., _2>
  # _2 = PHI <..., _1>

then their effects are supposed to be executed in parallel, that is,
both PHI argument _2 and _1 are supposed to see the "old" version.
The previous code tried to make sure the range of the new _1 doesn't
get seen when processing the argument _1 in the definition of _2.

The new version drops this, possibly resulting in wrong-code.


This is dropped because it is actually handled properly in 
range_defined_in_block now.  (which I think Aldy was describing).


It didnt make sense to me why it was handled here like this, so I traced 
through the call chain to find out if it was still actually needed and 
discussed it with Aldy.  I think it was mostly a leftover wart.




While I think it's appropriate to sort out compile-time issues like this
during stage4 at least the above makes me think it should be defered
to next stage1.


I am happy to defer it since its a marginal increase anyway.

Andrew




Re: [PATCH] tree-ssa-dse: Fix up handling of lhs of internal calls [PR108657]

2023-02-16 Thread Jakub Jelinek via Gcc-patches
On Thu, Feb 16, 2023 at 02:21:04PM +, Richard Biener wrote:
> > That said, while this patch fixes the regression by allowing DSE of
> > IFN_DEFERRED_INIT again, I think we probably have some latent bug in FRE
> > where without this patch it seems to be fre5 that sees one unconditional
> > c = 1; store, one conditional c = 0; store and in the last bb before return
> > another c = 1; store and decides that the last store is redundant, which is
> > not the case, the first two stores are redundant or if they can't be
> > removed, none of them is.  Richard, could you please have a look?
> 
> That's before this patch only?  I'll have a look.

Yes.

Jakub



[PATCH] constraint: fix relaxed memory and repeated constraint handling

2023-02-16 Thread Victor L. Do Nascimento via Gcc-patches
The function `constrain_operands' lacked the logic to consider relaxed
memory constraints when "traditional" memory constraints were not
satisfied, creating potential issues as observed during the reload
compilation pass.

In addition, it was observed that while `constrain_operands' chooses
to disregard constraints when more than one alternative is provided,
e.g. "m,r" using CONSTRAINT__UNKNOWN, it has no checks in place to
determine whether the multiple constraints in a given string are in
fact repetitions of the same constraint and should thus in fact be
treated as a single constraint, as ought to be the case for something
like "m,m".

Both of these issues are dealt with here, thus ensuring that we get
appropriate pattern matching.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

Victor

gcc/
* lra-constraints.cc (constraint_unique): New.
(process_address_1): Apply constraint_unique test.
* recog.cc (constrain_operands): Allow relaxed memory
constaints.
---
 gcc/lra-constraints.cc | 43 +++---
 gcc/recog.cc   |  3 ++-
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index dbfaf0485..c9c1653c0 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -3448,6 +3448,45 @@ skip_constraint_modifiers (const char *str)
   }
 }
 
+/*  Takes a string of 0 or more comma-separated constraints and the
+constraint_num correspondig to the first constraint.  When more
+than one constraint present, evaluate whether they all correspond
+to a single, repeated constraint (e.g. "r,r") or whether we have
+more than one distinct constraints (e.g. "r,m").  */
+static bool
+constraint_unique (const char *cstr, enum constraint_num ca)
+{
+   enum constraint_num cb;
+   for (;;)
+ {
+   /* Skip past current constraint and any whitespace which may
+ precede the end-of-line or separator characters.  */
+   cstr = skip_constraint_modifiers (cstr
++ CONSTRAINT_LEN (cstr[0], cstr));
+   /* If end of string reached and no disagreement found, we have
+ uniqueness.  */
+   if (*cstr == '\0')
+return true;
+   /* skip_constraint_modifiers does not handle commas, handle
+ case manually.  */
+   if (*cstr == ',')
+cstr++;
+   /* Get next constraint.  */
+   cstr =  skip_constraint_modifiers (cstr);
+   cb = lookup_constraint ((*cstr == '\0' || *cstr == ',') ? "X" : cstr);
+
+   /* If mismatch found, break out of loop.  */
+   if (cb != ca)
+return false;
+
+   /* If *cstr == '\0', we don't want to reach the
+ skip_constraint_modifiers statement again as that will
+ advance the pointer past the end of the string.  */
+   if (*cstr == '\0')
+return true;
+ }
+}
+
 /* Major function to make reloads for an address in operand NOP or
check its correctness (If CHECK_ONLY_P is true). The supported
cases are:
@@ -3507,9 +3546,7 @@ process_address_1 (int nop, bool check_only_p,
  operand has one address constraint, probably all others constraints are
  address ones.  */
   if (constraint[0] != '\0' && get_constraint_type (cn) != CT_ADDRESS
-  && *skip_constraint_modifiers (constraint
-+ CONSTRAINT_LEN (constraint[0],
-  constraint)) != '\0')
+  && !constraint_unique (constraint, cn))
 cn = CONSTRAINT__UNKNOWN;
   if (insn_extra_address_constraint (cn)
   /* When we find an asm operand with an address constraint that
diff --git a/gcc/recog.cc b/gcc/recog.cc
index 200cf4214..3ddeab59d 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -3234,7 +3234,8 @@ constrain_operands (int strict, alternative_mask 
alternatives)
  else if (constraint_satisfied_p (op, cn))
win = 1;
 
- else if (insn_extra_memory_constraint (cn)
+ else if ((insn_extra_memory_constraint (cn)
+   || insn_extra_relaxed_memory_constraint (cn))
   /* Every memory operand can be reloaded to fit.  */
   && ((strict < 0 && MEM_P (op))
   /* Before reload, accept what reload can turn
-- 
2.36.1




Re: [PATCH] tree-ssa-dse: Fix up handling of lhs of internal calls [PR108657]

2023-02-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Feb 2023, Jakub Jelinek wrote:

> Hi!
> 
> The r13-1778 PR106378 tree-ssa-dse change didn't just add special support
> for IFN_LEN_STORE and IFN_MASK_STORE internal function calls as I believe
> was intended, but given that the function was
> if (is builtin) { ... }
> else if (lhs present and non-SSA_NAME) { ... }
> return false;
> and it added a new
> else if (is internal builtin) { ... }
> in between the two, the last if used to be done before on all stmts
> with non-SSA_NAME lhs except for calls to builtin functions, but newly
> isn't done also for calls to internal functions.  In the testcase
> the important internal function is .DEFERRED_INIT, which often has
> non-SSA_NAME lhs, and the change resulted in them no longer being DSEd,
> so a block with nothing in it left but var = .DEFERRED_INIT () and
> var = {CLOBBER} was unrolled several times.
> 
> The following patch does the lhs handling for all stmts with non-SSA_NAME lhs
> unless initialize_ao_ref_for_dse handled those specially already and
> returned (which is the case for various mem* builtins which don't have
> such lhs, for some cases of calloc which again is fine,and since r13-1778
> also for IFN_LEN_STORE call and some IFN_MASK_STORE calls.
> As IFN_MASK_STORE doesn't have a lhs, the break for the !may_def_ok case
> doesn't seem to change anything, and because we've handled internal fns
> that way in the past, I think it is the right thing to do that again.
> That said, if it is inappropriate for some new ifn, I guess it could
> be added to the switch and just return false; for it instead of break;.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> That said, while this patch fixes the regression by allowing DSE of
> IFN_DEFERRED_INIT again, I think we probably have some latent bug in FRE
> where without this patch it seems to be fre5 that sees one unconditional
> c = 1; store, one conditional c = 0; store and in the last bb before return
> another c = 1; store and decides that the last store is redundant, which is
> not the case, the first two stores are redundant or if they can't be
> removed, none of them is.  Richard, could you please have a look?

That's before this patch only?  I'll have a look.

Thanks,
Richard.

> 2023-02-15  Jakub Jelinek  
> 
>   PR tree-optimization/108657
>   * tree-ssa-dse.cc (initialize_ao_ref_for_dse): If lhs of stmt
>   exists and is not a SSA_NAME, call ao_ref_init even if the stmt
>   is a call to internal or builtin function.
> 
>   * gcc.dg/pr108657.c: New test.
> 
> --- gcc/tree-ssa-dse.cc.jj2023-01-11 10:29:08.651161134 +0100
> +++ gcc/tree-ssa-dse.cc   2023-02-15 20:03:33.647684713 +0100
> @@ -177,7 +177,7 @@ initialize_ao_ref_for_dse (gimple *stmt,
>   default:;
>   }
>  }
> -  else if (tree lhs = gimple_get_lhs (stmt))
> +  if (tree lhs = gimple_get_lhs (stmt))
>  {
>if (TREE_CODE (lhs) != SSA_NAME)
>   {
> --- gcc/testsuite/gcc.dg/pr108657.c.jj2023-02-15 20:11:22.038804168 
> +0100
> +++ gcc/testsuite/gcc.dg/pr108657.c   2023-02-15 20:10:37.992451199 +0100
> @@ -0,0 +1,31 @@
> +/* PR tree-optimization/108657 */
> +/* { dg-do run } */
> +/* { dg-options "-O3 -ftrivial-auto-var-init=zero" } */
> +
> +int c, e, f;
> +static int *d = 
> +
> +__attribute__((noipa)) void
> +foo (void)
> +{
> +  if (c != 1)
> +__builtin_abort ();
> +}
> +
> +int
> +main ()
> +{
> +  for (c = 1; c >= 0; c--)
> +{
> +  e = 0;
> +  for (int j = 0; j <= 2; j++)
> + {
> +   short k[1];
> +   if (e)
> + break;
> +   e ^= f;
> + }
> +}
> +  *d = 1;
> +  foo ();
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


Re: [PATCH 4/7] libstdc++: Use a PCH to speed up check-simd

2023-02-16 Thread Jonathan Wakely via Gcc-patches
On Wed, 15 Feb 2023 at 20:54, Matthias Kretz via Libstdc++
 wrote:
>
>
> Signed-off-by: Matthias Kretz 
>
> libstdc++-v3/ChangeLog:
>
> * testsuite/experimental/simd/generate_makefile.sh: Generate and
> pre-compile pch.h, which includes all headers that do not depend
> on command-line macros.
> * testsuite/experimental/simd/tests/bits/conversions.h: Add
> include guard.
> (genHalfBits): Simplify.
> * testsuite/experimental/simd/tests/bits/make_vec.h: Add include
> guard.
> (make_alternating_mask): Moved from mask_loadstore.
> * testsuite/experimental/simd/tests/bits/mathreference.h: Add
> include guard.
> * testsuite/experimental/simd/tests/bits/test_values.h: Ditto.
> * testsuite/experimental/simd/tests/mask_loadstore.cc
> (make_mask, make_alternating_mask): Removed.
> * testsuite/experimental/simd/tests/mask_reductions.cc: Ditto.
> * testsuite/experimental/simd/tests/operators.cc (genHalfBits):
> Removed.
> * testsuite/experimental/simd/tests/abs.cc: Only include
> bits/main.h.
> * testsuite/experimental/simd/tests/algorithms.cc: Ditto.
> * testsuite/experimental/simd/tests/broadcast.cc: Ditto.
> * testsuite/experimental/simd/tests/casts.cc: Ditto.
> * testsuite/experimental/simd/tests/fpclassify.cc: Ditto.
> * testsuite/experimental/simd/tests/frexp.cc: Ditto.
> * testsuite/experimental/simd/tests/generator.cc: Ditto.
> * testsuite/experimental/simd/tests/hypot3_fma.cc: Ditto.
> * testsuite/experimental/simd/tests/integer_operators.cc: Ditto.
> * testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc:
> Ditto.
> * testsuite/experimental/simd/tests/loadstore.cc: Ditto.
> * testsuite/experimental/simd/tests/logarithm.cc: Ditto.
> * testsuite/experimental/simd/tests/mask_broadcast.cc: Ditto.
> * testsuite/experimental/simd/tests/mask_implicit_cvt.cc: Ditto.
> * testsuite/experimental/simd/tests/mask_operator_cvt.cc: Ditto.
> * testsuite/experimental/simd/tests/mask_operators.cc: Ditto.
> * testsuite/experimental/simd/tests/math_1arg.cc: Ditto.
> * testsuite/experimental/simd/tests/math_2arg.cc: Ditto.
> * testsuite/experimental/simd/tests/operator_cvt.cc: Ditto.
> * testsuite/experimental/simd/tests/reductions.cc: Ditto.
> * testsuite/experimental/simd/tests/remqo.cc: Ditto.
> * testsuite/experimental/simd/tests/sincos.cc: Ditto.
> * testsuite/experimental/simd/tests/split_concat.cc: Ditto.
> * testsuite/experimental/simd/tests/trigonometric.cc: Ditto.
> * testsuite/experimental/simd/tests/trunc_ceil_floor.cc: Ditto.
> * testsuite/experimental/simd/tests/where.cc: Ditto.

OK for trunk (for now, probably fine to backport at some point too).



Re: [PATCH 2/7] libstdc++: Annotate most lambdas with always_inline

2023-02-16 Thread Jonathan Wakely via Gcc-patches
On Wed, 15 Feb 2023 at 20:50, Matthias Kretz via Libstdc++
 wrote:
>
>
>
> All of the annotated lambdas are simply a necessary means for
> implementing these functions and should never result in an actual
> function call. Many of these lambdas would go away if C++ had better
> language support for packs.

(I hope we'll get p1061 for C++26.)

OK for trunk.

N.B. I noticed some pre-existing cases of a non-reserved name "Parts"
there, which should be fixed (separately from this patch though).



Re: [PATCH V2 0/5] RISC-V: Implement Scalar Cryptography Extension

2023-02-16 Thread Kito Cheng via Gcc-patches
Hi Markku-Juhani:

Scalar crypto is ratified I know, but the scalar crypto intrinsic part
isn't standardized - and even the APIs listed in riscv_scalar_crypto.h
are not documented anywhere yet.
So what I mean is I am happy to accept this patch except the
riscv_scalar_crypto.h header.

Once the API has documented and merged I am happy to accept that part too.

On Thu, Feb 16, 2023 at 9:52 PM Markku-Juhani Olavi Saarinen
 wrote:
>
> On Thu, Feb 16, 2023, 13:29 Kito Cheng  wrote:
>
> > Hi Shihua:
> >
> > Thanks for your patches! This patch set is generally in good shape,
> > but I would prefer to remove riscv_scalar_crypto.h at this moment
> > since it's NOT standardized yet.
> >
>
> Hi Kito,
>
> I'm not sure if you're referring to the scalar crypto extensions (which
> were fully ratified in November 2021 by RVI, are included  in several
> profiles, and available in commercial silicon IP), or this particular
> header file. Note that the single header is perhaps the most convenient way
> to access these subextensions that make up the scalar crypto extension 1.0.
>
> Cheers,
> Markku
>
> >
> > Do you mind sending a new version of this patch set which does not
> > include that and also update the testcases?
> >
> >
> >
> > On Thu, Feb 16, 2023 at 3:52 PM Liao Shihua  wrote:
> > >
> > > This series adds basic support for the Scalar Cryptography extensions:
> > > * Zbkb
> > > * Zbkc
> > > * Zbkx
> > > * Zknd
> > > * Zkne
> > > * Zknh
> > > * Zksed
> > > * Zksh
> > >
> > > The implementation follows the version Scalar Cryptography v1.0.0 of the
> > specification,
> > > and the intrinsic of Scalar Cryptography extensions follows riscv-c-api
> > > which can be found here:
> > > https://github.com/riscv/riscv-crypto/releases/tag/v1.0.0-scalar
> > > https://github.com/riscv-non-isa/riscv-c-api-doc/pull/31
> > >
> > > It works by Wu Siyu and Liao Shihua .
> > >
> > > Liao Shihua (5):
> > >   Add prototypes for RISC-V Crypto built-in functions
> > >   Implement ZBKB, ZBKC and ZBKX extensions
> > >   Implement ZKND and ZKNE extensions
> > >   Implement ZKNH extensions
> > >   Implement ZKSH and ZKSED extensions
> > >
> > >  gcc/config.gcc|   2 +-
> > >  gcc/config/riscv/bitmanip.md  |  20 +-
> > >  gcc/config/riscv/constraints.md   |   8 +
> > >  gcc/config/riscv/crypto.md| 435 ++
> > >  gcc/config/riscv/riscv-builtins.cc|  26 ++
> > >  gcc/config/riscv/riscv-crypto.def |  94 
> > >  gcc/config/riscv/riscv-ftypes.def |  10 +
> > >  gcc/config/riscv/riscv.md |   4 +-
> > >  gcc/config/riscv/riscv_scalar_crypto.h| 218 +
> > >  gcc/testsuite/gcc.target/riscv/zbkb32.c   |  36 ++
> > >  gcc/testsuite/gcc.target/riscv/zbkb64.c   |  28 ++
> > >  gcc/testsuite/gcc.target/riscv/zbkc32.c   |  17 +
> > >  gcc/testsuite/gcc.target/riscv/zbkc64.c   |  17 +
> > >  gcc/testsuite/gcc.target/riscv/zbkx32.c   |  18 +
> > >  gcc/testsuite/gcc.target/riscv/zbkx64.c   |  18 +
> > >  gcc/testsuite/gcc.target/riscv/zknd32.c   |  18 +
> > >  gcc/testsuite/gcc.target/riscv/zknd64.c   |  36 ++
> > >  gcc/testsuite/gcc.target/riscv/zkne32.c   |  18 +
> > >  gcc/testsuite/gcc.target/riscv/zkne64.c   |  30 ++
> > >  gcc/testsuite/gcc.target/riscv/zknh-sha256.c  |  29 ++
> > >  .../gcc.target/riscv/zknh-sha512-32.c |  43 ++
> > >  .../gcc.target/riscv/zknh-sha512-64.c |  31 ++
> > >  gcc/testsuite/gcc.target/riscv/zksed.c|  20 +
> > >  gcc/testsuite/gcc.target/riscv/zksh.c |  19 +
> > >  24 files changed, 1183 insertions(+), 12 deletions(-)
> > >  create mode 100644 gcc/config/riscv/crypto.md
> > >  create mode 100644 gcc/config/riscv/riscv-crypto.def
> > >  create mode 100644 gcc/config/riscv/riscv_scalar_crypto.h
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb32.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb64.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknd32.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknd64.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zkne32.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zkne64.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zksed.c
> > >  create mode 100644 gcc/testsuite/gcc.target/riscv/zksh.c
> > >
> > > --
> > > 2.38.1.windows.1
> > >
> >


Re: [PATCH V2 0/5] RISC-V: Implement Scalar Cryptography Extension

2023-02-16 Thread Markku-Juhani Olavi Saarinen
On Thu, Feb 16, 2023, 13:29 Kito Cheng  wrote:

> Hi Shihua:
>
> Thanks for your patches! This patch set is generally in good shape,
> but I would prefer to remove riscv_scalar_crypto.h at this moment
> since it's NOT standardized yet.
>

Hi Kito,

I'm not sure if you're referring to the scalar crypto extensions (which
were fully ratified in November 2021 by RVI, are included  in several
profiles, and available in commercial silicon IP), or this particular
header file. Note that the single header is perhaps the most convenient way
to access these subextensions that make up the scalar crypto extension 1.0.

Cheers,
Markku

>
> Do you mind sending a new version of this patch set which does not
> include that and also update the testcases?
>
>
>
> On Thu, Feb 16, 2023 at 3:52 PM Liao Shihua  wrote:
> >
> > This series adds basic support for the Scalar Cryptography extensions:
> > * Zbkb
> > * Zbkc
> > * Zbkx
> > * Zknd
> > * Zkne
> > * Zknh
> > * Zksed
> > * Zksh
> >
> > The implementation follows the version Scalar Cryptography v1.0.0 of the
> specification,
> > and the intrinsic of Scalar Cryptography extensions follows riscv-c-api
> > which can be found here:
> > https://github.com/riscv/riscv-crypto/releases/tag/v1.0.0-scalar
> > https://github.com/riscv-non-isa/riscv-c-api-doc/pull/31
> >
> > It works by Wu Siyu and Liao Shihua .
> >
> > Liao Shihua (5):
> >   Add prototypes for RISC-V Crypto built-in functions
> >   Implement ZBKB, ZBKC and ZBKX extensions
> >   Implement ZKND and ZKNE extensions
> >   Implement ZKNH extensions
> >   Implement ZKSH and ZKSED extensions
> >
> >  gcc/config.gcc|   2 +-
> >  gcc/config/riscv/bitmanip.md  |  20 +-
> >  gcc/config/riscv/constraints.md   |   8 +
> >  gcc/config/riscv/crypto.md| 435 ++
> >  gcc/config/riscv/riscv-builtins.cc|  26 ++
> >  gcc/config/riscv/riscv-crypto.def |  94 
> >  gcc/config/riscv/riscv-ftypes.def |  10 +
> >  gcc/config/riscv/riscv.md |   4 +-
> >  gcc/config/riscv/riscv_scalar_crypto.h| 218 +
> >  gcc/testsuite/gcc.target/riscv/zbkb32.c   |  36 ++
> >  gcc/testsuite/gcc.target/riscv/zbkb64.c   |  28 ++
> >  gcc/testsuite/gcc.target/riscv/zbkc32.c   |  17 +
> >  gcc/testsuite/gcc.target/riscv/zbkc64.c   |  17 +
> >  gcc/testsuite/gcc.target/riscv/zbkx32.c   |  18 +
> >  gcc/testsuite/gcc.target/riscv/zbkx64.c   |  18 +
> >  gcc/testsuite/gcc.target/riscv/zknd32.c   |  18 +
> >  gcc/testsuite/gcc.target/riscv/zknd64.c   |  36 ++
> >  gcc/testsuite/gcc.target/riscv/zkne32.c   |  18 +
> >  gcc/testsuite/gcc.target/riscv/zkne64.c   |  30 ++
> >  gcc/testsuite/gcc.target/riscv/zknh-sha256.c  |  29 ++
> >  .../gcc.target/riscv/zknh-sha512-32.c |  43 ++
> >  .../gcc.target/riscv/zknh-sha512-64.c |  31 ++
> >  gcc/testsuite/gcc.target/riscv/zksed.c|  20 +
> >  gcc/testsuite/gcc.target/riscv/zksh.c |  19 +
> >  24 files changed, 1183 insertions(+), 12 deletions(-)
> >  create mode 100644 gcc/config/riscv/crypto.md
> >  create mode 100644 gcc/config/riscv/riscv-crypto.def
> >  create mode 100644 gcc/config/riscv/riscv_scalar_crypto.h
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb32.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb64.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknd32.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknd64.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zkne32.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zkne64.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zksed.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/zksh.c
> >
> > --
> > 2.38.1.windows.1
> >
>


Re: [PATCH] don't declare header-defined functions both static and inline

2023-02-16 Thread Jakub Jelinek via Gcc-patches
On Thu, Feb 16, 2023 at 08:37:34AM -0500, Patrick Palka wrote:
> I can confirm that this patch only modifies headers that reside in
> $prefix/lib/gcc/x86_64-pc-linux-gnu/13.0.1/plugin/include/
> (with --enable-languages=c,c++,fortran,objc,obj-c++ --enable-jit make install)
> 
> Good point, I was able to scrape the functions modified by this patch
> with the below shell script which outputs the name of each function that
> has at least two overloads/definitions (possibly in different headers),
> followed by the headers in which it's defined:
> 
> I manually verified that none of these function definitions conflict
> with each other.

> Bootstrapping with objc enabled revealed a minor manual changed was
> needed in gcc/objc/objc-act.cc to avoid a redeclaration mismatch error.
> 
> -- >8 --
> 
> Subject: [PATCH] don't declare header-defined functions both static and inline
> 
> Many functions defined in our headers are declared 'static inline' which
> is a C idiom that predates GCC's move to C++ as the implementation
> language.  But in C++ the inline keyword is more than just a compiler
> hint, and is sufficient to give the function the intended semantics.
> In fact declaring a function both static and inline is a pessimization
> since static effectively disables the desired definition merging
> behavior enabled by inline, and is also a source of (harmless) ODR
> violations when a static inline function gets called from a non-static
> inline one (such as tree_operand_check calling tree_operand_length).
> 
> This patch mechanically fixes the vast majority of occurrences of this
> anti-pattern throughout the compiler's headers via the command line
> 
>   echo gcc/*.h gcc/*/*.h | xargs sed -i 's/^static inline/inline/g'
> 
> The patch also manually removes the redundant declarations of is_ivar
> and lookup_category in gcc/objc/objc-act.cc which would otherwise
> conflict with those in objc-act.h (due to the difference in staticness).
> 
> Besides fixing some ODR violations, this speeds up stage1 cc1plus by
> about 2% and reduces the size of its text segment by 1.5MB.

Thanks for doing the extra work, LGTM for trunk then.

> gcc/ChangeLog:
> 
>   * addresses.h: Mechanically drop 'static' from 'static inline'
>   functions via s/^static inline/inline/g.
>   * asan.h: Likewise.
>   * attribs.h: Likewise.
>   * basic-block.h: Likewise.
>   * bitmap.h: Likewise.
>   * cfghooks.h: Likewise.
>   * cfgloop.h: Likewise.
>   * cgraph.h: Likewise.
>   * cselib.h: Likewise.
>   * data-streamer.h: Likewise.
>   * debug.h: Likewise.
>   * df.h: Likewise.
>   * diagnostic.h: Likewise.
>   * dominance.h: Likewise.
>   * dumpfile.h: Likewise.
>   * emit-rtl.h: Likewise.
>   * except.h: Likewise.
>   * expmed.h: Likewise.
>   * expr.h: Likewise.
>   * fixed-value.h: Likewise.
>   * gengtype.h: Likewise.
>   * gimple-expr.h: Likewise.
>   * gimple-iterator.h: Likewise.
>   * gimple-predict.h: Likewise.
>   * gimple-range-fold.h: Likewise.
>   * gimple-ssa.h: Likewise.
>   * gimple.h: Likewise.
>   * graphite.h: Likewise.
>   * hard-reg-set.h: Likewise.
>   * hash-map.h: Likewise.
>   * hash-set.h: Likewise.
>   * hash-table.h: Likewise.
>   * hwint.h: Likewise.
>   * input.h: Likewise.
>   * insn-addr.h: Likewise.
>   * internal-fn.h: Likewise.
>   * ipa-fnsummary.h: Likewise.
>   * ipa-icf-gimple.h: Likewise.
>   * ipa-inline.h: Likewise.
>   * ipa-modref.h: Likewise.
>   * ipa-prop.h: Likewise.
>   * ira-int.h: Likewise.
>   * ira.h: Likewise.
>   * lra-int.h: Likewise.
>   * lra.h: Likewise.
>   * lto-streamer.h: Likewise.
>   * memmodel.h: Likewise.
>   * omp-general.h: Likewise.
>   * optabs-query.h: Likewise.
>   * optabs.h: Likewise.
>   * plugin.h: Likewise.
>   * pretty-print.h: Likewise.
>   * range.h: Likewise.
>   * read-md.h: Likewise.
>   * recog.h: Likewise.
>   * regs.h: Likewise.
>   * rtl-iter.h: Likewise.
>   * rtl.h: Likewise.
>   * sbitmap.h: Likewise.
>   * sched-int.h: Likewise.
>   * sel-sched-ir.h: Likewise.
>   * sese.h: Likewise.
>   * sparseset.h: Likewise.
>   * ssa-iterators.h: Likewise.
>   * system.h: Likewise.
>   * target-globals.h: Likewise.
>   * target.h: Likewise.
>   * timevar.h: Likewise.
>   * tree-chrec.h: Likewise.
>   * tree-data-ref.h: Likewise.
>   * tree-iterator.h: Likewise.
>   * tree-outof-ssa.h: Likewise.
>   * tree-phinodes.h: Likewise.
>   * tree-scalar-evolution.h: Likewise.
>   * tree-sra.h: Likewise.
>   * tree-ssa-alias.h: Likewise.
>   * tree-ssa-live.h: Likewise.
>   * tree-ssa-loop-manip.h: Likewise.
>   * tree-ssa-loop.h: Likewise.
>   * tree-ssa-operands.h: Likewise.
>   * tree-ssa-propagate.h: Likewise.
>   * tree-ssa-sccvn.h: Likewise.
>

[PATCH] don't declare header-defined functions both static and inline, pt 2

2023-02-16 Thread Patrick Palka via Gcc-patches
This fixes some header-defined functions that are undesirably declared
static and weren't caught by the "^static inline" pattern used in the
previous patch.

gcc/ChangeLog:

* hash-table.h (gt_pch_nx): Remove static.
* lra-int.h (lra_change_class): Likewise.
* recog.h (which_op_alt): Likewise.
* sel-sched-ir.h (sel_bb_empty_or_nop_p): Replace static with
inline.
---
 gcc/hash-table.h   | 2 +-
 gcc/lra-int.h  | 2 +-
 gcc/recog.h| 2 +-
 gcc/sel-sched-ir.h | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/hash-table.h b/gcc/hash-table.h
index 3f87ec06f37..c0c6e1cd83d 100644
--- a/gcc/hash-table.h
+++ b/gcc/hash-table.h
@@ -1275,7 +1275,7 @@ hashtab_entry_note_pointers (void *obj, void *h, 
gt_pointer_operator op,
 }
 
 template
-static void
+void
 gt_pch_nx (hash_table *h)
 {
   h->check_complete_insertion ();
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 73f8eb004b0..a400a0f85e2 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -428,7 +428,7 @@ lra_get_regno_hard_regno (int regno)
 
 /* Change class of pseudo REGNO to NEW_CLASS.  Print info about it
using TITLE.  Output a new line if NL_P.  */
-static void inline
+inline void
 lra_change_class (int regno, enum reg_class new_class,
  const char *title, bool nl_p)
 {
diff --git a/gcc/recog.h b/gcc/recog.h
index 764fa90afde..539a27c3edf 100644
--- a/gcc/recog.h
+++ b/gcc/recog.h
@@ -382,7 +382,7 @@ extern const operand_alternative *recog_op_alt;
on operand OP of the current instruction alternative (which_alternative).
Only valid after calling preprocess_constraints and constrain_operands.  */
 
-inline static const operand_alternative *
+inline const operand_alternative *
 which_op_alt ()
 {
   gcc_checking_assert (IN_RANGE (which_alternative, 0,
diff --git a/gcc/sel-sched-ir.h b/gcc/sel-sched-ir.h
index 7034a1ab06c..0e87134c6db 100644
--- a/gcc/sel-sched-ir.h
+++ b/gcc/sel-sched-ir.h
@@ -1096,7 +1096,7 @@ get_loop_exit_edges_unique_dests (const class loop *loop)
   return edges;
 }
 
-static bool
+inline bool
 sel_bb_empty_or_nop_p (basic_block bb)
 {
   insn_t first = sel_bb_head (bb), last;
-- 
2.39.2.422.gc867e4fa18



Re: [PATCH V2 0/5] RISC-V: Implement Scalar Cryptography Extension

2023-02-16 Thread Kito Cheng via Gcc-patches
Hi Shihua:

Thanks for your patches! This patch set is generally in good shape,
but I would prefer to remove riscv_scalar_crypto.h at this moment
since it's NOT standardized yet.

Do you mind sending a new version of this patch set which does not
include that and also update the testcases?



On Thu, Feb 16, 2023 at 3:52 PM Liao Shihua  wrote:
>
> This series adds basic support for the Scalar Cryptography extensions:
> * Zbkb
> * Zbkc
> * Zbkx
> * Zknd
> * Zkne
> * Zknh
> * Zksed
> * Zksh
>
> The implementation follows the version Scalar Cryptography v1.0.0 of the 
> specification,
> and the intrinsic of Scalar Cryptography extensions follows riscv-c-api
> which can be found here:
> https://github.com/riscv/riscv-crypto/releases/tag/v1.0.0-scalar
> https://github.com/riscv-non-isa/riscv-c-api-doc/pull/31
>
> It works by Wu Siyu and Liao Shihua .
>
> Liao Shihua (5):
>   Add prototypes for RISC-V Crypto built-in functions
>   Implement ZBKB, ZBKC and ZBKX extensions
>   Implement ZKND and ZKNE extensions
>   Implement ZKNH extensions
>   Implement ZKSH and ZKSED extensions
>
>  gcc/config.gcc|   2 +-
>  gcc/config/riscv/bitmanip.md  |  20 +-
>  gcc/config/riscv/constraints.md   |   8 +
>  gcc/config/riscv/crypto.md| 435 ++
>  gcc/config/riscv/riscv-builtins.cc|  26 ++
>  gcc/config/riscv/riscv-crypto.def |  94 
>  gcc/config/riscv/riscv-ftypes.def |  10 +
>  gcc/config/riscv/riscv.md |   4 +-
>  gcc/config/riscv/riscv_scalar_crypto.h| 218 +
>  gcc/testsuite/gcc.target/riscv/zbkb32.c   |  36 ++
>  gcc/testsuite/gcc.target/riscv/zbkb64.c   |  28 ++
>  gcc/testsuite/gcc.target/riscv/zbkc32.c   |  17 +
>  gcc/testsuite/gcc.target/riscv/zbkc64.c   |  17 +
>  gcc/testsuite/gcc.target/riscv/zbkx32.c   |  18 +
>  gcc/testsuite/gcc.target/riscv/zbkx64.c   |  18 +
>  gcc/testsuite/gcc.target/riscv/zknd32.c   |  18 +
>  gcc/testsuite/gcc.target/riscv/zknd64.c   |  36 ++
>  gcc/testsuite/gcc.target/riscv/zkne32.c   |  18 +
>  gcc/testsuite/gcc.target/riscv/zkne64.c   |  30 ++
>  gcc/testsuite/gcc.target/riscv/zknh-sha256.c  |  29 ++
>  .../gcc.target/riscv/zknh-sha512-32.c |  43 ++
>  .../gcc.target/riscv/zknh-sha512-64.c |  31 ++
>  gcc/testsuite/gcc.target/riscv/zksed.c|  20 +
>  gcc/testsuite/gcc.target/riscv/zksh.c |  19 +
>  24 files changed, 1183 insertions(+), 12 deletions(-)
>  create mode 100644 gcc/config/riscv/crypto.md
>  create mode 100644 gcc/config/riscv/riscv-crypto.def
>  create mode 100644 gcc/config/riscv/riscv_scalar_crypto.h
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb32.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkb64.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zknd32.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zknd64.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zkne32.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zkne64.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zksed.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zksh.c
>
> --
> 2.38.1.windows.1
>


[committed] libstdc++: Fix uses of non-reserved names in headers

2023-02-16 Thread Maciej Cencora via Gcc-patches
Hi,

instead of uglyfing all the libstdc++ code wouldn't it be simpler to
just ignore all non-reserved macro expansions (+ some special ones
like assert) inside system headers on compiler level?

Regards,
Maciej


Re: Re: [PATCH] RISC-V: Add vm* mask C api tests

2023-02-16 Thread Kito Cheng via Gcc-patches
TL;DR: I think most parts of the test could be added by generator
instead of adding those test cases directly, we gonna stop putting all
API testing testcase.


RISC-V Vector intrinsic is not implement through the *.def file way,
it's using same approach as SVE's intrinsic,
create and register by C files, the reason is RISC-V vector has a huge
set for the intrinsic function:
about ~80k function for different combinations.

So that result we have so huge testcase set, and let me break down that

There are several kinds for those testcase:
1. vsetvli insertion pass testing, which is a highly customized mode
switching pass
2. Code gen test: testing our move pattern and generated code has
satisfied the RISC-V vector ISA constraint.
3. Intrinsic API testing: test the C intrinsic has right interface and
generated expected instruction

---

FIrst part has 375 testcase in `testsuite/gcc.target/riscv/rvv/vsetvl`
which is important and ~16M
but one potential issue is that is highly code gen sensitive, we've
added many long scan-assembly in the test file,
it's not ideal and we plan to implement a builtin verifier inside GCC
instead of lots of long scan-assembly,
This is planned for this year, but will happen after GCC 13 release.

---

Second part is also important, and only 300~400 files, so I think this
part should just keep as it is.

---

The last part is the most huge part in the testcases (~3000 files so
far), and I think we should consider removing this part from the GCC
testsuite,
since we have a standard one[1] from the RISC-V international.

So my thought is we stop putting further intrinsic API testing now,
and use the external one,
and evaluate the effort and benefit of implementing a test generator
inside GCC in future (after GCC 13 release).

[1] 
https://github.com/riscv-non-isa/rvv-intrinsic-doc/blob/master/auto-generated/api-testing/


On Thu, Feb 16, 2023 at 6:32 PM juzhe.zh...@rivai.ai
 wrote:
>
> Well, I think the best solution:
> 1. Remove all intrinsic test that I already commited.
> 2. Then, embed test-generator for this intrinsic unit-test.
> 3. Call  test-generator during regression and test them.
> 4. Remove the testcases generated by the test-generator after regression.
>
> Not sure whether you aggree with me.
>
> The test-generator I used is generating the testcase by reading the 
> rvv-intrinsic document directly and generate the testcases.
> That means I need to commit test-generator and rvv-intrinsic document both.
> I don't think my test-generator is good to commit.
>
> I believe Kito has the mature and better test-generator (much better than 
> mine) to commit since rvv-intrinsic doc is their work.
>
> As long as we can make kito's test-generator embedded into GCC regression, 
> this issue will be fixed. And I believe we can fix it soon.
>
> So...Let's wait for kito.
>
>
> juzhe.zh...@rivai.ai
>
> From: Jakub Jelinek
> Date: 2023-02-16 18:20
> To: juzhe.zh...@rivai.ai
> CC: gcc-patches; kito.cheng; jeffreyalaw
> Subject: Re: Re: [PATCH] RISC-V: Add vm* mask C api tests
> On Thu, Feb 16, 2023 at 05:53:48PM +0800, juzhe.zh...@rivai.ai wrote:
> > Thanks for reporting this. I think may be we can make reduce tests into 1/3.
> > For example:
> > We have:
> > * gcc.target/riscv/rvv/base/vmand_mm-1.c: New test.
> > * gcc.target/riscv/rvv/base/vmand_mm-2.c: New test.
> > * gcc.target/riscv/rvv/base/vmand_mm-3.c: New test.
> >
> > Maybe we can reduce it into one test:
> > vmand_mm.c only.
> >
> > I will improve and reduce all intrinsic tests like this soon (I almost done 
> > all intrinsic in this week, next week I will do this soon).
> >
> > RVV intrinsics are really huge, this is the document:
> > https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated
> >
> > The testcases are directly come from LLVM (We just add assembler check into 
> > the test), they also have this amount of testcases and the just recently 
> > change them:
> > https://reviews.llvm.org/D142697
> > https://reviews.llvm.org/D142644
> >
> > Take a look at the changing LLVM patch, I am aggree with you ,the LLVM 
> > patch is quite huge and not easy to maintain.
>
> Yeah, LLVM does this all the time, their unit-tests where they embed e.g.
> matchers for IL in huge tests.
>
> I just think the way they are doing this is a very bad idea.
> If say one writes some C/C++ test, compile it, some helper program
> adds the IL into comments in the test then again any time you want to
> adjust something in the compiler that affects those tests, you need to
> regenerate them.  Is the generator included somewhere, or does every
> user write his own tooling to do that?  Anyway, if the solution is
> regenerate the IL, the test lost quite lot of its meaning, because
> when changing thousands of tests and regenerating the IL for all of them,
> one can hardly expect to carefully examine the changes to all those tests
> whether everything was intended.
>
> In GCC we have far fewer such unit-tests and big parts of the 

[committed] libstdc++: Make names_pstl.cc require et tbb_backend

2023-02-16 Thread Jonathan Wakely via Gcc-patches
Sigh ... this test still isn't right, because it fails on systems
without a bug-fix for TBB 2020.3. We could just add -fpermissive to the
test options, but let's just require tbb_backend (which isn't matched on
systems with the buggy headers). Separately, I'm going to backport the
TBB fix to Fedora 37 where I had the test FAIL.

Tested x86_64-linux (Fedora 37 this time). Pushed to trunk.

-- >8 --

The https://github.com/oneapi-src/oneTBB/pull/833 fix is needed for TBB
headers to avoid an error with GCC 13. The new names_pstl.cc test needs
to check effective target tbb_backend so that it's UNSUPPORTED on
systems without fixed headers.

libstdc++-v3/ChangeLog:

* testsuite/17_intro/names_pstl.cc: Require et tbb_backend.
---
 libstdc++-v3/testsuite/17_intro/names_pstl.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/17_intro/names_pstl.cc 
b/libstdc++-v3/testsuite/17_intro/names_pstl.cc
index 42ce098550c..019e4774ff2 100644
--- a/libstdc++-v3/testsuite/17_intro/names_pstl.cc
+++ b/libstdc++-v3/testsuite/17_intro/names_pstl.cc
@@ -1,10 +1,12 @@
 // { dg-do compile { target c++17 } }
-// { dg-additional-options "-DTBB_SUPPRESS_DEPRECATED_MESSAGES=1" { target 
tbb_backend } }
+// { dg-require-effective-target tbb_backend }
+// { dg-add-options no_pch }
 
 // The TBB headers use non-reserved names (because they're not part of the
 // implementation) so we need to include them before the macro definitions
 // in names.cc:
 #if __has_include()
+# define TBB_SUPPRESS_DEPRECATED_MESSAGES 1
 # include 
 #endif
 // Now we can define the macros to poison uses of non-reserved names:
-- 
2.39.1



Re: [PATCH] rs6000: Fix vector parity support [PR108699]

2023-02-16 Thread Kewen.Lin via Gcc-patches
Hi Segher,

Thanks for the review comments!

on 2023/2/16 19:14, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Feb 16, 2023 at 05:23:40PM +0800, Kewen.Lin wrote:
>> This patch is to fix the handling with one more pre-insn
>> vpopcntb.  It also fixes an oversight having V8HI in VEC_IP,
>> replaces VParity with VEC_IP, and adjusts the existing
>> UNSPEC_PARITY to a more meaningful name UNSPEC_PARITYB.
> 
> Please don't do that.  UNSPEC_PARITYB is worse than UNSPEC_PARITY,
> even more so for the prtyw etc. instructions.

I thought the scalar insns prty[wd] also operate on byte
(especially on the least significant bit in each byte),
PARITYB(yte) seems better ...

> 
> You might want to express the vector parity insns separately, but then
> *do that*, don't rename the normal stuff as well, and use a more obvious
> name like UNSPEC_VPARITY please.

I'll update for vector only.  Maybe it's better with UNSPEC_VPARITY*B*?
since the mnemonic has "b"(yte).

> 
>>const vsll __builtin_altivec_vprtybd (vsll);
>> -VPRTYBD parityv2di2 {}
>> +VPRTYBD p9v_paritybv2di2 {}
> 
> Why this?  Please keep the simpler names if at all possible.

The bif would like to map with the vector parity byte insns
directly, the parity2 can't work here any more.

The name is updated from previous *p9v_parity2 (becoming
to a named define_insn), I noticed there are some names with
p8v_, p9v_, meant to keep it consistent with the context.
You want this to be simplified as parity*b*v2di2?

> 
>>  {
>>emit_insn (gen_popcntbsi2 (tmp, src));
>> -  emit_insn (gen_paritysi2_cmpb (dst, tmp));
>> +  emit_insn (gen_paritybsi2 (dst, tmp));
>>  }
> 
> It is completely non-obvious what a "paritybsi2" is.  There is no such
> thing as a "parityb", not for normal people anyway.  It is very
> important that names give a hint of what they stand for.
> 
> The _cmpb of the existing name indicates that a cmpb insn is generated
> here as well.  Has that changed>
> 

I got the same understanding initially, but as you may have noticed
there isn't a cmpb, it seems just to be different from the name
parity2 so put the condition as one suffix.


>> -(define_insn "parity2_cmpb"
>> +(define_insn "parityb2"
>>[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
>> -(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] 
>> UNSPEC_PARITY))]
>> +(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")]
>> +UNSPEC_PARITYB))]
>>"TARGET_CMPB && TARGET_POPCNTB"
>>"prty %0,%1"
>>[(set_attr "type" "popcnt")])
> 
> Hrm, the original name was not so good apparently.  Still, please don't
> change multiple independent things in one patch, it makes the patch hard
> to read and understand and very hard to spot mistakes in.

Got it, good point.

> 
>> @@ -1226,7 +1225,16 @@ (define_expand "popcount2"
>>  (define_expand "parity2"
>>[(set (match_operand:VEC_IP 0 "register_operand")
>>  (parity:VEC_IP (match_operand:VEC_IP 1 "register_operand")))]
>> -  "TARGET_P9_VECTOR")
>> +  "TARGET_P9_VECTOR"
>> +{
>> +  rtx op1 = gen_lowpart (V16QImode, operands[1]);
>> +  rtx res = gen_reg_rtx (V16QImode);
>> +  emit_insn (gen_popcountv16qi2 (res, op1));
>> +  emit_insn (gen_p9v_parityb2 (operands[0],
>> +gen_lowpart (mode, res)));
>> +
>> +  DONE;
>> +})
> 
> So first do a patch that is essentially just this?

OK, will update and test it again.

> 
> Later patches can do all other things (also, not do this expand for
> TImode at all, ho hum).

OK, I guess all the others are for next stage1. :)

BR,
Kewen


Re: [committed] libstdc++: Fix uses of non-reserved names in headers

2023-02-16 Thread Jonathan Wakely via Gcc-patches
On Thu, 16 Feb 2023 at 11:45, Jakub Jelinek  wrote:
>
> On Thu, Feb 16, 2023 at 11:47:54AM +0100, Jakub Jelinek via Gcc-patches wrote:
> > On Thu, Feb 16, 2023 at 10:30:30AM +, Jonathan Wakely via Gcc-patches 
> > wrote:
> > > Tested powerpc64le-linux. Pushed to trunk.
> > >
> > > These should be backported too.
> > >
> > > -- >8 --
> > >
> > > The non-reserved names 'val' and 'dest' were being used in our headers
> > > but haven't been added to the 17_intro/names.cc test. That's because
> > > they are used by  and 
> > > respecitvely on glibc-based systems.
> >
> > So, can't we for such problematic names add hacks, like some directory
> > which the test adds as -isystem before the standard ones and contains
>
> Or do it the way you just did for PSTL, by including all the non-libstdc++
> headers used in libstdc++ headers before defining all the macros for the
> non-reserved names, hopefully the system headers use include guards and
> won't be included again.


Yes, I tried that last week, and it didn't work. There was a
diagnostic about __GLIBCXX__ being redefined, because of the {
dg-add-options no_pch } directive.

I can try again, or we can just use the existing solution of doing
#undef val #undef dest for glibc and/or linux targets, so that we
still test those names on other targets.



Re: [committed] libstdc++: Fix uses of non-reserved names in headers

2023-02-16 Thread Jonathan Wakely via Gcc-patches
On Thu, 16 Feb 2023 at 10:48, Jakub Jelinek  wrote:
>
> On Thu, Feb 16, 2023 at 10:30:30AM +, Jonathan Wakely via Gcc-patches 
> wrote:
> > Tested powerpc64le-linux. Pushed to trunk.
> >
> > These should be backported too.
> >
> > -- >8 --
> >
> > The non-reserved names 'val' and 'dest' were being used in our headers
> > but haven't been added to the 17_intro/names.cc test. That's because
> > they are used by  and 
> > respecitvely on glibc-based systems.
>
> So, can't we for such problematic names add hacks, like some directory
> which the test adds as -isystem before the standard ones and contains
> some header wrappers which temporarily
> #undef val
> #undef dest
> (or whatever other name), then
> #include_next ...
> and then define them again?
> Doesn't need to be for all targets of course, but just something to cover
> at least the most common ones.
>
> Or perhaps even do it differently, add 2 headers, one which defines
> all those #define whatever (, one that #undef whatever them all,
> and add wrappers in a -isystem directory for all non-gcc owned headers
> used by the libstdc++ headers, which would include this #undef header
> first and #define at the end.  That way we wouldn't test non-reserved
> names in say libc headers, just in libstdc++ headers.

We could, but it seems like a lot of work just for "val" and "dest",
when there are much simpler solutions :-)



[committed] libstdc++: Fix non-reserved names in

2023-02-16 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* include/ext/throw_allocator.h: Use reserved names for
parameters.
---
 libstdc++-v3/include/ext/throw_allocator.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/ext/throw_allocator.h 
b/libstdc++-v3/include/ext/throw_allocator.h
index 4c5565bcc2e..0dbf00176dc 100644
--- a/libstdc++-v3/include/ext/throw_allocator.h
+++ b/libstdc++-v3/include/ext/throw_allocator.h
@@ -851,13 +851,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return std::__addressof(__x); }
 
   _GLIBCXX_NODISCARD pointer
-  allocate(size_type __n, const void* hint = 0)
+  allocate(size_type __n, const void* __hint = 0)
   {
if (__n > this->max_size())
  std::__throw_bad_alloc();
 
throw_conditionally();
-   pointer const a = traits::allocate(_M_allocator, __n, hint);
+   pointer const a = traits::allocate(_M_allocator, __n, __hint);
insert(a, sizeof(value_type) * __n);
return a;
   }
@@ -880,8 +880,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
 #else
   void
-  construct(pointer __p, const value_type& val)
-  { return _M_allocator.construct(__p, val); }
+  construct(pointer __p, const value_type& __val)
+  { return _M_allocator.construct(__p, __val); }
 
   void
   destroy(pointer __p)
-- 
2.39.1



[committed] libstdc++: Add missing space after effective-target name in test

2023-02-16 Thread Jonathan Wakely via Gcc-patches
I only noticed this when sending the mail for he previous commit.

Tested x86_64-linux and pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* testsuite/17_intro/names_pstl.cc: Add space after effective
target name.
---
 libstdc++-v3/testsuite/17_intro/names_pstl.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/17_intro/names_pstl.cc 
b/libstdc++-v3/testsuite/17_intro/names_pstl.cc
index e5f3ca91aa2..42ce098550c 100644
--- a/libstdc++-v3/testsuite/17_intro/names_pstl.cc
+++ b/libstdc++-v3/testsuite/17_intro/names_pstl.cc
@@ -1,5 +1,5 @@
 // { dg-do compile { target c++17 } }
-// { dg-additional-options "-DTBB_SUPPRESS_DEPRECATED_MESSAGES=1" { target 
tbb_backend} }
+// { dg-additional-options "-DTBB_SUPPRESS_DEPRECATED_MESSAGES=1" { target 
tbb_backend } }
 
 // The TBB headers use non-reserved names (because they're not part of the
 // implementation) so we need to include them before the macro definitions
-- 
2.39.1



Re: [committed] libstdc++: Fix uses of non-reserved names in headers

2023-02-16 Thread Jakub Jelinek via Gcc-patches
On Thu, Feb 16, 2023 at 11:47:54AM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Thu, Feb 16, 2023 at 10:30:30AM +, Jonathan Wakely via Gcc-patches 
> wrote:
> > Tested powerpc64le-linux. Pushed to trunk.
> > 
> > These should be backported too.
> > 
> > -- >8 --
> > 
> > The non-reserved names 'val' and 'dest' were being used in our headers
> > but haven't been added to the 17_intro/names.cc test. That's because
> > they are used by  and 
> > respecitvely on glibc-based systems.
> 
> So, can't we for such problematic names add hacks, like some directory
> which the test adds as -isystem before the standard ones and contains

Or do it the way you just did for PSTL, by including all the non-libstdc++
headers used in libstdc++ headers before defining all the macros for the
non-reserved names, hopefully the system headers use include guards and
won't be included again.

Jakub



[committed] libstdc++: Fix non-reserved names in PSTL headers

2023-02-16 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* include/pstl/algorithm_fwd.h (__pattern_search_n)
(__brick_unique_copy, __brick_adjacent_find)
(__brick_generate_n, __pattern_generate_n): Use reserved names
for parameters.
* include/pstl/algorithm_impl.h (__brick_unique_copy)
(__pattern_reverse, __brick_generate_n): Likewise.
* include/pstl/execution_impl.h (__prefer_unsequenced_tag)
(__prefer_parallel_tag): Likewise.
* include/pstl/glue_algorithm_impl.h (transform): Likewise.
* include/pstl/glue_numeric_defs.h (adjacent_difference):
Likewise.
* include/pstl/numeric_impl.h (__brick_adjacent_difference):
Likewise.
* include/pstl/parallel_backend_tbb.h (__merge_func): Likewise.
* include/pstl/unseq_backend_simd.h (_Combiner)
(__simd_min_element, __simd_minmax_element): Likewise.
* testsuite/17_intro/names_pstl.cc: New test.
---
 libstdc++-v3/include/pstl/algorithm_fwd.h | 36 +--
 libstdc++-v3/include/pstl/algorithm_impl.h| 28 +++
 libstdc++-v3/include/pstl/execution_impl.h|  8 ++---
 .../include/pstl/glue_algorithm_impl.h|  2 +-
 libstdc++-v3/include/pstl/glue_numeric_defs.h |  2 +-
 libstdc++-v3/include/pstl/numeric_impl.h  |  4 +--
 .../include/pstl/parallel_backend_tbb.h   |  4 +--
 .../include/pstl/unseq_backend_simd.h | 10 +++---
 libstdc++-v3/testsuite/17_intro/names_pstl.cc | 16 +
 9 files changed, 63 insertions(+), 47 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/17_intro/names_pstl.cc

diff --git a/libstdc++-v3/include/pstl/algorithm_fwd.h 
b/libstdc++-v3/include/pstl/algorithm_fwd.h
index 814356b38fb..e85125c4dc7 100644
--- a/libstdc++-v3/include/pstl/algorithm_fwd.h
+++ b/libstdc++-v3/include/pstl/algorithm_fwd.h
@@ -364,17 +364,17 @@ __brick_search_n(_ForwardIterator, _ForwardIterator, 
_Size, const _Tp&, _BinaryP
  /*vector=*/std::true_type) noexcept;
 
 template 
+  class _IsVector>
 _ForwardIterator
 __pattern_search_n(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, 
_Size, const _Tp&, _BinaryPredicate,
-   IsVector,
+  _IsVector,
/*is_parallel=*/std::false_type) noexcept;
 
 template 
+  class _IsVector>
 _RandomAccessIterator
 __pattern_search_n(_ExecutionPolicy&&, _RandomAccessIterator, 
_RandomAccessIterator, _Size, const _Tp&,
-   _BinaryPredicate, IsVector,
+   _BinaryPredicate, _IsVector,
/*is_parallel=*/std::true_type) noexcept;
 
 //
@@ -528,8 +528,8 @@ __pattern_unique(_ExecutionPolicy&&, _ForwardIterator, 
_ForwardIterator, _Binary
 // unique_copy
 //
 
-template 
-OutputIterator __brick_unique_copy(_ForwardIterator, _ForwardIterator, 
OutputIterator, _BinaryPredicate,
+template 
+  _OutputIterator __brick_unique_copy(_ForwardIterator, _ForwardIterator, 
_OutputIterator, _BinaryPredicate,
/*vector=*/std::false_type) noexcept;
 
 template 
@@ -823,12 +823,12 @@ __pattern_partial_sort_copy(_ExecutionPolicy&&, 
_ForwardIterator, _ForwardIterat
 template 
 _ForwardIterator
 __brick_adjacent_find(_ForwardIterator, _ForwardIterator, _BinaryPredicate,
-  /* IsVector = */ std::true_type, bool) noexcept;
+  /* _IsVector = */ std::true_type, bool) noexcept;
 
 template 
 _ForwardIterator
 __brick_adjacent_find(_ForwardIterator, _ForwardIterator, _BinaryPredicate,
-  /* IsVector = */ std::false_type, bool) noexcept;
+  /* _IsVector = */ std::false_type, bool) noexcept;
 
 template 
 _ForwardIterator
@@ -920,22 +920,22 @@ _ForwardIterator
 __pattern_generate(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, 
_Generator,
/*is_parallel=*/std::true_type, _IsVector);
 
-template 
-OutputIterator __brick_generate_n(OutputIterator, Size, _Generator,
+template 
+  _OutputIterator __brick_generate_n(_OutputIterator, _Size, _Generator,
   /* is_vector = */ std::true_type) noexcept;
 
-template 
-OutputIterator __brick_generate_n(OutputIterator, Size, _Generator,
+template 
+  _OutputIterator __brick_generate_n(_OutputIterator, _Size, _Generator,
   /* is_vector = */ std::false_type) noexcept;
 
-template 
-OutputIterator
-__pattern_generate_n(_ExecutionPolicy&&, OutputIterator, Size, _Generator,
+template 
+  _OutputIterator
+__pattern_generate_n(_ExecutionPolicy&&, _OutputIterator, _Size, _Generator,
  /*is_parallel=*/std::false_type, _IsVector) noexcept;
 
-template 
-OutputIterator
-__pattern_generate_n(_ExecutionPolicy&&, OutputIterator, Size, 

[committed] libgomp: Fix up some typos in libgomp.texi

2023-02-16 Thread Jakub Jelinek via Gcc-patches
Hi!

I decided to check for repeated the the in libgomp and noticed
there are several occurrences of a typo theads rather than threads
in libgomp.texi.

Tested on x86_64-linux, committed to trunk.

2023-02-16  Jakub Jelinek  

* libgomp.texi: Fix typos - theads -> threads.

--- libgomp/libgomp.texi.jj 2023-02-04 06:19:06.984211708 +0100
+++ libgomp/libgomp.texi2023-02-16 12:05:59.676732507 +0100
@@ -524,7 +524,7 @@ linkage, and do not throw exceptions.
 * omp_get_num_procs::   Number of processors online
 * omp_get_num_teams::   Number of teams
 * omp_get_num_threads:: Size of the active team
-* omp_get_proc_bind::   Whether theads may be moved between CPUs
+* omp_get_proc_bind::   Whether threads may be moved between CPUs
 * omp_get_schedule::Obtain the runtime scheduling method
 * omp_get_supported_active_levels:: Maximum number of active regions supported
 * omp_get_team_num::Get team number
@@ -1031,7 +1031,7 @@ one thread per CPU online is used.
 
 
 @node omp_get_proc_bind
-@section @code{omp_get_proc_bind} -- Whether theads may be moved between CPUs
+@section @code{omp_get_proc_bind} -- Whether threads may be moved between CPUs
 @table @asis
 @item @emph{Description}:
 This functions returns the currently active thread affinity policy, which is
@@ -1945,8 +1945,8 @@ beginning with @env{GOMP_} are GNU exten
 * OMP_NESTED::  Nested parallel regions
 * OMP_NUM_TEAMS::   Specifies the number of teams to use by teams 
region
 * OMP_NUM_THREADS:: Specifies the number of threads to use
-* OMP_PROC_BIND::   Whether theads may be moved between CPUs
-* OMP_PLACES::  Specifies on which CPUs the theads should be placed
+* OMP_PROC_BIND::   Whether threads may be moved between CPUs
+* OMP_PLACES::  Specifies on which CPUs the threads should be 
placed
 * OMP_STACKSIZE::   Set default thread stack size
 * OMP_SCHEDULE::How threads are scheduled
 * OMP_TARGET_OFFLOAD::  Controls offloading behaviour
@@ -2144,12 +2144,12 @@ nesting by default.  If undefined one th
 
 
 @node OMP_PROC_BIND
-@section @env{OMP_PROC_BIND} -- Whether theads may be moved between CPUs
+@section @env{OMP_PROC_BIND} -- Whether threads may be moved between CPUs
 @cindex Environment Variable
 @table @asis
 @item @emph{Description}:
 Specifies whether threads may be moved between processors.  If set to
-@code{TRUE}, OpenMP theads should not be moved; if set to @code{FALSE}
+@code{TRUE}, OpenMP threads should not be moved; if set to @code{FALSE}
 they may be moved.  Alternatively, a comma separated list with the
 values @code{PRIMARY}, @code{MASTER}, @code{CLOSE} and @code{SPREAD} can
 be used to specify the thread affinity policy for the corresponding nesting
@@ -2174,7 +2174,7 @@ When undefined, @env{OMP_PROC_BIND} defa
 
 
 @node OMP_PLACES
-@section @env{OMP_PLACES} -- Specifies on which CPUs the theads should be 
placed
+@section @env{OMP_PLACES} -- Specifies on which CPUs the threads should be 
placed
 @cindex Environment Variable
 @table @asis
 @item @emph{Description}:

Jakub



[committed] libgomp: Fix comment typo

2023-02-16 Thread Jakub Jelinek via Gcc-patches
Hi!

I saw
FAIL: libgomp.fortran/target-nowait-array-section.f90   -O  execution test
in my last x86_64-linux bootstrap.  From quick skimming, it might be just
unreliable test, which assumes that asynchronous execution wouldn't produce
ordered sequence, but can't it happen even with asynchronous execution?

That said, while skimming the test, I've noticed a comment typo and
this patch fixes that up.

Tested on x86_64-linux, committed to trunk.

2023-02-16  Jakub Jelinek  

* testsuite/libgomp.fortran/target-nowait-array-section.f90: Fix
comment typo and improve its wording.

--- libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f90.jj
2022-05-16 09:46:02.329060126 +0200
+++ libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f90   
2023-02-16 12:04:11.227347228 +0100
@@ -1,4 +1,4 @@
-! Runs the the target region asynchrolously and checks for it
+! Run the target region asynchronously and check it
 !
 ! Note that  map(alloc: work(:, i)) + nowait  should be safe
 ! given that a nondescriptor array is used. However, it still

Jakub



Re: [PATCH] rs6000: Fix vector parity support [PR108699]

2023-02-16 Thread Segher Boessenkool
Hi!

On Thu, Feb 16, 2023 at 05:23:40PM +0800, Kewen.Lin wrote:
> This patch is to fix the handling with one more pre-insn
> vpopcntb.  It also fixes an oversight having V8HI in VEC_IP,
> replaces VParity with VEC_IP, and adjusts the existing
> UNSPEC_PARITY to a more meaningful name UNSPEC_PARITYB.

Please don't do that.  UNSPEC_PARITYB is worse than UNSPEC_PARITY,
even more so for the prtyw etc. instructions.

You might want to express the vector parity insns separately, but then
*do that*, don't rename the normal stuff as well, and use a more obvious
name like UNSPEC_VPARITY please.

>const vsll __builtin_altivec_vprtybd (vsll);
> -VPRTYBD parityv2di2 {}
> +VPRTYBD p9v_paritybv2di2 {}

Why this?  Please keep the simpler names if at all possible.

>   {
> emit_insn (gen_popcntbsi2 (tmp, src));
> -   emit_insn (gen_paritysi2_cmpb (dst, tmp));
> +   emit_insn (gen_paritybsi2 (dst, tmp));
>   }

It is completely non-obvious what a "paritybsi2" is.  There is no such
thing as a "parityb", not for normal people anyway.  It is very
important that names give a hint of what they stand for.

The _cmpb of the existing name indicates that a cmpb insn is generated
here as well.  Has that changed>

> -(define_insn "parity2_cmpb"
> +(define_insn "parityb2"
>[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
> - (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] 
> UNSPEC_PARITY))]
> + (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")]
> + UNSPEC_PARITYB))]
>"TARGET_CMPB && TARGET_POPCNTB"
>"prty %0,%1"
>[(set_attr "type" "popcnt")])

Hrm, the original name was not so good apparently.  Still, please don't
change multiple independent things in one patch, it makes the patch hard
to read and understand and very hard to spot mistakes in.

> @@ -1226,7 +1225,16 @@ (define_expand "popcount2"
>  (define_expand "parity2"
>[(set (match_operand:VEC_IP 0 "register_operand")
>   (parity:VEC_IP (match_operand:VEC_IP 1 "register_operand")))]
> -  "TARGET_P9_VECTOR")
> +  "TARGET_P9_VECTOR"
> +{
> +  rtx op1 = gen_lowpart (V16QImode, operands[1]);
> +  rtx res = gen_reg_rtx (V16QImode);
> +  emit_insn (gen_popcountv16qi2 (res, op1));
> +  emit_insn (gen_p9v_parityb2 (operands[0],
> + gen_lowpart (mode, res)));
> +
> +  DONE;
> +})

So first do a patch that is essentially just this?

Later patches can do all other things (also, not do this expand for
TImode at all, ho hum).


Segher


Re: [committed] libstdc++: Fix uses of non-reserved names in headers

2023-02-16 Thread Jakub Jelinek via Gcc-patches
On Thu, Feb 16, 2023 at 10:30:30AM +, Jonathan Wakely via Gcc-patches wrote:
> Tested powerpc64le-linux. Pushed to trunk.
> 
> These should be backported too.
> 
> -- >8 --
> 
> The non-reserved names 'val' and 'dest' were being used in our headers
> but haven't been added to the 17_intro/names.cc test. That's because
> they are used by  and 
> respecitvely on glibc-based systems.

So, can't we for such problematic names add hacks, like some directory
which the test adds as -isystem before the standard ones and contains
some header wrappers which temporarily
#undef val
#undef dest
(or whatever other name), then
#include_next ...
and then define them again?
Doesn't need to be for all targets of course, but just something to cover
at least the most common ones.

Or perhaps even do it differently, add 2 headers, one which defines
all those #define whatever (, one that #undef whatever them all,
and add wrappers in a -isystem directory for all non-gcc owned headers
used by the libstdc++ headers, which would include this #undef header
first and #define at the end.  That way we wouldn't test non-reserved
names in say libc headers, just in libstdc++ headers.

What both of these break though is if libstdc++ headers try to use
__has_include etc. on them, because the added wrapper will mean they will
show as existing.

Jakub



Re: Re: [PATCH] RISC-V: Add vm* mask C api tests

2023-02-16 Thread juzhe.zh...@rivai.ai
Well, I think the best solution:
1. Remove all intrinsic test that I already commited.
2. Then, embed test-generator for this intrinsic unit-test.
3. Call  test-generator during regression and test them.
4. Remove the testcases generated by the test-generator after regression.

Not sure whether you aggree with me.

The test-generator I used is generating the testcase by reading the 
rvv-intrinsic document directly and generate the testcases.
That means I need to commit test-generator and rvv-intrinsic document both.
I don't think my test-generator is good to commit.

I believe Kito has the mature and better test-generator (much better than mine) 
to commit since rvv-intrinsic doc is their work.

As long as we can make kito's test-generator embedded into GCC regression, this 
issue will be fixed. And I believe we can fix it soon.

So...Let's wait for kito.


juzhe.zh...@rivai.ai
 
From: Jakub Jelinek
Date: 2023-02-16 18:20
To: juzhe.zh...@rivai.ai
CC: gcc-patches; kito.cheng; jeffreyalaw
Subject: Re: Re: [PATCH] RISC-V: Add vm* mask C api tests
On Thu, Feb 16, 2023 at 05:53:48PM +0800, juzhe.zh...@rivai.ai wrote:
> Thanks for reporting this. I think may be we can make reduce tests into 1/3.
> For example:
> We have:
> * gcc.target/riscv/rvv/base/vmand_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmand_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmand_mm-3.c: New test.
> 
> Maybe we can reduce it into one test:
> vmand_mm.c only.
> 
> I will improve and reduce all intrinsic tests like this soon (I almost done 
> all intrinsic in this week, next week I will do this soon).
> 
> RVV intrinsics are really huge, this is the document:
> https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated 
> 
> The testcases are directly come from LLVM (We just add assembler check into 
> the test), they also have this amount of testcases and the just recently 
> change them:
> https://reviews.llvm.org/D142697 
> https://reviews.llvm.org/D142644 
> 
> Take a look at the changing LLVM patch, I am aggree with you ,the LLVM patch 
> is quite huge and not easy to maintain.
 
Yeah, LLVM does this all the time, their unit-tests where they embed e.g.
matchers for IL in huge tests.
 
I just think the way they are doing this is a very bad idea.
If say one writes some C/C++ test, compile it, some helper program
adds the IL into comments in the test then again any time you want to
adjust something in the compiler that affects those tests, you need to
regenerate them.  Is the generator included somewhere, or does every
user write his own tooling to do that?  Anyway, if the solution is
regenerate the IL, the test lost quite lot of its meaning, because
when changing thousands of tests and regenerating the IL for all of them,
one can hardly expect to carefully examine the changes to all those tests
whether everything was intended.
 
In GCC we have far fewer such unit-tests and big parts of the testsuite
are testing everything from parsing through assembly through linking through
runtime.  In my experience over the years, many such tests can discover even
bugs completely unrelated to the original reason why a test has been added.
 
If they have some generator in LLVM for these riscv tests, even worse,
there is another step for LLVM generator regenerates them on the LLVM side
and somebody needs to reimport them into GCC and regenerate the
scan-assembler regexps.
 
riscv already uses what various other GCC backends use for builtins and
intrinsics, various *.def files from which the actual support is created.
So, can't we use the same files + something on top of that to have the
testsuite coverage, or if it should be independent from it, at least
have something similar which would describe intrinsic that should be tested,
iterate over such and such types for which arguments and how to come up with
the expected emitted code.
So, rather than reducing the tests into 1/3, try to reduce them to one
line per intrinsic or something of that scale.
 
Jakub
 
 


[committed] libstdc++: Fix uses of non-reserved names in headers

2023-02-16 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

These should be backported too.

-- >8 --

The non-reserved names 'val' and 'dest' were being used in our headers
but haven't been added to the 17_intro/names.cc test. That's because
they are used by  and 
respecitvely on glibc-based systems.

libstdc++-v3/ChangeLog:

* include/bits/fs_ops.h (create_directory): Use reserved name
for parameter.
* include/bits/ranges_algo.h (__contains_subrange_fn):
Likewise.
* include/bits/regex_automaton.h (_State_base::_M_print):
Likewise.
* include/bits/regex_automaton.tcc(_State_base::_M_print):
Likewise.
* include/bits/regex_scanner.tcc(_Scanner::_M_print): Likewise.
* include/experimental/bits/fs_ops.h (create_directory):
Likewise.
* include/std/mutex (timed_mutex::_M_clocklock): Likewise.
(recursive_timed_mutex:_M_clocklock): Likewise.
* include/std/tuple (basic_common_reference): Likewise.
* libsupc++/cxxabi_init_exception.h
(__cxa_init_primary_exception): Likewise.
* testsuite/17_intro/names.cc: Add checks.
---
 libstdc++-v3/include/bits/fs_ops.h|  4 +-
 libstdc++-v3/include/bits/ranges_algo.h   |  6 +-
 libstdc++-v3/include/bits/regex_automaton.h   |  2 +-
 libstdc++-v3/include/bits/regex_automaton.tcc | 18 +++---
 libstdc++-v3/include/bits/regex_scanner.tcc   | 60 +--
 .../include/experimental/bits/fs_ops.h|  4 +-
 libstdc++-v3/include/std/mutex|  8 +--
 libstdc++-v3/include/std/tuple|  8 +--
 .../libsupc++/cxxabi_init_exception.h |  5 +-
 libstdc++-v3/testsuite/17_intro/names.cc  | 21 ++-
 10 files changed, 77 insertions(+), 59 deletions(-)

diff --git a/libstdc++-v3/include/bits/fs_ops.h 
b/libstdc++-v3/include/bits/fs_ops.h
index 74b8ad50210..e0b87c9fa00 100644
--- a/libstdc++-v3/include/bits/fs_ops.h
+++ b/libstdc++-v3/include/bits/fs_ops.h
@@ -90,8 +90,8 @@ namespace filesystem
   bool create_directory(const path& __p);
   bool create_directory(const path& __p, error_code& __ec) noexcept;
 
-  bool create_directory(const path& __p, const path& attributes);
-  bool create_directory(const path& __p, const path& attributes,
+  bool create_directory(const path& __p, const path& __attributes);
+  bool create_directory(const path& __p, const path& __attributes,
error_code& __ec) noexcept;
 
   void create_directory_symlink(const path& __to, const path& __new_symlink);
diff --git a/libstdc++-v3/include/bits/ranges_algo.h 
b/libstdc++-v3/include/bits/ranges_algo.h
index 1685e1d85bb..5d039bd1cd4 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -3490,11 +3490,11 @@ namespace ranges
 template _Sent1,
 forward_iterator _Iter2, sentinel_for<_Iter2> _Sent2,
 typename _Pred = ranges::equal_to,
-typename Proj1 = identity, typename Proj2 = identity>
-  requires indirectly_comparable<_Iter1, _Iter2, _Pred, Proj1, Proj2>
+typename _Proj1 = identity, typename _Proj2 = identity>
+  requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2>
   constexpr bool
   operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 
__last2,
-_Pred __pred = {}, Proj1 __proj1 = {}, Proj2 __proj2 = {}) 
const
+_Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) 
const
   {
return __first2 == __last2
  || !ranges::search(__first1, __last1, __first2, __last2,
diff --git a/libstdc++-v3/include/bits/regex_automaton.h 
b/libstdc++-v3/include/bits/regex_automaton.h
index c2e4b512ed5..ef336d5883b 100644
--- a/libstdc++-v3/include/bits/regex_automaton.h
+++ b/libstdc++-v3/include/bits/regex_automaton.h
@@ -110,7 +110,7 @@ namespace __detail
 
 #ifdef _GLIBCXX_DEBUG
 std::ostream&
-_M_print(std::ostream& ostr) const;
+_M_print(std::ostream& __ostr) const;
 
 // Prints graphviz dot commands for state.
 std::ostream&
diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc 
b/libstdc++-v3/include/bits/regex_automaton.tcc
index 546605da31f..f25f7b33263 100644
--- a/libstdc++-v3/include/bits/regex_automaton.tcc
+++ b/libstdc++-v3/include/bits/regex_automaton.tcc
@@ -36,34 +36,34 @@ namespace __detail
 {
 #ifdef _GLIBCXX_DEBUG
   inline std::ostream&
-  _State_base::_M_print(std::ostream& ostr) const
+  _State_base::_M_print(std::ostream& __ostr) const
   {
 switch (_M_opcode)
 {
   case _S_opcode_alternative:
   case _S_opcode_repeat:
-   ostr << "alt next=" << _M_next << " alt=" << _M_alt;
+   __ostr << "alt next=" << _M_next << " alt=" << _M_alt;
break;
   case _S_opcode_subexpr_begin:
-   ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
+   __ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
break;
   

Re: Re: [PATCH] RISC-V: Add vm* mask C api tests

2023-02-16 Thread Jakub Jelinek via Gcc-patches
On Thu, Feb 16, 2023 at 05:53:48PM +0800, juzhe.zh...@rivai.ai wrote:
> Thanks for reporting this. I think may be we can make reduce tests into 1/3.
> For example:
> We have:
> * gcc.target/riscv/rvv/base/vmand_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmand_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmand_mm-3.c: New test.
> 
> Maybe we can reduce it into one test:
> vmand_mm.c only.
> 
> I will improve and reduce all intrinsic tests like this soon (I almost done 
> all intrinsic in this week, next week I will do this soon).
> 
> RVV intrinsics are really huge, this is the document:
> https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated 
> 
> The testcases are directly come from LLVM (We just add assembler check into 
> the test), they also have this amount of testcases and the just recently 
> change them:
> https://reviews.llvm.org/D142697 
> https://reviews.llvm.org/D142644 
> 
> Take a look at the changing LLVM patch, I am aggree with you ,the LLVM patch 
> is quite huge and not easy to maintain.

Yeah, LLVM does this all the time, their unit-tests where they embed e.g.
matchers for IL in huge tests.

I just think the way they are doing this is a very bad idea.
If say one writes some C/C++ test, compile it, some helper program
adds the IL into comments in the test then again any time you want to
adjust something in the compiler that affects those tests, you need to
regenerate them.  Is the generator included somewhere, or does every
user write his own tooling to do that?  Anyway, if the solution is
regenerate the IL, the test lost quite lot of its meaning, because
when changing thousands of tests and regenerating the IL for all of them,
one can hardly expect to carefully examine the changes to all those tests
whether everything was intended.

In GCC we have far fewer such unit-tests and big parts of the testsuite
are testing everything from parsing through assembly through linking through
runtime.  In my experience over the years, many such tests can discover even
bugs completely unrelated to the original reason why a test has been added.

If they have some generator in LLVM for these riscv tests, even worse,
there is another step for LLVM generator regenerates them on the LLVM side
and somebody needs to reimport them into GCC and regenerate the
scan-assembler regexps.

riscv already uses what various other GCC backends use for builtins and
intrinsics, various *.def files from which the actual support is created.
So, can't we use the same files + something on top of that to have the
testsuite coverage, or if it should be independent from it, at least
have something similar which would describe intrinsic that should be tested,
iterate over such and such types for which arguments and how to come up with
the expected emitted code.
So, rather than reducing the tests into 1/3, try to reduce them to one
line per intrinsic or something of that scale.

Jakub



Re: Re: [PATCH] RISC-V: Add vm* mask C api tests

2023-02-16 Thread juzhe.zh...@rivai.ai
Thanks for reporting this. I think may be we can make reduce tests into 1/3.
For example:
We have:
* gcc.target/riscv/rvv/base/vmand_mm-1.c: New test.
* gcc.target/riscv/rvv/base/vmand_mm-2.c: New test.
* gcc.target/riscv/rvv/base/vmand_mm-3.c: New test.

Maybe we can reduce it into one test:
vmand_mm.c only.

I will improve and reduce all intrinsic tests like this soon (I almost done all 
intrinsic in this week, next week I will do this soon).

RVV intrinsics are really huge, this is the document:
https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated 

The testcases are directly come from LLVM (We just add assembler check into the 
test), they also have this amount of testcases and the just recently change 
them:
https://reviews.llvm.org/D142697 
https://reviews.llvm.org/D142644 

Take a look at the changing LLVM patch, I am aggree with you ,the LLVM patch is 
quite huge and not easy to maintain.

So.. I think I can reduce the tests into 1/3 of them in the next. But it's 
still very big (you can take a look at LLVM).
Let's see whether kito has more comments about it.



juzhe.zh...@rivai.ai
 
From: Jakub Jelinek
Date: 2023-02-16 17:38
To: juzhe.zhong
CC: gcc-patches; kito.cheng; Jeff Law
Subject: Re: [PATCH] RISC-V: Add vm* mask C api tests
Hi!
 
I see in the past few weeks you've added huge amounts of these tests
du -shc *.target/riscv/*/
34M gcc.target/riscv/rvv/
28M g++.target/riscv/rvv/
61M total
and new are coming (nothing at all at this year's start).
This is far larger than tests of any other architecture
(i386 has 35M total, aarch64 31M total, arm 17M total, powerpc 12M total,
everything else is even much smaller) but for the other architectures it has
been decades of testsuite coverage for features added over the years.
Rather than looking purely at size, I'm more worried about the content
of the tests.  Usually target testsuites include runtime tests whether
particular intrinsics etc. behave correctly at runtime, plus some compile
tests that they can be compiled with occassional scan-assembler* to mention
a particular instruction appears, but in these cases the scan-assembler*
covers the entire (albeit small) functions, which makes it IMHO a
maintainance nightmare whenever one wants to change something important
in the compiler.  Take e.g. the recent Andreas Schwab's change to make
-fasynchronous-unwind-tables the default on riscv, even that change required
quite a few changes.  My worry is that with these kind of tests changes like
that will become much harder and some people will simply decide not to do
such changes because having to adjust tens of thousands of tests even with
some scripting would be a nightmare.  Can't we do better than this?
 
E.g. what is the difference between gcc.target/riscv/rvv/ and
g++.target/riscv/rvv/ tests?  Are the  APIs so different
between C and C++ that it needs to be tested twice?  Even if so,
we have the concept of c-c++-common tests, we could add c-c++-common.target
and make riscv.exp handle it similarly to how e.g. C and C++ dg.exp handles
those.  How do you create these tests?  If you use some generator for them,
wouldn't it be better to include the generator in the testsuite and generate
them on the fly?  We already have a precedent for that, e.g. the
gcc/testsuite/g*.dg/compat/struct-layout-1.exp testsuite has a generator
program written in C that creates tests on the fly.  Now, using something
like that would have 2 advantages, it would be much easier for maintainance,
if you do some global change in the compiler that affects those tests, just
adjust a few spots in the generator instead of tweaking currently 6000 tests
and counting.  Even if you aren't using a generator to write these tests
(that would be a lot of work then!), a question is if it couldn't be done by
one, have say some file like gcc has *.def files all around to describe what
you want to test and something that generates those.
 
Just wanted to chime in before we have 10 times more of such tests and it
will be too late to adjust...
 
On Thu, Feb 16, 2023 at 11:36:19AM +0800, juzhe.zh...@rivai.ai wrote:
> From: Ju-Zhe Zhong 
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.target/riscv/rvv/base/vmand_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmand_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmand_mm-3.c: New test.
> * gcc.target/riscv/rvv/base/vmandn_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmandn_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmandn_mm-3.c: New test.
> * gcc.target/riscv/rvv/base/vmclr_m_m-1.c: New test.
> * gcc.target/riscv/rvv/base/vmclr_m_m-2.c: New test.
> * gcc.target/riscv/rvv/base/vmclr_m_m-3.c: New test.
> * gcc.target/riscv/rvv/base/vmmv_m_m-1.c: New test.
> * gcc.target/riscv/rvv/base/vmmv_m_m-2.c: New test.
> * gcc.target/riscv/rvv/base/vmmv_m_m-3.c: New test.
> * gcc.target/riscv/rvv/base/vmnand_mm-1.c: New test.
>  

Re: [PATCH] RISC-V: Add vm* mask C api tests

2023-02-16 Thread Jakub Jelinek via Gcc-patches
Hi!

I see in the past few weeks you've added huge amounts of these tests
du -shc *.target/riscv/*/
34M gcc.target/riscv/rvv/
28M g++.target/riscv/rvv/
61M total
and new are coming (nothing at all at this year's start).
This is far larger than tests of any other architecture
(i386 has 35M total, aarch64 31M total, arm 17M total, powerpc 12M total,
everything else is even much smaller) but for the other architectures it has
been decades of testsuite coverage for features added over the years.
Rather than looking purely at size, I'm more worried about the content
of the tests.  Usually target testsuites include runtime tests whether
particular intrinsics etc. behave correctly at runtime, plus some compile
tests that they can be compiled with occassional scan-assembler* to mention
a particular instruction appears, but in these cases the scan-assembler*
covers the entire (albeit small) functions, which makes it IMHO a
maintainance nightmare whenever one wants to change something important
in the compiler.  Take e.g. the recent Andreas Schwab's change to make
-fasynchronous-unwind-tables the default on riscv, even that change required
quite a few changes.  My worry is that with these kind of tests changes like
that will become much harder and some people will simply decide not to do
such changes because having to adjust tens of thousands of tests even with
some scripting would be a nightmare.  Can't we do better than this?

E.g. what is the difference between gcc.target/riscv/rvv/ and
g++.target/riscv/rvv/ tests?  Are the  APIs so different
between C and C++ that it needs to be tested twice?  Even if so,
we have the concept of c-c++-common tests, we could add c-c++-common.target
and make riscv.exp handle it similarly to how e.g. C and C++ dg.exp handles
those.  How do you create these tests?  If you use some generator for them,
wouldn't it be better to include the generator in the testsuite and generate
them on the fly?  We already have a precedent for that, e.g. the
gcc/testsuite/g*.dg/compat/struct-layout-1.exp testsuite has a generator
program written in C that creates tests on the fly.  Now, using something
like that would have 2 advantages, it would be much easier for maintainance,
if you do some global change in the compiler that affects those tests, just
adjust a few spots in the generator instead of tweaking currently 6000 tests
and counting.  Even if you aren't using a generator to write these tests
(that would be a lot of work then!), a question is if it couldn't be done by
one, have say some file like gcc has *.def files all around to describe what
you want to test and something that generates those.

Just wanted to chime in before we have 10 times more of such tests and it
will be too late to adjust...

On Thu, Feb 16, 2023 at 11:36:19AM +0800, juzhe.zh...@rivai.ai wrote:
> From: Ju-Zhe Zhong 
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.target/riscv/rvv/base/vmand_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmand_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmand_mm-3.c: New test.
> * gcc.target/riscv/rvv/base/vmandn_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmandn_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmandn_mm-3.c: New test.
> * gcc.target/riscv/rvv/base/vmclr_m_m-1.c: New test.
> * gcc.target/riscv/rvv/base/vmclr_m_m-2.c: New test.
> * gcc.target/riscv/rvv/base/vmclr_m_m-3.c: New test.
> * gcc.target/riscv/rvv/base/vmmv_m_m-1.c: New test.
> * gcc.target/riscv/rvv/base/vmmv_m_m-2.c: New test.
> * gcc.target/riscv/rvv/base/vmmv_m_m-3.c: New test.
> * gcc.target/riscv/rvv/base/vmnand_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmnand_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmnand_mm-3.c: New test.
> * gcc.target/riscv/rvv/base/vmnor_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmnor_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmnor_mm-3.c: New test.
> * gcc.target/riscv/rvv/base/vmnot_m_m-1.c: New test.
> * gcc.target/riscv/rvv/base/vmnot_m_m-2.c: New test.
> * gcc.target/riscv/rvv/base/vmnot_m_m-3.c: New test.
> * gcc.target/riscv/rvv/base/vmor_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmor_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmor_mm-3.c: New test.
> * gcc.target/riscv/rvv/base/vmorn_mm-1.c: New test.
> * gcc.target/riscv/rvv/base/vmorn_mm-2.c: New test.
> * gcc.target/riscv/rvv/base/vmorn_mm-3.c: New test.
> * gcc.target/riscv/rvv/base/vmsbf_m_m-1.c: New test.
> * gcc.target/riscv/rvv/base/vmsbf_m_m-2.c: New test.
> * gcc.target/riscv/rvv/base/vmsbf_m_m-3.c: New test.
> * gcc.target/riscv/rvv/base/vmsbf_m_mu-1.c: New test.
> * gcc.target/riscv/rvv/base/vmsbf_m_mu-2.c: New test.
> * gcc.target/riscv/rvv/base/vmsbf_m_mu-3.c: New test.
> * 

Re: [PATCH] PR tree-optimization/108697 - Create a lazy ssa_cache

2023-02-16 Thread Aldy Hernandez via Gcc-patches




On 2/16/23 08:55, Richard Biener wrote:

On Wed, Feb 15, 2023 at 6:07 PM Andrew MacLeod via Gcc-patches
 wrote:


This patch implements the suggestion that we have an alternative
ssa-cache which does not zero memory, and instead uses a bitmap to track
whether a value is currently set or not.  It roughly mimics what
path_range_query was doing internally.

For sparsely used cases, expecially in large programs, this is more
efficient.  I changed path_range_query to use this, and removed it old
bitmap (and a hack or two around PHI calculations), and also utilized
this is the assume_query class.

Performance wise, the patch doesn't affect VRP (since that still uses
the original version).  Switching to the lazy version caused a slowdown
of 2.5% across VRP.

There was a noticeable improvement elsewhere.,  across 230 GCC source
files, threading ran over 12% faster!.  Overall compilation improved by
0.3%  Not sure it makes much difference in compiler.i, but it shouldn't
hurt.

bootstraps on x86_64-pc-linux-gnu with no regressions.   OK for trunk?
or do you want to wait for the next release...


I see

@@ -365,16 +335,8 @@ path_range_query::compute_ranges_in_phis (basic_block bb)

Value_Range r (TREE_TYPE (name));
if (range_defined_in_block (r, name, bb))
-   {
- unsigned v = SSA_NAME_VERSION (name);
- set_cache (r, name);
- bitmap_set_bit (phi_set, v);
- // Pretend we don't have a cache entry for this name until
- // we're done with all PHIs.
- bitmap_clear_bit (m_has_cache_entry, v);
-   }
+   m_cache.set_global_range (name, r);
  }
-  bitmap_ior_into (m_has_cache_entry, phi_set);
  }

  // Return TRUE if relations may be invalidated after crossing edge E.

which I think is not correct - if we have

  # _1 = PHI <..., _2>
  # _2 = PHI <..., _1>

then their effects are supposed to be executed in parallel, that is,
both PHI argument _2 and _1 are supposed to see the "old" version.
The previous code tried to make sure the range of the new _1 doesn't
get seen when processing the argument _1 in the definition of _2.


Yes, the effects should appear in parallel, but ssa_range_in_phi() which 
is the only thing range_defined_in_block does for PHIs, is guaranteed to 
not do any additional cache lookups.  The comment there should be 
adjusted to make this clear:


// Since PHIs are calculated in parallel at the beginning of the
// block, we must be careful to never save anything to the cache here.
// It is the caller's responsibility to adjust the cache.  Also,
// calculating the PHI's range must not trigger additional lookups.

We should instead say:

"we must be careful to never set or access the cache here"...

This was the original intent, but a subtle access to the cache crept in 
here:


  // Try to fold the phi exclusively with global or cached values.
  // This will get things like PHI <5(99), 6(88)>.  We do this by
  // calling range_of_expr with no context.
  unsigned nargs = gimple_phi_num_args (phi);
  Value_Range arg_range (TREE_TYPE (name));
  r.set_undefined ();
  for (size_t i = 0; i < nargs; ++i)
{
  tree arg = gimple_phi_arg_def (phi, i);
  if (range_of_expr (arg_range, arg, /*stmt=*/NULL))

This range_of_expr call will indeed access the cache incorrectly, but 
Andrew fixed that here:


@@ -264,7 +236,7 @@ path_range_query::ssa_range_in_phi (vrange , gphi 
*phi)

   for (size_t i = 0; i < nargs; ++i)
{
  tree arg = gimple_phi_arg_def (phi, i);
- if (range_of_expr (arg_range, arg, /*stmt=*/NULL))
+ if (m_ranger.range_of_expr (arg_range, arg, /*stmt=*/NULL))
r.union_ (arg_range);
  else
{

...thus ensuring that function never uses the cache.  All the lookups 
are done with the global ranger at either the path entry or globally as 
above (with stmt=NULL).


I believe the switch from range_of_expr to m_ranger.range_of_expr is 
safe, as the original code was added to handle silly things like PHI 
<5(99), 6(88)> which shouldn't need path aware ranges.


As you've found out, the update to the cache in this case was not 
obvious at all.  Perhaps it should also be commented:


"It is safe to set the cache here, as range_defined_in_block for PHIs 
(ssa_range_in_phi) is guaranteed not to do any cache lookups."




The new version drops this, possibly resulting in wrong-code.

While I think it's appropriate to sort out compile-time issues like this
during stage4 at least the above makes me think it should be defered
to next stage1.


I defer to the release managers as to whether this is safe in light of 
my explanation above :).


Aldy



[PATCH] rs6000: Fix vector parity support [PR108699]

2023-02-16 Thread Kewen.Lin via Gcc-patches
Hi,

The failures on the original failed case builtin-bitops-1.c
and the associated test case pr108699.c here show that the
current support of parity vector mode is wrong on Power.
The hardware insns vprtyb[wdq] which operate on the least
significant bit of each byte per element, they doesn't match
what RTL opcode parity needs, but the current implementation
expands it with them wrongly.

This patch is to fix the handling with one more pre-insn
vpopcntb.  It also fixes an oversight having V8HI in VEC_IP,
replaces VParity with VEC_IP, and adjusts the existing
UNSPEC_PARITY to a more meaningful name UNSPEC_PARITYB.

I also noticed that we can make use of vpopcnt[bhwd] on
Power8 (AND with 1 on each element), but it's next stage1
content, I plan to support it with one subsequent patch
and make this patch focus on bug fixing.

Bootstrapped and regtested on powerpc64-linux-gnu P{7,8,9}
and powerpc64le-linux-gnu P10.

Is it ok for trunk?

BR,
Kewen
-
PR target/108699

gcc/ChangeLog:

* config/rs6000/altivec.md (*p9v_parity2): Rename to ...
(p9v_parityb2): ... this.  Adjust pattern with UNSPEC_PARITYB,
and replace mode_iterator VParity with VEC_IP.
(mode_iterator VParity): Remove.
* config/rs6000/rs6000-builtins.def (VPRTYBD): Replace parityv2di2 with
p9v_paritybv2di2.
(VPRTYBW): Replace parityv4si2 with p9v_paritybv4si2.
(VPRTYBQ): Replace parityv1ti2 with p9v_paritybv1ti2.
* config/rs6000/rs6000.cc (rs6000_emit_parity): Replace
gen_paritysi2_cmpb with gen_paritybsi2, and replace gen_paritydi2_cmpb
with gen_paritybdi2
* config/rs6000/rs6000.md (parity2_cmpb): Rename to ...
(parityb2): ... this.
(UNSPEC_PARITY): Rename to ...
(UNSPEC_PARITYB): ... this.
* config/rs6000/vector.md (mode_iterator VEC_IP): Remove V8HI.
(parity2 with VEC_IP): Expand with popcountv16qi2 and the
corresponding vector parity byte p9v_parityb2.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/p9-vparity.c: Add scan-assembler-not for vpopcntb
to distinguish parity byte from parity.
* gcc.target/powerpc/pr108699.c: New test.
---
 gcc/config/rs6000/altivec.md  | 15 +++
 gcc/config/rs6000/rs6000-builtins.def |  6 +--
 gcc/config/rs6000/rs6000.cc   |  4 +-
 gcc/config/rs6000/rs6000.md   |  7 ++--
 gcc/config/rs6000/vector.md   | 14 +--
 gcc/testsuite/gcc.target/powerpc/p9-vparity.c |  1 +
 gcc/testsuite/gcc.target/powerpc/pr108699.c   | 42 +++
 7 files changed, 68 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108699.c

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 30606b8ab21..87053aa69b5 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -215,13 +215,6 @@ (define_mode_iterator VM2 [V4SI
 ;; versus floating point
 (define_mode_attr VS_sxwsp [(V4SI "sxw") (V4SF "sp")])

-;; Specific iterator for parity which does not have a byte/half-word form, but
-;; does have a quad word form
-(define_mode_iterator VParity [V4SI
-  V2DI
-  V1TI
-  TI])
-
 (define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
 (define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
 (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
@@ -4195,9 +4188,11 @@ (define_insn "*p8v_popcount2"
   [(set_attr "type" "vecsimple")])

 ;; Vector parity
-(define_insn "*p9v_parity2"
-  [(set (match_operand:VParity 0 "register_operand" "=v")
-(parity:VParity (match_operand:VParity 1 "register_operand" "v")))]
+(define_insn "p9v_parityb2"
+  [(set (match_operand:VEC_IP 0 "register_operand" "=v")
+(unspec:VEC_IP
+  [(match_operand:VEC_IP 1 "register_operand" "v")]
+  UNSPEC_PARITYB))]
   "TARGET_P9_VECTOR"
   "vprtyb %0,%1"
   [(set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index e0d9f5adc97..182e3fc5bdc 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2666,13 +2666,13 @@
 VMSUMUDM altivec_vmsumudm {}

   const vsll __builtin_altivec_vprtybd (vsll);
-VPRTYBD parityv2di2 {}
+VPRTYBD p9v_paritybv2di2 {}

   const vsq __builtin_altivec_vprtybq (vsq);
-VPRTYBQ parityv1ti2 {}
+VPRTYBQ p9v_paritybv1ti2 {}

   const vsi __builtin_altivec_vprtybw (vsi);
-VPRTYBW parityv4si2 {}
+VPRTYBW p9v_paritybv4si2 {}

   const vsll __builtin_altivec_vrldmi (vsll, vsll, vsll);
 VRLDMI altivec_vrldmi {}
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 16ca3a31757..bfa1060e55a 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -22973,12 +22973,12 @@ 

[PATCH] tree-ssa-dse: Fix up handling of lhs of internal calls [PR108657]

2023-02-16 Thread Jakub Jelinek via Gcc-patches
Hi!

The r13-1778 PR106378 tree-ssa-dse change didn't just add special support
for IFN_LEN_STORE and IFN_MASK_STORE internal function calls as I believe
was intended, but given that the function was
if (is builtin) { ... }
else if (lhs present and non-SSA_NAME) { ... }
return false;
and it added a new
else if (is internal builtin) { ... }
in between the two, the last if used to be done before on all stmts
with non-SSA_NAME lhs except for calls to builtin functions, but newly
isn't done also for calls to internal functions.  In the testcase
the important internal function is .DEFERRED_INIT, which often has
non-SSA_NAME lhs, and the change resulted in them no longer being DSEd,
so a block with nothing in it left but var = .DEFERRED_INIT () and
var = {CLOBBER} was unrolled several times.

The following patch does the lhs handling for all stmts with non-SSA_NAME lhs
unless initialize_ao_ref_for_dse handled those specially already and
returned (which is the case for various mem* builtins which don't have
such lhs, for some cases of calloc which again is fine,and since r13-1778
also for IFN_LEN_STORE call and some IFN_MASK_STORE calls.
As IFN_MASK_STORE doesn't have a lhs, the break for the !may_def_ok case
doesn't seem to change anything, and because we've handled internal fns
that way in the past, I think it is the right thing to do that again.
That said, if it is inappropriate for some new ifn, I guess it could
be added to the switch and just return false; for it instead of break;.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

That said, while this patch fixes the regression by allowing DSE of
IFN_DEFERRED_INIT again, I think we probably have some latent bug in FRE
where without this patch it seems to be fre5 that sees one unconditional
c = 1; store, one conditional c = 0; store and in the last bb before return
another c = 1; store and decides that the last store is redundant, which is
not the case, the first two stores are redundant or if they can't be
removed, none of them is.  Richard, could you please have a look?

2023-02-15  Jakub Jelinek  

PR tree-optimization/108657
* tree-ssa-dse.cc (initialize_ao_ref_for_dse): If lhs of stmt
exists and is not a SSA_NAME, call ao_ref_init even if the stmt
is a call to internal or builtin function.

* gcc.dg/pr108657.c: New test.

--- gcc/tree-ssa-dse.cc.jj  2023-01-11 10:29:08.651161134 +0100
+++ gcc/tree-ssa-dse.cc 2023-02-15 20:03:33.647684713 +0100
@@ -177,7 +177,7 @@ initialize_ao_ref_for_dse (gimple *stmt,
default:;
}
 }
-  else if (tree lhs = gimple_get_lhs (stmt))
+  if (tree lhs = gimple_get_lhs (stmt))
 {
   if (TREE_CODE (lhs) != SSA_NAME)
{
--- gcc/testsuite/gcc.dg/pr108657.c.jj  2023-02-15 20:11:22.038804168 +0100
+++ gcc/testsuite/gcc.dg/pr108657.c 2023-02-15 20:10:37.992451199 +0100
@@ -0,0 +1,31 @@
+/* PR tree-optimization/108657 */
+/* { dg-do run } */
+/* { dg-options "-O3 -ftrivial-auto-var-init=zero" } */
+
+int c, e, f;
+static int *d = 
+
+__attribute__((noipa)) void
+foo (void)
+{
+  if (c != 1)
+__builtin_abort ();
+}
+
+int
+main ()
+{
+  for (c = 1; c >= 0; c--)
+{
+  e = 0;
+  for (int j = 0; j <= 2; j++)
+   {
+ short k[1];
+ if (e)
+   break;
+ e ^= f;
+   }
+}
+  *d = 1;
+  foo ();
+}

Jakub



Re: [PATCH] reassoc: Fix up (ab) handling in eliminate_redundant_comparison [PR108783]

2023-02-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Feb 2023, Jakub Jelinek wrote:

> Hi!
> 
> The following testcase ICEs because eliminate_redundant_comparison sees
> redundant comparisons in &&/|| where the comparison has (ab) SSA_NAME,
> maybe_fold_{and,or}_comparisons optimizes them into a single comparison
> and build_and_add_sum emits a new comparison close to the definition
> operands, which in this case is before a returns_twice call (which is
> invalid).  Generally reassoc just punts on (ab) SSA_NAMEs, declares them
> non-reassociable etc., so the second half of this patch does that.
> 
> Though we can do better in this case; the function has special code
> when maybe_fold_{and,or}_comparisons returns INTEGER_CST (false/true)
> or when what it returns is the same as curr->op (the first of the
> comparisons we are considering) - in that case we just remove the
> second one and keep the first one.  The reason it doesn't match is that
> curr->op is a SSA_NAME whose SSA_NAME_DEF_STMT is checked to be a
> comparison, in this case _42 = a_1(ab) != 0 and the other comparison
> is also like that.  maybe_fold_{and,or}_comparisons looks through the
> definitions though and so returns a_1(ab) != 0 as tree.
> So the first part of the patch checks whether that returned comparison
> isn't the same as the curr->op comparison and if yes, it just overrides
> t back to curr->op so that its SSA_NAME is reused.  In that case we can
> handle even (ab) in {,new}op{1,2} because we don't create a new comparison
> of that, just keep using the existing one.  And t can't be (ab) because
> otherwise it wouldn't be considered a reassociable operand.
> 
> The (ab) checks are needed say when we have a_1(ab) == 42 || a_1(ab) > 42
> kind of comparisons where maybe_fold_{and,or}_comparisons returns a new
> comparison not existing in the IL yet.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2023-02-15  Jakub Jelinek  
> 
>   PR tree-optimization/108783
>   * tree-ssa-reassoc.cc (eliminate_redundant_comparison): If lcode
>   is equal to TREE_CODE (t), op1 to newop1 and op2 to newop2, set
>   t to curr->op.  Otherwise, punt if either newop1 or newop2 are
>   SSA_NAME_OCCURS_IN_ABNORMAL_PHI SSA_NAMEs.
> 
>   * gcc.c-torture/compile/pr108783.c: New test.
> 
> --- gcc/tree-ssa-reassoc.cc.jj2023-01-12 21:04:08.726238049 +0100
> +++ gcc/tree-ssa-reassoc.cc   2023-02-15 13:28:04.987278895 +0100
> @@ -2272,6 +2272,15 @@ eliminate_redundant_comparison (enum tre
> STRIP_USELESS_TYPE_CONVERSION (newop2);
> if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
>   continue;
> +   if (lcode == TREE_CODE (t)
> +   && operand_equal_p (op1, newop1, 0)
> +   && operand_equal_p (op2, newop2, 0))
> + t = curr->op;
> +   else if ((TREE_CODE (newop1) == SSA_NAME
> + && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
> +|| (TREE_CODE (newop2) == SSA_NAME
> +&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
> + continue;
>   }
>  
>if (dump_file && (dump_flags & TDF_DETAILS))
> --- gcc/testsuite/gcc.c-torture/compile/pr108783.c.jj 2023-02-15 
> 12:42:46.244340524 +0100
> +++ gcc/testsuite/gcc.c-torture/compile/pr108783.c2023-02-15 
> 13:24:47.515187118 +0100
> @@ -0,0 +1,42 @@
> +/* PR tree-optimization/108783 */
> +
> +__attribute__((returns_twice)) int baz (int, int);
> +
> +int
> +bar (int x)
> +{
> +  return x;
> +}
> +
> +int
> +foo (int x, int y)
> +{
> +  int a;
> +
> +  a = bar (x);
> +  baz (x, y);
> +
> +  return y && a && a;
> +}
> +
> +int
> +qux (int x, int y)
> +{
> +  int a;
> +
> +  a = bar (x);
> +  baz (x, y);
> +
> +  return y && a != 42 && a >= 42;
> +}
> +
> +int
> +corge (int x, int y)
> +{
> +  int a;
> +
> +  a = bar (x);
> +  baz (x, y);
> +
> +  return y || a == 42 || a > 42;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


[PATCH] reassoc: Fix up (ab) handling in eliminate_redundant_comparison [PR108783]

2023-02-16 Thread Jakub Jelinek via Gcc-patches
Hi!

The following testcase ICEs because eliminate_redundant_comparison sees
redundant comparisons in &&/|| where the comparison has (ab) SSA_NAME,
maybe_fold_{and,or}_comparisons optimizes them into a single comparison
and build_and_add_sum emits a new comparison close to the definition
operands, which in this case is before a returns_twice call (which is
invalid).  Generally reassoc just punts on (ab) SSA_NAMEs, declares them
non-reassociable etc., so the second half of this patch does that.

Though we can do better in this case; the function has special code
when maybe_fold_{and,or}_comparisons returns INTEGER_CST (false/true)
or when what it returns is the same as curr->op (the first of the
comparisons we are considering) - in that case we just remove the
second one and keep the first one.  The reason it doesn't match is that
curr->op is a SSA_NAME whose SSA_NAME_DEF_STMT is checked to be a
comparison, in this case _42 = a_1(ab) != 0 and the other comparison
is also like that.  maybe_fold_{and,or}_comparisons looks through the
definitions though and so returns a_1(ab) != 0 as tree.
So the first part of the patch checks whether that returned comparison
isn't the same as the curr->op comparison and if yes, it just overrides
t back to curr->op so that its SSA_NAME is reused.  In that case we can
handle even (ab) in {,new}op{1,2} because we don't create a new comparison
of that, just keep using the existing one.  And t can't be (ab) because
otherwise it wouldn't be considered a reassociable operand.

The (ab) checks are needed say when we have a_1(ab) == 42 || a_1(ab) > 42
kind of comparisons where maybe_fold_{and,or}_comparisons returns a new
comparison not existing in the IL yet.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-02-15  Jakub Jelinek  

PR tree-optimization/108783
* tree-ssa-reassoc.cc (eliminate_redundant_comparison): If lcode
is equal to TREE_CODE (t), op1 to newop1 and op2 to newop2, set
t to curr->op.  Otherwise, punt if either newop1 or newop2 are
SSA_NAME_OCCURS_IN_ABNORMAL_PHI SSA_NAMEs.

* gcc.c-torture/compile/pr108783.c: New test.

--- gcc/tree-ssa-reassoc.cc.jj  2023-01-12 21:04:08.726238049 +0100
+++ gcc/tree-ssa-reassoc.cc 2023-02-15 13:28:04.987278895 +0100
@@ -2272,6 +2272,15 @@ eliminate_redundant_comparison (enum tre
  STRIP_USELESS_TYPE_CONVERSION (newop2);
  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
continue;
+ if (lcode == TREE_CODE (t)
+ && operand_equal_p (op1, newop1, 0)
+ && operand_equal_p (op2, newop2, 0))
+   t = curr->op;
+ else if ((TREE_CODE (newop1) == SSA_NAME
+   && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
+  || (TREE_CODE (newop2) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
+   continue;
}
 
   if (dump_file && (dump_flags & TDF_DETAILS))
--- gcc/testsuite/gcc.c-torture/compile/pr108783.c.jj   2023-02-15 
12:42:46.244340524 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr108783.c  2023-02-15 
13:24:47.515187118 +0100
@@ -0,0 +1,42 @@
+/* PR tree-optimization/108783 */
+
+__attribute__((returns_twice)) int baz (int, int);
+
+int
+bar (int x)
+{
+  return x;
+}
+
+int
+foo (int x, int y)
+{
+  int a;
+
+  a = bar (x);
+  baz (x, y);
+
+  return y && a && a;
+}
+
+int
+qux (int x, int y)
+{
+  int a;
+
+  a = bar (x);
+  baz (x, y);
+
+  return y && a != 42 && a >= 42;
+}
+
+int
+corge (int x, int y)
+{
+  int a;
+
+  a = bar (x);
+  baz (x, y);
+
+  return y || a == 42 || a > 42;
+}

Jakub



Re: [PATCH] Fix PR target/90458

2023-02-16 Thread Eric Botcazou via Gcc-patches
> This fixes dg.exp/stack-check-2.c, -7, 8, and -16.c, which is great!

Try the attached patch.

-- 
Eric Botcazoudiff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 227e3004077..d4f036a3f1e 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -11655,6 +11655,11 @@ proc check_effective_target_autoincdec { } {
 # 
 proc check_effective_target_supports_stack_clash_protection { } {
 
+# Stack probing is done unconditionally out-of-line on Windows
+if { [istarget *-*-cygwin*] || [istarget *-*-mingw*] } {
+	return 0
+}
+
 if { [istarget x86_64-*-*] || [istarget i?86-*-*] 
 	  || [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
 	  || [istarget aarch64*-**] || [istarget s390*-*-*]