[committed] d: Merge upstream dmd, druntime 5f7552bb28, phobos 67a47cf39.

2023-03-16 Thread Iain Buclaw via Gcc-patches
Hi,

This patch merges the D front-end and run-time library with upstream dmd
5f7552bb28, and standard library with phobos 67a47cf39.

Synchronizing the latest bug fixes in the upcoming v2.103.0 release.

D front-end changes:

- Import dmd v2.103.0-rc.1.

D runtime changes:

- Import druntime v2.103.0-rc.1.

Phobos changes:

- Import phobos v2.103.0-rc.1.

gcc/d/ChangeLog:

* dmd/MERGE: Merge upstream dmd 5f7552bb28.
* dmd/VERSION: Bump version to v2.103.0-rc.1.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32,
committed to mainline.

Regards,
Iain.

---
libphobos/ChangeLog:

* libdruntime/MERGE: Merge upstream druntime 5f7552bb28.
* src/MERGE: Merge upstream phobos 67a47cf39.
---
 gcc/d/dmd/MERGE   |   2 +-
 gcc/d/dmd/VERSION |   2 +-
 gcc/d/dmd/dinterpret.d|  12 ++-
 gcc/d/dmd/dsymbol.d   |  21 +++-
 gcc/d/dmd/expressionsem.d | 102 +-
 gcc/d/dmd/typesem.d   |   1 +
 gcc/d/dmd/typinf.d|   5 +-
 gcc/testsuite/gdc.test/compilable/test16213.d |   8 ++
 gcc/testsuite/gdc.test/compilable/test17351.d |   9 ++
 gcc/testsuite/gdc.test/compilable/test19295.d |  10 ++
 .../gdc.test/compilable/testcorrectthis.d |  37 +++
 .../gdc.test/fail_compilation/fail23760.d |  27 +
 .../gdc.test/fail_compilation/fail61.d|   2 +-
 .../gdc.test/fail_compilation/fail_circular.d |  15 +--
 .../gdc.test/fail_compilation/ice19295.d  |  18 
 .../gdc.test/fail_compilation/ice23781.d  |  10 ++
 .../gdc.test/fail_compilation/ice9439.d   |   4 +-
 libphobos/libdruntime/MERGE   |   2 +-
 libphobos/src/MERGE   |   2 +-
 libphobos/src/std/math/exponential.d  |  30 --
 libphobos/src/std/traits.d|  27 -
 21 files changed, 295 insertions(+), 51 deletions(-)
 create mode 100644 gcc/testsuite/gdc.test/compilable/test16213.d
 create mode 100644 gcc/testsuite/gdc.test/compilable/test19295.d
 create mode 100644 gcc/testsuite/gdc.test/compilable/testcorrectthis.d
 create mode 100644 gcc/testsuite/gdc.test/fail_compilation/fail23760.d
 delete mode 100644 gcc/testsuite/gdc.test/fail_compilation/ice19295.d
 create mode 100644 gcc/testsuite/gdc.test/fail_compilation/ice23781.d

diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
index 269eebfc483..986925e8bdc 100644
--- a/gcc/d/dmd/MERGE
+++ b/gcc/d/dmd/MERGE
@@ -1,4 +1,4 @@
-4ca4140e584c055a8a9bc727e56a97ebcecd61e0
+5f7552bb2829b75d5e36cc767a476e1ab35147b7
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/VERSION b/gcc/d/dmd/VERSION
index 8b24f92dab7..da496a2ceeb 100644
--- a/gcc/d/dmd/VERSION
+++ b/gcc/d/dmd/VERSION
@@ -1 +1 @@
-v2.103.0-beta.1
+v2.103.0-rc.1
diff --git a/gcc/d/dmd/dinterpret.d b/gcc/d/dmd/dinterpret.d
index 9073b0db2f8..e6ef704be86 100644
--- a/gcc/d/dmd/dinterpret.d
+++ b/gcc/d/dmd/dinterpret.d
@@ -2036,7 +2036,7 @@ public:
 }
 auto er = interpret(e.e1, istate, CTFEGoal.LValue);
 if (auto ve = er.isVarExp())
-if (ve.var == istate.fd.vthis)
+if (istate && ve.var == istate.fd.vthis)
 er = interpret(er, istate);
 
 if (exceptionOrCant(er))
@@ -2117,6 +2117,16 @@ public:
 return CTFEExp.cantexp;
 assert(e.type);
 
+// There's a terrible hack in `dmd.dsymbolsem` that special 
case
+// a struct with all zeros to an 
`ExpInitializer(BlitExp(IntegerExp(0)))`
+// There's matching code for it in e2ir (toElem's 
visitAssignExp),
+// so we need the same hack here.
+// This does not trigger for global as they get a normal 
initializer.
+if (auto ts = e.type.isTypeStruct())
+if (auto ae = e.isBlitExp())
+if (ae.e2.op == EXP.int64)
+e = ts.defaultInitLiteral(loc);
+
 if (e.op == EXP.construct || e.op == EXP.blit)
 {
 AssignExp ae = cast(AssignExp)e;
diff --git a/gcc/d/dmd/dsymbol.d b/gcc/d/dmd/dsymbol.d
index aa478f2fea2..e7ce93ee067 100644
--- a/gcc/d/dmd/dsymbol.d
+++ b/gcc/d/dmd/dsymbol.d
@@ -2162,10 +2162,23 @@ extern (C++) final class ArrayScopeSymbol : ScopeDsymbol
  * or a variable (in which case an expression is created in
  * toir.c).
  */
-auto e = new VoidInitializer(Loc.initial);
-e.type = Type.tsize_t;
-v = new VarDeclaration(loc, Type.tsize_t, Id.dollar, e);
-v.storage_class |= STC.temp | STC.ctfe; // it's never a true 
static variable
+
+// 

[committed] Docs: Fix formatting issues in BPF built-ins documentation

2023-03-16 Thread Sandra Loosemore
This section of the GCC manual had some issues with lines in the example 
overflowing into the right margin of the PDF-format document, but as I 
looked at it more closely I also saw that it was full of missing or 
incorrect Texinfo markup, too.  I've cleaned it up thusly.


-Sandracommit 7ffbc74c8c202a16a5e987134f03c2359c531f0e
Author: Sandra Loosemore 
Date:   Thu Mar 16 21:07:18 2023 +

Docs: Fix formatting issues in BPF built-ins documentation.

gcc/ChangeLog:
* doc/extend.texi (BPF Built-in Functions): Fix numerous markup
issues.  Add more line breaks to example so it doesn't overflow
the margins.

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 39d45df8d89..8ecd9611201 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -15715,23 +15715,23 @@ void __builtin_bfin_ssync (void);
 
 The following built-in functions are available for eBPF targets.
 
-@deftypefn {Built-in Function} unsigned long long __builtin_bpf_load_byte (unsigned long long @var{offset})
+@deftypefn {Built-in Function} {unsigned long long} __builtin_bpf_load_byte (unsigned long long @var{offset})
 Load a byte from the @code{struct sk_buff} packet data pointed by the register @code{%r6} and return it.
 @end deftypefn
 
-@deftypefn {Built-in Function} unsigned long long __builtin_bpf_load_half (unsigned long long @var{offset})
-Load 16-bits from the @code{struct sk_buff} packet data pointed by the register @code{%r6} and return it.
+@deftypefn {Built-in Function} {unsigned long long} __builtin_bpf_load_half (unsigned long long @var{offset})
+Load 16 bits from the @code{struct sk_buff} packet data pointed by the register @code{%r6} and return it.
 @end deftypefn
 
-@deftypefn {Built-in Function} unsigned long long __builtin_bpf_load_word (unsigned long long @var{offset})
-Load 32-bits from the @code{struct sk_buff} packet data pointed by the register @code{%r6} and return it.
+@deftypefn {Built-in Function} {unsigned long long} __builtin_bpf_load_word (unsigned long long @var{offset})
+Load 32 bits from the @code{struct sk_buff} packet data pointed by the register @code{%r6} and return it.
 @end deftypefn
 
-@deftypefn {Built-in Function} void * __builtin_preserve_access_index (@var{expr})
+@deftypefn {Built-in Function} {void *} __builtin_preserve_access_index (@var{expr})
 BPF Compile Once-Run Everywhere (CO-RE) support. Instruct GCC to generate CO-RE relocation records for any accesses to aggregate data structures (struct, union, array types) in @var{expr}. This builtin is otherwise transparent, the return value is whatever @var{expr} evaluates to. It is also overloaded: @var{expr} may be of any type (not necessarily a pointer), the return type is the same. Has no effect if @code{-mco-re} is not in effect (either specified or implied).
 @end deftypefn
 
-@deftypefn {Built-in Function} unsigned int __builtin_preserve_field_info (@var{expr}, unsigned int @var{kind})
+@deftypefn {Built-in Function} {unsigned int} __builtin_preserve_field_info (@var{expr}, unsigned int @var{kind})
 BPF Compile Once-Run Everywhere (CO-RE) support. This builtin is used to
 extract information to aid in struct/union relocations.  @var{expr} is
 an access to a field of a struct or union. Depending on @var{kind}, different
@@ -15739,15 +15739,15 @@ information is returned to the program. A CO-RE relocation for the access in
 @var{expr} with kind @var{kind} is recorded if @code{-mco-re} is in effect.
 
 The following values are supported for @var{kind}:
-@table @var
+@table @code
 @item FIELD_BYTE_OFFSET = 0
 The returned value is the offset, in bytes, of the field from the
-beginning of the containing structure. For bitfields, the byte offset
+beginning of the containing structure. For bit-fields, this is the byte offset
 of the containing word.
 
 @item FIELD_BYTE_SIZE = 1
-The returned value is the size, in bytes, of the field. For bitfields,
-the size in bytes of the containing word.
+The returned value is the size, in bytes, of the field. For bit-fields,
+this is the size in bytes of the containing word.
 
 @item FIELD_EXISTENCE = 2
 The returned value is 1 if the field exists, 0 otherwise. Always 1 at
@@ -15759,25 +15759,26 @@ The returned value is 1 if the field is signed, 0 otherwise.
 @item FIELD_LSHIFT_U64 = 4
 @itemx FIELD_RSHIFT_U64 = 5
 The returned value is the number of bits of left- or right-shifting
-respectively needed in order to recover the original value of the field,
-after it has been loaded by a read of FIELD_BYTE_SIZE bytes into an
-unsigned 64-bit value. Primarily useful for reading bitfield values
-from structures which may change between kernel versions.
+(respectively) needed in order to recover the original value of the field,
+after it has been loaded by a read of @code{FIELD_BYTE_SIZE} bytes into an
+unsigned 64-bit value. Primarily useful for reading bit-field values
+from structures that may change between kernel versions.
 
 @end table
 
 Note that the 

[committed] Docs: Fix some too-long lines

2023-03-16 Thread Sandra Loosemore
I noticed when looking at other things last week that there were a whole 
bunch of too-long lines overflowing into the right margin in the PDF 
version of the GCC manual.  This patch fixes some of them.  There are 
still a whole bunch of especially bad ones in the diagnostic message 
formatting examples that I haven't found a satisfactory way to fix yet 
:-( but at least this patch is an incremental improvement.


-Sandracommit 35a80d19b69df59f52800f34bac1df3cb0293735
Author: Sandra Loosemore 
Date:   Thu Mar 16 21:05:53 2023 +

Docs: Fix some too-long lines in Texinfo manual.

gcc/ChangeLog:
* doc/extend.texi (Common Function Attributes) : Fix bad
line breaks in examples.
: Fix bad line breaks in running text, also copy-edit
for consistency.
(Extended Asm) : Fix @multitable width.
* doc/invoke.texi (Option Summary) : Fix misplaced
@gol.
(C++ Dialect Options) <-fcontracts>: Add line break in example.
<-Wctad-maybe-unsupported>: Likewise.
<-Winvalid-constexpr>: Likewise.
(Warning Options) <-Wdangling-pointer>: Likewise.
<-Winterference-size>: Likewise.
<-Wvla-parameter>: Likewise.
(Static Analyzer Options): Fix bad line breaks in running text,
plus add some missing markup.
(Optimize Options) : Fix more bad line
breaks in running text.

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index fd3745c5608..39d45df8d89 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -2611,8 +2611,11 @@ the @code{puts} function, or the second and third arguments to
 the @code{memcpy} function.
 
 @smallexample
-__attribute__ ((access (read_only, 1))) int puts (const char*);
-__attribute__ ((access (read_only, 2, 3))) void* memcpy (void*, const void*, size_t);
+__attribute__ ((access (read_only, 1)))
+int puts (const char*);
+
+__attribute__ ((access (read_only, 2, 3)))
+void* memcpy (void*, const void*, size_t);
 @end smallexample
 
 The @code{read_write} access mode applies to arguments of pointer types
@@ -2624,7 +2627,8 @@ of the use of the @code{read_write} access mode is the first argument to
 the @code{strcat} function.
 
 @smallexample
-__attribute__ ((access (read_write, 1), access (read_only, 2))) char* strcat (char*, const char*);
+__attribute__ ((access (read_write, 1), access (read_only, 2)))
+char* strcat (char*, const char*);
 @end smallexample
 
 The @code{write_only} access mode applies to arguments of pointer types
@@ -2636,8 +2640,11 @@ the @code{strcpy} function, or the first two arguments to the @code{fgets}
 function.
 
 @smallexample
-__attribute__ ((access (write_only, 1), access (read_only, 2))) char* strcpy (char*, const char*);
-__attribute__ ((access (write_only, 1, 2), access (read_write, 3))) int fgets (char*, int, FILE*);
+__attribute__ ((access (write_only, 1), access (read_only, 2)))
+char* strcpy (char*, const char*);
+
+__attribute__ ((access (write_only, 1, 2), access (read_write, 3)))
+int fgets (char*, int, FILE*);
 @end smallexample
 
 The access mode @code{none} specifies that the pointer to which it applies
@@ -3444,22 +3451,23 @@ deallocation pairs marked with the @code{malloc}.  In particular:
 @itemize @bullet
 
 @item
-The analyzer will emit a @option{-Wanalyzer-mismatching-deallocation}
+The analyzer emits a @option{-Wanalyzer-mismatching-deallocation}
 diagnostic if there is an execution path in which the result of an
 allocation call is passed to a different deallocator.
 
 @item
-The analyzer will emit a @option{-Wanalyzer-double-free}
+The analyzer emits a @option{-Wanalyzer-double-free}
 diagnostic if there is an execution path in which a value is passed
 more than once to a deallocation call.
 
 @item
-The analyzer will consider the possibility that an allocation function
-could fail and return NULL.  It will emit
-@option{-Wanalyzer-possible-null-dereference} and
-@option{-Wanalyzer-possible-null-argument} diagnostics if there are
+The analyzer considers the possibility that an allocation function
+could fail and return null.  If there are
 execution paths in which an unchecked result of an allocation call is
-dereferenced or passed to a function requiring a non-null argument.
+dereferenced or passed to a function requiring a non-null argument,
+it emits
+@option{-Wanalyzer-possible-null-dereference} and
+@option{-Wanalyzer-possible-null-argument} diagnostics.
 If the allocator always returns non-null, use
 @code{__attribute__ ((returns_nonnull))} to suppress these warnings.
 For example:
@@ -3469,26 +3477,26 @@ char *xstrdup (const char *)
 @end smallexample
 
 @item
-The analyzer will emit a @option{-Wanalyzer-use-after-free}
+The analyzer emits a @option{-Wanalyzer-use-after-free}
 diagnostic if there is an execution path in which the memory passed
 by pointer to a deallocation call is used after the deallocation.
 
 @item
-The analyzer 

[PATCH V5] Use reg mode to move sub blocks for parameters and returns

2023-03-16 Thread Jiufu Guo via Gcc-patches
Hi,

When assigning a parameter to a variable, or assigning a variable to
return value with struct type, and the parameter/return is passed
through registers.
For this kind of case, it would be better to use the nature mode of
the registers to move the content for the assignment.

As the example code (like code in PR65421):

typedef struct SA {double a[3];} A;
A ret_arg_pt (A *a) {return *a;} // on ppc64le, expect only 3 lfd(s)
A ret_arg (A a) {return a;} // just empty fun body
void st_arg (A a, A *p) {*p = a;} //only 3 stfd(s)

Comparing with previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609394.html
This version refine code to eliminated reductant code in  the sub
routine "move_sub_blocks".

Bootstrap and regtest pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu)

PR target/65421

gcc/ChangeLog:

* cfgexpand.cc (expand_used_vars): Update to mark DECL_USEDBY_RETURN_P
for returns.
* expr.cc (move_sub_blocks): New function.
(expand_assignment): Update assignment code about returns/parameters.
* function.cc (assign_parm_setup_block): Update to mark
DECL_REGS_TO_STACK_P for parameter.
* tree-core.h (struct tree_decl_common): Add comment.
* tree.h (DECL_USEDBY_RETURN_P): New define.
(DECL_REGS_TO_STACK_P): New define.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr65421-1.c: New test.
* gcc.target/powerpc/pr65421.c: New test.

---
 gcc/cfgexpand.cc | 14 +
 gcc/expr.cc  | 61 
 gcc/function.cc  |  3 +
 gcc/tree-core.h  |  4 +-
 gcc/tree.h   |  9 +++
 gcc/testsuite/gcc.target/powerpc/pr65421-1.c |  6 ++
 gcc/testsuite/gcc.target/powerpc/pr65421.c   | 33 +++
 7 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 1a1b26b1c6c..eda4d85d140 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -2158,6 +2158,20 @@ expand_used_vars (bitmap forced_stack_vars)
 frame_phase = off ? align - off : 0;
   }
 
+  /* Collect VARs on returns.  */
+  if (DECL_RESULT (current_function_decl))
+{
+  edge_iterator ei;
+  edge e;
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+   if (greturn *ret = safe_dyn_cast (last_stmt (e->src)))
+ {
+   tree val = gimple_return_retval (ret);
+   if (val && VAR_P (val))
+ DECL_USEDBY_RETURN_P (val) = 1;
+ }
+}
+
   /* Set TREE_USED on all variables in the local_decls.  */
   FOR_EACH_LOCAL_DECL (cfun, i, var)
 TREE_USED (var) = 1;
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 15be1c8db99..97a7be9542e 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -5559,6 +5559,41 @@ mem_ref_refers_to_non_mem_p (tree ref)
   return non_mem_decl_p (base);
 }
 
+/* Sub routine of expand_assignment, invoked when assigning from a
+   parameter or assigning to a return val on struct type which may
+   be passed through registers.  The mode of register is used to
+   move the content for the assignment.
+
+   This routine generates code for expression FROM which is BLKmode,
+   and move the generated content to TO_RTX by su-blocks in SUB_MODE.  */
+
+static void
+move_sub_blocks (rtx to_rtx, tree from, machine_mode sub_mode)
+{
+  gcc_assert (MEM_P (to_rtx));
+
+  HOST_WIDE_INT size = MEM_SIZE (to_rtx).to_constant ();
+  HOST_WIDE_INT sub_size = GET_MODE_SIZE (sub_mode).to_constant ();
+  HOST_WIDE_INT len = size / sub_size;
+  gcc_assert (size % sub_size == 0);
+
+  push_temp_slots ();
+
+  rtx from_rtx = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), 
EXPAND_NORMAL);
+  for (int i = 0; i < len; i++)
+{
+  rtx temp = gen_reg_rtx (sub_mode);
+  rtx src = adjust_address (from_rtx, sub_mode, sub_size * i);
+  rtx dest = adjust_address (to_rtx, sub_mode, sub_size * i);
+  emit_move_insn (temp, src);
+  emit_move_insn (dest, temp);
+}
+
+  preserve_temp_slots (to_rtx);
+  pop_temp_slots ();
+  return;
+}
+
 /* Expand an assignment that stores the value of FROM into TO.  If NONTEMPORAL
is true, try generating a nontemporal store.  */
 
@@ -6045,6 +6080,32 @@ expand_assignment (tree to, tree from, bool nontemporal)
   return;
 }
 
+  /* If it is assigning from a struct param which may be passed via registers,
+ it would be better to use the register's mode to move sub-blocks for the
+ assignment.  */
+  if (TREE_CODE (from) == PARM_DECL && mode == BLKmode
+  && DECL_REGS_TO_STACK_P (from))
+{
+  rtx parm = DECL_INCOMING_RTL (from);
+  machine_mode sub_mode
+   = REG_P (parm) ? word_mode : GET_MODE (XEXP (XVECEXP (parm, 0, 0), 0));
+  move_sub_blocks (to_rtx, from, sub_mode);
+  

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Surya Kumari Jangala via Gcc-patches
The issue of suboptimal code exists even for integer return value and not just 
bool return value. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784#c9 
So the patch would need to take care of integer return values too.

On 16/03/23 10:50 am, Ajit Agarwal via Gcc-patches wrote:
> Hello All:
> 
> 
> This patch eliminates unnecessary zero extension instruction from power 
> generated assembly.
> Bootstrapped and regtested on powerpc64-linux-gnu.
> 
> Thanks & Regards
> Ajit
> 
>   rs6000: suboptimal code for returning bool value on target ppc.
> 
>   New pass to eliminate unnecessary zero extension. This pass
>   is registered after cse rtl pass.
> 
>   2023-03-16  Ajit Kumar Agarwal  
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000-passes.def: Registered zero elimination
>   pass.
>   * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>   * config.gcc: Add new executable.
>   * config/rs6000/rs6000-protos.h: Add new prototype for zero
>   elimination pass.
>   * config/rs6000/rs6000.cc: Add new prototype for zero
>   elimination pass.
>   * config/rs6000/t-rs6000: Add new rule.
>   * expr.cc: Modified gcc assert.
>   * explow.cc: Modified gcc assert.
>   * optabs.cc: Modified gcc assert.
> ---
>  gcc/config.gcc|   4 +-
>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>  gcc/config/rs6000/rs6000-protos.h |   1 +
>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++
>  gcc/config/rs6000/rs6000.cc   |   2 +
>  gcc/config/rs6000/t-rs6000|   5 +
>  gcc/explow.cc |   3 +-
>  gcc/expr.cc   |   4 +-
>  gcc/optabs.cc |   3 +-
>  9 files changed, 379 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
> 
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index da3a6d3ba1f..e8ac9d882f0 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -503,7 +503,7 @@ or1k*-*-*)
>   ;;
>  powerpc*-*-*)
>   cpu_type=rs6000
> - extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> + extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
> rs6000-logue.o"
>   extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>   extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>   extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> @@ -538,7 +538,7 @@ riscv*)
>   ;;
>  rs6000*-*-*)
>   extra_options="${extra_options} g.opt fused-madd.opt 
> rs6000/rs6000-tables.opt"
> - extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> + extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
> rs6000-logue.o"
>   extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>   target_gtfiles="$target_gtfiles 
> \$(srcdir)/config/rs6000/rs6000-logue.cc 
> \$(srcdir)/config/rs6000/rs6000-call.cc"
>   target_gtfiles="$target_gtfiles 
> \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
> diff --git a/gcc/config/rs6000/rs6000-passes.def 
> b/gcc/config/rs6000/rs6000-passes.def
> index ca899d5f7af..d7500feddf1 100644
> --- a/gcc/config/rs6000/rs6000-passes.def
> +++ b/gcc/config/rs6000/rs6000-passes.def
> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>   The power8 does not have instructions that automaticaly do the byte 
> swaps
>   for loads and stores.  */
>INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
> +
>  
>/* Pass to do the PCREL_OPT optimization that combines the load of an
>   external symbol's address along with a single load or store using that
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 1a4fc1df668..f6cf2d673d4 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>  class rtl_opt_pass;
>  
>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc 
> b/gcc/config/rs6000/rs6000-zext-elim.cc
> new file mode 100644
> index 000..777c7a5a387
> --- /dev/null
> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
> @@ -0,0 +1,361 @@
> +/* Subroutine to eliminate redundant zero extend for power architecture.
> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published
> +   by the Free Software Foundation; either version 3, or (at your
> +   option) any later version.

[PATCH V3] extract DF/SF/SI/HI/QI subreg from parameter word on stack

2023-03-16 Thread Jiufu Guo via Gcc-patches
Hi,

This patch is fixing an issue about parameter accessing if the
parameter is struct type and passed through integer registers, and
there is floating member is accessed. Like below code:

typedef struct DF {double a[4]; long l; } DF;
double foo_df (DF arg){return arg.a[3];}

On ppc64le, with trunk gcc, "std 6,-24(1) ; lfd 1,-24(1)" is
generated.  While instruction "mtvsrd 1, 6" would be enough for
this case.

This patch updates the behavior when loading floating members of a
parameter: if that floating member is stored via integer register,
then loading it as integer mode first, and converting it to floating
mode.

Compare with previous patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html
This version supports the non-zero stack offset for parameter
argument.

I also tried to enhance CSE/DSE for this issue.  But because the
limitations (e.g. CSE does not like new pseudo, DSE is not good
at cross-blocks), some cases (as this patch) can not be handled.

Bootstrap and regtest passes on ppc64{,le}.
Is this ok for trunk?  Thanks for comments!


BR,
Jeff (Jiufu)


PR target/108073

gcc/ChangeLog:

* expr.cc (extract_subreg_from_loading_word): New function.
(expand_expr_real_1): Call extract_subreg_from_loading_word.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/pr102024.C: Updated.
* gcc.target/powerpc/pr108073.c: New test.

---
 gcc/expr.cc | 85 +
 gcc/testsuite/g++.target/powerpc/pr102024.C |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr108073.c | 29 +++
 3 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 3917fc24c8c..57bc29c5678 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -10711,6 +10711,77 @@ stmt_is_replaceable_p (gimple *stmt)
   return false;
 }
 
+/* Return the content of the memory slot SOURCE as MODE.
+   SOURCE is based on BASE. BASE is a memory block that is stored via words.
+
+   To get the content from SOURCE:
+   first load the word from the memory which covers the SOURCE slot first;
+   next return the word's subreg which offsets to SOURCE slot;
+   then convert to MODE as necessary.  */
+
+static rtx
+extract_subreg_from_loading_word (machine_mode mode, rtx source, rtx base, 
poly_uint64 bytepos)
+{
+  rtx src_base = XEXP (source, 0);
+  poly_uint64 offset = MEM_OFFSET (source);
+
+  if (GET_CODE (src_base) == PLUS && CONSTANT_P (XEXP (src_base, 1)))
+{
+  offset += INTVAL (XEXP (src_base, 1));
+  src_base = XEXP (src_base, 0);
+}
+
+  base = XEXP (base, 0);
+  if (GET_CODE (base) == PLUS && CONSTANT_P (XEXP (base, 1)))
+{
+  poly_uint64 offset_on_base = INTVAL (XEXP (base, 1));
+  base = XEXP (base, 0);
+  offset -= offset_on_base;
+}
+
+  if (!rtx_equal_p (base, src_base) || !known_ge (offset, bytepos))
+return NULL_RTX;
+
+  /* Subreg(DI,n) -> DF/SF/SI/HI/QI */
+  poly_uint64 word_size = GET_MODE_SIZE (word_mode);
+  poly_uint64 mode_size = GET_MODE_SIZE (mode);
+  poly_uint64 byte_off;
+  unsigned int start;
+  machine_mode int_mode;
+  if (known_ge (word_size, mode_size) && multiple_p (word_size, mode_size)
+  && int_mode_for_mode (mode).exists (_mode)
+  && can_div_trunc_p (offset, word_size, , _off)
+  && multiple_p (byte_off, mode_size))
+{
+  rtx word_mem = copy_rtx (source);
+  PUT_MODE (word_mem, word_mode);
+  word_mem = adjust_address (word_mem, word_mode, -byte_off);
+
+  rtx word_reg = gen_reg_rtx (word_mode);
+  emit_move_insn (word_reg, word_mem);
+
+  poly_uint64 low_off = subreg_lowpart_offset (int_mode, word_mode);
+  if (!known_eq (byte_off, low_off))
+   {
+ poly_uint64 shift_bytes = known_gt (byte_off, low_off)
+ ? byte_off - low_off
+ : low_off - byte_off;
+ word_reg = expand_shift (RSHIFT_EXPR, word_mode, word_reg,
+  shift_bytes * BITS_PER_UNIT, word_reg, 0);
+   }
+
+  rtx int_subreg = gen_lowpart (int_mode, word_reg);
+  if (mode == int_mode)
+   return int_subreg;
+
+  rtx int_mode_reg = gen_reg_rtx (int_mode);
+  emit_move_insn (int_mode_reg, int_subreg);
+  return gen_lowpart (mode, int_mode_reg);
+}
+
+  return NULL_RTX;
+}
+
 rtx
 expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
enum expand_modifier modifier, rtx *alt_rtl,
@@ -11892,6 +11963,20 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode 
tmode,
&& modifier != EXPAND_WRITE)
  op0 = flip_storage_order (mode1, op0);
 
+   /* Accessing sub-field of struct parameter which passed via integer
+  registers.  */
+   if (mode == mode1 && TREE_CODE (tem) == PARM_DECL
+   && DECL_INCOMING_RTL (tem) && REG_P (DECL_INCOMING_RTL (tem))
+   && GET_MODE 

Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-16 Thread HAO CHEN GUI via Gcc-patches
Hi Richard,

在 2023/3/16 18:36, Richard Biener 写道:
> On Thu, Mar 16, 2023 at 10:04 AM HAO CHEN GUI  wrote:
>>
>> Hi Richard,
>>
>> 在 2023/3/16 15:57, Richard Biener 写道:
>>> So this is one way around the lack of CSE/PRE of constant operands.  I'd
>>> argue that a better spot for this _might_ be LRA (split the constant out if
>>> there's a free register available), postreload-[g]cse (CSE the constants) 
>>> and
>>> then maybe cprop_hardreg to combine back single-use constants?
>>>
>>> I'm not sure if careful constraints massaging like adding magic letters to
>>> alternatives with constants to pessimize them for LRA, making them
>>> more expensive than spilling the constant to a register but avoid
>>> secondary reloads with spilling a register to the stack to make room
>>> for the constant, is possible - but in theory a special constraint modifier
>>> for this purpose could be invented.
>>
>> Thanks so much for your advice.
>>
>> cse/gcse doesn't take cost of constant set (the def insn of the constant) 
>> into
>> consideration. So it won't replace the register with a constant as it costs 1
>> insn with the register and costs 2 insn with the constant.
> 
> I think it does (and should) cost the constant set (IIRC we had some
> improvements
> there, or at least proposed, during this stage1).  But sure - this is why your
> "trick" works.
> 
It's doable if post-reload gsc costs the constant set. I will draft a patch to
test it.

>> Finally, the single-
>> use constants can't be back to 2 insn.
> 
> And that's because of the issue you point out above?
No. my original concern is the constant can't be back. If post-reload gsc doen't
cost the constant set, the insn with a register always cost less than two insns
with immediates. Commonly the constant set itself costs 2 insn also.
> 
>> Not sure if I understand it correctly.
>> Looking forward to your advice.
> 
> My main point is that CSEing constants has impacts on register pressure
> and thus should probably be done after or within register allocation.  RTL
> expansion itself is probably a bad time to pro-actively split out constants
> even more if, as you say, nothing puts them back.
> 
I agree. Thank a lot.
> Richard.
> 
>> Thanks
>> Gui Haochen

Gui Haochen


[pushed] wwwdocs: readings: Switch publibfp.dhe.ibm.com to https

2023-03-16 Thread Gerald Pfeifer
Pushed.

Gerald
---
 htdocs/readings.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/htdocs/readings.html b/htdocs/readings.html
index 3cdc47a9..6813b84f 100644
--- a/htdocs/readings.html
+++ b/htdocs/readings.html
@@ -310,8 +310,8 @@ names.
 
  z/Architecture (S/390)
   Manufacturer: IBM
-  http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf;>z/Architecture 
Principles of Operation
-  http://publibfp.dhe.ibm.com/epubs/pdf/dz9ar008.pdf;>ESA/390 
Principles of Operation
+  https://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf;>z/Architecture 
Principles of Operation
+  https://publibfp.dhe.ibm.com/epubs/pdf/dz9ar008.pdf;>ESA/390 
Principles of Operation
   https://refspecs.linuxbase.org/ELF/zSeries/lzsabi0_zSeries.html;>Linux 
for z Systems ABI
   https://refspecs.linuxbase.org/ELF/zSeries/lzsabi0_s390.html;>Linux for 
S/390 ABI
  
-- 
2.39.2


Re: [wwwdocs] AVR entry in readings.htmls

2023-03-16 Thread Gerald Pfeifer
On Sat, 10 Mar 2018, Segher Boessenkool wrote:
>> It appears this link at atmel.com has been taken down without
>> what appears a replacement, so I applied the patch below.
> Atmel was bought by Microchip some two years ago...  Maybe
> https://www.microchip.com/design-centers/8-bit/microchip-avr-mcus

On Sun, 11 Mar 2018, Denis Chertykov wrote:
> http://www.microchip.com/design-centers/8-bit/microchip-avr-mcus

Thank you, Segher and Denis! I somehow failed to add that link back
then, done now per the patch below.

(In the meantime the address has changed again, and I am using that
updated address.)

Pushed.

Gerald


commit 94c654022d3d6d439f21418c74d13e4e303d44ab
Author: Gerald Pfeifer 
Date:   Fri Mar 17 00:32:07 2023 +0100

readings: Add new link for AVR documentation

diff --git a/htdocs/readings.html b/htdocs/readings.html
index 27f42d3d..3cdc47a9 100644
--- a/htdocs/readings.html
+++ b/htdocs/readings.html
@@ -90,6 +90,7 @@ names.
 
  AVR
   Manufacturer: Atmel
+  https://www.microchip.com/en-us/products/microcontrollers-and-microprocessors/8-bit-mcus/avr-mcus;>AVR
 documentation
  
 
  Blackfin


[pushed] wwwdocs: onlinedocs: Use the proper name of the Modula-2 manual

2023-03-16 Thread Gerald Pfeifer
With this the Modula-2 manual -- quite impressive, but the way -- is now 
finally reachable from our /onlinedocs page.

Gerald

---
 htdocs/onlinedocs/index.html | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/htdocs/onlinedocs/index.html b/htdocs/onlinedocs/index.html
index 27a8a505..da5f8398 100644
--- a/htdocs/onlinedocs/index.html
+++ b/htdocs/onlinedocs/index.html
@@ -1647,11 +1647,11 @@ existing release.
href="https://gcc.gnu.org/onlinedocs/gdc.ps.gz;>PostScript or https://gcc.gnu.org/onlinedocs/gdc-html.tar.gz;>an
HTML tarball)
-https://gcc.gnu.org/onlinedocs/m2/;>GNU M2 Manual (https://gcc.gnu.org/onlinedocs/m2.pdf;>also in
+https://gcc.gnu.org/onlinedocs/gm2/;>GNU M2 Manual (https://gcc.gnu.org/onlinedocs/gm2.pdf;>also in
PDF or https://gcc.gnu.org/onlinedocs/m2.ps.gz;>PostScript or https://gcc.gnu.org/onlinedocs/m2-html.tar.gz;>an
+   href="https://gcc.gnu.org/onlinedocs/gm2.ps.gz;>PostScript or https://gcc.gnu.org/onlinedocs/gm2-html.tar.gz;>an
HTML tarball)
 https://gcc.gnu.org/onlinedocs/libgomp/;>GNU Offloading and
Multi Processing Runtime Library Manual (

Re: [wwwdocs] document modula-2 in gcc-13/changes.html (and index.html)

2023-03-16 Thread Gerald Pfeifer
On Thu, 16 Mar 2023, Gaius Mulley wrote:
>> Does maintainer-scripts/update_web_docs_git require an update to cover 
>> Modula-2 and actually build the manual we are now linking to
> Apologies I was going to ask about these links.  I've updated the m2
> subtree with target documentation independent sections.  Attached is a
> proposed patch for maintainer-scripts/update_web_docs_git feel free to
> apply or adapt in any way.

I sorted the special casing alphabetically and pushed the updated patch
below.

Then I updated the script on gcc.gnu.org and did a testrun. 

Indeed Modula 2 manuals are now available below
  https://gcc.gnu.org/onlinedocs/ 
just the links from that page are off (referring to m2 instead of gm2). 
I'll fix that next.

Gerald


commit fa4d0ab533cc2bc9cb6f512b3d4bd0bbc01ee797
Author: Gaius Mulley 
Date:   Fri Mar 17 00:08:20 2023 +0100

maintainer-scripts: Add Modula-2 manual to update_web_docs_git

maintainer-scripts/ChangeLog:

* update_web_docs_git (MANUALS): Add gm2.
Add include path for gm2 manual.

diff --git a/maintainer-scripts/update_web_docs_git 
b/maintainer-scripts/update_web_docs_git
index 1c6a993cafd..4bb4897bf35 100755
--- a/maintainer-scripts/update_web_docs_git
+++ b/maintainer-scripts/update_web_docs_git
@@ -24,6 +24,7 @@ MANUALS="cpp
   gdc
   gfortran
   gfc-internals
+  gm2
   gnat_ugn
   gnat-style
   gnat_rm
@@ -167,7 +168,10 @@ for file in $MANUALS; do
   filename=`find . -name ${file}.texi`
   if [ "${filename}" ]; then
 includes="-I ${includedir} -I `dirname ${filename}`"
-if [ "$file" = "gnat_ugn" ]; then
+if [ "$file" = "gm2" ]; then
+  includes="$includes -I gcc/gcc/m2/target-independent"
+  includes="$includes -I gcc/gcc/m2/target-independent/m2"
+elif [ "$file" = "gnat_ugn" ]; then
   includes="$includes -I gcc/gcc/ada -I gcc/gcc/ada/doc/gnat_ugn"
 fi
 makeinfo --html --css-ref $CSS $includes -o ${file} ${filename}


[PATCH] correct function attribute typo

2023-03-16 Thread Jonny Grant
Hello
There's a typo in the common function attribute docs, "nonnul" which this patch 
corrects.

https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes

gcc/ChangeLog
2023-03-16  Jonny Grant  
* doc/extend.texi: correct function attribute typo

---
 gcc/doc/extend.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e0c5357291b..91dfdef4461 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -3657,7 +3657,7 @@ marked with nonnull is compared with null, and
 @option{-Wnonnull-compare} option is enabled, a warning is issued.
 @xref{Warning Options}.
 @item The compiler may also perform optimizations based on the
-knowledge that @code{nonnul} parameters cannot be null.  This can
+knowledge that @code{nonnull} parameters cannot be null.  This can
 currently not be disabled other than by removing the nonnull
 attribute.
 @end itemize


[pushed] c++: __func__ and local class DMI [PR105809]

2023-03-16 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

As in 108242, we need to instantiate in the context of the enclosing
function, not after it's gone.

PR c++/105809

gcc/cp/ChangeLog:

* init.cc (get_nsdmi): Split out...
(maybe_instantiate_nsdmi_init): ...this function.
* cp-tree.h: Declare it.
* pt.cc (tsubst_expr): Use it.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-__func__3.C: New test.
---
 gcc/cp/cp-tree.h  |  1 +
 gcc/cp/init.cc| 27 ---
 gcc/cp/pt.cc  |  6 +
 .../g++.dg/cpp0x/constexpr-__func__3.C| 15 +++
 4 files changed, 40 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-__func__3.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index dfc1c845768..b74c18b03ad 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7086,6 +7086,7 @@ extern bool is_copy_initialization(tree);
 extern tree build_zero_init(tree, tree, bool);
 extern tree build_value_init   (tree, tsubst_flags_t);
 extern tree build_value_init_noctor(tree, tsubst_flags_t);
+extern tree maybe_instantiate_nsdmi_init   (tree, tsubst_flags_t);
 extern tree get_nsdmi  (tree, bool, tsubst_flags_t);
 extern tree build_offset_ref   (tree, tree, bool,
 tsubst_flags_t);
diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index 1b7d3d8fe3e..90302372340 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -561,18 +561,16 @@ perform_target_ctor (tree init)
   return init;
 }
 
-/* Return the non-static data initializer for FIELD_DECL MEMBER.  */
+/* Instantiate the default member initializer of MEMBER, if needed.
+   Only get_nsdmi should use the return value of this function.  */
 
 static GTY((cache)) decl_tree_cache_map *nsdmi_inst;
 
 tree
-get_nsdmi (tree member, bool in_ctor, tsubst_flags_t complain)
+maybe_instantiate_nsdmi_init (tree member, tsubst_flags_t complain)
 {
-  tree init;
-  tree save_ccp = current_class_ptr;
-  tree save_ccr = current_class_ref;
-  
-  if (DECL_LANG_SPECIFIC (member) && DECL_TEMPLATE_INFO (member))
+  tree init = DECL_INITIAL (member);
+  if (init && DECL_LANG_SPECIFIC (member) && DECL_TEMPLATE_INFO (member))
 {
   init = DECL_INITIAL (DECL_TI_TEMPLATE (member));
   location_t expr_loc
@@ -642,8 +640,19 @@ get_nsdmi (tree member, bool in_ctor, tsubst_flags_t 
complain)
  input_location = sloc;
}
 }
-  else
-init = DECL_INITIAL (member);
+
+  return init;
+}
+
+/* Return the non-static data initializer for FIELD_DECL MEMBER.  */
+
+tree
+get_nsdmi (tree member, bool in_ctor, tsubst_flags_t complain)
+{
+  tree save_ccp = current_class_ptr;
+  tree save_ccr = current_class_ref;
+
+  tree init = maybe_instantiate_nsdmi_init (member, complain);
 
   if (init && TREE_CODE (init) == DEFERRED_PARSE)
 {
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 1072882e1a8..056b8c7abad 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -19353,6 +19353,8 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
  /* Closures are handled by the LAMBDA_EXPR.  */
  gcc_assert (!LAMBDA_TYPE_P (TREE_TYPE (t)));
  complete_type (tmp);
+ tree save_ccp = current_class_ptr;
+ tree save_ccr = current_class_ref;
  for (tree fld = TYPE_FIELDS (tmp); fld; fld = DECL_CHAIN (fld))
if ((VAR_P (fld)
 || (TREE_CODE (fld) == FUNCTION_DECL
@@ -19360,6 +19362,10 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
&& DECL_TEMPLATE_INSTANTIATION (fld))
  instantiate_decl (fld, /*defer_ok=*/false,
/*expl_inst_class=*/false);
+   else if (TREE_CODE (fld) == FIELD_DECL)
+ maybe_instantiate_nsdmi_init (fld, tf_warning_or_error);
+ current_class_ptr = save_ccp;
+ current_class_ref = save_ccr;
}
   break;
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-__func__3.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-__func__3.C
new file mode 100644
index 000..365a4e00c5f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-__func__3.C
@@ -0,0 +1,15 @@
+// PR c++/105809
+// { dg-do compile { target c++11 } }
+
+template void hh() {  ss t; }
+
+template
+int f(void)
+{
+constexpr char const* cc = __func__;
+struct j{  char const* kk=cc; };
+hh();
+return 0;
+}
+
+int t = f<1>();

base-commit: b323f52ccf966800297b0520b9e1d4b3951db525
-- 
2.31.1



[pushed] c++: generic lambda, local class, __func__ [PR108242]

2023-03-16 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

Here we are trying to do name lookup in a deferred instantiation of t() and
failing to find __func__.  tsubst_expr already tries to instantiate members
of local classes, but was failing with the partial instantiation of generic
lambdas.

PR c++/108242

gcc/cp/ChangeLog:

* pt.cc (tsubst_expr) [TAG_DEFN]: Handle partial instantiation.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/lambda-generic-func2.C: New test.
---
 gcc/cp/pt.cc   |  5 -
 .../g++.dg/cpp1y/lambda-generic-func2.C| 18 ++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-func2.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index ddbd73371b9..1072882e1a8 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -19341,7 +19341,10 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
 
 case TAG_DEFN:
   tmp = tsubst (TREE_TYPE (t), args, complain, NULL_TREE);
-  if (CLASS_TYPE_P (tmp))
+  if (dependent_type_p (tmp))
+   /* This is a partial instantiation, try again when full.  */
+   add_stmt (build_min (TAG_DEFN, tmp));
+  else if (CLASS_TYPE_P (tmp))
{
  /* Local classes are not independent templates; they are
 instantiated along with their containing function.  And this
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-func2.C 
b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-func2.C
new file mode 100644
index 000..ed541c7812f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-func2.C
@@ -0,0 +1,18 @@
+// PR c++/108242
+// { dg-do compile { target c++14 } }
+
+template
+void my_fun()
+{
+[&](auto) {
+static constexpr char const* fun_name = __func__;
+struct t
+{
+t() { fun_name; };
+} t1;
+}(12);
+}
+
+int main() {
+my_fun<1>();
+}

base-commit: 1cc8814098bb46f9fca58a0b831fbf9a8574bdc9
-- 
2.31.1



[pushed] c++: ::enumerator [PR101869]

2023-03-16 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

We don't want to call build_offset_ref with an enum.

PR c++/101869

gcc/cp/ChangeLog:

* semantics.cc (finish_qualified_id_expr): Don't try to build a
pointer-to-member if the scope is an enumeration.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/enum43.C: New test.
---
 gcc/cp/semantics.cc |  3 ++-
 gcc/testsuite/g++.dg/cpp0x/enum43.C | 11 +++
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/enum43.C

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 57dd7b66da8..87c2e8a7111 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -2386,7 +2386,8 @@ finish_qualified_id_expr (tree qualifying_class,
 
   /* If EXPR occurs as the operand of '&', use special handling that
  permits a pointer-to-member.  */
-  if (address_p && done)
+  if (address_p && done
+  && TREE_CODE (qualifying_class) != ENUMERAL_TYPE)
 {
   if (TREE_CODE (expr) == SCOPE_REF)
expr = TREE_OPERAND (expr, 1);
diff --git a/gcc/testsuite/g++.dg/cpp0x/enum43.C 
b/gcc/testsuite/g++.dg/cpp0x/enum43.C
new file mode 100644
index 000..b2cd9797a06
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/enum43.C
@@ -0,0 +1,11 @@
+// PR c++/101869
+
+enum E { A };
+E operator & (E e)
+{
+  return e;
+}
+E f(void)
+{
+return ::A;   // { dg-error "not a class" "" { target c++98_only } }
+}

base-commit: e6ccae0ac0d53cfa9099d62fada050ee87d4d0ad
-- 
2.31.1



[V5][PATCH 2/2] Update documentation to clarify a GCC extension

2023-03-16 Thread Qing Zhao via Gcc-patches
on a structure with a C99 flexible array member being nested in
another structure. (PR77650)

"GCC extension accepts a structure containing an ISO C99 "flexible array
member", or a union containing such a structure (possibly recursively)
to be a member of a structure.

 There are two situations:

   * A structure or a union with a C99 flexible array member is the last
 field of another structure, for example:

  struct flex  { int length; char data[]; };
  union union_flex { int others; struct flex f; };

  struct out_flex_struct { int m; struct flex flex_data; };
  struct out_flex_union { int n; union union_flex flex_data; };

 In the above, both 'out_flex_struct.flex_data.data[]' and
 'out_flex_union.flex_data.f.data[]' are considered as flexible
 arrays too.

   * A structure or a union with a C99 flexible array member is the
 middle field of another structure, for example:

  struct flex  { int length; char data[]; };

  struct mid_flex { int m; struct flex flex_data; int n; };

 In the above, 'mid_flex.flex_data.data[]' has undefined behavior.
 Compilers do not handle such case consistently, Any code relying on
 such case should be modified to ensure that flexible array members
 only end up at the ends of structures.

 Please use warning option '-Wgnu-variable-sized-type-not-at-end' to
 identify all such cases in the source code and modify them.  This
 extension will be deprecated from gcc in the next release.
"

gcc/c-family/ChangeLog:

* c.opt: New option -Wgnu-variable-sized-type-not-at-end.

gcc/c/ChangeLog:

* c-decl.cc (finish_struct): Issue warnings for new option.

gcc/ChangeLog:

* doc/extend.texi: Document GCC extension on a structure containing
a flexible array member to be a member of another structure.

gcc/testsuite/ChangeLog:

* gcc.dg/variable-sized-type-flex-array.c: New test.
---
 gcc/c-family/c.opt|  5 +++
 gcc/c/c-decl.cc   |  8 
 gcc/doc/extend.texi   | 45 ++-
 .../gcc.dg/variable-sized-type-flex-array.c   | 31 +
 4 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/variable-sized-type-flex-array.c

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index cddeece..660ac07f3d4 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -737,6 +737,11 @@ Wformat-truncation=
 C ObjC C++ LTO ObjC++ Joined RejectNegative UInteger Var(warn_format_trunc) 
Warning LangEnabledBy(C ObjC C++ LTO ObjC++,Wformat=, warn_format >= 1, 0) 
IntegerRange(0, 2)
 Warn about calls to snprintf and similar functions that truncate output.
 
+Wgnu-variable-sized-type-not-at-end
+C C++ Var(warn_variable_sized_type_not_at_end) Warning
+Warn about structures or unions with C99 flexible array members are not
+at the end of a structure.
+
 Wif-not-aligned
 C ObjC C++ ObjC++ Var(warn_if_not_aligned) Init(1) Warning
 Warn when the field in a struct is not aligned.
diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 14c54809b9d..1632043a8ff 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -9269,6 +9269,14 @@ finish_struct (location_t loc, tree t, tree fieldlist, 
tree attributes,
TYPE_INCLUDE_FLEXARRAY (t)
  = is_last_field && TYPE_INCLUDE_FLEXARRAY (TREE_TYPE (x));
 
+  if (warn_variable_sized_type_not_at_end
+ && !is_last_field
+ && RECORD_OR_UNION_TYPE_P (TREE_TYPE (x))
+ && TYPE_INCLUDE_FLEXARRAY (TREE_TYPE (x)))
+   warning_at (DECL_SOURCE_LOCATION (x),
+   OPT_Wgnu_variable_sized_type_not_at_end,
+   "variable sized type not at the end of a struct");
+
   if (DECL_NAME (x)
  || RECORD_OR_UNION_TYPE_P (TREE_TYPE (x)))
saw_named_field = true;
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index fd3745c5608..0928b962a60 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -1748,7 +1748,50 @@ Flexible array members may only appear as the last 
member of a
 A structure containing a flexible array member, or a union containing
 such a structure (possibly recursively), may not be a member of a
 structure or an element of an array.  (However, these uses are
-permitted by GCC as extensions.)
+permitted by GCC as extensions, see details below.)
+@end itemize
+
+GCC extension accepts a structure containing an ISO C99 @dfn{flexible array
+member}, or a union containing such a structure (possibly recursively)
+to be a member of a structure.
+
+There are two situations:
+
+@itemize @bullet
+@item
+A structure or a union with a C99 flexible array member is the last field
+of another structure, for example:
+
+@smallexample
+struct flex  @{ int length; char data[]; @};
+union union_flex @{ int others; struct flex f; @};
+
+struct out_flex_struct @{ int m; struct flex flex_data; @};
+struct out_flex_union @{ int n; 

[V5][PATCH 1/2] Handle component_ref to a structre/union field including flexible array member [PR101832]

2023-03-16 Thread Qing Zhao via Gcc-patches
GCC extension accepts the case when a struct with a flexible array member
is embedded into another struct or union (possibly recursively).
__builtin_object_size should treat such struct as flexible size per
-fstrict-flex-arrays.

gcc/c/ChangeLog:

PR tree-optimization/101832
* c-decl.cc (finish_struct): Set TYPE_INCLUDE_FLEXARRAY for
struct/union type.

gcc/lto/ChangeLog:

PR tree-optimization/101832
* lto-common.cc (compare_tree_sccs_1): Compare bit
TYPE_NO_NAMED_ARGS_STDARG_P or TYPE_INCLUDE_FLEXARRAY properly
for its corresponding type.

gcc/ChangeLog:

PR tree-optimization/101832
* print-tree.cc (print_node): Print new bit type_include_flexarray.
* tree-core.h (struct tree_type_common): Use bit no_named_args_stdarg_p
as type_include_flexarray for RECORD_TYPE or UNION_TYPE.
* tree-object-size.cc (addr_object_size): Handle structure/union type
when it has flexible size.
* tree-streamer-in.cc (unpack_ts_type_common_value_fields): Stream
in bit no_named_args_stdarg_p properly for its corresponding type.
* tree-streamer-out.cc (pack_ts_type_common_value_fields): Stream
out bit no_named_args_stdarg_p properly for its corresponding type.
* tree.h (TYPE_INCLUDE_FLEXARRAY): New macro TYPE_INCLUDE_FLEXARRAY.

gcc/testsuite/ChangeLog:

PR tree-optimization/101832
* gcc.dg/builtin-object-size-pr101832.c: New test.
---
 gcc/c/c-decl.cc   |  11 ++
 gcc/lto/lto-common.cc |   5 +-
 gcc/print-tree.cc |   5 +
 .../gcc.dg/builtin-object-size-pr101832.c | 134 ++
 gcc/tree-core.h   |   2 +
 gcc/tree-object-size.cc   |  23 ++-
 gcc/tree-streamer-in.cc   |   5 +-
 gcc/tree-streamer-out.cc  |   5 +-
 gcc/tree.h|   7 +-
 9 files changed, 192 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index e537d33f398..14c54809b9d 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -9258,6 +9258,17 @@ finish_struct (location_t loc, tree t, tree fieldlist, 
tree attributes,
   /* Set DECL_NOT_FLEXARRAY flag for FIELD_DECL x.  */
   DECL_NOT_FLEXARRAY (x) = !is_flexible_array_member_p (is_last_field, x);
 
+  /* Set TYPE_INCLUDE_FLEXARRAY for the context of x, t.
+when x is an array and is the last field.  */
+  if (TREE_CODE (TREE_TYPE (x)) == ARRAY_TYPE)
+   TYPE_INCLUDE_FLEXARRAY (t)
+ = is_last_field && flexible_array_member_type_p (TREE_TYPE (x));
+  /* Recursively set TYPE_INCLUDE_FLEXARRAY for the context of x, t
+when x is an union or record and is the last field.  */
+  else if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (x)))
+   TYPE_INCLUDE_FLEXARRAY (t)
+ = is_last_field && TYPE_INCLUDE_FLEXARRAY (TREE_TYPE (x));
+
   if (DECL_NAME (x)
  || RECORD_OR_UNION_TYPE_P (TREE_TYPE (x)))
saw_named_field = true;
diff --git a/gcc/lto/lto-common.cc b/gcc/lto/lto-common.cc
index 882dd8971a4..9dde7118266 100644
--- a/gcc/lto/lto-common.cc
+++ b/gcc/lto/lto-common.cc
@@ -1275,7 +1275,10 @@ compare_tree_sccs_1 (tree t1, tree t2, tree **map)
   if (AGGREGATE_TYPE_P (t1))
compare_values (TYPE_TYPELESS_STORAGE);
   compare_values (TYPE_EMPTY_P);
-  compare_values (TYPE_NO_NAMED_ARGS_STDARG_P);
+  if (FUNC_OR_METHOD_TYPE_P (t1))
+   compare_values (TYPE_NO_NAMED_ARGS_STDARG_P);
+  if (RECORD_OR_UNION_TYPE_P (t1))
+   compare_values (TYPE_INCLUDE_FLEXARRAY);
   compare_values (TYPE_PACKED);
   compare_values (TYPE_RESTRICT);
   compare_values (TYPE_USER_ALIGN);
diff --git a/gcc/print-tree.cc b/gcc/print-tree.cc
index 1f3afcbbc86..efacdb7686f 100644
--- a/gcc/print-tree.cc
+++ b/gcc/print-tree.cc
@@ -631,6 +631,11 @@ print_node (FILE *file, const char *prefix, tree node, int 
indent,
  && TYPE_CXX_ODR_P (node))
fputs (" cxx-odr-p", file);
 
+  if ((code == RECORD_TYPE
+  || code == UNION_TYPE)
+ && TYPE_INCLUDE_FLEXARRAY (node))
+   fputs (" include-flexarray", file);
+
   /* The transparent-union flag is used for different things in
 different nodes.  */
   if ((code == UNION_TYPE || code == RECORD_TYPE)
diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c 
b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
new file mode 100644
index 000..60078e11634
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
@@ -0,0 +1,134 @@
+/* PR 101832: 
+   GCC extension accepts the case when a struct with a C99 flexible array
+   member is embedded into another struct (possibly recursively).
+   __builtin_object_size will treat such struct as flexible size.
+   

[V5][PATCH 0/2] Handle component_ref to a structure/union field including FAM for builtin_object_size

2023-03-16 Thread Qing Zhao via Gcc-patches
Hi, Joseph, Jakub and Sandra,

Could you please review this patch and let me know whether it???s ready
for committing into GCC13?

The fix to Bug PR101832 is an important patch for kernel security
purpose. it's better to be put into GCC13.

===

These are the 5th version of the patches for PR101832, to fix
builtin_object_size to correctly handle component_ref to a
structure/union field that includes a flexible array member.

also includes a documentation update for the GCC extension on embedding
a structure/union with flexible array member into another structure.
which includes a fix to PR77650.

compared to the 4th version of the patch, the major changes are:

1. Update the documentation per Sandra's comments and
suggestion.
2. per Richard's suggestion, let the new bit TYPE_INCLUDE_FLEXARRAY to
share the same bit with no_named_args_stdarg_p to save space in the IR.
and corresponding changes to support such sharing.
3. I also changed the code inside tree-object-size.cc to make it cleaner
and easier to be understood.

bootstrapped and regression tested on aarch64 and x86.

Okay for commit?

thanks.

Qing

Qing Zhao (2):
  Handle component_ref to a structre/union field including  flexible
array member [PR101832]
  Update documentation to clarify a GCC extension

 gcc/c-family/c.opt|   5 +
 gcc/c/c-decl.cc   |  19 +++
 gcc/doc/extend.texi   |  45 +-
 gcc/lto/lto-common.cc |   5 +-
 gcc/print-tree.cc |   5 +
 .../gcc.dg/builtin-object-size-pr101832.c | 134 ++
 .../gcc.dg/variable-sized-type-flex-array.c   |  31 
 gcc/tree-core.h   |   2 +
 gcc/tree-object-size.cc   |  23 ++-
 gcc/tree-streamer-in.cc   |   5 +-
 gcc/tree-streamer-out.cc  |   5 +-
 gcc/tree.h|   7 +-
 12 files changed, 280 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
 create mode 100644 gcc/testsuite/gcc.dg/variable-sized-type-flex-array.c

-- 
2.31.1



Re: [PATCH v1] [RFC] Improve folding for comparisons with zero in tree-ssa-forwprop.

2023-03-16 Thread Philipp Tomsich
Just to add a bit more color on this one...
It was originally observed (and isolated from)
_ZN11xalanc_1_1027XalanReferenceCountedObject12addReferenceEPS0_ and
reproduces both for AArch64 and RISC-V.

The basic block (annotated with dynamic instructions executed and
percentage of total dynamic instructions) looks as follows:

>   0x00511488 4589868875 0.4638%
> _ZN11xalanc_1_1027XalanReferenceCountedObject12addReferenceEPS0_
>   4518  lw  a4,8(a0)
>   0017029b  addiw   t0,a4,1
>   00552423  sw  t0,8(a0)
>   4685  addia3,zero,1
>   00d28363  beq t0,a3,6 # 0x51149a


This change reduces the instruction count on RISC-V by one compressible
instruction (2 bytes) and on AArch64 by one instruction (4 bytes).
No execution time improvement (measured on Neoverse-N1) — as would be
expected.

--Philipp.


On Thu, 16 Mar 2023 at 17:41, Jeff Law  wrote:

>
>
> On 3/16/23 09:27, Manolis Tsamis wrote:
> > For this C testcase:
> >
> > void g();
> > void f(unsigned int *a)
> > {
> >if (++*a == 1)
> >  g();
> > }
> >
> > GCC will currently emit a comparison with 1 by using the value
> > of *a after the increment. This can be improved by comparing
> > against 0 and using the value before the increment. As a result
> > there is a potentially shorter dependancy chain (no need to wait
> > for the result of +1) and on targets with compare zero instructions
> > the generated code is one instruction shorter.
> >
> > Example from Aarch64:
> >
> > Before
> >  ldr w1, [x0]
> >  add w1, w1, 1
> >  str w1, [x0]
> >  cmp w1, 1
> >  beq .L4
> >  ret
> >
> > After
> >  ldr w1, [x0]
> >  add w2, w1, 1
> >  str w2, [x0]
> >  cbz w1, .L4
> >  ret
> >
> > gcc/ChangeLog:
> >
> >  * tree-ssa-forwprop.cc (combine_cond_expr_cond):
> >  (forward_propagate_into_comparison_1): Optimize
> >  for zero comparisons.
> Deferring to gcc-14.  Though I'm generally supportive of normalizing to
> a comparison against zero when we safely can :-)
>
> jeff
>


[PATCH] i386: Robustify vec perm blend functions for TARGET_MMX_WITH_SSE

2023-03-16 Thread Uros Bizjak via Gcc-patches
8-byte modes should be processed only for TARGET_MMX_WITH_SSE.

gcc/ChangeLog:

* config/i386/i386-expand.cc (expand_vec_perm_pblendv):
Handle 8-byte modes only with TARGET_MMX_WITH_SSE.
(expand_vec_perm_2perm_pblendv): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 1545d4365b7..c1300dc4e26 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -20288,9 +20288,10 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
 ;
   else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
 ;
-  else if (TARGET_SSE4_1 && (GET_MODE_SIZE (vmode) == 4
-|| GET_MODE_SIZE (vmode) == 8
-|| GET_MODE_SIZE (vmode) == 16))
+  else if (TARGET_SSE4_1
+  && (GET_MODE_SIZE (vmode) == 16
+  || (TARGET_MMX_WITH_SSE && GET_MODE_SIZE (vmode) == 8)
+  || GET_MODE_SIZE (vmode) == 4))
 ;
   else
 return false;
@@ -21154,9 +21155,10 @@ expand_vec_perm_2perm_pblendv (struct 
expand_vec_perm_d *d, bool two_insn)
 ;
   else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
 ;
-  else if (TARGET_SSE4_1 && (GET_MODE_SIZE (vmode) == 16
-|| GET_MODE_SIZE (vmode) == 8
-|| GET_MODE_SIZE (vmode) == 4))
+  else if (TARGET_SSE4_1
+  && (GET_MODE_SIZE (vmode) == 16
+  || (TARGET_MMX_WITH_SSE && GET_MODE_SIZE (vmode) == 8)
+  || GET_MODE_SIZE (vmode) == 4))
 ;
   else
 return false;


Re: [PATCH] testsuite: Handle default_packed targets in gcc.dg/plugin

2023-03-16 Thread Hans-Peter Nilsson via Gcc-patches
> From: Hans-Peter Nilsson 
> Date: Thu, 16 Mar 2023 19:25:05 +0100

> That doesn't seem like a good idea.  At a glance the
> *testcode* will be simpler, but the patch will be slightly
> larger

Bah, s/but the patch will be slightly larger/and the patch
will certainly be smaller, but because less is tested/.

brgds, H-P


Re: [PATCH] testsuite: Handle default_packed targets in gcc.dg/plugin

2023-03-16 Thread David Malcolm via Gcc-patches
On Thu, 2023-03-16 at 19:25 +0100, Hans-Peter Nilsson wrote:
> > From: David Malcolm 
> > Date: Thu, 16 Mar 2023 13:55:48 -0400
> 
> > On Thu, 2023-03-09 at 19:56 +0100, Hans-Peter Nilsson wrote:
> > > It's not obvious to me whether considered best to include or
> > > exclude these tests that depend on structure layout details.
> > > If excluding, the obvious alternative to this patch is then
> > > to add a top one-liner (to dg-skip-if the test for
> > > default_packed targets or a similar excluding expression).
> > > I'm fine either way, just suggesting the following, which
> > > handles the cris-elf test-case failures I see for these
> > > tests, and causes no change in results for native
> > > x86_64-pc-linux-gnu.
> > 
> > Thanks for looking at this.
> > 
> > How about a third option: can the structs be explicitly marked as
> > being
> > packed, by adding __attribute__((__packed__)) to the various
> > structs? 
> > The tests are all about detecting problems with padding bits, and
> > presumably we can have padding bits on all targets if we explicitly
> > ask
> > for them.
> > 
> > Does that make for a simpler patch?
> 
> Did I get you right: making the layout the same for all
> targets, by -for all structs that in my patch needed
> different layout- marking them with
> __attribute__((__packed__)) and adjust numbers in warnings?
> 
> That doesn't seem like a good idea.  At a glance the
> *testcode* will be simpler, but the patch will be slightly
> larger and have a lot of "-" lines instead of "+" lines, as
> the patch cause a lot of warnings to be dropped: you'll test
> for absence of warnings instead of proper warnings.
> 
> Looks like you'll lose 24 of the padding tests; 30 lines
> where I added "target ! default_packed" and 6 where I added
> "target default_packed".
> 
> Perhaps I misunderstood?

No, I think I'm misunderstanding the problem; sorry.

I think I prefer the top one-liner dg-skip-if approach you mentioned in
your original email; it seems simplest.

Thanks
Dave




> 
> brgds, H-P
> 
> 
> > Dave
> > 
> > > 
> > > Beware that some of the tests have lines with trailing
> > > whitespace.  Where lines are changed in this patch, the
> > > trailing whitespace is removed.
> > 
> > 
> > > 
> > > Ok to commit?
> > > 
> > > -- >8 --
> > > It's a judgement call whether to just skip some of these
> > > tests rather than trying to match messages depending on the
> > > layout of structures, but better include than exclude.
> > > 
> > > * gcc.dg/plugin/infoleak-2.c,
> > > gcc.dg/plugin/infoleak-CVE-2011-1078-1.c,
> > > gcc.dg/plugin/infoleak-CVE-2011-1078-2.c,
> > > gcc.dg/plugin/infoleak-CVE-2017-18549-1.c,
> > > gcc.dg/plugin/infoleak-CVE-2017-18550-1.c,
> > > gcc.dg/plugin/infoleak-antipatterns-1.c,
> > > gcc.dg/plugin/infoleak-fixit-1.c: Handle default_packed
> > > targets.
> > > ---
> > >  gcc/testsuite/gcc.dg/plugin/infoleak-2.c    | 13
> > > ---
> > > --
> > >  .../gcc.dg/plugin/infoleak-CVE-2011-1078-1.c    | 10 +--
> > > ---
> > >  .../gcc.dg/plugin/infoleak-CVE-2011-1078-2.c    | 10 +--
> > > ---
> > >  .../gcc.dg/plugin/infoleak-CVE-2017-18549-1.c   | 10 +--
> > > ---
> > >  .../gcc.dg/plugin/infoleak-CVE-2017-18550-1.c   |  7 ---
> > >  .../gcc.dg/plugin/infoleak-antipatterns-1.c | 10 +--
> > > ---
> > >  gcc/testsuite/gcc.dg/plugin/infoleak-fixit-1.c  | 10 ++-
> > > ---
> > >  7 files changed, 38 insertions(+), 32 deletions(-)
> > > 
> > > diff --git a/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> > > b/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> > > index 252f8f25918a..4ba484b3c6be 100644
> > > --- a/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> > > +++ b/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> > > @@ -18,16 +18,19 @@ struct st
> > >    int b:1; /* { dg-message "field 'b' is uninitialized \\(1
> > > bit\\)"
> > > "field" } */
> > >     /* { dg-message "padding after field 'b' is
> > > uninitialized
> > > \\(7 bits\\)" "padding" { target *-*-* } .-1 } */
> > >    u8 d;    /* { dg-message "field 'd' is uninitialized \\(1
> > > byte\\)"
> > > } */
> > > -  int c:7; /* { dg-message "padding after field 'c' is
> > > uninitialized
> > > \\(9 bits\\)" } */
> > > -  u16 e;   /* { dg-message "padding after field 'e' is
> > > uninitialized
> > > \\(2 bytes\\)" } */  
> > > +  int c:7; /* { dg-message "padding after field 'c' is
> > > uninitialized
> > > \\(9 bits\\)" "padding" { target { ! default_packed } } } */
> > > +   /* { dg-message "padding after field 'c' is
> > > uninitialized
> > > \\(1 bit\\)" "padding" { target default_packed } .-1 } */
> > > +  u16 e;   /* { dg-message "padding after field 'e' is
> > > uninitialized
> > > \\(2 bytes\\)" "padding" { target { ! default_packed } } } */
> > >  };
> > >  
> > >  void test (void __user *dst, u16 v)
> > >  {
> > >    struct st s; /* { dg-message "region created on stack here"
> > > "where" } */
> > > -  /* { 

Re: Should -ffp-contract=off the default on GCC?

2023-03-16 Thread Qing Zhao via Gcc-patches


> On Mar 16, 2023, at 12:53 PM, Jakub Jelinek  wrote:
> 
> On Thu, Mar 16, 2023 at 04:38:41PM +, Qing Zhao via Gcc-patches wrote:
>>> NO. We have this debate every few years and such.
>> 
>> So, what’s the major reason we keep the default  that is not IEEE754 
>> compliant from the beginning? 
> 
> It is compliant.  fusedMultiplyAdd is a standard IEEE 754 operation, and
> C explicitly allows contractions.

Okay, thanks for the info.

Yes, looks like that GCC’s default behavior is correct. -:)

Qing
> 
>   Jakub
> 



Re: [PATCH] testsuite: Handle default_packed targets in gcc.dg/plugin

2023-03-16 Thread Hans-Peter Nilsson via Gcc-patches
> From: David Malcolm 
> Date: Thu, 16 Mar 2023 13:55:48 -0400

> On Thu, 2023-03-09 at 19:56 +0100, Hans-Peter Nilsson wrote:
> > It's not obvious to me whether considered best to include or
> > exclude these tests that depend on structure layout details.
> > If excluding, the obvious alternative to this patch is then
> > to add a top one-liner (to dg-skip-if the test for
> > default_packed targets or a similar excluding expression).
> > I'm fine either way, just suggesting the following, which
> > handles the cris-elf test-case failures I see for these
> > tests, and causes no change in results for native
> > x86_64-pc-linux-gnu.
> 
> Thanks for looking at this.
> 
> How about a third option: can the structs be explicitly marked as being
> packed, by adding __attribute__((__packed__)) to the various structs? 
> The tests are all about detecting problems with padding bits, and
> presumably we can have padding bits on all targets if we explicitly ask
> for them.
> 
> Does that make for a simpler patch?

Did I get you right: making the layout the same for all
targets, by -for all structs that in my patch needed
different layout- marking them with
__attribute__((__packed__)) and adjust numbers in warnings?

That doesn't seem like a good idea.  At a glance the
*testcode* will be simpler, but the patch will be slightly
larger and have a lot of "-" lines instead of "+" lines, as
the patch cause a lot of warnings to be dropped: you'll test
for absence of warnings instead of proper warnings.

Looks like you'll lose 24 of the padding tests; 30 lines
where I added "target ! default_packed" and 6 where I added
"target default_packed".

Perhaps I misunderstood?

brgds, H-P


> Dave
> 
> > 
> > Beware that some of the tests have lines with trailing
> > whitespace.  Where lines are changed in this patch, the
> > trailing whitespace is removed.
> 
> 
> > 
> > Ok to commit?
> > 
> > -- >8 --
> > It's a judgement call whether to just skip some of these
> > tests rather than trying to match messages depending on the
> > layout of structures, but better include than exclude.
> > 
> > * gcc.dg/plugin/infoleak-2.c,
> > gcc.dg/plugin/infoleak-CVE-2011-1078-1.c,
> > gcc.dg/plugin/infoleak-CVE-2011-1078-2.c,
> > gcc.dg/plugin/infoleak-CVE-2017-18549-1.c,
> > gcc.dg/plugin/infoleak-CVE-2017-18550-1.c,
> > gcc.dg/plugin/infoleak-antipatterns-1.c,
> > gcc.dg/plugin/infoleak-fixit-1.c: Handle default_packed
> > targets.
> > ---
> >  gcc/testsuite/gcc.dg/plugin/infoleak-2.c    | 13 ---
> > --
> >  .../gcc.dg/plugin/infoleak-CVE-2011-1078-1.c    | 10 +-
> >  .../gcc.dg/plugin/infoleak-CVE-2011-1078-2.c    | 10 +-
> >  .../gcc.dg/plugin/infoleak-CVE-2017-18549-1.c   | 10 +-
> >  .../gcc.dg/plugin/infoleak-CVE-2017-18550-1.c   |  7 ---
> >  .../gcc.dg/plugin/infoleak-antipatterns-1.c | 10 +-
> >  gcc/testsuite/gcc.dg/plugin/infoleak-fixit-1.c  | 10 ++
> >  7 files changed, 38 insertions(+), 32 deletions(-)
> > 
> > diff --git a/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> > b/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> > index 252f8f25918a..4ba484b3c6be 100644
> > --- a/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> > +++ b/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> > @@ -18,16 +18,19 @@ struct st
> >    int b:1; /* { dg-message "field 'b' is uninitialized \\(1 bit\\)"
> > "field" } */
> >     /* { dg-message "padding after field 'b' is uninitialized
> > \\(7 bits\\)" "padding" { target *-*-* } .-1 } */
> >    u8 d;    /* { dg-message "field 'd' is uninitialized \\(1 byte\\)"
> > } */
> > -  int c:7; /* { dg-message "padding after field 'c' is uninitialized
> > \\(9 bits\\)" } */
> > -  u16 e;   /* { dg-message "padding after field 'e' is uninitialized
> > \\(2 bytes\\)" } */  
> > +  int c:7; /* { dg-message "padding after field 'c' is uninitialized
> > \\(9 bits\\)" "padding" { target { ! default_packed } } } */
> > +   /* { dg-message "padding after field 'c' is uninitialized
> > \\(1 bit\\)" "padding" { target default_packed } .-1 } */
> > +  u16 e;   /* { dg-message "padding after field 'e' is uninitialized
> > \\(2 bytes\\)" "padding" { target { ! default_packed } } } */
> >  };
> >  
> >  void test (void __user *dst, u16 v)
> >  {
> >    struct st s; /* { dg-message "region created on stack here"
> > "where" } */
> > -  /* { dg-message "capacity: 12 bytes" "capacity" { target *-*-* }
> > .-1 } */
> > -  /* { dg-message "suggest forcing zero-initialization by providing
> > a '\\{0\\}' initializer" "fix-it" { target *-*-* } .-2 } */  
> > +  /* { dg-message "capacity: 12 bytes" "capacity" { target { !
> > default_packed } } .-1 } */
> > +  /* { dg-message "capacity: 9 bytes" "capacity" { target
> > default_packed } .-2 } */
> > +  /* { dg-message "suggest forcing zero-initialization by providing
> > a '\\{0\\}' initializer" "fix-it" { target *-*-* } .-3 } */
> >    s.e = v;
> >    

Re: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891]

2023-03-16 Thread Wilco Dijkstra via Gcc-patches
ping


From: Wilco Dijkstra
Sent: 23 February 2023 15:11
To: GCC Patches 
Cc: Richard Sandiford ; Kyrylo Tkachov 

Subject: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891] 
 

The LSE2 ifunc for 16-byte atomic load requires a barrier before the LDP -
without it, it effectively has Load-AcquirePC semantics similar to LDAPR,
which is less restrictive than what __ATOMIC_SEQ_CST requires.  This patch
fixes this and adds comments to make it easier to see which sequence is
used for each case. Use a load/store exclusive loop for store to simplify
testing memory ordering is correct (it is slightly faster too). 

Passes regress, OK for commit?

libatomic/
    PR libgcc/108891
    config/linux/aarch64/atomic_16.S: Fix libat_load_16_i1.
    Add comments describing the memory order.

---

diff --git a/libatomic/config/linux/aarch64/atomic_16.S 
b/libatomic/config/linux/aarch64/atomic_16.S
index 
732c3534a06678664a252bdbc53652eeab0af506..05439ce394b9653c9bcb582761ff7aaa7c8f9643
 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -72,33 +72,38 @@ name:   \
 
 ENTRY (libat_load_16_i1)
 cbnz    w1, 1f
+
+   /* RELAXED.  */
 ldp res0, res1, [x0]
 ret
 1:
-   cmp w1, ACQUIRE
-   b.hi    2f
+   cmp w1, SEQ_CST
+   b.eq    2f
+
+   /* ACQUIRE/CONSUME (Load-AcquirePC semantics).  */
 ldp res0, res1, [x0]
 dmb ishld
 ret
-2:
+
+   /* SEQ_CST.  */
+2: ldar    tmp0, [x0]  /* Block reordering with Store-Release instr.  
*/
 ldp res0, res1, [x0]
-   dmb ish
+   dmb ishld
 ret
 END (libat_load_16_i1)
 
 
 ENTRY (libat_store_16_i1)
 cbnz    w4, 1f
+
+   /* RELAXED.  */
 stp in0, in1, [x0]
 ret
-1:
-   dmb ish
-   stp in0, in1, [x0]
-   cmp w4, SEQ_CST
-   beq 2f
-   ret
-2:
-   dmb ish
+
+   /* RELEASE/SEQ_CST.  */
+1: ldaxp   xzr, tmp0, [x0]
+   stlxp   w4, in0, in1, [x0]
+   cbnz    w4, 1b
 ret
 END (libat_store_16_i1)
 
@@ -106,29 +111,33 @@ END (libat_store_16_i1)
 ENTRY (libat_exchange_16_i1)
 mov x5, x0
 cbnz    w4, 2f
-1:
-   ldxp    res0, res1, [x5]
+
+   /* RELAXED.  */
+1: ldxp    res0, res1, [x5]
 stxp    w4, in0, in1, [x5]
 cbnz    w4, 1b
 ret
 2:
 cmp w4, ACQUIRE
 b.hi    4f
-3:
-   ldaxp   res0, res1, [x5]
+
+   /* ACQUIRE/CONSUME.  */
+3: ldaxp   res0, res1, [x5]
 stxp    w4, in0, in1, [x5]
 cbnz    w4, 3b
 ret
 4:
 cmp w4, RELEASE
 b.ne    6f
-5:
-   ldxp    res0, res1, [x5]
+
+   /* RELEASE.  */
+5: ldxp    res0, res1, [x5]
 stlxp   w4, in0, in1, [x5]
 cbnz    w4, 5b
 ret
-6:
-   ldaxp   res0, res1, [x5]
+
+   /* ACQ_REL/SEQ_CST.  */
+6: ldaxp   res0, res1, [x5]
 stlxp   w4, in0, in1, [x5]
 cbnz    w4, 6b
 ret
@@ -142,6 +151,8 @@ ENTRY (libat_compare_exchange_16_i1)
 cbz w4, 2f
 cmp w4, RELEASE
 b.hs    3f
+
+   /* ACQUIRE/CONSUME.  */
 caspa   exp0, exp1, in0, in1, [x0]
 0:
 cmp exp0, tmp0
@@ -153,15 +164,18 @@ ENTRY (libat_compare_exchange_16_i1)
 stp exp0, exp1, [x1]
 mov x0, 0
 ret
-2:
-   casp    exp0, exp1, in0, in1, [x0]
+
+   /* RELAXED.  */
+2: casp    exp0, exp1, in0, in1, [x0]
 b   0b
-3:
-   b.hi    4f
+
+   /* RELEASE.  */
+3: b.hi    4f
 caspl   exp0, exp1, in0, in1, [x0]
 b   0b
-4:
-   caspal  exp0, exp1, in0, in1, [x0]
+
+   /* ACQ_REL/SEQ_CST.  */
+4: caspal  exp0, exp1, in0, in1, [x0]
 b   0b
 END (libat_compare_exchange_16_i1)
 
@@ -169,15 +183,17 @@ END (libat_compare_exchange_16_i1)
 ENTRY (libat_fetch_add_16_i1)
 mov x5, x0
 cbnz    w4, 2f
-1:
-   ldxp    res0, res1, [x5]
+
+   /* RELAXED.  */
+1: ldxp    res0, res1, [x5]
 adds    tmplo, reslo, inlo
 adc tmphi, reshi, inhi
 stxp    w4, tmp0, tmp1, [x5]
 cbnz    w4, 1b
 ret
-2:
-   ldaxp   res0, res1, [x5]
+
+   /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST.  */
+2: ldaxp   res0, res1, [x5]
 adds    tmplo, reslo, inlo
 adc tmphi, reshi, inhi
 stlxp   w4, tmp0, tmp1, [x5]
@@ -189,15 +205,17 @@ END (libat_fetch_add_16_i1)
 ENTRY (libat_add_fetch_16_i1)
 mov x5, x0
 cbnz    w4, 2f
-1:
-   ldxp    res0, res1, [x5]
+
+   /* RELAXED.  */
+1: ldxp    res0, res1, [x5]
 adds    reslo, reslo, inlo
 adc reshi, reshi, inhi
 stxp    w4, res0, res1, [x5]
 cbnz    w4, 1b
 ret
-2:
-   ldaxp   res0, res1, [x5]
+
+   /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. 

Re: [PATCH] testsuite: Handle default_packed targets in gcc.dg/plugin

2023-03-16 Thread David Malcolm via Gcc-patches
On Thu, 2023-03-09 at 19:56 +0100, Hans-Peter Nilsson wrote:
> It's not obvious to me whether considered best to include or
> exclude these tests that depend on structure layout details.
> If excluding, the obvious alternative to this patch is then
> to add a top one-liner (to dg-skip-if the test for
> default_packed targets or a similar excluding expression).
> I'm fine either way, just suggesting the following, which
> handles the cris-elf test-case failures I see for these
> tests, and causes no change in results for native
> x86_64-pc-linux-gnu.

Thanks for looking at this.

How about a third option: can the structs be explicitly marked as being
packed, by adding __attribute__((__packed__)) to the various structs? 
The tests are all about detecting problems with padding bits, and
presumably we can have padding bits on all targets if we explicitly ask
for them.

Does that make for a simpler patch?
Dave

> 
> Beware that some of the tests have lines with trailing
> whitespace.  Where lines are changed in this patch, the
> trailing whitespace is removed.


> 
> Ok to commit?
> 
> -- >8 --
> It's a judgement call whether to just skip some of these
> tests rather than trying to match messages depending on the
> layout of structures, but better include than exclude.
> 
> * gcc.dg/plugin/infoleak-2.c,
> gcc.dg/plugin/infoleak-CVE-2011-1078-1.c,
> gcc.dg/plugin/infoleak-CVE-2011-1078-2.c,
> gcc.dg/plugin/infoleak-CVE-2017-18549-1.c,
> gcc.dg/plugin/infoleak-CVE-2017-18550-1.c,
> gcc.dg/plugin/infoleak-antipatterns-1.c,
> gcc.dg/plugin/infoleak-fixit-1.c: Handle default_packed
> targets.
> ---
>  gcc/testsuite/gcc.dg/plugin/infoleak-2.c    | 13 ---
> --
>  .../gcc.dg/plugin/infoleak-CVE-2011-1078-1.c    | 10 +-
>  .../gcc.dg/plugin/infoleak-CVE-2011-1078-2.c    | 10 +-
>  .../gcc.dg/plugin/infoleak-CVE-2017-18549-1.c   | 10 +-
>  .../gcc.dg/plugin/infoleak-CVE-2017-18550-1.c   |  7 ---
>  .../gcc.dg/plugin/infoleak-antipatterns-1.c | 10 +-
>  gcc/testsuite/gcc.dg/plugin/infoleak-fixit-1.c  | 10 ++
>  7 files changed, 38 insertions(+), 32 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> b/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> index 252f8f25918a..4ba484b3c6be 100644
> --- a/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> +++ b/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> @@ -18,16 +18,19 @@ struct st
>    int b:1; /* { dg-message "field 'b' is uninitialized \\(1 bit\\)"
> "field" } */
>     /* { dg-message "padding after field 'b' is uninitialized
> \\(7 bits\\)" "padding" { target *-*-* } .-1 } */
>    u8 d;    /* { dg-message "field 'd' is uninitialized \\(1 byte\\)"
> } */
> -  int c:7; /* { dg-message "padding after field 'c' is uninitialized
> \\(9 bits\\)" } */
> -  u16 e;   /* { dg-message "padding after field 'e' is uninitialized
> \\(2 bytes\\)" } */  
> +  int c:7; /* { dg-message "padding after field 'c' is uninitialized
> \\(9 bits\\)" "padding" { target { ! default_packed } } } */
> +   /* { dg-message "padding after field 'c' is uninitialized
> \\(1 bit\\)" "padding" { target default_packed } .-1 } */
> +  u16 e;   /* { dg-message "padding after field 'e' is uninitialized
> \\(2 bytes\\)" "padding" { target { ! default_packed } } } */
>  };
>  
>  void test (void __user *dst, u16 v)
>  {
>    struct st s; /* { dg-message "region created on stack here"
> "where" } */
> -  /* { dg-message "capacity: 12 bytes" "capacity" { target *-*-* }
> .-1 } */
> -  /* { dg-message "suggest forcing zero-initialization by providing
> a '\\{0\\}' initializer" "fix-it" { target *-*-* } .-2 } */  
> +  /* { dg-message "capacity: 12 bytes" "capacity" { target { !
> default_packed } } .-1 } */
> +  /* { dg-message "capacity: 9 bytes" "capacity" { target
> default_packed } .-2 } */
> +  /* { dg-message "suggest forcing zero-initialization by providing
> a '\\{0\\}' initializer" "fix-it" { target *-*-* } .-3 } */
>    s.e = v;
>    copy_to_user(dst, , sizeof (struct st)); /* { dg-warning
> "potential exposure of sensitive information by copying uninitialized
> data from stack" "warning" } */
> -  /* { dg-message "10 bytes are uninitialized" "note how much" {
> target *-*-* } .-1 } */
> +  /* { dg-message "10 bytes are uninitialized" "note how much" {
> target { ! default_packed } } .-1 } */
> +  /* { dg-message "7 bytes are uninitialized" "note how much" {
> target default_packed } .-2 } */
>  }
> diff --git a/gcc/testsuite/gcc.dg/plugin/infoleak-CVE-2011-1078-1.c
> b/gcc/testsuite/gcc.dg/plugin/infoleak-CVE-2011-1078-1.c
> index 3616fbe176b3..9269b911b22f 100644
> --- a/gcc/testsuite/gcc.dg/plugin/infoleak-CVE-2011-1078-1.c
> +++ b/gcc/testsuite/gcc.dg/plugin/infoleak-CVE-2011-1078-1.c
> @@ -51,7 +51,7 @@ struct socket {
>  
>  struct sco_conninfo {
> __u16 hci_handle;
> -   __u8  dev_class[3]; /* { dg-message "padding after 

Re: [PATCH] c++: ICE with diagnosed constraint recursion [PR100288]

2023-03-16 Thread Jason Merrill via Gcc-patches

On 3/16/23 12:48, Patrick Palka wrote:

When satisfaction_cache::get detects constraint recursion, it asserts
that entry->result is empty.  This makes sense when we're initially
detecting/diagnosing recursion from the inner recursive call, but
aftewards from the outer recursive call the recursion error is treated
like any other SFINAE error encountered during satisfaction, and we set
entry->result to whatever the satisfaction value ended up being.

Perhaps we should keep entry->result cleared in this case, but that'd
require the inner recursive call to communicate to the outer recursive
call that constraint recursion occurred, likely via setting entry->result
to some sentinel value, which doesn't seem to be worth the complexity.
So this patch just relaxes the problematic assert to accept non-empty
entry->result as long as we've already issued an error.

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?  Backports
seems unnecessary as the problematic assert is a checking assert.


OK.


PR c++/100288

gcc/cp/ChangeLog:

* constraint.cc (satisfaction_cache::get): Relax overly strict
checking assert in the constraint recursion case.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-recursive-sat5.C: New test.
---
  gcc/cp/constraint.cc|  2 +-
  .../g++.dg/cpp2a/concepts-recursive-sat5.C  | 13 +
  2 files changed, 14 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat5.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index a28c85178fe..273d15ab097 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -2705,7 +2705,7 @@ satisfaction_cache::get ()
if (entry->evaluating)
  {
/* If we get here, it means satisfaction is self-recursive.  */
-  gcc_checking_assert (!entry->result);
+  gcc_checking_assert (!entry->result || seen_error ());
if (info.noisy ())
error_at (EXPR_LOCATION (ATOMIC_CONSTR_EXPR (entry->atom)),
  "satisfaction of atomic constraint %qE depends on itself",
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat5.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat5.C
new file mode 100644
index 000..b7a02815db9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat5.C
@@ -0,0 +1,13 @@
+// PR c++/100288
+// { dg-do compile { target c++20 } }
+
+class A { };
+
+template  concept pipeable = requires(A a, T t) { a | t; }; // { dg-error 
"depends on itself" }
+
+template  void operator|(A, T);
+
+void f(A tab) {
+  tab | 1; // { dg-error "no match" }
+  tab | 1; // { dg-error "no match" }
+}




Re: Should -ffp-contract=off the default on GCC?

2023-03-16 Thread Jakub Jelinek via Gcc-patches
On Thu, Mar 16, 2023 at 04:38:41PM +, Qing Zhao via Gcc-patches wrote:
> > NO. We have this debate every few years and such.
> 
> So, what’s the major reason we keep the default  that is not IEEE754 
> compliant from the beginning? 

It is compliant.  fusedMultiplyAdd is a standard IEEE 754 operation, and
C explicitly allows contractions.

Jakub



[PATCH] c++: ICE with diagnosed constraint recursion [PR100288]

2023-03-16 Thread Patrick Palka via Gcc-patches
When satisfaction_cache::get detects constraint recursion, it asserts
that entry->result is empty.  This makes sense when we're initially
detecting/diagnosing recursion from the inner recursive call, but
aftewards from the outer recursive call the recursion error is treated
like any other SFINAE error encountered during satisfaction, and we set
entry->result to whatever the satisfaction value ended up being.

Perhaps we should keep entry->result cleared in this case, but that'd
require the inner recursive call to communicate to the outer recursive
call that constraint recursion occurred, likely via setting entry->result
to some sentinel value, which doesn't seem to be worth the complexity.
So this patch just relaxes the problematic assert to accept non-empty
entry->result as long as we've already issued an error.

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?  Backports
seems unnecessary as the problematic assert is a checking assert.

PR c++/100288

gcc/cp/ChangeLog:

* constraint.cc (satisfaction_cache::get): Relax overly strict
checking assert in the constraint recursion case.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-recursive-sat5.C: New test.
---
 gcc/cp/constraint.cc|  2 +-
 .../g++.dg/cpp2a/concepts-recursive-sat5.C  | 13 +
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat5.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index a28c85178fe..273d15ab097 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -2705,7 +2705,7 @@ satisfaction_cache::get ()
   if (entry->evaluating)
 {
   /* If we get here, it means satisfaction is self-recursive.  */
-  gcc_checking_assert (!entry->result);
+  gcc_checking_assert (!entry->result || seen_error ());
   if (info.noisy ())
error_at (EXPR_LOCATION (ATOMIC_CONSTR_EXPR (entry->atom)),
  "satisfaction of atomic constraint %qE depends on itself",
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat5.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat5.C
new file mode 100644
index 000..b7a02815db9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat5.C
@@ -0,0 +1,13 @@
+// PR c++/100288
+// { dg-do compile { target c++20 } }
+
+class A { };
+
+template  concept pipeable = requires(A a, T t) { a | t; }; // { 
dg-error "depends on itself" }
+
+template  void operator|(A, T);
+
+void f(A tab) {
+  tab | 1; // { dg-error "no match" }
+  tab | 1; // { dg-error "no match" }
+}
-- 
2.40.0



Re: [PATCH v1] [RFC] Improve folding for comparisons with zero in tree-ssa-forwprop.

2023-03-16 Thread Jeff Law via Gcc-patches




On 3/16/23 09:27, Manolis Tsamis wrote:

For this C testcase:

void g();
void f(unsigned int *a)
{
   if (++*a == 1)
 g();
}

GCC will currently emit a comparison with 1 by using the value
of *a after the increment. This can be improved by comparing
against 0 and using the value before the increment. As a result
there is a potentially shorter dependancy chain (no need to wait
for the result of +1) and on targets with compare zero instructions
the generated code is one instruction shorter.

Example from Aarch64:

Before
 ldr w1, [x0]
 add w1, w1, 1
 str w1, [x0]
 cmp w1, 1
 beq .L4
 ret

After
 ldr w1, [x0]
 add w2, w1, 1
 str w2, [x0]
 cbz w1, .L4
 ret

gcc/ChangeLog:

 * tree-ssa-forwprop.cc (combine_cond_expr_cond):
 (forward_propagate_into_comparison_1): Optimize
 for zero comparisons.
Deferring to gcc-14.  Though I'm generally supportive of normalizing to 
a comparison against zero when we safely can :-)


jeff


Re: Should -ffp-contract=off the default on GCC?

2023-03-16 Thread Qing Zhao via Gcc-patches


> On Mar 16, 2023, at 12:31 PM, Andrew Pinski  wrote:
> 
> On Thu, Mar 16, 2023 at 9:25 AM Qing Zhao via Gcc-patches
>  wrote:
>> 
>> Hi,
>> 
>> Recently, we discovered some floating point precision diffs when using GCC8 
>> to build our
>> application on arm64: After some investigation, it turns out that this is 
>> due to the
>> -ffp-contract=fast option that is on by default. Therefore, we have to 
>> explicitly add
>> -ffp-contract=off and do a full-rebuild.
>> 
>> GCC by default turns -ffp-contract=fast on.
>> https://gcc.gnu.org/onlinedocs/gcc-8.5.0/gcc/Optimize-Options.html#Optimize-Options
>> https://gcc.gnu.org/onlinedocs/gcc-12.2.0/gcc/Optimize-Options.html#Optimize-Options
>> 
>> "
>> -ffp-contract=style
>> -ffp-contract=off disables floating-point expression contraction. 
>> -ffp-contract=fast enables
>> floating-point expression contraction such as forming of fused multiply-add 
>> operations if
>> the target has native support for them. -ffp-contract=on enables 
>> floating-point expression
>> contraction if allowed by the language standard. This is currently not 
>> implemented and
>> treated equal to -ffp-contract=off.
>> 
>> The default is -ffp-contract=fast.
>> "
>> 
>> This can be shown by a small example for arm64 with gcc8.5 in 
>> https://godbolt.org/z/MxYfnG8TE.
>> Only when adding -std=c89 explicitly, this transformaton is off.
>> 
>> another exmaple also shows that Clang and MSVC only allow this 
>> transformation when speifiying
>> ffast-math and fp:fast:  https://godbolt.org/z/o54bYfPbP
>> 
>> When searching online, we found that there were similar discussions recently 
>> on the exact same issue:
>> https://github.com/dotnet/runtime/issues/64604
>> https://github.com/dotnet/runtime/issues/64591
>> 
>> a summary of these discussions is:
>> 
>> 1. "fmadd" is a fused operation and will return a different result for many 
>> inputs;
>> 2. therefore, -ffp-contract=fast is not a safe optimization to be on by 
>> default;
>> 3. Clang and MSVC only allow this when specifying ffast-math and fp:fast 
>> since this is not an
>>  IEEE754 compliant optimization;
>> 4. The reasons why GCC turns on this option by default are:
>>  A. GNU C language spec allows such transformation.
>>  B. this did not expose real problem for most X86/X64 apps previously since 
>> FMA instructions
>> didn't exist until 2013 when the FMA3 instruction set was added, and 
>> also these instructions
>> were not always available..
>> 5. Arm64 has fused multiply-add instructions as "baseline" and are always 
>> available. therefore
>>  -ffp-contract=fast exposed more serious problems on Arm64 platforms.
> 
> This summary ignores x87 and even ignores PowerPC in GCC having FMA
> even before clang/LLVM was around.

Okay. 
> 
>> 
>> our major question:
>> 
>> Should GCC turn off -ffp-contract=fast by default since it's not IEEE754 
>> compliant and more
>>  modern processors have the FMA instructions available by default?
> 
> 
> NO. We have this debate every few years and such.

So, what’s the major reason we keep the default  that is not IEEE754 compliant 
from the beginning? 

thanks.

Qing
> 
> Thanks,
> Andrew Pinski
> 
>> 
>> Thanks.
>> 
>> Qing



Re: Should -ffp-contract=off the default on GCC?

2023-03-16 Thread Andrew Pinski via Gcc-patches
On Thu, Mar 16, 2023 at 9:25 AM Qing Zhao via Gcc-patches
 wrote:
>
> Hi,
>
> Recently, we discovered some floating point precision diffs when using GCC8 
> to build our
> application on arm64: After some investigation, it turns out that this is due 
> to the
> -ffp-contract=fast option that is on by default. Therefore, we have to 
> explicitly add
> -ffp-contract=off and do a full-rebuild.
>
> GCC by default turns -ffp-contract=fast on.
> https://gcc.gnu.org/onlinedocs/gcc-8.5.0/gcc/Optimize-Options.html#Optimize-Options
> https://gcc.gnu.org/onlinedocs/gcc-12.2.0/gcc/Optimize-Options.html#Optimize-Options
>
> "
> -ffp-contract=style
> -ffp-contract=off disables floating-point expression contraction. 
> -ffp-contract=fast enables
> floating-point expression contraction such as forming of fused multiply-add 
> operations if
> the target has native support for them. -ffp-contract=on enables 
> floating-point expression
> contraction if allowed by the language standard. This is currently not 
> implemented and
> treated equal to -ffp-contract=off.
>
> The default is -ffp-contract=fast.
> "
>
> This can be shown by a small example for arm64 with gcc8.5 in 
> https://godbolt.org/z/MxYfnG8TE.
> Only when adding -std=c89 explicitly, this transformaton is off.
>
> another exmaple also shows that Clang and MSVC only allow this transformation 
> when speifiying
> ffast-math and fp:fast:  https://godbolt.org/z/o54bYfPbP
>
> When searching online, we found that there were similar discussions recently 
> on the exact same issue:
> https://github.com/dotnet/runtime/issues/64604
> https://github.com/dotnet/runtime/issues/64591
>
> a summary of these discussions is:
>
> 1. "fmadd" is a fused operation and will return a different result for many 
> inputs;
> 2. therefore, -ffp-contract=fast is not a safe optimization to be on by 
> default;
> 3. Clang and MSVC only allow this when specifying ffast-math and fp:fast 
> since this is not an
>   IEEE754 compliant optimization;
> 4. The reasons why GCC turns on this option by default are:
>   A. GNU C language spec allows such transformation.
>   B. this did not expose real problem for most X86/X64 apps previously since 
> FMA instructions
>  didn't exist until 2013 when the FMA3 instruction set was added, and 
> also these instructions
>  were not always available..
> 5. Arm64 has fused multiply-add instructions as "baseline" and are always 
> available. therefore
>   -ffp-contract=fast exposed more serious problems on Arm64 platforms.

This summary ignores x87 and even ignores PowerPC in GCC having FMA
even before clang/LLVM was around.

>
> our major question:
>
> Should GCC turn off -ffp-contract=fast by default since it's not IEEE754 
> compliant and more
>   modern processors have the FMA instructions available by default?


NO. We have this debate every few years and such.

Thanks,
Andrew Pinski

>
> Thanks.
>
> Qing


Should -ffp-contract=off the default on GCC?

2023-03-16 Thread Qing Zhao via Gcc-patches
Hi,

Recently, we discovered some floating point precision diffs when using GCC8 to 
build our 
application on arm64: After some investigation, it turns out that this is due 
to the 
-ffp-contract=fast option that is on by default. Therefore, we have to 
explicitly add 
-ffp-contract=off and do a full-rebuild.  

GCC by default turns -ffp-contract=fast on.
https://gcc.gnu.org/onlinedocs/gcc-8.5.0/gcc/Optimize-Options.html#Optimize-Options
https://gcc.gnu.org/onlinedocs/gcc-12.2.0/gcc/Optimize-Options.html#Optimize-Options

"
-ffp-contract=style
-ffp-contract=off disables floating-point expression contraction. 
-ffp-contract=fast enables
floating-point expression contraction such as forming of fused multiply-add 
operations if 
the target has native support for them. -ffp-contract=on enables floating-point 
expression
contraction if allowed by the language standard. This is currently not 
implemented and 
treated equal to -ffp-contract=off.

The default is -ffp-contract=fast.
"

This can be shown by a small example for arm64 with gcc8.5 in 
https://godbolt.org/z/MxYfnG8TE. 
Only when adding -std=c89 explicitly, this transformaton is off.

another exmaple also shows that Clang and MSVC only allow this transformation 
when speifiying
ffast-math and fp:fast:  https://godbolt.org/z/o54bYfPbP 

When searching online, we found that there were similar discussions recently on 
the exact same issue:
https://github.com/dotnet/runtime/issues/64604
https://github.com/dotnet/runtime/issues/64591

a summary of these discussions is:

1. "fmadd" is a fused operation and will return a different result for many 
inputs;  
2. therefore, -ffp-contract=fast is not a safe optimization to be on by default;
3. Clang and MSVC only allow this when specifying ffast-math and fp:fast since 
this is not an
  IEEE754 compliant optimization;
4. The reasons why GCC turns on this option by default are:
  A. GNU C language spec allows such transformation. 
  B. this did not expose real problem for most X86/X64 apps previously since 
FMA instructions
 didn't exist until 2013 when the FMA3 instruction set was added, and also 
these instructions
 were not always available.. 
5. Arm64 has fused multiply-add instructions as "baseline" and are always 
available. therefore
  -ffp-contract=fast exposed more serious problems on Arm64 platforms.  

our major question:

Should GCC turn off -ffp-contract=fast by default since it's not IEEE754 
compliant and more
  modern processors have the FMA instructions available by default?

Thanks.

Qing

Re: [PATCH] c++: noexcept and copy elision [PR109030]

2023-03-16 Thread Jason Merrill via Gcc-patches

On 3/16/23 11:48, Patrick Palka wrote:

On Thu, 16 Mar 2023, Jason Merrill wrote:


On 3/16/23 10:09, Patrick Palka wrote:

On Wed, 15 Mar 2023, Patrick Palka wrote:


On Thu, 9 Mar 2023, Jason Merrill wrote:


On 3/9/23 14:32, Patrick Palka wrote:

On Mon, 6 Mar 2023, Marek Polacek via Gcc-patches wrote:


When processing a noexcept, constructors aren't elided:
build_over_call
has
 /* It's unsafe to elide the constructor when handling
a noexcept-expression, it may evaluate to the wrong
value (c++/53025).  */
 && (force_elide || cp_noexcept_operand == 0))
so the assert I added recently needs to be relaxed a little bit.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/109030

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_call_expression): Relax assert.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept77.C: New test.
---
gcc/cp/constexpr.cc | 6 +-
gcc/testsuite/g++.dg/cpp0x/noexcept77.C | 9 +
2 files changed, 14 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept77.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 364695b762c..5384d0e8e46 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -2869,7 +2869,11 @@ cxx_eval_call_expression (const constexpr_ctx
*ctx,
tree t,
/* We used to shortcut trivial constructor/op= here, but
nowadays
 we can only get a trivial function here with
-fno-elide-constructors.  */
-  gcc_checking_assert (!trivial_fn_p (fun) ||
!flag_elide_constructors);
+  gcc_checking_assert (!trivial_fn_p (fun)
+  || !flag_elide_constructors
+  /* We don't elide constructors when processing
+ a noexcept-expression.  */
+  || cp_noexcept_operand);


It seems weird that we're performing constant evaluation within an
unevaluated operand.  Would it make sense to also fix this a second
way
by avoiding constant evaluation from maybe_constant_init when
cp_unevaluated_operand && !manifestly_const_eval, like in
maybe_constant_value?


Sounds good.


Hmm, while working on this I noticed we currently don't reject a version
of
g++.dg/cpp2a/constexpr-inst1.C that list initializes an aggregate instead
of
int (ever since r12-4425-g1595fe44e11a96):

struct A { int m; };
template constexpr int f() { return T::value; }
template void h(decltype(A{B ? f() : 0})); //
was int{...}
template void h(...);
void x() {
  h(0); // OK?
}

ISTM we should instantiate f here for the same reason we do in the
original version of the testcase, and for that to happen we need to
pass manifestly_const_eval=true in massage_init_elt.  Does that seem
reasonable?



FWIW the reason this came up is because I tried contriving a testcase
for the aforementioned maybe_constant_init change, and I came up with:

struct __as_receiver {
  int empty_env;
};

template
constexpr int f(T t) {
  return t.fail;
};

using type = decltype(__as_receiver{f(0)}); // OK, f no longer
instantiated

which we used to reject and afterwards accept.  But since the elements
of an initializer list are potentially constant evaluated, I wonder if
that that means f should be instantiated here after all despite the
unevaluated context?


The relevant section of the standard would seem to be
https://eel.is/c++draft/expr.const#20 ; an immediate subexpression of a
braced-init-list is potentially constant-evaluated even though it isn't
potentially-evaluated or manifestly constant-evaluated.

It seems like the call to fold_non_dependent_expr in check_narrowing ought to
cause instantiation in this case, why doesn't it?


Looks like check_narrowing isn't called at all in this aggr init case.
The call from e.g. convert_like_internal isn't reached because the
conversion for the initializer element is ck_identity, and don't ever
set conversion::check_narrowing for ck_identity conversions I think.


Ah, yes, that makes sense; an identity conversion can never be 
narrowing, so we don't care about the constant value.  So not 
instantiating seems correct, and the patch is OK.



Yet for using 'type = decltype(int{f(0)});' (similar to the example in
[temp.inst]/8) we do call check_narrowing directly from
finish_compound_literal, despite the conversion effectively being an
identity conversion.


Hmm, maybe check_narrowing should defer constant evaluation until after 
deciding that the target type is not a superset of the source type...



Here's the full patch for reference:

-- >8 --

Subject: [PATCH] c++: maybe_constant_init and unevaluated operands
[PR109030]

This testcase in this PR (already fixed by r13-6526-ge4692319fd5fc7)
illustrates that maybe_constant_init can be called on an unevaluated
operand (from massage_init_elt), so this entry point should limit
constant evaluation in that case, like maybe_constant_value does.

PR 

Re: [PATCH] c++: noexcept and copy elision [PR109030]

2023-03-16 Thread Patrick Palka via Gcc-patches
On Thu, 16 Mar 2023, Jason Merrill wrote:

> On 3/16/23 10:09, Patrick Palka wrote:
> > On Wed, 15 Mar 2023, Patrick Palka wrote:
> > 
> > > On Thu, 9 Mar 2023, Jason Merrill wrote:
> > > 
> > > > On 3/9/23 14:32, Patrick Palka wrote:
> > > > > On Mon, 6 Mar 2023, Marek Polacek via Gcc-patches wrote:
> > > > > 
> > > > > > When processing a noexcept, constructors aren't elided:
> > > > > > build_over_call
> > > > > > has
> > > > > >  /* It's unsafe to elide the constructor when handling
> > > > > > a noexcept-expression, it may evaluate to the wrong
> > > > > > value (c++/53025).  */
> > > > > >  && (force_elide || cp_noexcept_operand == 0))
> > > > > > so the assert I added recently needs to be relaxed a little bit.
> > > > > > 
> > > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > > > > 
> > > > > > PR c++/109030
> > > > > > 
> > > > > > gcc/cp/ChangeLog:
> > > > > > 
> > > > > > * constexpr.cc (cxx_eval_call_expression): Relax assert.
> > > > > > 
> > > > > > gcc/testsuite/ChangeLog:
> > > > > > 
> > > > > > * g++.dg/cpp0x/noexcept77.C: New test.
> > > > > > ---
> > > > > >gcc/cp/constexpr.cc | 6 +-
> > > > > >gcc/testsuite/g++.dg/cpp0x/noexcept77.C | 9 +
> > > > > >2 files changed, 14 insertions(+), 1 deletion(-)
> > > > > >create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept77.C
> > > > > > 
> > > > > > diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
> > > > > > index 364695b762c..5384d0e8e46 100644
> > > > > > --- a/gcc/cp/constexpr.cc
> > > > > > +++ b/gcc/cp/constexpr.cc
> > > > > > @@ -2869,7 +2869,11 @@ cxx_eval_call_expression (const constexpr_ctx
> > > > > > *ctx,
> > > > > > tree t,
> > > > > >/* We used to shortcut trivial constructor/op= here, but
> > > > > > nowadays
> > > > > > we can only get a trivial function here with
> > > > > > -fno-elide-constructors.  */
> > > > > > -  gcc_checking_assert (!trivial_fn_p (fun) ||
> > > > > > !flag_elide_constructors);
> > > > > > +  gcc_checking_assert (!trivial_fn_p (fun)
> > > > > > +  || !flag_elide_constructors
> > > > > > +  /* We don't elide constructors when processing
> > > > > > + a noexcept-expression.  */
> > > > > > +  || cp_noexcept_operand);
> > > > > 
> > > > > It seems weird that we're performing constant evaluation within an
> > > > > unevaluated operand.  Would it make sense to also fix this a second
> > > > > way
> > > > > by avoiding constant evaluation from maybe_constant_init when
> > > > > cp_unevaluated_operand && !manifestly_const_eval, like in
> > > > > maybe_constant_value?
> > > > 
> > > > Sounds good.
> > > 
> > > Hmm, while working on this I noticed we currently don't reject a version
> > > of
> > > g++.dg/cpp2a/constexpr-inst1.C that list initializes an aggregate instead
> > > of
> > > int (ever since r12-4425-g1595fe44e11a96):
> > > 
> > >struct A { int m; };
> > >template constexpr int f() { return T::value; }
> > >template void h(decltype(A{B ? f() : 0})); //
> > > was int{...}
> > >template void h(...);
> > >void x() {
> > >  h(0); // OK?
> > >}
> > > 
> > > ISTM we should instantiate f here for the same reason we do in the
> > > original version of the testcase, and for that to happen we need to
> > > pass manifestly_const_eval=true in massage_init_elt.  Does that seem
> > > reasonable?
> > > 
> > 
> > FWIW the reason this came up is because I tried contriving a testcase
> > for the aforementioned maybe_constant_init change, and I came up with:
> > 
> >struct __as_receiver {
> >  int empty_env;
> >};
> > 
> >template
> >constexpr int f(T t) {
> >  return t.fail;
> >};
> > 
> >using type = decltype(__as_receiver{f(0)}); // OK, f no longer
> > instantiated
> > 
> > which we used to reject and afterwards accept.  But since the elements
> > of an initializer list are potentially constant evaluated, I wonder if
> > that that means f should be instantiated here after all despite the
> > unevaluated context?
> 
> The relevant section of the standard would seem to be
> https://eel.is/c++draft/expr.const#20 ; an immediate subexpression of a
> braced-init-list is potentially constant-evaluated even though it isn't
> potentially-evaluated or manifestly constant-evaluated.
> 
> It seems like the call to fold_non_dependent_expr in check_narrowing ought to
> cause instantiation in this case, why doesn't it?

Looks like check_narrowing isn't called at all in this aggr init case.
The call from e.g. convert_like_internal isn't reached because the
conversion for the initializer element is ck_identity, and don't ever
set conversion::check_narrowing for ck_identity conversions I think.

Yet for using 'type = decltype(int{f(0)});' (similar to the example in
[temp.inst]/8) we do call check_narrowing directly from
finish_compound_literal, despite the conversion effectively being 

Ping: [PATCH] testsuite: Handle default_packed targets in gcc.dg/plugin

2023-03-16 Thread Hans-Peter Nilsson via Gcc-patches
Pinging this patch.

> From: Hans-Peter Nilsson 
> Date: Thu, 9 Mar 2023 19:56:16 +0100
> 
> It's not obvious to me whether considered best to include or
> exclude these tests that depend on structure layout details.
> If excluding, the obvious alternative to this patch is then
> to add a top one-liner (to dg-skip-if the test for
> default_packed targets or a similar excluding expression).
> I'm fine either way, just suggesting the following, which
> handles the cris-elf test-case failures I see for these
> tests, and causes no change in results for native
> x86_64-pc-linux-gnu.
> 
> Beware that some of the tests have lines with trailing
> whitespace.  Where lines are changed in this patch, the
> trailing whitespace is removed.
> 
> Ok to commit?
> 
> -- >8 --
> It's a judgement call whether to just skip some of these
> tests rather than trying to match messages depending on the
> layout of structures, but better include than exclude.
> 
>   * gcc.dg/plugin/infoleak-2.c,
>   gcc.dg/plugin/infoleak-CVE-2011-1078-1.c,
>   gcc.dg/plugin/infoleak-CVE-2011-1078-2.c,
>   gcc.dg/plugin/infoleak-CVE-2017-18549-1.c,
>   gcc.dg/plugin/infoleak-CVE-2017-18550-1.c,
>   gcc.dg/plugin/infoleak-antipatterns-1.c,
>   gcc.dg/plugin/infoleak-fixit-1.c: Handle default_packed targets.
> ---
>  gcc/testsuite/gcc.dg/plugin/infoleak-2.c| 13 -
>  .../gcc.dg/plugin/infoleak-CVE-2011-1078-1.c| 10 +-
>  .../gcc.dg/plugin/infoleak-CVE-2011-1078-2.c| 10 +-
>  .../gcc.dg/plugin/infoleak-CVE-2017-18549-1.c   | 10 +-
>  .../gcc.dg/plugin/infoleak-CVE-2017-18550-1.c   |  7 ---
>  .../gcc.dg/plugin/infoleak-antipatterns-1.c | 10 +-
>  gcc/testsuite/gcc.dg/plugin/infoleak-fixit-1.c  | 10 ++
>  7 files changed, 38 insertions(+), 32 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/plugin/infoleak-2.c 
> b/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> index 252f8f25918a..4ba484b3c6be 100644
> --- a/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> +++ b/gcc/testsuite/gcc.dg/plugin/infoleak-2.c
> @@ -18,16 +18,19 @@ struct st
>int b:1; /* { dg-message "field 'b' is uninitialized \\(1 bit\\)" "field" 
> } */
> /* { dg-message "padding after field 'b' is uninitialized \\(7 
> bits\\)" "padding" { target *-*-* } .-1 } */
>u8 d;/* { dg-message "field 'd' is uninitialized \\(1 byte\\)" } */
> -  int c:7; /* { dg-message "padding after field 'c' is uninitialized \\(9 
> bits\\)" } */
> -  u16 e;   /* { dg-message "padding after field 'e' is uninitialized \\(2 
> bytes\\)" } */  
> +  int c:7; /* { dg-message "padding after field 'c' is uninitialized \\(9 
> bits\\)" "padding" { target { ! default_packed } } } */
> +   /* { dg-message "padding after field 'c' is uninitialized \\(1 
> bit\\)" "padding" { target default_packed } .-1 } */
> +  u16 e;   /* { dg-message "padding after field 'e' is uninitialized \\(2 
> bytes\\)" "padding" { target { ! default_packed } } } */
>  };
>  
>  void test (void __user *dst, u16 v)
>  {
>struct st s; /* { dg-message "region created on stack here" "where" } */
> -  /* { dg-message "capacity: 12 bytes" "capacity" { target *-*-* } .-1 } */
> -  /* { dg-message "suggest forcing zero-initialization by providing a 
> '\\{0\\}' initializer" "fix-it" { target *-*-* } .-2 } */  
> +  /* { dg-message "capacity: 12 bytes" "capacity" { target { ! 
> default_packed } } .-1 } */
> +  /* { dg-message "capacity: 9 bytes" "capacity" { target default_packed } 
> .-2 } */
> +  /* { dg-message "suggest forcing zero-initialization by providing a 
> '\\{0\\}' initializer" "fix-it" { target *-*-* } .-3 } */
>s.e = v;
>copy_to_user(dst, , sizeof (struct st)); /* { dg-warning "potential 
> exposure of sensitive information by copying uninitialized data from stack" 
> "warning" } */
> -  /* { dg-message "10 bytes are uninitialized" "note how much" { target 
> *-*-* } .-1 } */
> +  /* { dg-message "10 bytes are uninitialized" "note how much" { target { ! 
> default_packed } } .-1 } */
> +  /* { dg-message "7 bytes are uninitialized" "note how much" { target 
> default_packed } .-2 } */
>  }
> diff --git a/gcc/testsuite/gcc.dg/plugin/infoleak-CVE-2011-1078-1.c 
> b/gcc/testsuite/gcc.dg/plugin/infoleak-CVE-2011-1078-1.c
> index 3616fbe176b3..9269b911b22f 100644
> --- a/gcc/testsuite/gcc.dg/plugin/infoleak-CVE-2011-1078-1.c
> +++ b/gcc/testsuite/gcc.dg/plugin/infoleak-CVE-2011-1078-1.c
> @@ -51,7 +51,7 @@ struct socket {
>  
>  struct sco_conninfo {
>   __u16 hci_handle;
> - __u8  dev_class[3]; /* { dg-message "padding after field 'dev_class' is 
> uninitialized \\(1 byte\\)" } */
> + __u8  dev_class[3]; /* { dg-message "padding after field 'dev_class' is 
> uninitialized \\(1 byte\\)" "padding" { target { ! default_packed } } } */
>  };
>  
>  struct sco_conn {
> @@ -83,8 +83,8 @@ static int sco_sock_getsockopt_old_broken(struct socket 
> *sock, int optname, char
> 

[PATCH v1] [RFC] Improve folding for comparisons with zero in tree-ssa-forwprop.

2023-03-16 Thread Manolis Tsamis
For this C testcase:

void g();
void f(unsigned int *a)
{
  if (++*a == 1)
g();
}

GCC will currently emit a comparison with 1 by using the value
of *a after the increment. This can be improved by comparing
against 0 and using the value before the increment. As a result
there is a potentially shorter dependancy chain (no need to wait
for the result of +1) and on targets with compare zero instructions
the generated code is one instruction shorter.

Example from Aarch64:

Before
ldr w1, [x0]
add w1, w1, 1
str w1, [x0]
cmp w1, 1
beq .L4
ret

After
ldr w1, [x0]
add w2, w1, 1
str w2, [x0]
cbz w1, .L4
ret

gcc/ChangeLog:

* tree-ssa-forwprop.cc (combine_cond_expr_cond):
(forward_propagate_into_comparison_1): Optimize
for zero comparisons.

Signed-off-by: Manolis Tsamis 
---

 gcc/tree-ssa-forwprop.cc | 41 +++-
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index e34f0888954..93d5043821b 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -373,12 +373,13 @@ rhs_to_tree (tree type, gimple *stmt)
 /* Combine OP0 CODE OP1 in the context of a COND_EXPR.  Returns
the folded result in a form suitable for COND_EXPR_COND or
NULL_TREE, if there is no suitable simplified form.  If
-   INVARIANT_ONLY is true only gimple_min_invariant results are
-   considered simplified.  */
+   ALWAYS_COMBINE is false then only combine it the resulting
+   expression is gimple_min_invariant or considered simplified
+   compared to the original.  */
 
 static tree
 combine_cond_expr_cond (gimple *stmt, enum tree_code code, tree type,
-   tree op0, tree op1, bool invariant_only)
+   tree op0, tree op1, bool always_combine)
 {
   tree t;
 
@@ -398,17 +399,31 @@ combine_cond_expr_cond (gimple *stmt, enum tree_code 
code, tree type,
   /* Canonicalize the combined condition for use in a COND_EXPR.  */
   t = canonicalize_cond_expr_cond (t);
 
-  /* Bail out if we required an invariant but didn't get one.  */
-  if (!t || (invariant_only && !is_gimple_min_invariant (t)))
+  if (!t)
 {
   fold_undefer_overflow_warnings (false, NULL, 0);
   return NULL_TREE;
 }
 
-  bool nowarn = warning_suppressed_p (stmt, OPT_Wstrict_overflow);
-  fold_undefer_overflow_warnings (!nowarn, stmt, 0);
+  if (always_combine || is_gimple_min_invariant (t))
+{
+  bool nowarn = warning_suppressed_p (stmt, OPT_Wstrict_overflow);
+  fold_undefer_overflow_warnings (!nowarn, stmt, 0);
+  return t;
+}
 
-  return t;
+  /* If the result of folding is a zero comparison treat it preferentially.  */
+  if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison
+  && integer_zerop (TREE_OPERAND (t, 1))
+  && !integer_zerop (op1))
+{
+  bool nowarn = warning_suppressed_p (stmt, OPT_Wstrict_overflow);
+  fold_undefer_overflow_warnings (!nowarn, stmt, 0);
+  return t;
+}
+
+  fold_undefer_overflow_warnings (false, NULL, 0);
+  return NULL_TREE;
 }
 
 /* Combine the comparison OP0 CODE OP1 at LOC with the defining statements
@@ -432,7 +447,7 @@ forward_propagate_into_comparison_1 (gimple *stmt,
   if (def_stmt && can_propagate_from (def_stmt))
{
  enum tree_code def_code = gimple_assign_rhs_code (def_stmt);
- bool invariant_only_p = !single_use0_p;
+ bool always_combine = single_use0_p;
 
  rhs0 = rhs_to_tree (TREE_TYPE (op1), def_stmt);
 
@@ -442,10 +457,10 @@ forward_propagate_into_comparison_1 (gimple *stmt,
   && TREE_CODE (TREE_TYPE (TREE_OPERAND (rhs0, 0)))
  == BOOLEAN_TYPE)
  || TREE_CODE_CLASS (def_code) == tcc_comparison))
-   invariant_only_p = false;
+   always_combine = true;
 
  tmp = combine_cond_expr_cond (stmt, code, type,
-   rhs0, op1, invariant_only_p);
+   rhs0, op1, always_combine);
  if (tmp)
return tmp;
}
@@ -459,7 +474,7 @@ forward_propagate_into_comparison_1 (gimple *stmt,
{
  rhs1 = rhs_to_tree (TREE_TYPE (op0), def_stmt);
  tmp = combine_cond_expr_cond (stmt, code, type,
-   op0, rhs1, !single_use1_p);
+   op0, rhs1, single_use1_p);
  if (tmp)
return tmp;
}
@@ -470,7 +485,7 @@ forward_propagate_into_comparison_1 (gimple *stmt,
   && rhs1 != NULL_TREE)
 tmp = combine_cond_expr_cond (stmt, code, type,
  rhs0, rhs1,
- !(single_use0_p && single_use1_p));
+ single_use0_p && single_use1_p);
 
   return tmp;
 }
-- 
2.34.1



Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Jeff Law via Gcc-patches




On 3/16/23 04:11, Ajit Agarwal via Gcc-patches wrote:


Hello Richard:

On 16/03/23 3:22 pm, Richard Biener wrote:

On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  wrote:




On 16/03/23 1:44 pm, Richard Biener wrote:

On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  wrote:


Hello Richard:

On 16/03/23 1:10 pm, Richard Biener wrote:

On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
 wrote:


Hello All:


This patch eliminates unnecessary zero extension instruction from power 
generated assembly.
Bootstrapped and regtested on powerpc64-linux-gnu.


What makes this so special that we cannot deal with it from generic code?
In particular we do have the REE pass, why is target specific
knowledge neccessary
to eliminate the extension?



For returning bool values and comparision with integers generates the following 
by all the rtl passes.

set compare (subreg)
set if_then_else
Convert SImode -> QImode
set zero_extend to SImode from QImode
set return value 0 in one path of cfg.
set return value 1 in other path of cfg.

This pass replaces the above zero extension and conversion from QImode to 
DImode with copy operation to keep QImode in 64 bit registers in powerpc target.


Sorry, I can't parse that - as there's no testcase with the patch I
cannot even try to see what the actual RTL
looks like (without the pass).



Here is the PR with bugzilla.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784

I can add the attached testcase with this PR in the patch.


I don't see any zero-extends there.



Here is the testcase.


bool (int a, int b)
{
   if (a > 2)
   return false;
if (b < 10)
return true;
  return false;
}

compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.

Here is the rtl after cse.
(note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
(insn 15 12 16 3 (set (reg:CC 123)
 (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
 (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
  (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
 (nil)))
(insn 16 15 17 3 (set (reg:SI 124)
 (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
  (nil))
(insn 17 16 18 3 (set (reg:SI 122)
 (if_then_else:SI (gt (reg:CC 123)
 (const_int 0 [0]))
 (const_int 0 [0])
 (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
  (expr_list:REG_DEAD (reg:SI 124)
 (expr_list:REG_DEAD (reg:CC 123)
 (nil
(insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
 (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
  (expr_list:REG_DEAD (reg:SI 122)
 (nil)))
   ; pc falls through to BB 5
(code_label 32 18 31 4 3 (nil) [1 uses])
(note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
 (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
  (nil))
(code_label 19 5 20 5 2 (nil) [0 uses])
(note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
(insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
 (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
  (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
 (nil)))
(insn 22 21 26 5 (set (reg:DI 118 [  ])
 (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
  (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
 (nil)))
(insn 26 22 27 5 (set (reg/i:DI 3 3)
 (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
  (expr_list:REG_DEAD (reg:DI 118 [  ])
 (nil)))
(insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
  (nil))

This looks like it'd be better addressed in REE.


We've got two paths to the zero_extend.  One sets (reg 117) from a 
constant.  The other sets (reg 117) from a (subreg:QI (reg:SI)).


Handling the constant is trivial.  For the other set, we can replace the 
subreg with the zero_extend.  Presumably we'd then proceed to try and 
eliminate the zero-extend by realizing both arms of the conditional move 
are constants and thus trivially handled.


While I don't think REE would handle all this today, fixing it to handle 
this case seems like it'd be better than doing a specialized pass in the 
ppc backend.


jeff



Re: [PATCH] diagnostics: fix crash with -fdiagnostics-format=json-file

2023-03-16 Thread David Malcolm via Gcc-patches
On Tue, 2023-01-10 at 16:10 +0100, Martin Liška wrote:
> On 1/6/23 14:21, David Malcolm wrote:
> > On Fri, 2023-01-06 at 12:33 +0100, Martin Liška wrote:
> > > Patch can bootstrap on x86_64-linux-gnu and survives regression
> > > tests.
> > 
> > Thanks for the patch.
> > 
> > I noticed that you marked PR 108307 as a dup of this, which covers
> > -fdiagnostics-format=sarif-file (and a .S file as input).
> > 
> > The patch doesn't add any test coverage (for either of the
> > diagnostic
> > formats).
> > 
> > If we try to emit a diagnostic and base_file_name is NULL, and the
> > user
> > requested one of -fdiagnostics-format={json,sarif}-file, where do
> > the
> > diagnostics go?  Where should they go?
> 
> Hey.
> 
> I've done a new version of the patch where I utilize
> x_main_input_basename.
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression
> tests.
> 
> Ready to be installed?

Sorry about the long delay; the updated patch is good for trunk.

Thanks
Dave



Re: [PATCH] c++: noexcept and copy elision [PR109030]

2023-03-16 Thread Jason Merrill via Gcc-patches

On 3/16/23 10:09, Patrick Palka wrote:

On Wed, 15 Mar 2023, Patrick Palka wrote:


On Thu, 9 Mar 2023, Jason Merrill wrote:


On 3/9/23 14:32, Patrick Palka wrote:

On Mon, 6 Mar 2023, Marek Polacek via Gcc-patches wrote:


When processing a noexcept, constructors aren't elided: build_over_call
has
 /* It's unsafe to elide the constructor when handling
a noexcept-expression, it may evaluate to the wrong
value (c++/53025).  */
 && (force_elide || cp_noexcept_operand == 0))
so the assert I added recently needs to be relaxed a little bit.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/109030

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_call_expression): Relax assert.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept77.C: New test.
---
   gcc/cp/constexpr.cc | 6 +-
   gcc/testsuite/g++.dg/cpp0x/noexcept77.C | 9 +
   2 files changed, 14 insertions(+), 1 deletion(-)
   create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept77.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 364695b762c..5384d0e8e46 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -2869,7 +2869,11 @@ cxx_eval_call_expression (const constexpr_ctx *ctx,
tree t,
   /* We used to shortcut trivial constructor/op= here, but nowadays
we can only get a trivial function here with
-fno-elide-constructors.  */
-  gcc_checking_assert (!trivial_fn_p (fun) || !flag_elide_constructors);
+  gcc_checking_assert (!trivial_fn_p (fun)
+  || !flag_elide_constructors
+  /* We don't elide constructors when processing
+ a noexcept-expression.  */
+  || cp_noexcept_operand);


It seems weird that we're performing constant evaluation within an
unevaluated operand.  Would it make sense to also fix this a second way
by avoiding constant evaluation from maybe_constant_init when
cp_unevaluated_operand && !manifestly_const_eval, like in
maybe_constant_value?


Sounds good.


Hmm, while working on this I noticed we currently don't reject a version of
g++.dg/cpp2a/constexpr-inst1.C that list initializes an aggregate instead of
int (ever since r12-4425-g1595fe44e11a96):

   struct A { int m; };
   template constexpr int f() { return T::value; }
   template void h(decltype(A{B ? f() : 0})); // was 
int{...}
   template void h(...);
   void x() {
 h(0); // OK?
   }

ISTM we should instantiate f here for the same reason we do in the
original version of the testcase, and for that to happen we need to
pass manifestly_const_eval=true in massage_init_elt.  Does that seem
reasonable?



FWIW the reason this came up is because I tried contriving a testcase
for the aforementioned maybe_constant_init change, and I came up with:

   struct __as_receiver {
 int empty_env;
   };

   template
   constexpr int f(T t) {
 return t.fail;
   };

   using type = decltype(__as_receiver{f(0)}); // OK, f no longer 
instantiated

which we used to reject and afterwards accept.  But since the elements
of an initializer list are potentially constant evaluated, I wonder if
that that means f should be instantiated here after all despite the
unevaluated context?


The relevant section of the standard would seem to be
https://eel.is/c++draft/expr.const#20 ; an immediate subexpression of a 
braced-init-list is potentially constant-evaluated even though it isn't 
potentially-evaluated or manifestly constant-evaluated.


It seems like the call to fold_non_dependent_expr in check_narrowing 
ought to cause instantiation in this case, why doesn't it?



Here's the full patch for reference:

-- >8 --

Subject: [PATCH] c++: maybe_constant_init and unevaluated operands [PR109030]

This testcase in this PR (already fixed by r13-6526-ge4692319fd5fc7)
illustrates that maybe_constant_init can be called on an unevaluated
operand (from massage_init_elt), so this entry point should limit
constant evaluation in that case, like maybe_constant_value does.

PR c++/109030

gcc/cp/ChangeLog:

* constexpr.cc (maybe_constant_init_1): For an unevaluated
non-manifestly-constant operand, don't constant evaluate
and instead call fold_to_constant.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/decltype83.C: New test.
---
  gcc/cp/constexpr.cc |  2 ++
  gcc/testsuite/g++.dg/cpp0x/decltype83.C | 14 ++
  2 files changed, 16 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/decltype83.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 8683c00596a..f325af375c8 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8795,6 +8795,8 @@ maybe_constant_init_1 (tree t, tree decl, bool 
allow_non_constant,
&& (TREE_STATIC (decl) || DECL_EXTERNAL (decl)));
if (is_static)
manifestly_const_eval = true;
+  if (cp_unevaluated_operand && !manifestly_const_eval)
+  

Re: [PATCH] tree-optimization/106912 - IPA profile and pure/const

2023-03-16 Thread Jakub Jelinek via Gcc-patches
On Thu, Mar 16, 2023 at 02:11:01PM +, Richard Biener wrote:
> > Let's wait for Honzas opinion.
> 
> The following is what I profile-bootstrapped and tested on 
> x86_64-unknown-linux-gnu.
> 
> Richard.
> 
> >From d438a0d84cafced85c90204cba81de0f60ad0073 Mon Sep 17 00:00:00 2001
> From: Richard Biener 
> Date: Thu, 16 Mar 2023 13:51:19 +0100
> Subject: [PATCH] tree-optimization/106912 - clear const attribute from fntype
> To: gcc-patches@gcc.gnu.org
> 
> The following makes sure that after clearing pure/const from
> instrumented function declarations we are adjusting call statements
> fntype as well to handle indirect calls and because gimple_call_flags
> looks at both decl and fntype.
> 
> Like the pure/const flag clearing on decls we refrain from touching
> calls to known functions that do not have a body in the current TU.
> 
>   PR tree-optimization/106912
>   * tree-profile.cc (tree_profiling): Update stmts only when
>   profiling or testing coverage.  Make sure to update calls
>   fntype, stripping 'const' there.
> 
>   * gcc.dg/profile-generate-4.c: New testcase.

> + if (fntype && TYPE_READONLY (fntype))
> +   gimple_call_set_fntype
> + (call, build_qualified_type (fntype, (TYPE_QUALS (fntype)
> +   & ~TYPE_QUAL_CONST)));

I think
if (fntype && TYPE_READONLY (fntype))
  {
int quals = TYPE_QUALS (fntype) & ~TYPE_QUAL_CONST;
fntype = build_qualified_type (fntype, quals);
gimple_call_set_fntype (call, fntype);
  }
would be nicer formatting for it.

Anyway, let's wait for Honza, LGTM.

> +
> + /* Update virtual operands of calls to no longer const/pure
> +functions.  */
> + update_stmt (call);
> +   }
> +   }
>  
>/* re-merge split blocks.  */
>cleanup_tree_cfg ();

Jakub



Re: [PATCH] tree-optimization/106912 - IPA profile and pure/const

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Mar 2023, Richard Biener wrote:

> On Thu, 16 Mar 2023, Jakub Jelinek wrote:
> 
> > On Thu, Mar 16, 2023 at 12:05:56PM +, Richard Biener wrote:
> > > On Thu, 16 Mar 2023, Jakub Jelinek wrote:
> > > 
> > > > On Fri, Nov 25, 2022 at 09:26:34PM +0100, Richard Biener via 
> > > > Gcc-patches wrote:
> > > > > > We could
> > > > > > probably keep tract if any instrumented code was ever inlined into a
> > > > > > given function and perhaps just start ignoring attributes set on 
> > > > > > types?
> > > > > 
> > > > > But ignoring attributes on types makes all indirect calls not
> > > > > const/pure annotatable (OK, they are not pure annotatable because of
> > > > > the above bug).  I really don't see how to conservatively solve this
> > > > > issue?
> > > > 
> > > > > Maybe we can ignore all pure/const when the cgraph state is
> > > > > in profile-instrumented state?  Of course we have multiple "APIs"
> > > > > to query that.
> > > > 
> > > > I think that's the way to go.  But we'd need to arrange somewhere 
> > > > during IPA
> > > > to add vops to all those pure/const calls if -fprofile-generate (direct 
> > > > or
> > > > indirect; not sure what exact flags) and after that make sure all our 
> > > > APIs
> > > > don't return ECF_CONST, ECF_PURE or ECF_LOOPING_CONST_OR_PURE.
> > > > Could be e.g.
> > > >   if (whatever)
> > > > flags &= ~(ECF_CONST | ECF_PURE | ECF_LOOPING_CONST_OR_PURE);
> > > > at the end of flags_from_decl_or_type, internal_fn_flags, what else?
> > > > Although, perhaps internal_fn_flags don't need to change, because 
> > > > internal
> > > > calls don't really have callees.
> > > > 
> > > > Or is just ECF_CONST enough given that ECF_PURE is on fndecls and not
> > > > types?
> > > > 
> > > > Or perhaps just start ignoring TYPE_READONLY -> ECF_CONST in
> > > > flags_from_decl_or_type if that flag is on?
> > > > 
> > > > Is that rewriting currently what tree_profiling does in the
> > > >   /* Update call statements and rebuild the cgraph.  */
> > > >   FOR_EACH_DEFINED_FUNCTION (node)
> > > > spot where it calls update_stmt on all call statements?
> > > > 
> > > > If so, could we just set that global? flag instead or in addition to
> > > > doing those node->set_const_flag (false, false); calls and
> > > > change flags_from_decl_or_type, plus I guess lto1 should set that
> > > > flag if it is global on start as well if
> > > > !flag_auto_profile
> > > > && (flag_branch_probabilities || flag_test_coverage || profile_arc_flag)
> > > > ?
> > > > 
> > > > I think just ignoring ECF_CONST from TYPE_READONLY might be best, if we
> > > > have external functions like builtins from libc/libm and don't have 
> > > > their
> > > > bodies, we can still treat them as const, the only problem is with the
> > > > indirect calls where we don't know if we do or don't have a body for
> > > > the callees and whether we've instrumented those or not.
> > > 
> > > I think we want something reflected on the IL.  Because of LTO I think
> > > we cannot ignore externs (or we have to do massaging at stream-in).
> > > 
> > > The following brute-force works for the testcase.  I suppose since
> > > we leave const/pure set on functions without body in the compile-time
> > > TU (and ignore LTO there) we could do the same here.
> > > 
> > > I also wonder if that whole function walk is necessary if not
> > > profile_arc_flag || flag_test_coverage ...
> > > 
> > > Honza - does this look OK to you?  I'll test guarding the whole
> > > outer loop with profile_arc_flag || flag_test_coverage separately.
> > > 
> > > diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
> > > index ea9d7a23443..c8789618f96 100644
> > > --- a/gcc/tree-profile.cc
> > > +++ b/gcc/tree-profile.cc
> > > @@ -840,9 +840,29 @@ tree_profiling (void)
> > >   gimple_stmt_iterator gsi;
> > >   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next ())
> > > {
> > > - gimple *stmt = gsi_stmt (gsi);
> > > - if (is_gimple_call (stmt))
> > > -   update_stmt (stmt);
> > > + gcall *call = dyn_cast  (gsi_stmt (gsi));
> > > + if (!call)
> > > +   continue;
> > > +
> > > + if (profile_arc_flag || flag_test_coverage)
> > > +   {
> > > + /* We do not clear pure/const on decls without body.  */
> > > + tree fndecl = gimple_call_fndecl (call);
> > > + if (fndecl && !gimple_has_body_p (fndecl))
> > > +   continue;
> > > +
> > > + /* Drop the const attribute from the call type (the pure
> > > +attribute is not available on types).  */
> > > + tree fntype = gimple_call_fntype (call);
> > > + if (fntype && TYPE_READONLY (fntype))
> > > +   gimple_call_set_fntype (call, build_qualified_type
> > > +   (fntype, (TYPE_QUALS 
> > > (fntype)
> > > +   

Re: [PATCH] c++: noexcept and copy elision [PR109030]

2023-03-16 Thread Patrick Palka via Gcc-patches
On Wed, 15 Mar 2023, Patrick Palka wrote:

> On Thu, 9 Mar 2023, Jason Merrill wrote:
> 
> > On 3/9/23 14:32, Patrick Palka wrote:
> > > On Mon, 6 Mar 2023, Marek Polacek via Gcc-patches wrote:
> > > 
> > > > When processing a noexcept, constructors aren't elided: build_over_call
> > > > has
> > > >  /* It's unsafe to elide the constructor when handling
> > > > a noexcept-expression, it may evaluate to the wrong
> > > > value (c++/53025).  */
> > > >  && (force_elide || cp_noexcept_operand == 0))
> > > > so the assert I added recently needs to be relaxed a little bit.
> > > > 
> > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > > 
> > > > PR c++/109030
> > > > 
> > > > gcc/cp/ChangeLog:
> > > > 
> > > > * constexpr.cc (cxx_eval_call_expression): Relax assert.
> > > > 
> > > > gcc/testsuite/ChangeLog:
> > > > 
> > > > * g++.dg/cpp0x/noexcept77.C: New test.
> > > > ---
> > > >   gcc/cp/constexpr.cc | 6 +-
> > > >   gcc/testsuite/g++.dg/cpp0x/noexcept77.C | 9 +
> > > >   2 files changed, 14 insertions(+), 1 deletion(-)
> > > >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept77.C
> > > > 
> > > > diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
> > > > index 364695b762c..5384d0e8e46 100644
> > > > --- a/gcc/cp/constexpr.cc
> > > > +++ b/gcc/cp/constexpr.cc
> > > > @@ -2869,7 +2869,11 @@ cxx_eval_call_expression (const constexpr_ctx 
> > > > *ctx,
> > > > tree t,
> > > >   /* We used to shortcut trivial constructor/op= here, but nowadays
> > > >we can only get a trivial function here with
> > > > -fno-elide-constructors.  */
> > > > -  gcc_checking_assert (!trivial_fn_p (fun) || 
> > > > !flag_elide_constructors);
> > > > +  gcc_checking_assert (!trivial_fn_p (fun)
> > > > +  || !flag_elide_constructors
> > > > +  /* We don't elide constructors when processing
> > > > + a noexcept-expression.  */
> > > > +  || cp_noexcept_operand);
> > > 
> > > It seems weird that we're performing constant evaluation within an
> > > unevaluated operand.  Would it make sense to also fix this a second way
> > > by avoiding constant evaluation from maybe_constant_init when
> > > cp_unevaluated_operand && !manifestly_const_eval, like in
> > > maybe_constant_value?
> > 
> > Sounds good.
> 
> Hmm, while working on this I noticed we currently don't reject a version of
> g++.dg/cpp2a/constexpr-inst1.C that list initializes an aggregate instead of
> int (ever since r12-4425-g1595fe44e11a96):
> 
>   struct A { int m; };
>   template constexpr int f() { return T::value; }
>   template void h(decltype(A{B ? f() : 0})); // was 
> int{...}
>   template void h(...);
>   void x() {
> h(0); // OK?
>   }
> 
> ISTM we should instantiate f here for the same reason we do in the
> original version of the testcase, and for that to happen we need to
> pass manifestly_const_eval=true in massage_init_elt.  Does that seem
> reasonable?
> 

FWIW the reason this came up is because I tried contriving a testcase
for the aforementioned maybe_constant_init change, and I came up with:

  struct __as_receiver {
int empty_env;
  };

  template
  constexpr int f(T t) {
return t.fail;
  };

  using type = decltype(__as_receiver{f(0)}); // OK, f no longer 
instantiated

which we used to reject and afterwards accept.  But since the elements
of an initializer list are potentially constant evaluated, I wonder if
that that means f should be instantiated here after all despite the
unevaluated context?

Here's the full patch for reference:

-- >8 --

Subject: [PATCH] c++: maybe_constant_init and unevaluated operands [PR109030]

This testcase in this PR (already fixed by r13-6526-ge4692319fd5fc7)
illustrates that maybe_constant_init can be called on an unevaluated
operand (from massage_init_elt), so this entry point should limit
constant evaluation in that case, like maybe_constant_value does.

PR c++/109030

gcc/cp/ChangeLog:

* constexpr.cc (maybe_constant_init_1): For an unevaluated
non-manifestly-constant operand, don't constant evaluate
and instead call fold_to_constant.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/decltype83.C: New test.
---
 gcc/cp/constexpr.cc |  2 ++
 gcc/testsuite/g++.dg/cpp0x/decltype83.C | 14 ++
 2 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/decltype83.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 8683c00596a..f325af375c8 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8795,6 +8795,8 @@ maybe_constant_init_1 (tree t, tree decl, bool 
allow_non_constant,
&& (TREE_STATIC (decl) || DECL_EXTERNAL (decl)));
   if (is_static)
manifestly_const_eval = true;
+  if (cp_unevaluated_operand && !manifestly_const_eval)
+   return 

Re: [PATCH] tree-optimization/106912 - IPA profile and pure/const

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Mar 2023, Jakub Jelinek wrote:

> On Thu, Mar 16, 2023 at 12:05:56PM +, Richard Biener wrote:
> > On Thu, 16 Mar 2023, Jakub Jelinek wrote:
> > 
> > > On Fri, Nov 25, 2022 at 09:26:34PM +0100, Richard Biener via Gcc-patches 
> > > wrote:
> > > > > We could
> > > > > probably keep tract if any instrumented code was ever inlined into a
> > > > > given function and perhaps just start ignoring attributes set on 
> > > > > types?
> > > > 
> > > > But ignoring attributes on types makes all indirect calls not
> > > > const/pure annotatable (OK, they are not pure annotatable because of
> > > > the above bug).  I really don't see how to conservatively solve this
> > > > issue?
> > > 
> > > > Maybe we can ignore all pure/const when the cgraph state is
> > > > in profile-instrumented state?  Of course we have multiple "APIs"
> > > > to query that.
> > > 
> > > I think that's the way to go.  But we'd need to arrange somewhere during 
> > > IPA
> > > to add vops to all those pure/const calls if -fprofile-generate (direct or
> > > indirect; not sure what exact flags) and after that make sure all our APIs
> > > don't return ECF_CONST, ECF_PURE or ECF_LOOPING_CONST_OR_PURE.
> > > Could be e.g.
> > >   if (whatever)
> > > flags &= ~(ECF_CONST | ECF_PURE | ECF_LOOPING_CONST_OR_PURE);
> > > at the end of flags_from_decl_or_type, internal_fn_flags, what else?
> > > Although, perhaps internal_fn_flags don't need to change, because internal
> > > calls don't really have callees.
> > > 
> > > Or is just ECF_CONST enough given that ECF_PURE is on fndecls and not
> > > types?
> > > 
> > > Or perhaps just start ignoring TYPE_READONLY -> ECF_CONST in
> > > flags_from_decl_or_type if that flag is on?
> > > 
> > > Is that rewriting currently what tree_profiling does in the
> > >   /* Update call statements and rebuild the cgraph.  */
> > >   FOR_EACH_DEFINED_FUNCTION (node)
> > > spot where it calls update_stmt on all call statements?
> > > 
> > > If so, could we just set that global? flag instead or in addition to
> > > doing those node->set_const_flag (false, false); calls and
> > > change flags_from_decl_or_type, plus I guess lto1 should set that
> > > flag if it is global on start as well if
> > > !flag_auto_profile
> > > && (flag_branch_probabilities || flag_test_coverage || profile_arc_flag)
> > > ?
> > > 
> > > I think just ignoring ECF_CONST from TYPE_READONLY might be best, if we
> > > have external functions like builtins from libc/libm and don't have their
> > > bodies, we can still treat them as const, the only problem is with the
> > > indirect calls where we don't know if we do or don't have a body for
> > > the callees and whether we've instrumented those or not.
> > 
> > I think we want something reflected on the IL.  Because of LTO I think
> > we cannot ignore externs (or we have to do massaging at stream-in).
> > 
> > The following brute-force works for the testcase.  I suppose since
> > we leave const/pure set on functions without body in the compile-time
> > TU (and ignore LTO there) we could do the same here.
> > 
> > I also wonder if that whole function walk is necessary if not
> > profile_arc_flag || flag_test_coverage ...
> > 
> > Honza - does this look OK to you?  I'll test guarding the whole
> > outer loop with profile_arc_flag || flag_test_coverage separately.
> > 
> > diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
> > index ea9d7a23443..c8789618f96 100644
> > --- a/gcc/tree-profile.cc
> > +++ b/gcc/tree-profile.cc
> > @@ -840,9 +840,29 @@ tree_profiling (void)
> >   gimple_stmt_iterator gsi;
> >   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next ())
> > {
> > - gimple *stmt = gsi_stmt (gsi);
> > - if (is_gimple_call (stmt))
> > -   update_stmt (stmt);
> > + gcall *call = dyn_cast  (gsi_stmt (gsi));
> > + if (!call)
> > +   continue;
> > +
> > + if (profile_arc_flag || flag_test_coverage)
> > +   {
> > + /* We do not clear pure/const on decls without body.  */
> > + tree fndecl = gimple_call_fndecl (call);
> > + if (fndecl && !gimple_has_body_p (fndecl))
> > +   continue;
> > +
> > + /* Drop the const attribute from the call type (the pure
> > +attribute is not available on types).  */
> > + tree fntype = gimple_call_fntype (call);
> > + if (fntype && TYPE_READONLY (fntype))
> > +   gimple_call_set_fntype (call, build_qualified_type
> > +   (fntype, (TYPE_QUALS 
> > (fntype)
> > + & 
> > ~TYPE_QUAL_CONST)));
> > +   }
> > +
> > + /* Update virtual operands of calls to no longer const/pure
> > +functions.  */
> > + update_stmt (call);
> > }
> > }
> 
> I have in the meantime 

Re: [PATCH] tree-optimization/106912 - IPA profile and pure/const

2023-03-16 Thread Jakub Jelinek via Gcc-patches
On Thu, Mar 16, 2023 at 12:05:56PM +, Richard Biener wrote:
> On Thu, 16 Mar 2023, Jakub Jelinek wrote:
> 
> > On Fri, Nov 25, 2022 at 09:26:34PM +0100, Richard Biener via Gcc-patches 
> > wrote:
> > > > We could
> > > > probably keep tract if any instrumented code was ever inlined into a
> > > > given function and perhaps just start ignoring attributes set on types?
> > > 
> > > But ignoring attributes on types makes all indirect calls not
> > > const/pure annotatable (OK, they are not pure annotatable because of
> > > the above bug).  I really don't see how to conservatively solve this
> > > issue?
> > 
> > > Maybe we can ignore all pure/const when the cgraph state is
> > > in profile-instrumented state?  Of course we have multiple "APIs"
> > > to query that.
> > 
> > I think that's the way to go.  But we'd need to arrange somewhere during IPA
> > to add vops to all those pure/const calls if -fprofile-generate (direct or
> > indirect; not sure what exact flags) and after that make sure all our APIs
> > don't return ECF_CONST, ECF_PURE or ECF_LOOPING_CONST_OR_PURE.
> > Could be e.g.
> >   if (whatever)
> > flags &= ~(ECF_CONST | ECF_PURE | ECF_LOOPING_CONST_OR_PURE);
> > at the end of flags_from_decl_or_type, internal_fn_flags, what else?
> > Although, perhaps internal_fn_flags don't need to change, because internal
> > calls don't really have callees.
> > 
> > Or is just ECF_CONST enough given that ECF_PURE is on fndecls and not
> > types?
> > 
> > Or perhaps just start ignoring TYPE_READONLY -> ECF_CONST in
> > flags_from_decl_or_type if that flag is on?
> > 
> > Is that rewriting currently what tree_profiling does in the
> >   /* Update call statements and rebuild the cgraph.  */
> >   FOR_EACH_DEFINED_FUNCTION (node)
> > spot where it calls update_stmt on all call statements?
> > 
> > If so, could we just set that global? flag instead or in addition to
> > doing those node->set_const_flag (false, false); calls and
> > change flags_from_decl_or_type, plus I guess lto1 should set that
> > flag if it is global on start as well if
> > !flag_auto_profile
> > && (flag_branch_probabilities || flag_test_coverage || profile_arc_flag)
> > ?
> > 
> > I think just ignoring ECF_CONST from TYPE_READONLY might be best, if we
> > have external functions like builtins from libc/libm and don't have their
> > bodies, we can still treat them as const, the only problem is with the
> > indirect calls where we don't know if we do or don't have a body for
> > the callees and whether we've instrumented those or not.
> 
> I think we want something reflected on the IL.  Because of LTO I think
> we cannot ignore externs (or we have to do massaging at stream-in).
> 
> The following brute-force works for the testcase.  I suppose since
> we leave const/pure set on functions without body in the compile-time
> TU (and ignore LTO there) we could do the same here.
> 
> I also wonder if that whole function walk is necessary if not
> profile_arc_flag || flag_test_coverage ...
> 
> Honza - does this look OK to you?  I'll test guarding the whole
> outer loop with profile_arc_flag || flag_test_coverage separately.
> 
> diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
> index ea9d7a23443..c8789618f96 100644
> --- a/gcc/tree-profile.cc
> +++ b/gcc/tree-profile.cc
> @@ -840,9 +840,29 @@ tree_profiling (void)
>   gimple_stmt_iterator gsi;
>   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next ())
> {
> - gimple *stmt = gsi_stmt (gsi);
> - if (is_gimple_call (stmt))
> -   update_stmt (stmt);
> + gcall *call = dyn_cast  (gsi_stmt (gsi));
> + if (!call)
> +   continue;
> +
> + if (profile_arc_flag || flag_test_coverage)
> +   {
> + /* We do not clear pure/const on decls without body.  */
> + tree fndecl = gimple_call_fndecl (call);
> + if (fndecl && !gimple_has_body_p (fndecl))
> +   continue;
> +
> + /* Drop the const attribute from the call type (the pure
> +attribute is not available on types).  */
> + tree fntype = gimple_call_fntype (call);
> + if (fntype && TYPE_READONLY (fntype))
> +   gimple_call_set_fntype (call, build_qualified_type
> +   (fntype, (TYPE_QUALS 
> (fntype)
> + & 
> ~TYPE_QUAL_CONST)));
> +   }
> +
> + /* Update virtual operands of calls to no longer const/pure
> +functions.  */
> + update_stmt (call);
> }
> }

I have in the meantime briefly tested following.

But if you want to the above way, then at least the testcase could be
useful.  Though, not sure if the above is all that is needed.  Shouldn't
set_const_flag_1 upon TREE_READONLY (node->decl) = 0; also adjust
TREE_TYPE on the function to
  

Re: [PATCH] tree-optimization/106912 - IPA profile and pure/const

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Mar 2023, Jakub Jelinek wrote:

> On Fri, Nov 25, 2022 at 09:26:34PM +0100, Richard Biener via Gcc-patches 
> wrote:
> > > We could
> > > probably keep tract if any instrumented code was ever inlined into a
> > > given function and perhaps just start ignoring attributes set on types?
> > 
> > But ignoring attributes on types makes all indirect calls not
> > const/pure annotatable (OK, they are not pure annotatable because of
> > the above bug).  I really don't see how to conservatively solve this
> > issue?
> 
> > Maybe we can ignore all pure/const when the cgraph state is
> > in profile-instrumented state?  Of course we have multiple "APIs"
> > to query that.
> 
> I think that's the way to go.  But we'd need to arrange somewhere during IPA
> to add vops to all those pure/const calls if -fprofile-generate (direct or
> indirect; not sure what exact flags) and after that make sure all our APIs
> don't return ECF_CONST, ECF_PURE or ECF_LOOPING_CONST_OR_PURE.
> Could be e.g.
>   if (whatever)
> flags &= ~(ECF_CONST | ECF_PURE | ECF_LOOPING_CONST_OR_PURE);
> at the end of flags_from_decl_or_type, internal_fn_flags, what else?
> Although, perhaps internal_fn_flags don't need to change, because internal
> calls don't really have callees.
> 
> Or is just ECF_CONST enough given that ECF_PURE is on fndecls and not
> types?
> 
> Or perhaps just start ignoring TYPE_READONLY -> ECF_CONST in
> flags_from_decl_or_type if that flag is on?
> 
> Is that rewriting currently what tree_profiling does in the
>   /* Update call statements and rebuild the cgraph.  */
>   FOR_EACH_DEFINED_FUNCTION (node)
> spot where it calls update_stmt on all call statements?
> 
> If so, could we just set that global? flag instead or in addition to
> doing those node->set_const_flag (false, false); calls and
> change flags_from_decl_or_type, plus I guess lto1 should set that
> flag if it is global on start as well if
> !flag_auto_profile
> && (flag_branch_probabilities || flag_test_coverage || profile_arc_flag)
> ?
> 
> I think just ignoring ECF_CONST from TYPE_READONLY might be best, if we
> have external functions like builtins from libc/libm and don't have their
> bodies, we can still treat them as const, the only problem is with the
> indirect calls where we don't know if we do or don't have a body for
> the callees and whether we've instrumented those or not.

I think we want something reflected on the IL.  Because of LTO I think
we cannot ignore externs (or we have to do massaging at stream-in).

The following brute-force works for the testcase.  I suppose since
we leave const/pure set on functions without body in the compile-time
TU (and ignore LTO there) we could do the same here.

I also wonder if that whole function walk is necessary if not
profile_arc_flag || flag_test_coverage ...

Honza - does this look OK to you?  I'll test guarding the whole
outer loop with profile_arc_flag || flag_test_coverage separately.

diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
index ea9d7a23443..c8789618f96 100644
--- a/gcc/tree-profile.cc
+++ b/gcc/tree-profile.cc
@@ -840,9 +840,29 @@ tree_profiling (void)
  gimple_stmt_iterator gsi;
  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next ())
{
- gimple *stmt = gsi_stmt (gsi);
- if (is_gimple_call (stmt))
-   update_stmt (stmt);
+ gcall *call = dyn_cast  (gsi_stmt (gsi));
+ if (!call)
+   continue;
+
+ if (profile_arc_flag || flag_test_coverage)
+   {
+ /* We do not clear pure/const on decls without body.  */
+ tree fndecl = gimple_call_fndecl (call);
+ if (fndecl && !gimple_has_body_p (fndecl))
+   continue;
+
+ /* Drop the const attribute from the call type (the pure
+attribute is not available on types).  */
+ tree fntype = gimple_call_fntype (call);
+ if (fntype && TYPE_READONLY (fntype))
+   gimple_call_set_fntype (call, build_qualified_type
+   (fntype, (TYPE_QUALS 
(fntype)
+ & 
~TYPE_QUAL_CONST)));
+   }
+
+ /* Update virtual operands of calls to no longer const/pure
+functions.  */
+ update_stmt (call);
}
}
 



Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches



On 16/03/23 4:26 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 11:43 AM Ajit Agarwal  wrote:
>>
>>
>>
>> On 16/03/23 4:00 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal  
>>> wrote:


 Hello Richard:

 On 16/03/23 3:22 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  
> wrote:
>>
>>
>>
>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  
>>> wrote:

 Hello Richard:

 On 16/03/23 1:10 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>  wrote:
>>
>> Hello All:
>>
>>
>> This patch eliminates unnecessary zero extension instruction from 
>> power generated assembly.
>> Bootstrapped and regtested on powerpc64-linux-gnu.
>
> What makes this so special that we cannot deal with it from generic 
> code?
> In particular we do have the REE pass, why is target specific
> knowledge neccessary
> to eliminate the extension?
>

 For returning bool values and comparision with integers generates the 
 following by all the rtl passes.

 set compare (subreg)
 set if_then_else
 Convert SImode -> QImode
 set zero_extend to SImode from QImode
 set return value 0 in one path of cfg.
 set return value 1 in other path of cfg.

 This pass replaces the above zero extension and conversion from QImode 
 to DImode with copy operation to keep QImode in 64 bit registers in 
 powerpc target.
>>>
>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>> cannot even try to see what the actual RTL
>>> looks like (without the pass).
>>>
>>
>> Here is the PR with bugzilla.
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>
>> I can add the attached testcase with this PR in the patch.
>
> I don't see any zero-extends there.
>

 Here is the testcase.


 bool (int a, int b)
 {
   if (a > 2)
   return false;
if (b < 10)
return true;
  return false;
 }

 compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.

 Here is the rtl after cse.
 (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
 (insn 15 12 16 3 (set (reg:CC 123)
 (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
 (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
  (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
 (nil)))
 (insn 16 15 17 3 (set (reg:SI 124)
 (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
  (nil))
 (insn 17 16 18 3 (set (reg:SI 122)
 (if_then_else:SI (gt (reg:CC 123)
 (const_int 0 [0]))
 (const_int 0 [0])
 (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
  (expr_list:REG_DEAD (reg:SI 124)
 (expr_list:REG_DEAD (reg:CC 123)
 (nil
 (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
 (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
  (expr_list:REG_DEAD (reg:SI 122)
 (nil)))
   ; pc falls through to BB 5
 (code_label 32 18 31 4 3 (nil) [1 uses])
 (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
 (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
 (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
  (nil))
 (code_label 19 5 20 5 2 (nil) [0 uses])
 (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
 (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
 (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 
 {zero_extendqidi2}
  (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
 (nil)))
 (insn 22 21 26 5 (set (reg:DI 118 [  ])
 (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
  (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
 (nil)))
 (insn 26 22 27 5 (set (reg/i:DI 3 3)
 (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
  (expr_list:REG_DEAD (reg:DI 118 [  ])
 (nil)))
 (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
  (nil))
>>>
>>> But after combine there's just
>>>
>>> (note 6 0 38 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
>>> (insn 38 6 2 2 (set (reg:DI 126)
>>> (reg:DI 3 3 [ a ])) "t.c":3:1 634 {*movdi_internal64}
>>>  (expr_list:REG_DEAD (reg:DI 3 3 [ a ])
>>> (nil)))
>>> (note 2 38 39 2 NOTE_INSN_DELETED)
>>> (insn 39 2 3 2 (set (reg:DI 127)
>>> (reg:DI 4 4 [ b ])) "t.c":3:1 634 {*movdi_internal64}
>>>  (expr_list:REG_DEAD (reg:DI 4 4 [ b ])
>>> (nil)))
>>> (insn 3 39 4 2 (set (reg/v:DI 

[PATCH] [PR96339] AArch64: Optimise svlast[ab]

2023-03-16 Thread Tejas Belagod via Gcc-patches
From: Tejas Belagod 

  This PR optimizes an SVE intrinsics sequence where
svlasta (svptrue_pat_b8 (SV_VL1), x)
  a scalar is selected based on a constant predicate and a variable vector.
  This sequence is optimized to return the correspoding element of a NEON
  vector. For eg.
svlasta (svptrue_pat_b8 (SV_VL1), x)
  returns
umovw0, v0.b[1]
  Likewise,
svlastb (svptrue_pat_b8 (SV_VL1), x)
  returns
 umovw0, v0.b[0]
  This optimization only works provided the constant predicate maps to a range
  that is within the bounds of a 128-bit NEON register.

gcc/ChangeLog:

PR target/96339
* config/aarch64/aarch64-sve-builtins-base.cc (svlast_impl::fold): Fold 
sve
calls that have a constant input predicate vector.
(svlast_impl::is_lasta): Query to check if intrinsic is svlasta.
(svlast_impl::is_lastb): Query to check if intrinsic is svlastb.
(svlast_impl::vect_all_same): Check if all vector elements are equal.

gcc/testsuite/ChangeLog:

PR target/96339
* gcc.target/aarch64/sve/acle/general-c/svlast.c: New.
* gcc.target/aarch64/sve/acle/general-c/svlast128_run.c: New.
* gcc.target/aarch64/sve/acle/general-c/svlast256_run.c: New.
* gcc.target/aarch64/sve/pcs/return_4.c (caller_bf16): Fix asm
to expect optimized code for function body.
* gcc.target/aarch64/sve/pcs/return_4_128.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_4_256.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_4_512.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_4_1024.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_4_2048.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_5.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_5_128.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_5_256.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_5_512.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_5_1024.c (caller_bf16): Likewise.
* gcc.target/aarch64/sve/pcs/return_5_2048.c (caller_bf16): Likewise.
---
 .../aarch64/aarch64-sve-builtins-base.cc  | 124 +++
 .../aarch64/sve/acle/general-c/svlast.c   |  63 
 .../sve/acle/general-c/svlast128_run.c| 313 +
 .../sve/acle/general-c/svlast256_run.c| 314 ++
 .../gcc.target/aarch64/sve/pcs/return_4.c |   2 -
 .../aarch64/sve/pcs/return_4_1024.c   |   2 -
 .../gcc.target/aarch64/sve/pcs/return_4_128.c |   2 -
 .../aarch64/sve/pcs/return_4_2048.c   |   2 -
 .../gcc.target/aarch64/sve/pcs/return_4_256.c |   2 -
 .../gcc.target/aarch64/sve/pcs/return_4_512.c |   2 -
 .../gcc.target/aarch64/sve/pcs/return_5.c |   2 -
 .../aarch64/sve/pcs/return_5_1024.c   |   2 -
 .../gcc.target/aarch64/sve/pcs/return_5_128.c |   2 -
 .../aarch64/sve/pcs/return_5_2048.c   |   2 -
 .../gcc.target/aarch64/sve/pcs/return_5_256.c |   2 -
 .../gcc.target/aarch64/sve/pcs/return_5_512.c |   2 -
 16 files changed, 814 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svlast.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svlast128_run.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svlast256_run.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index cd9cace3c9b..db2b4dcaac9 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1056,6 +1056,130 @@ class svlast_impl : public quiet
 public:
   CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {}
 
+  bool is_lasta () const { return m_unspec == UNSPEC_LASTA; }
+  bool is_lastb () const { return m_unspec == UNSPEC_LASTB; }
+
+  bool vect_all_same (tree v , int step) const
+  {
+int i;
+int nelts = vector_cst_encoded_nelts (v);
+int first_el = 0;
+
+for (i = first_el; i < nelts; i += step)
+  if (VECTOR_CST_ENCODED_ELT (v, i) != VECTOR_CST_ENCODED_ELT (v, 
first_el))
+   return false;
+
+return true;
+  }
+
+  /* Fold a svlast{a/b} call with constant predicate to a BIT_FIELD_REF.
+ BIT_FIELD_REF lowers to a NEON element extract, so we have to make sure
+ the index of the element being accessed is in the range of a NEON vector
+ width.  */
+  gimple *fold (gimple_folder & f) const override
+  {
+tree pred = gimple_call_arg (f.call, 0);
+tree val = gimple_call_arg (f.call, 1);
+
+if (TREE_CODE (pred) == VECTOR_CST)
+  {
+   HOST_WIDE_INT pos;
+   unsigned int const_vg;
+   int i = 0;
+   int step = f.type_suffix (0).element_bytes;
+   int step_1 = gcd (step, VECTOR_CST_NPATTERNS (pred));
+   int npats = VECTOR_CST_NPATTERNS (pred);
+   unsigned 

[committed] d: Fix closure fields don't get same alignment as local variable [PR109144]

2023-03-16 Thread Iain Buclaw via Gcc-patches
Hi,

Local variables with both non-local references and explicit alignment
did not propagate their alignment to either the closure field or closure
frame type, resulting in the closure being misaligned. This is now
correctly set-up when building the frame type.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32,
committed to mainline and backported to releases/gcc-12.

I did have a look at backporting to gcc-11 too, however the D front-end
does not correctly set the alignment of local variables, so although the
code generation pass is doing the right thing, the alignment for the
local variable is never set in the first place.

Regards,
Iain.

---
PR d/109144

gcc/d/ChangeLog:

* d-codegen.cc (build_frame_type): Set frame field and type alignment.

gcc/testsuite/ChangeLog:

* gdc.dg/torture/pr109144.d: New test.
---
 gcc/d/d-codegen.cc  | 5 +
 gcc/testsuite/gdc.dg/torture/pr109144.d | 9 +
 2 files changed, 14 insertions(+)
 create mode 100644 gcc/testsuite/gdc.dg/torture/pr109144.d

diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
index 5a041927ec9..5c6c300ecec 100644
--- a/gcc/d/d-codegen.cc
+++ b/gcc/d/d-codegen.cc
@@ -2706,6 +2706,11 @@ build_frame_type (tree ffi, FuncDeclaration *fd)
   TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (vsym);
   DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (vsym);
   TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (vsym);
+  SET_DECL_ALIGN (field, DECL_ALIGN (vsym));
+
+  /* Update alignment for frame record type.  */
+  if (TYPE_ALIGN (frame_rec_type) < DECL_ALIGN (field))
+   SET_TYPE_ALIGN (frame_rec_type, DECL_ALIGN (field));
 
   if (DECL_LANG_NRVO (vsym))
{
diff --git a/gcc/testsuite/gdc.dg/torture/pr109144.d 
b/gcc/testsuite/gdc.dg/torture/pr109144.d
new file mode 100644
index 000..32d3af7cd45
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/pr109144.d
@@ -0,0 +1,9 @@
+// { dg-do run }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+void main()
+{
+align(128) byte var;
+assert((cast(size_t) ) % 128 == 0);
+var = 73;
+assert((() => var)() == 73);
+}
-- 
2.37.2



Re: [PATCH] tree-optimization/106912 - IPA profile and pure/const

2023-03-16 Thread Jakub Jelinek via Gcc-patches
On Fri, Nov 25, 2022 at 09:26:34PM +0100, Richard Biener via Gcc-patches wrote:
> > We could
> > probably keep tract if any instrumented code was ever inlined into a
> > given function and perhaps just start ignoring attributes set on types?
> 
> But ignoring attributes on types makes all indirect calls not
> const/pure annotatable (OK, they are not pure annotatable because of
> the above bug).  I really don't see how to conservatively solve this
> issue?

> Maybe we can ignore all pure/const when the cgraph state is
> in profile-instrumented state?  Of course we have multiple "APIs"
> to query that.

I think that's the way to go.  But we'd need to arrange somewhere during IPA
to add vops to all those pure/const calls if -fprofile-generate (direct or
indirect; not sure what exact flags) and after that make sure all our APIs
don't return ECF_CONST, ECF_PURE or ECF_LOOPING_CONST_OR_PURE.
Could be e.g.
  if (whatever)
flags &= ~(ECF_CONST | ECF_PURE | ECF_LOOPING_CONST_OR_PURE);
at the end of flags_from_decl_or_type, internal_fn_flags, what else?
Although, perhaps internal_fn_flags don't need to change, because internal
calls don't really have callees.

Or is just ECF_CONST enough given that ECF_PURE is on fndecls and not
types?

Or perhaps just start ignoring TYPE_READONLY -> ECF_CONST in
flags_from_decl_or_type if that flag is on?

Is that rewriting currently what tree_profiling does in the
  /* Update call statements and rebuild the cgraph.  */
  FOR_EACH_DEFINED_FUNCTION (node)
spot where it calls update_stmt on all call statements?

If so, could we just set that global? flag instead or in addition to
doing those node->set_const_flag (false, false); calls and
change flags_from_decl_or_type, plus I guess lto1 should set that
flag if it is global on start as well if
!flag_auto_profile
&& (flag_branch_probabilities || flag_test_coverage || profile_arc_flag)
?

I think just ignoring ECF_CONST from TYPE_READONLY might be best, if we
have external functions like builtins from libc/libm and don't have their
bodies, we can still treat them as const, the only problem is with the
indirect calls where we don't know if we do or don't have a body for
the callees and whether we've instrumented those or not.

Jakub



Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, Mar 16, 2023 at 11:43 AM Ajit Agarwal  wrote:
>
>
>
> On 16/03/23 4:00 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal  
> > wrote:
> >>
> >>
> >> Hello Richard:
> >>
> >> On 16/03/23 3:22 pm, Richard Biener wrote:
> >>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  
> >>> wrote:
> 
> 
> 
>  On 16/03/23 1:44 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  
> > wrote:
> >>
> >> Hello Richard:
> >>
> >> On 16/03/23 1:10 pm, Richard Biener wrote:
> >>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
> >>>  wrote:
> 
>  Hello All:
> 
> 
>  This patch eliminates unnecessary zero extension instruction from 
>  power generated assembly.
>  Bootstrapped and regtested on powerpc64-linux-gnu.
> >>>
> >>> What makes this so special that we cannot deal with it from generic 
> >>> code?
> >>> In particular we do have the REE pass, why is target specific
> >>> knowledge neccessary
> >>> to eliminate the extension?
> >>>
> >>
> >> For returning bool values and comparision with integers generates the 
> >> following by all the rtl passes.
> >>
> >> set compare (subreg)
> >> set if_then_else
> >> Convert SImode -> QImode
> >> set zero_extend to SImode from QImode
> >> set return value 0 in one path of cfg.
> >> set return value 1 in other path of cfg.
> >>
> >> This pass replaces the above zero extension and conversion from QImode 
> >> to DImode with copy operation to keep QImode in 64 bit registers in 
> >> powerpc target.
> >
> > Sorry, I can't parse that - as there's no testcase with the patch I
> > cannot even try to see what the actual RTL
> > looks like (without the pass).
> >
> 
>  Here is the PR with bugzilla.
>  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
> 
>  I can add the attached testcase with this PR in the patch.
> >>>
> >>> I don't see any zero-extends there.
> >>>
> >>
> >> Here is the testcase.
> >>
> >>
> >> bool (int a, int b)
> >> {
> >>   if (a > 2)
> >>   return false;
> >>if (b < 10)
> >>return true;
> >>  return false;
> >> }
> >>
> >> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
> >>
> >> Here is the rtl after cse.
> >> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
> >> (insn 15 12 16 3 (set (reg:CC 123)
> >> (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
> >> (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
> >>  (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
> >> (nil)))
> >> (insn 16 15 17 3 (set (reg:SI 124)
> >> (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
> >>  (nil))
> >> (insn 17 16 18 3 (set (reg:SI 122)
> >> (if_then_else:SI (gt (reg:CC 123)
> >> (const_int 0 [0]))
> >> (const_int 0 [0])
> >> (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
> >>  (expr_list:REG_DEAD (reg:SI 124)
> >> (expr_list:REG_DEAD (reg:CC 123)
> >> (nil
> >> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
> >> (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
> >>  (expr_list:REG_DEAD (reg:SI 122)
> >> (nil)))
> >>   ; pc falls through to BB 5
> >> (code_label 32 18 31 4 3 (nil) [1 uses])
> >> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> >> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
> >> (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
> >>  (nil))
> >> (code_label 19 5 20 5 2 (nil) [0 uses])
> >> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
> >> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
> >> (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 
> >> {zero_extendqidi2}
> >>  (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
> >> (nil)))
> >> (insn 22 21 26 5 (set (reg:DI 118 [  ])
> >> (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
> >>  (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
> >> (nil)))
> >> (insn 26 22 27 5 (set (reg/i:DI 3 3)
> >> (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
> >>  (expr_list:REG_DEAD (reg:DI 118 [  ])
> >> (nil)))
> >> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
> >>  (nil))
> >
> > But after combine there's just
> >
> > (note 6 0 38 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
> > (insn 38 6 2 2 (set (reg:DI 126)
> > (reg:DI 3 3 [ a ])) "t.c":3:1 634 {*movdi_internal64}
> >  (expr_list:REG_DEAD (reg:DI 3 3 [ a ])
> > (nil)))
> > (note 2 38 39 2 NOTE_INSN_DELETED)
> > (insn 39 2 3 2 (set (reg:DI 127)
> > (reg:DI 4 4 [ b ])) "t.c":3:1 634 {*movdi_internal64}
> >  (expr_list:REG_DEAD (reg:DI 4 4 [ b ])
> > (nil)))
> > (insn 3 39 4 2 (set (reg/v:DI 119 [ b ])
> > (reg:DI 127)) "t.c":3:1 634 

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches



On 16/03/23 4:00 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal  wrote:
>>
>>
>> Hello Richard:
>>
>> On 16/03/23 3:22 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  wrote:



 On 16/03/23 1:44 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  
> wrote:
>>
>> Hello Richard:
>>
>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>  wrote:

 Hello All:


 This patch eliminates unnecessary zero extension instruction from 
 power generated assembly.
 Bootstrapped and regtested on powerpc64-linux-gnu.
>>>
>>> What makes this so special that we cannot deal with it from generic 
>>> code?
>>> In particular we do have the REE pass, why is target specific
>>> knowledge neccessary
>>> to eliminate the extension?
>>>
>>
>> For returning bool values and comparision with integers generates the 
>> following by all the rtl passes.
>>
>> set compare (subreg)
>> set if_then_else
>> Convert SImode -> QImode
>> set zero_extend to SImode from QImode
>> set return value 0 in one path of cfg.
>> set return value 1 in other path of cfg.
>>
>> This pass replaces the above zero extension and conversion from QImode 
>> to DImode with copy operation to keep QImode in 64 bit registers in 
>> powerpc target.
>
> Sorry, I can't parse that - as there's no testcase with the patch I
> cannot even try to see what the actual RTL
> looks like (without the pass).
>

 Here is the PR with bugzilla.
 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784

 I can add the attached testcase with this PR in the patch.
>>>
>>> I don't see any zero-extends there.
>>>
>>
>> Here is the testcase.
>>
>>
>> bool (int a, int b)
>> {
>>   if (a > 2)
>>   return false;
>>if (b < 10)
>>return true;
>>  return false;
>> }
>>
>> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
>>
>> Here is the rtl after cse.
>> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>> (insn 15 12 16 3 (set (reg:CC 123)
>> (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>> (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>>  (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>> (nil)))
>> (insn 16 15 17 3 (set (reg:SI 124)
>> (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>>  (nil))
>> (insn 17 16 18 3 (set (reg:SI 122)
>> (if_then_else:SI (gt (reg:CC 123)
>> (const_int 0 [0]))
>> (const_int 0 [0])
>> (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>>  (expr_list:REG_DEAD (reg:SI 124)
>> (expr_list:REG_DEAD (reg:CC 123)
>> (nil
>> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>> (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>>  (expr_list:REG_DEAD (reg:SI 122)
>> (nil)))
>>   ; pc falls through to BB 5
>> (code_label 32 18 31 4 3 (nil) [1 uses])
>> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>> (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>>  (nil))
>> (code_label 19 5 20 5 2 (nil) [0 uses])
>> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
>> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
>> (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 
>> {zero_extendqidi2}
>>  (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
>> (nil)))
>> (insn 22 21 26 5 (set (reg:DI 118 [  ])
>> (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>>  (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
>> (nil)))
>> (insn 26 22 27 5 (set (reg/i:DI 3 3)
>> (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>>  (expr_list:REG_DEAD (reg:DI 118 [  ])
>> (nil)))
>> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
>>  (nil))
> 
> But after combine there's just
> 
> (note 6 0 38 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
> (insn 38 6 2 2 (set (reg:DI 126)
> (reg:DI 3 3 [ a ])) "t.c":3:1 634 {*movdi_internal64}
>  (expr_list:REG_DEAD (reg:DI 3 3 [ a ])
> (nil)))
> (note 2 38 39 2 NOTE_INSN_DELETED)
> (insn 39 2 3 2 (set (reg:DI 127)
> (reg:DI 4 4 [ b ])) "t.c":3:1 634 {*movdi_internal64}
>  (expr_list:REG_DEAD (reg:DI 4 4 [ b ])
> (nil)))
> (insn 3 39 4 2 (set (reg/v:DI 119 [ b ])
> (reg:DI 127)) "t.c":3:1 634 {*movdi_internal64}
>  (expr_list:REG_DEAD (reg:DI 127)
> (nil)))
> (note 4 3 10 2 NOTE_INSN_FUNCTION_BEG)
> (insn 10 4 11 2 (set (reg:CC 120)
> (compare:CC (subreg/s/u:SI (reg:DI 126) 0)
> (const_int 2 [0x2]))) "t.c":4:6 755 {*cmpsi_signed}
>  (expr_list:REG_DEAD (reg:DI 126)
> (nil)))
> 

Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, Mar 16, 2023 at 10:04 AM HAO CHEN GUI  wrote:
>
> Hi Richard,
>
> 在 2023/3/16 15:57, Richard Biener 写道:
> > So this is one way around the lack of CSE/PRE of constant operands.  I'd
> > argue that a better spot for this _might_ be LRA (split the constant out if
> > there's a free register available), postreload-[g]cse (CSE the constants) 
> > and
> > then maybe cprop_hardreg to combine back single-use constants?
> >
> > I'm not sure if careful constraints massaging like adding magic letters to
> > alternatives with constants to pessimize them for LRA, making them
> > more expensive than spilling the constant to a register but avoid
> > secondary reloads with spilling a register to the stack to make room
> > for the constant, is possible - but in theory a special constraint modifier
> > for this purpose could be invented.
>
> Thanks so much for your advice.
>
> cse/gcse doesn't take cost of constant set (the def insn of the constant) into
> consideration. So it won't replace the register with a constant as it costs 1
> insn with the register and costs 2 insn with the constant.

I think it does (and should) cost the constant set (IIRC we had some
improvements
there, or at least proposed, during this stage1).  But sure - this is why your
"trick" works.

> Finally, the single-
> use constants can't be back to 2 insn.

And that's because of the issue you point out above?

> Not sure if I understand it correctly.
> Looking forward to your advice.

My main point is that CSEing constants has impacts on register pressure
and thus should probably be done after or within register allocation.  RTL
expansion itself is probably a bad time to pro-actively split out constants
even more if, as you say, nothing puts them back.

Richard.

> Thanks
> Gui Haochen


Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal  wrote:
>
>
> Hello Richard:
>
> On 16/03/23 3:22 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  wrote:
> >>
> >>
> >>
> >> On 16/03/23 1:44 pm, Richard Biener wrote:
> >>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  
> >>> wrote:
> 
>  Hello Richard:
> 
>  On 16/03/23 1:10 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
> >  wrote:
> >>
> >> Hello All:
> >>
> >>
> >> This patch eliminates unnecessary zero extension instruction from 
> >> power generated assembly.
> >> Bootstrapped and regtested on powerpc64-linux-gnu.
> >
> > What makes this so special that we cannot deal with it from generic 
> > code?
> > In particular we do have the REE pass, why is target specific
> > knowledge neccessary
> > to eliminate the extension?
> >
> 
>  For returning bool values and comparision with integers generates the 
>  following by all the rtl passes.
> 
>  set compare (subreg)
>  set if_then_else
>  Convert SImode -> QImode
>  set zero_extend to SImode from QImode
>  set return value 0 in one path of cfg.
>  set return value 1 in other path of cfg.
> 
>  This pass replaces the above zero extension and conversion from QImode 
>  to DImode with copy operation to keep QImode in 64 bit registers in 
>  powerpc target.
> >>>
> >>> Sorry, I can't parse that - as there's no testcase with the patch I
> >>> cannot even try to see what the actual RTL
> >>> looks like (without the pass).
> >>>
> >>
> >> Here is the PR with bugzilla.
> >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
> >>
> >> I can add the attached testcase with this PR in the patch.
> >
> > I don't see any zero-extends there.
> >
>
> Here is the testcase.
>
>
> bool (int a, int b)
> {
>   if (a > 2)
>   return false;
>if (b < 10)
>return true;
>  return false;
> }
>
> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
>
> Here is the rtl after cse.
> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
> (insn 15 12 16 3 (set (reg:CC 123)
> (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
> (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>  (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
> (nil)))
> (insn 16 15 17 3 (set (reg:SI 124)
> (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>  (nil))
> (insn 17 16 18 3 (set (reg:SI 122)
> (if_then_else:SI (gt (reg:CC 123)
> (const_int 0 [0]))
> (const_int 0 [0])
> (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>  (expr_list:REG_DEAD (reg:SI 124)
> (expr_list:REG_DEAD (reg:CC 123)
> (nil
> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
> (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>  (expr_list:REG_DEAD (reg:SI 122)
> (nil)))
>   ; pc falls through to BB 5
> (code_label 32 18 31 4 3 (nil) [1 uses])
> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
> (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>  (nil))
> (code_label 19 5 20 5 2 (nil) [0 uses])
> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
> (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 
> {zero_extendqidi2}
>  (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
> (nil)))
> (insn 22 21 26 5 (set (reg:DI 118 [  ])
> (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>  (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
> (nil)))
> (insn 26 22 27 5 (set (reg/i:DI 3 3)
> (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>  (expr_list:REG_DEAD (reg:DI 118 [  ])
> (nil)))
> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
>  (nil))

But after combine there's just

(note 6 0 38 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(insn 38 6 2 2 (set (reg:DI 126)
(reg:DI 3 3 [ a ])) "t.c":3:1 634 {*movdi_internal64}
 (expr_list:REG_DEAD (reg:DI 3 3 [ a ])
(nil)))
(note 2 38 39 2 NOTE_INSN_DELETED)
(insn 39 2 3 2 (set (reg:DI 127)
(reg:DI 4 4 [ b ])) "t.c":3:1 634 {*movdi_internal64}
 (expr_list:REG_DEAD (reg:DI 4 4 [ b ])
(nil)))
(insn 3 39 4 2 (set (reg/v:DI 119 [ b ])
(reg:DI 127)) "t.c":3:1 634 {*movdi_internal64}
 (expr_list:REG_DEAD (reg:DI 127)
(nil)))
(note 4 3 10 2 NOTE_INSN_FUNCTION_BEG)
(insn 10 4 11 2 (set (reg:CC 120)
(compare:CC (subreg/s/u:SI (reg:DI 126) 0)
(const_int 2 [0x2]))) "t.c":4:6 755 {*cmpsi_signed}
 (expr_list:REG_DEAD (reg:DI 126)
(nil)))
(jump_insn 11 10 12 2 (set (pc)
(if_then_else (gt (reg:CC 120)
(const_int 0 [0]))
(label_ref:DI 32)
(pc))) "t.c":4:6 

Re: [PATCH RFC] c++: co_await and move-only type [PR105406]

2023-03-16 Thread Iain Sandoe
Hi Jason

> On 16 Mar 2023, at 03:01, Jason Merrill  wrote:
> 
> Tested x86_64-pc-linux-gnu.  As with the array issue, I know you have WIP to
> deal with larger issues, but this seems like a reasonable local fix.  Does it
> make sense to you?

Yes, thanks for fixing it,

I believe it will still be needed when pending WIP in this area is considered, 
since
that only reduces the number of temporaries we might materialize.

Iain

> 
> -- 8< --
> 
> Here we were building a temporary MoveOnlyAwaitable to hold the result of
> evaluating 'o', but since 'o' is an lvalue we should build a reference
> instead.
> 
>   PR c++/105406
> 
> gcc/cp/ChangeLog:
> 
>   * coroutines.cc (build_co_await): Handle lvalue 'o'.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/coroutines/co-await-moveonly1.C: New test.
> ---
> gcc/cp/coroutines.cc  |  9 ++-
> .../g++.dg/coroutines/co-await-moveonly1.C| 63 +++
> 2 files changed, 71 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/g++.dg/coroutines/co-await-moveonly1.C
> 
> diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
> index 7f8cbc3d95e..a2189e43db8 100644
> --- a/gcc/cp/coroutines.cc
> +++ b/gcc/cp/coroutines.cc
> @@ -1024,9 +1024,13 @@ build_co_await (location_t loc, tree a, 
> suspend_point_kind suspend_kind)
> }
>   else
> {
> -  e_proxy = get_awaitable_var (suspend_kind, o_type);
> +  tree p_type = o_type;
> +  if (glvalue_p (o))
> + p_type = cp_build_reference_type (p_type, !lvalue_p (o));
> +  e_proxy = get_awaitable_var (suspend_kind, p_type);
>   o = cp_build_modify_expr (loc, e_proxy, INIT_EXPR, o,
>   tf_warning_or_error);
> +  e_proxy = convert_from_reference (e_proxy);
> }
> 
>   /* I suppose we could check that this is contextually convertible to bool.  
> */
> @@ -1120,6 +1124,9 @@ build_co_await (location_t loc, tree a, 
> suspend_point_kind suspend_kind)
> }
>   TREE_VEC_ELT (awaiter_calls, 2) = awrs_call; /* await_resume().  */
> 
> +  if (REFERENCE_REF_P (e_proxy))
> +e_proxy = TREE_OPERAND (e_proxy, 0);
> +
>   tree await_expr = build5_loc (loc, CO_AWAIT_EXPR,
>   TREE_TYPE (TREE_TYPE (awrs_func)),
>   a, e_proxy, o, awaiter_calls,
> diff --git a/gcc/testsuite/g++.dg/coroutines/co-await-moveonly1.C 
> b/gcc/testsuite/g++.dg/coroutines/co-await-moveonly1.C
> new file mode 100644
> index 000..e2831c104bf
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/coroutines/co-await-moveonly1.C
> @@ -0,0 +1,63 @@
> +// PR c++/105406
> +// { dg-do compile { target c++20 } }
> +
> +#include 
> +#include 
> +
> +// A move-only awaitable
> +class MoveOnlyAwaitable {
> +public:
> +MoveOnlyAwaitable() = default;
> +MoveOnlyAwaitable(MoveOnlyAwaitable &&) = default;
> +MoveOnlyAwaitable =(MoveOnlyAwaitable &&) = default;
> +
> +MoveOnlyAwaitable(const MoveOnlyAwaitable &) = delete;
> +MoveOnlyAwaitable =(const MoveOnlyAwaitable &) = delete;
> +
> +bool await_ready() const noexcept { return false; }
> +void await_suspend(std::coroutine_handle<>) noexcept {}
> +void await_resume() {}
> +};
> +
> +struct task {
> +struct promise_type {
> +auto initial_suspend() const { return std::suspend_never{}; }
> +auto final_suspend() const noexcept { return std::suspend_never(); }
> +auto get_return_object() { return task{}; }
> +void return_void() {}
> +void unhandled_exception() {}
> +
> +template
> +T &_transform(T &) {
> +return static_cast(t);
> +}
> +
> +
> +};
> +
> +bool await_ready() const { return false; }
> +void await_suspend(std::coroutine_handle<> awaiter) {}
> +void await_resume() {}
> +};
> +
> +task myCoroutine() {
> +// GCC: OK
> +// clang: OK
> +{
> +co_await MoveOnlyAwaitable();
> +}
> +// GCC: OK
> +// clang: OK
> +{
> +auto moveonly = MoveOnlyAwaitable();
> +co_await std::move(moveonly);
> +}
> +
> +// GCC <= 11.2: OK
> +// GCC 11.3:ERROR:  error: use of deleted function 
> 'MoveOnlyAwaitable::MoveOnlyAwaitable(const MoveOnlyAwaitable&)
> +// clang: OK
> +{
> +auto moveonly = MoveOnlyAwaitable();
> +co_await moveonly;
> +}
> +}
> 
> base-commit: ea4dd8f512979db247c54d6b41377bb73699bcd7
> -- 
> 2.31.1
> 



Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches


Hello Richard:

On 16/03/23 3:22 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  wrote:
>>
>>
>>
>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  wrote:

 Hello Richard:

 On 16/03/23 1:10 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>  wrote:
>>
>> Hello All:
>>
>>
>> This patch eliminates unnecessary zero extension instruction from power 
>> generated assembly.
>> Bootstrapped and regtested on powerpc64-linux-gnu.
>
> What makes this so special that we cannot deal with it from generic code?
> In particular we do have the REE pass, why is target specific
> knowledge neccessary
> to eliminate the extension?
>

 For returning bool values and comparision with integers generates the 
 following by all the rtl passes.

 set compare (subreg)
 set if_then_else
 Convert SImode -> QImode
 set zero_extend to SImode from QImode
 set return value 0 in one path of cfg.
 set return value 1 in other path of cfg.

 This pass replaces the above zero extension and conversion from QImode to 
 DImode with copy operation to keep QImode in 64 bit registers in powerpc 
 target.
>>>
>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>> cannot even try to see what the actual RTL
>>> looks like (without the pass).
>>>
>>
>> Here is the PR with bugzilla.
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>
>> I can add the attached testcase with this PR in the patch.
> 
> I don't see any zero-extends there.
>

Here is the testcase.


bool (int a, int b)
{ 
  if (a > 2)
  return false;
   if (b < 10)
   return true;
 return false;
}

compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.

Here is the rtl after cse.
(note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
(insn 15 12 16 3 (set (reg:CC 123)
(compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
(const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
 (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
(nil)))
(insn 16 15 17 3 (set (reg:SI 124)
(const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
 (nil))
(insn 17 16 18 3 (set (reg:SI 122)
(if_then_else:SI (gt (reg:CC 123)
(const_int 0 [0]))
(const_int 0 [0])
(reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
 (expr_list:REG_DEAD (reg:SI 124)
(expr_list:REG_DEAD (reg:CC 123)
(nil
(insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
(subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
 (expr_list:REG_DEAD (reg:SI 122)
(nil)))
  ; pc falls through to BB 5
(code_label 32 18 31 4 3 (nil) [1 uses])
(note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
(const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
 (nil))
(code_label 19 5 20 5 2 (nil) [0 uses])
(note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
(insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
(zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
 (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
(nil)))
(insn 22 21 26 5 (set (reg:DI 118 [  ])
(reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
 (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
(nil)))
(insn 26 22 27 5 (set (reg/i:DI 3 3)
(reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
 (expr_list:REG_DEAD (reg:DI 118 [  ])
(nil)))
(insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
 (nil))


Thanks & Regards
Ajit
 
>> Thanks & Regards
>> Ajit
>>> Richard.
>>>
 Thanks & Regards
 Ajit
>> +  In cfgexpand pass QImode is generated with
>> +  bool register value and this pass uses QI
>> +  as 64 bit registers.
>> +

>> rs6000: suboptimal code for returning bool value on target ppc.
>>
>> New pass to eliminate unnecessary zero extension. This pass
>> is registered after cse rtl pass.
>>
>> 2023-03-16  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>> * config/rs6000/rs6000-passes.def: Registered zero elimination
>> pass.
>> * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>> * config.gcc: Add new executable.
>> * config/rs6000/rs6000-protos.h: Add new prototype for zero
>> elimination pass.
>> * config/rs6000/rs6000.cc: Add new prototype for zero
>> elimination pass.
>> * config/rs6000/t-rs6000: Add new rule.
>> * expr.cc: Modified gcc assert.
>> * explow.cc: Modified gcc assert.
>> * optabs.cc: Modified gcc assert.
>> ---
>>  gcc/config.gcc|   4 +-
>>  

Re: [PATCH] diagnostics: fix crash with -fdiagnostics-format=json-file

2023-03-16 Thread Martin Liška
PING^5

On 2/27/23 10:49, Martin Liška wrote:
> PING^4
> 
> On 2/17/23 15:52, Martin Liška wrote:
>> PING^3
>>
>> On 2/1/23 14:13, Martin Liška wrote:
>>> PING^2
>>>
>>> On 1/24/23 14:34, Martin Liška wrote:
 PING^1

 On 1/10/23 16:10, Martin Liška wrote:
> On 1/6/23 14:21, David Malcolm wrote:
>> On Fri, 2023-01-06 at 12:33 +0100, Martin Liška wrote:
>>> Patch can bootstrap on x86_64-linux-gnu and survives regression
>>> tests.
>>
>> Thanks for the patch.
>>
>> I noticed that you marked PR 108307 as a dup of this, which covers
>> -fdiagnostics-format=sarif-file (and a .S file as input).
>>
>> The patch doesn't add any test coverage (for either of the diagnostic
>> formats).
>>
>> If we try to emit a diagnostic and base_file_name is NULL, and the user
>> requested one of -fdiagnostics-format={json,sarif}-file, where do the
>> diagnostics go?  Where should they go?
>
> Hey.
>
> I've done a new version of the patch where I utilize 
> x_main_input_basename.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?
> Thanks,
> Martin

>>>
>>
> 



Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  wrote:
>
>
>
> On 16/03/23 1:44 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  wrote:
> >>
> >> Hello Richard:
> >>
> >> On 16/03/23 1:10 pm, Richard Biener wrote:
> >>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
> >>>  wrote:
> 
>  Hello All:
> 
> 
>  This patch eliminates unnecessary zero extension instruction from power 
>  generated assembly.
>  Bootstrapped and regtested on powerpc64-linux-gnu.
> >>>
> >>> What makes this so special that we cannot deal with it from generic code?
> >>> In particular we do have the REE pass, why is target specific
> >>> knowledge neccessary
> >>> to eliminate the extension?
> >>>
> >>
> >> For returning bool values and comparision with integers generates the 
> >> following by all the rtl passes.
> >>
> >> set compare (subreg)
> >> set if_then_else
> >> Convert SImode -> QImode
> >> set zero_extend to SImode from QImode
> >> set return value 0 in one path of cfg.
> >> set return value 1 in other path of cfg.
> >>
> >> This pass replaces the above zero extension and conversion from QImode to 
> >> DImode with copy operation to keep QImode in 64 bit registers in powerpc 
> >> target.
> >
> > Sorry, I can't parse that - as there's no testcase with the patch I
> > cannot even try to see what the actual RTL
> > looks like (without the pass).
> >
>
> Here is the PR with bugzilla.
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>
> I can add the attached testcase with this PR in the patch.

I don't see any zero-extends there.

> Thanks & Regards
> Ajit
> > Richard.
> >
> >> Thanks & Regards
> >> Ajit
>  +  In cfgexpand pass QImode is generated with
>  +  bool register value and this pass uses QI
>  +  as 64 bit registers.
>  +
> >>
>  rs6000: suboptimal code for returning bool value on target ppc.
> 
>  New pass to eliminate unnecessary zero extension. This pass
>  is registered after cse rtl pass.
> 
>  2023-03-16  Ajit Kumar Agarwal  
> 
>  gcc/ChangeLog:
> 
>  * config/rs6000/rs6000-passes.def: Registered zero elimination
>  pass.
>  * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>  * config.gcc: Add new executable.
>  * config/rs6000/rs6000-protos.h: Add new prototype for zero
>  elimination pass.
>  * config/rs6000/rs6000.cc: Add new prototype for zero
>  elimination pass.
>  * config/rs6000/t-rs6000: Add new rule.
>  * expr.cc: Modified gcc assert.
>  * explow.cc: Modified gcc assert.
>  * optabs.cc: Modified gcc assert.
>  ---
>   gcc/config.gcc|   4 +-
>   gcc/config/rs6000/rs6000-passes.def   |   2 +
>   gcc/config/rs6000/rs6000-protos.h |   1 +
>   gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++
>   gcc/config/rs6000/rs6000.cc   |   2 +
>   gcc/config/rs6000/t-rs6000|   5 +
>   gcc/explow.cc |   3 +-
>   gcc/expr.cc   |   4 +-
>   gcc/optabs.cc |   3 +-
>   9 files changed, 379 insertions(+), 6 deletions(-)
>   create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
> 
>  diff --git a/gcc/config.gcc b/gcc/config.gcc
>  index da3a6d3ba1f..e8ac9d882f0 100644
>  --- a/gcc/config.gcc
>  +++ b/gcc/config.gcc
>  @@ -503,7 +503,7 @@ or1k*-*-*)
>  ;;
>   powerpc*-*-*)
>  cpu_type=rs6000
>  -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>  +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
>  rs6000-logue.o"
>  extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>  extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>  extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>  @@ -538,7 +538,7 @@ riscv*)
>  ;;
>   rs6000*-*-*)
>  extra_options="${extra_options} g.opt fused-madd.opt 
>  rs6000/rs6000-tables.opt"
>  -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>  +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
>  rs6000-logue.o"
>  extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>  target_gtfiles="$target_gtfiles 
>  \$(srcdir)/config/rs6000/rs6000-logue.cc 
>  \$(srcdir)/config/rs6000/rs6000-call.cc"
>  target_gtfiles="$target_gtfiles 
>  \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
>  diff --git a/gcc/config/rs6000/rs6000-passes.def 
>  b/gcc/config/rs6000/rs6000-passes.def
>  index ca899d5f7af..d7500feddf1 100644
>  --- a/gcc/config/rs6000/rs6000-passes.def
>  +++ 

[PATCH] RISC-V: Fine tune vmadc/vmsbc RA constraint

2023-03-16 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/ChangeLog:

* config/riscv/vector.md: Fix bug of vmsbc

---
 gcc/config/riscv/vector.md | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index a76e8286fe5..c100407d9fa 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2600,14 +2600,14 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[4])"))])
 
 (define_insn "@pred_msbc_overflow"
-  [(set (match_operand: 0 "register_operand" "=vr, , ")
+  [(set (match_operand: 0 "register_operand" "=vr, vr, , ")
(unspec:
   [(minus:VI
-(match_operand:VI 1 "register_operand" "  %0,  vr,  vr")
-(match_operand:VI 2 "register_operand" "vrvi,  vr,  vi"))
+(match_operand:VI 1 "register_operand" "   0,  vr,  vr,  vr")
+(match_operand:VI 2 "register_operand" "  vr,   0,  vr,  vi"))
(unspec:
- [(match_operand 3 "vector_length_operand" "  rK,  rK,  rK")
-  (match_operand 4 "const_int_operand" "   i,   i,   i")
+ [(match_operand 3 "vector_length_operand" "  rK,  rK,  rK,  rK")
+  (match_operand 4 "const_int_operand" "   i,   i,   i,   i")
   (reg:SI VL_REGNUM)
   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
   "TARGET_VECTOR"
-- 
2.36.3



[PATCH] ISC-V: Fine tune vmadc/vmsbc RA constraint

2023-03-16 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/ChangeLog:

* config/riscv/vector.md: Fix bug of vmsbc

---
 gcc/config/riscv/vector.md | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index a76e8286fe5..c100407d9fa 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2600,14 +2600,14 @@
(set (attr "avl_type") (symbol_ref "INTVAL (operands[4])"))])
 
 (define_insn "@pred_msbc_overflow"
-  [(set (match_operand: 0 "register_operand" "=vr, , ")
+  [(set (match_operand: 0 "register_operand" "=vr, vr, , ")
(unspec:
   [(minus:VI
-(match_operand:VI 1 "register_operand" "  %0,  vr,  vr")
-(match_operand:VI 2 "register_operand" "vrvi,  vr,  vi"))
+(match_operand:VI 1 "register_operand" "   0,  vr,  vr,  vr")
+(match_operand:VI 2 "register_operand" "  vr,   0,  vr,  vi"))
(unspec:
- [(match_operand 3 "vector_length_operand" "  rK,  rK,  rK")
-  (match_operand 4 "const_int_operand" "   i,   i,   i")
+ [(match_operand 3 "vector_length_operand" "  rK,  rK,  rK,  rK")
+  (match_operand 4 "const_int_operand" "   i,   i,   i,   i")
   (reg:SI VL_REGNUM)
   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
   "TARGET_VECTOR"
-- 
2.36.3



Re: [PATCH] contrib: Update instructions regarding Unicode updates

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, 16 Mar 2023, Jakub Jelinek wrote:

> Hi!
> 
> I've noticed we have instructions on how to update from newer Unicode
> standard, but it didn't mention uname2c.h regeneration.
> 
> The following patch mentions that, also mentions that the Copyright years
> of Unicode should be updated and adds a copy of NameAliases.txt which
> is used for uname2c.h generation.
> 
> Ok for trunk?

OK.

> 2023-03-16  Jakub Jelinek  
> 
>   * unicode/README: Update to mention also makeuname2c.
>   * unicode/NameAliases.txt: New file.
> 
> --- contrib/unicode/README.jj 2023-03-14 12:24:55.498729826 +0100
> +++ contrib/unicode/README2023-03-15 16:50:01.078677092 +0100
> @@ -1,17 +1,20 @@
>  This directory contains a mechanism for GCC to have its own internal
>  implementation of wcwidth functionality (cpp_wcwidth () in libcpp/charset.c),
>  as well as a mechanism to update the information about codepoints permitted 
> in
> -identifiers, which is encoded in libcpp/ucnid.h.
> +identifiers, which is encoded in libcpp/ucnid.h, and mapping between Unicode
> +names and codepoints, which is encoded in libcpp/uname2c.h.
>  
>  The idea is to produce the necessary lookup tables
> -(../../libcpp/{ucnid.h,generated_cpp_wcwidth.h}) in a reproducible way, 
> starting
> -from the following files that are distributed by the Unicode Consortium:
> +(../../libcpp/{ucnid.h,uname2c.h,generated_cpp_wcwidth.h}) in a reproducible
> +way, starting from the following files that are distributed by the Unicode
> +Consortium:
>  
>  ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
>  ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt
>  ftp://ftp.unicode.org/Public/UNIDATA/PropList.txt
>  ftp://ftp.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt
>  ftp://ftp.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
> +ftp://ftp.unicode.org/Public/UNIDATA/NameAliases.txt
>  
>  These files have been added to source control in this directory;
>  please see unicode-license.txt for the relevant copyright information.
> @@ -44,12 +47,27 @@ The procedure to update GCC's Unicode su
>  
>  3.  Run ./gen_wcwidth.py X.Y > ../../libcpp/generated_cpp_wcwidth.h
>  (where X.Y is the version of the Unicode standard corresponding to the
> -Unicode data files being used, most recently, 14.0.0).
> +Unicode data files being used, most recently, 15.0.0).
>  
> -4.  Compile makeucnid, e.g. with:
> -  gcc -O2 ../../libcpp/makeucnid.cc -o ../../libcpp/makeucnid
> +4.  Update Unicode Copyright years in libcpp/makeucnid.cc and in
> +libcpp/makeuname2c.cc up to the year in which the Unicode
> +standard has been released.
>  
> -5.  Generate ucnid.h as follows:
> +5.  Compile makeucnid, e.g. with:
> +  g++ -O2 ../../libcpp/makeucnid.cc -o ../../libcpp/makeucnid
> +
> +6.  Generate ucnid.h as follows:
>../../libcpp/makeucnid ../../libcpp/ucnid.tab UnicodeData.txt \
>   DerivedNormalizationProps.txt DerivedCoreProperties.txt \
>   > ../../libcpp/ucnid.h
> +
> +7.  Read the corresponding Unicode's standard and update correspondingly
> +generated_ranges table in libcpp/makeuname2c.cc (in Unicode 15 all
> +the needed information was in Table 4-8).
> +
> +8.  Compile makeuname2c, e.g. with:
> +  g++ -O2 ../../libcpp/makeuname2c.cc -o ../../libcpp/makeuname2c
> +
> +9:  Generate uname2c.h as follows:
> +  ../../libcpp/makeuname2c UnicodeData.txt NameAliases.txt \
> + > ../../libcpp/uname2c.h
> --- contrib/unicode/NameAliases.txt.jj2023-03-15 16:48:53.457663393 
> +0100
> +++ contrib/unicode/NameAliases.txt   2022-08-04 00:00:41.0 +0200
> @@ -0,0 +1,570 @@
> +# NameAliases-15.0.0.txt
> +# Date: 2022-07-26, 20:13:00 GMT [KW]
> +# © 2022 Unicode®, Inc.
> +# For terms of use, see https://www.unicode.org/terms_of_use.html
> +#
> +# Unicode Character Database
> +# For documentation, see https://www.unicode.org/reports/tr44/
> +#
> +# This file is a normative contributory data file in the
> +# Unicode Character Database.
> +#
> +# This file defines the formal name aliases for Unicode characters.
> +#
> +# For informative aliases, see NamesList.txt
> +#
> +# The formal name aliases are divided into five types, each with a distinct 
> label.
> +#
> +# Type Labels:
> +#
> +# 1. correction
> +#  Corrections for serious problems in the character names
> +# 2. control
> +#  ISO 6429 names for C0 and C1 control functions, and other
> +#  commonly occurring names for control codes
> +# 3. alternate
> +#  A few widely used alternate names for format characters
> +# 4. figment
> +#  Several documented labels for C1 control code points which
> +#  were never actually approved in any standard
> +# 5. abbreviation
> +#  Commonly occurring abbreviations (or acronyms) for control codes,
> +#  format characters, spaces, and variation selectors
> +#
> +# The formal name aliases are part of the Unicode character namespace, which
> +# includes the character names and the 

[PATCH] contrib: Update instructions regarding Unicode updates

2023-03-16 Thread Jakub Jelinek via Gcc-patches
Hi!

I've noticed we have instructions on how to update from newer Unicode
standard, but it didn't mention uname2c.h regeneration.

The following patch mentions that, also mentions that the Copyright years
of Unicode should be updated and adds a copy of NameAliases.txt which
is used for uname2c.h generation.

Ok for trunk?

2023-03-16  Jakub Jelinek  

* unicode/README: Update to mention also makeuname2c.
* unicode/NameAliases.txt: New file.

--- contrib/unicode/README.jj   2023-03-14 12:24:55.498729826 +0100
+++ contrib/unicode/README  2023-03-15 16:50:01.078677092 +0100
@@ -1,17 +1,20 @@
 This directory contains a mechanism for GCC to have its own internal
 implementation of wcwidth functionality (cpp_wcwidth () in libcpp/charset.c),
 as well as a mechanism to update the information about codepoints permitted in
-identifiers, which is encoded in libcpp/ucnid.h.
+identifiers, which is encoded in libcpp/ucnid.h, and mapping between Unicode
+names and codepoints, which is encoded in libcpp/uname2c.h.
 
 The idea is to produce the necessary lookup tables
-(../../libcpp/{ucnid.h,generated_cpp_wcwidth.h}) in a reproducible way, 
starting
-from the following files that are distributed by the Unicode Consortium:
+(../../libcpp/{ucnid.h,uname2c.h,generated_cpp_wcwidth.h}) in a reproducible
+way, starting from the following files that are distributed by the Unicode
+Consortium:
 
 ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
 ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt
 ftp://ftp.unicode.org/Public/UNIDATA/PropList.txt
 ftp://ftp.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt
 ftp://ftp.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
+ftp://ftp.unicode.org/Public/UNIDATA/NameAliases.txt
 
 These files have been added to source control in this directory;
 please see unicode-license.txt for the relevant copyright information.
@@ -44,12 +47,27 @@ The procedure to update GCC's Unicode su
 
 3.  Run ./gen_wcwidth.py X.Y > ../../libcpp/generated_cpp_wcwidth.h
 (where X.Y is the version of the Unicode standard corresponding to the
-Unicode data files being used, most recently, 14.0.0).
+Unicode data files being used, most recently, 15.0.0).
 
-4.  Compile makeucnid, e.g. with:
-  gcc -O2 ../../libcpp/makeucnid.cc -o ../../libcpp/makeucnid
+4.  Update Unicode Copyright years in libcpp/makeucnid.cc and in
+libcpp/makeuname2c.cc up to the year in which the Unicode
+standard has been released.
 
-5.  Generate ucnid.h as follows:
+5.  Compile makeucnid, e.g. with:
+  g++ -O2 ../../libcpp/makeucnid.cc -o ../../libcpp/makeucnid
+
+6.  Generate ucnid.h as follows:
   ../../libcpp/makeucnid ../../libcpp/ucnid.tab UnicodeData.txt \
DerivedNormalizationProps.txt DerivedCoreProperties.txt \
> ../../libcpp/ucnid.h
+
+7.  Read the corresponding Unicode's standard and update correspondingly
+generated_ranges table in libcpp/makeuname2c.cc (in Unicode 15 all
+the needed information was in Table 4-8).
+
+8.  Compile makeuname2c, e.g. with:
+  g++ -O2 ../../libcpp/makeuname2c.cc -o ../../libcpp/makeuname2c
+
+9:  Generate uname2c.h as follows:
+  ../../libcpp/makeuname2c UnicodeData.txt NameAliases.txt \
+   > ../../libcpp/uname2c.h
--- contrib/unicode/NameAliases.txt.jj  2023-03-15 16:48:53.457663393 +0100
+++ contrib/unicode/NameAliases.txt 2022-08-04 00:00:41.0 +0200
@@ -0,0 +1,570 @@
+# NameAliases-15.0.0.txt
+# Date: 2022-07-26, 20:13:00 GMT [KW]
+# © 2022 Unicode®, Inc.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see https://www.unicode.org/reports/tr44/
+#
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# This file defines the formal name aliases for Unicode characters.
+#
+# For informative aliases, see NamesList.txt
+#
+# The formal name aliases are divided into five types, each with a distinct 
label.
+#
+# Type Labels:
+#
+# 1. correction
+#  Corrections for serious problems in the character names
+# 2. control
+#  ISO 6429 names for C0 and C1 control functions, and other
+#  commonly occurring names for control codes
+# 3. alternate
+#  A few widely used alternate names for format characters
+# 4. figment
+#  Several documented labels for C1 control code points which
+#  were never actually approved in any standard
+# 5. abbreviation
+#  Commonly occurring abbreviations (or acronyms) for control codes,
+#  format characters, spaces, and variation selectors
+#
+# The formal name aliases are part of the Unicode character namespace, which
+# includes the character names and the names of named character sequences.
+# The inclusion of ISO 6429 names and other commonly occurring names and
+# abbreviations for control codes and format characters as formal name aliases
+# is to help avoid name collisions between Unicode character names and the
+# 

[committed] libcpp: Update Unicode copyright years

2023-03-16 Thread Jakub Jelinek via Gcc-patches
Hi!

I've noticed I forgot to update copyright years when updating from
Unicode 15.0.0 (and makeucnid.cc had it hopelessly obsolete).

Committed as obvious to trunk.

2023-03-16  Jakub Jelinek  

* makeucnid.cc (write_copyright): Update Unicode copyright years
up to 2022.
* makeuname2c.cc (write_copyright): Likewise.
* ucnid.h: Regenerated.
* uname2c.h: Regenerated.

--- libcpp/makeucnid.cc.jj  2023-01-16 11:52:16.329730439 +0100
+++ libcpp/makeucnid.cc 2023-03-15 16:27:32.068353772 +0100
@@ -467,7 +467,7 @@ write_copyright (void)
.\n\
 \n\
 \n\
-   Copyright (C) 1991-2005 Unicode, Inc.  All rights reserved.\n\
+   Copyright (C) 1991-2022 Unicode, Inc.  All rights reserved.\n\
Distributed under the Terms of Use in\n\
http://www.unicode.org/copyright.html.\n\
 \n\
--- libcpp/makeuname2c.cc.jj2023-01-16 11:52:16.329730439 +0100
+++ libcpp/makeuname2c.cc   2023-03-15 16:28:11.994771274 +0100
@@ -669,7 +669,7 @@ write_copyright (void)
.\n\
 \n\
 \n\
-   Copyright (C) 1991-2021 Unicode, Inc.  All rights reserved.\n\
+   Copyright (C) 1991-2022 Unicode, Inc.  All rights reserved.\n\
Distributed under the Terms of Use in\n\
http://www.unicode.org/copyright.html.\n\
 \n\
--- libcpp/ucnid.h.jj   2023-01-16 11:52:16.330730424 +0100
+++ libcpp/ucnid.h  2023-03-15 16:27:55.362013940 +0100
@@ -16,7 +16,7 @@
.
 
 
-   Copyright (C) 1991-2005 Unicode, Inc.  All rights reserved.
+   Copyright (C) 1991-2022 Unicode, Inc.  All rights reserved.
Distributed under the Terms of Use in
http://www.unicode.org/copyright.html.
 
--- libcpp/uname2c.h.jj 2023-01-16 11:52:16.335730351 +0100
+++ libcpp/uname2c.h2023-03-15 16:28:29.542515265 +0100
@@ -16,7 +16,7 @@
.
 
 
-   Copyright (C) 1991-2021 Unicode, Inc.  All rights reserved.
+   Copyright (C) 1991-2022 Unicode, Inc.  All rights reserved.
Distributed under the Terms of Use in
http://www.unicode.org/copyright.html.
 

Jakub



Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-16 Thread HAO CHEN GUI via Gcc-patches
Hi Richard,

在 2023/3/16 15:57, Richard Biener 写道:
> So this is one way around the lack of CSE/PRE of constant operands.  I'd
> argue that a better spot for this _might_ be LRA (split the constant out if
> there's a free register available), postreload-[g]cse (CSE the constants) and
> then maybe cprop_hardreg to combine back single-use constants?
> 
> I'm not sure if careful constraints massaging like adding magic letters to
> alternatives with constants to pessimize them for LRA, making them
> more expensive than spilling the constant to a register but avoid
> secondary reloads with spilling a register to the stack to make room
> for the constant, is possible - but in theory a special constraint modifier
> for this purpose could be invented.

Thanks so much for your advice.

cse/gcse doesn't take cost of constant set (the def insn of the constant) into
consideration. So it won't replace the register with a constant as it costs 1
insn with the register and costs 2 insn with the constant. Finally, the single-
use constants can't be back to 2 insn. Not sure if I understand it correctly.
Looking forward to your advice.

Thanks
Gui Haochen


Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches



On 16/03/23 1:44 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  wrote:
>>
>> Hello Richard:
>>
>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>  wrote:

 Hello All:


 This patch eliminates unnecessary zero extension instruction from power 
 generated assembly.
 Bootstrapped and regtested on powerpc64-linux-gnu.
>>>
>>> What makes this so special that we cannot deal with it from generic code?
>>> In particular we do have the REE pass, why is target specific
>>> knowledge neccessary
>>> to eliminate the extension?
>>>
>>
>> For returning bool values and comparision with integers generates the 
>> following by all the rtl passes.
>>
>> set compare (subreg)
>> set if_then_else
>> Convert SImode -> QImode
>> set zero_extend to SImode from QImode
>> set return value 0 in one path of cfg.
>> set return value 1 in other path of cfg.
>>
>> This pass replaces the above zero extension and conversion from QImode to 
>> DImode with copy operation to keep QImode in 64 bit registers in powerpc 
>> target.
> 
> Sorry, I can't parse that - as there's no testcase with the patch I
> cannot even try to see what the actual RTL
> looks like (without the pass).
> 

Here is the PR with bugzilla. 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784

I can add the attached testcase with this PR in the patch.

Thanks & Regards
Ajit 
> Richard.
> 
>> Thanks & Regards
>> Ajit
 +  In cfgexpand pass QImode is generated with
 +  bool register value and this pass uses QI
 +  as 64 bit registers.
 +
>>
 rs6000: suboptimal code for returning bool value on target ppc.

 New pass to eliminate unnecessary zero extension. This pass
 is registered after cse rtl pass.

 2023-03-16  Ajit Kumar Agarwal  

 gcc/ChangeLog:

 * config/rs6000/rs6000-passes.def: Registered zero elimination
 pass.
 * config/rs6000/rs6000-zext-elim.cc: Add new pass.
 * config.gcc: Add new executable.
 * config/rs6000/rs6000-protos.h: Add new prototype for zero
 elimination pass.
 * config/rs6000/rs6000.cc: Add new prototype for zero
 elimination pass.
 * config/rs6000/t-rs6000: Add new rule.
 * expr.cc: Modified gcc assert.
 * explow.cc: Modified gcc assert.
 * optabs.cc: Modified gcc assert.
 ---
  gcc/config.gcc|   4 +-
  gcc/config/rs6000/rs6000-passes.def   |   2 +
  gcc/config/rs6000/rs6000-protos.h |   1 +
  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++
  gcc/config/rs6000/rs6000.cc   |   2 +
  gcc/config/rs6000/t-rs6000|   5 +
  gcc/explow.cc |   3 +-
  gcc/expr.cc   |   4 +-
  gcc/optabs.cc |   3 +-
  9 files changed, 379 insertions(+), 6 deletions(-)
  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc

 diff --git a/gcc/config.gcc b/gcc/config.gcc
 index da3a6d3ba1f..e8ac9d882f0 100644
 --- a/gcc/config.gcc
 +++ b/gcc/config.gcc
 @@ -503,7 +503,7 @@ or1k*-*-*)
 ;;
  powerpc*-*-*)
 cpu_type=rs6000
 -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
 +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
 rs6000-logue.o"
 extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
 extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
 extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
 @@ -538,7 +538,7 @@ riscv*)
 ;;
  rs6000*-*-*)
 extra_options="${extra_options} g.opt fused-madd.opt 
 rs6000/rs6000-tables.opt"
 -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
 +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
 rs6000-logue.o"
 extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
 target_gtfiles="$target_gtfiles 
 \$(srcdir)/config/rs6000/rs6000-logue.cc 
 \$(srcdir)/config/rs6000/rs6000-call.cc"
 target_gtfiles="$target_gtfiles 
 \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
 diff --git a/gcc/config/rs6000/rs6000-passes.def 
 b/gcc/config/rs6000/rs6000-passes.def
 index ca899d5f7af..d7500feddf1 100644
 --- a/gcc/config/rs6000/rs6000-passes.def
 +++ b/gcc/config/rs6000/rs6000-passes.def
 @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
   The power8 does not have instructions that automaticaly do the byte 
 swaps
   for loads and stores.  */
INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
 +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
 

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  wrote:
>
> Hello Richard:
>
> On 16/03/23 1:10 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
> >  wrote:
> >>
> >> Hello All:
> >>
> >>
> >> This patch eliminates unnecessary zero extension instruction from power 
> >> generated assembly.
> >> Bootstrapped and regtested on powerpc64-linux-gnu.
> >
> > What makes this so special that we cannot deal with it from generic code?
> > In particular we do have the REE pass, why is target specific
> > knowledge neccessary
> > to eliminate the extension?
> >
>
> For returning bool values and comparision with integers generates the 
> following by all the rtl passes.
>
> set compare (subreg)
> set if_then_else
> Convert SImode -> QImode
> set zero_extend to SImode from QImode
> set return value 0 in one path of cfg.
> set return value 1 in other path of cfg.
>
> This pass replaces the above zero extension and conversion from QImode to 
> DImode with copy operation to keep QImode in 64 bit registers in powerpc 
> target.

Sorry, I can't parse that - as there's no testcase with the patch I
cannot even try to see what the actual RTL
looks like (without the pass).

Richard.

> Thanks & Regards
> Ajit
> >> +  In cfgexpand pass QImode is generated with
> >> +  bool register value and this pass uses QI
> >> +  as 64 bit registers.
> >> +
>
> >> rs6000: suboptimal code for returning bool value on target ppc.
> >>
> >> New pass to eliminate unnecessary zero extension. This pass
> >> is registered after cse rtl pass.
> >>
> >> 2023-03-16  Ajit Kumar Agarwal  
> >>
> >> gcc/ChangeLog:
> >>
> >> * config/rs6000/rs6000-passes.def: Registered zero elimination
> >> pass.
> >> * config/rs6000/rs6000-zext-elim.cc: Add new pass.
> >> * config.gcc: Add new executable.
> >> * config/rs6000/rs6000-protos.h: Add new prototype for zero
> >> elimination pass.
> >> * config/rs6000/rs6000.cc: Add new prototype for zero
> >> elimination pass.
> >> * config/rs6000/t-rs6000: Add new rule.
> >> * expr.cc: Modified gcc assert.
> >> * explow.cc: Modified gcc assert.
> >> * optabs.cc: Modified gcc assert.
> >> ---
> >>  gcc/config.gcc|   4 +-
> >>  gcc/config/rs6000/rs6000-passes.def   |   2 +
> >>  gcc/config/rs6000/rs6000-protos.h |   1 +
> >>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++
> >>  gcc/config/rs6000/rs6000.cc   |   2 +
> >>  gcc/config/rs6000/t-rs6000|   5 +
> >>  gcc/explow.cc |   3 +-
> >>  gcc/expr.cc   |   4 +-
> >>  gcc/optabs.cc |   3 +-
> >>  9 files changed, 379 insertions(+), 6 deletions(-)
> >>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
> >>
> >> diff --git a/gcc/config.gcc b/gcc/config.gcc
> >> index da3a6d3ba1f..e8ac9d882f0 100644
> >> --- a/gcc/config.gcc
> >> +++ b/gcc/config.gcc
> >> @@ -503,7 +503,7 @@ or1k*-*-*)
> >> ;;
> >>  powerpc*-*-*)
> >> cpu_type=rs6000
> >> -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >> +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
> >> rs6000-logue.o"
> >> extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >> extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
> >> extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> >> @@ -538,7 +538,7 @@ riscv*)
> >> ;;
> >>  rs6000*-*-*)
> >> extra_options="${extra_options} g.opt fused-madd.opt 
> >> rs6000/rs6000-tables.opt"
> >> -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >> +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
> >> rs6000-logue.o"
> >> extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >> target_gtfiles="$target_gtfiles 
> >> \$(srcdir)/config/rs6000/rs6000-logue.cc 
> >> \$(srcdir)/config/rs6000/rs6000-call.cc"
> >> target_gtfiles="$target_gtfiles 
> >> \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
> >> diff --git a/gcc/config/rs6000/rs6000-passes.def 
> >> b/gcc/config/rs6000/rs6000-passes.def
> >> index ca899d5f7af..d7500feddf1 100644
> >> --- a/gcc/config/rs6000/rs6000-passes.def
> >> +++ b/gcc/config/rs6000/rs6000-passes.def
> >> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
> >>   The power8 does not have instructions that automaticaly do the byte 
> >> swaps
> >>   for loads and stores.  */
> >>INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> >> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
> >> +
> >>
> >>/* Pass to do the PCREL_OPT optimization that combines the load of an
> >>   external symbol's address along with a single load or store using 
> >> that
> >> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> >> 

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches
Hello Richard:

On 16/03/23 1:10 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>  wrote:
>>
>> Hello All:
>>
>>
>> This patch eliminates unnecessary zero extension instruction from power 
>> generated assembly.
>> Bootstrapped and regtested on powerpc64-linux-gnu.
> 
> What makes this so special that we cannot deal with it from generic code?
> In particular we do have the REE pass, why is target specific
> knowledge neccessary
> to eliminate the extension?
>

For returning bool values and comparision with integers generates the following 
by all the rtl passes.
 
set compare (subreg)
set if_then_else
Convert SImode -> QImode
set zero_extend to SImode from QImode
set return value 0 in one path of cfg.
set return value 1 in other path of cfg.

This pass replaces the above zero extension and conversion from QImode to 
DImode with copy operation to keep QImode in 64 bit registers in powerpc target.

Thanks & Regards
Ajit
>> +  In cfgexpand pass QImode is generated with
>> +  bool register value and this pass uses QI
>> +  as 64 bit registers.
>> +

>> rs6000: suboptimal code for returning bool value on target ppc.
>>
>> New pass to eliminate unnecessary zero extension. This pass
>> is registered after cse rtl pass.
>>
>> 2023-03-16  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>> * config/rs6000/rs6000-passes.def: Registered zero elimination
>> pass.
>> * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>> * config.gcc: Add new executable.
>> * config/rs6000/rs6000-protos.h: Add new prototype for zero
>> elimination pass.
>> * config/rs6000/rs6000.cc: Add new prototype for zero
>> elimination pass.
>> * config/rs6000/t-rs6000: Add new rule.
>> * expr.cc: Modified gcc assert.
>> * explow.cc: Modified gcc assert.
>> * optabs.cc: Modified gcc assert.
>> ---
>>  gcc/config.gcc|   4 +-
>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>>  gcc/config/rs6000/rs6000-protos.h |   1 +
>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++
>>  gcc/config/rs6000/rs6000.cc   |   2 +
>>  gcc/config/rs6000/t-rs6000|   5 +
>>  gcc/explow.cc |   3 +-
>>  gcc/expr.cc   |   4 +-
>>  gcc/optabs.cc |   3 +-
>>  9 files changed, 379 insertions(+), 6 deletions(-)
>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>>
>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>> index da3a6d3ba1f..e8ac9d882f0 100644
>> --- a/gcc/config.gcc
>> +++ b/gcc/config.gcc
>> @@ -503,7 +503,7 @@ or1k*-*-*)
>> ;;
>>  powerpc*-*-*)
>> cpu_type=rs6000
>> -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>> +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
>> rs6000-logue.o"
>> extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>> extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>> extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>> @@ -538,7 +538,7 @@ riscv*)
>> ;;
>>  rs6000*-*-*)
>> extra_options="${extra_options} g.opt fused-madd.opt 
>> rs6000/rs6000-tables.opt"
>> -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>> +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
>> rs6000-logue.o"
>> extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>> target_gtfiles="$target_gtfiles 
>> \$(srcdir)/config/rs6000/rs6000-logue.cc 
>> \$(srcdir)/config/rs6000/rs6000-call.cc"
>> target_gtfiles="$target_gtfiles 
>> \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
>> diff --git a/gcc/config/rs6000/rs6000-passes.def 
>> b/gcc/config/rs6000/rs6000-passes.def
>> index ca899d5f7af..d7500feddf1 100644
>> --- a/gcc/config/rs6000/rs6000-passes.def
>> +++ b/gcc/config/rs6000/rs6000-passes.def
>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>>   The power8 does not have instructions that automaticaly do the byte 
>> swaps
>>   for loads and stores.  */
>>INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
>> +
>>
>>/* Pass to do the PCREL_OPT optimization that combines the load of an
>>   external symbol's address along with a single load or store using that
>> diff --git a/gcc/config/rs6000/rs6000-protos.h 
>> b/gcc/config/rs6000/rs6000-protos.h
>> index 1a4fc1df668..f6cf2d673d4 100644
>> --- a/gcc/config/rs6000/rs6000-protos.h
>> +++ b/gcc/config/rs6000/rs6000-protos.h
>> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>>  class rtl_opt_pass;
>>
>>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
>> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>>  extern bool 

Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, Mar 16, 2023 at 6:35 AM HAO CHEN GUI via Gcc-patches
 wrote:
>
> Hi,
>   Currently, rs6000 directly expands to 2 insns if an integer constant is the
> second operand and it needs two insns. For example, addi/addis and ori/oris.
> It may not benefit when the constant is used for more than 2 times in an
> extended basic block, just like the case in PR shows.
>
>   One possible solution is to force the constant in pseudo at expand and let
> propagation pass and combine pass decide if the pseudo should be replaced
> with the constant or not by comparing the rtx/insn cost.
>
>   It generates a constant move if the constant is forced to a pseudo. There
> is one constant move if it's used only once. The combine pass can combine
> the constant move and add/ior/xor insn and eliminate the move as the insn
> cost reduces. There are multiple moves if the constant is used for several
> times. In an extended basic block, these constant moves are merged to one by
> propagation pass. The combine pass can't replace the pseudo with the constant
> as it is no cost saving.
>
>   In an extreme case, the constant is used twice in an extended basic block.
> The cost(latency) is unchanged between putting constant in pseudo and
> generating 2 insns. The dependence of instructions reduces but one more
> register is used. In other case, it should be always optimal to put constant
> in a pseudo.
>
>   This patch changes the expander of integer add and force constant to a
> pseudo when it needs 2 insn. Also a combine and split pattern is defined.

So this is one way around the lack of CSE/PRE of constant operands.  I'd
argue that a better spot for this _might_ be LRA (split the constant out if
there's a free register available), postreload-[g]cse (CSE the constants) and
then maybe cprop_hardreg to combine back single-use constants?

I'm not sure if careful constraints massaging like adding magic letters to
alternatives with constants to pessimize them for LRA, making them
more expensive than spilling the constant to a register but avoid
secondary reloads with spilling a register to the stack to make room
for the constant, is possible - but in theory a special constraint modifier
for this purpose could be invented.

Richard.

>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
>
> Thanks
> Gui Haochen
>
> ChangeLog
> 2023-03-14  Haochen Gui 
>
> gcc/
> * config/rs6000/predicates.md (add_2insn_cint_operand): New predicate
> which returns true when op is a 32-bit but not a 16-bit signed
> integer constant.
> * config/rs6000/rs6000.md (add3): Put the second operand into
> register when it's a constant and need 2 add insns.
> (*add_2insn): New insn_and_split for 2-insn add.
>
>
> patch.diff
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index a1764018545..09e59a48cd3 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -282,6 +282,13 @@ (define_predicate "s32bit_cint_operand"
>(and (match_code "const_int")
> (match_test "(0x8000 + UINTVAL (op)) >> 32 == 0")))
>
> +;; Return 1 if op is a 32-bit but not 16-bit constant signed integer
> +(define_predicate "add_2insn_cint_operand"
> +  (and (match_code "const_int")
> +   (and (match_operand 0 "s32bit_cint_operand")
> +   (and (not (match_operand 0 "short_cint_operand"))
> +(not (match_operand 0 "upper16_cint_operand"))
> +
>  ;; Return 1 if op is a constant 32-bit unsigned
>  (define_predicate "c32bit_cint_operand"
>(and (match_code "const_int")
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 6011f5bf76a..dba41e3df90 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -1796,12 +1796,44 @@ (define_expand "add3"
>/* The ordering here is important for the prolog expander.
>  When space is allocated from the stack, adding 'low' first may
>  produce a temporary deallocation (which would be bad).  */
> -  emit_insn (gen_add3 (tmp, operands[1], GEN_INT (rest)));
> -  emit_insn (gen_add3 (operands[0], tmp, GEN_INT (low)));
> -  DONE;
> +  if (!can_create_pseudo_p ())
> +   {
> + emit_insn (gen_add3 (tmp, operands[1], GEN_INT (rest)));
> + emit_insn (gen_add3 (operands[0], tmp, GEN_INT (low)));
> + DONE;
> +   }
> +
> +  operands[2] = force_reg (mode, operands[2]);
>  }
>  })
>
> +/* The ordering here is important for the prolog expander.
> +   When space is allocated from the stack, adding 'low' first may
> +   produce a temporary deallocation (which would be bad).  */
> +
> +(define_insn_and_split "*add_2insn"
> +  [(set (match_operand:GPR 0 "gpc_reg_operand" "=b")
> +   (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%b")
> + (match_operand:GPR 2 "add_2insn_cint_operand" "n")))]
> +  "!TARGET_PREFIXED"
> +  "#"
> +  "&& 1"
> +  

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Richard Biener via Gcc-patches
On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
 wrote:
>
> Hello All:
>
>
> This patch eliminates unnecessary zero extension instruction from power 
> generated assembly.
> Bootstrapped and regtested on powerpc64-linux-gnu.

What makes this so special that we cannot deal with it from generic code?
In particular we do have the REE pass, why is target specific
knowledge neccessary
to eliminate the extension?

> Thanks & Regards
> Ajit
>
> rs6000: suboptimal code for returning bool value on target ppc.
>
> New pass to eliminate unnecessary zero extension. This pass
> is registered after cse rtl pass.
>
> 2023-03-16  Ajit Kumar Agarwal  
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000-passes.def: Registered zero elimination
> pass.
> * config/rs6000/rs6000-zext-elim.cc: Add new pass.
> * config.gcc: Add new executable.
> * config/rs6000/rs6000-protos.h: Add new prototype for zero
> elimination pass.
> * config/rs6000/rs6000.cc: Add new prototype for zero
> elimination pass.
> * config/rs6000/t-rs6000: Add new rule.
> * expr.cc: Modified gcc assert.
> * explow.cc: Modified gcc assert.
> * optabs.cc: Modified gcc assert.
> ---
>  gcc/config.gcc|   4 +-
>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>  gcc/config/rs6000/rs6000-protos.h |   1 +
>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++
>  gcc/config/rs6000/rs6000.cc   |   2 +
>  gcc/config/rs6000/t-rs6000|   5 +
>  gcc/explow.cc |   3 +-
>  gcc/expr.cc   |   4 +-
>  gcc/optabs.cc |   3 +-
>  9 files changed, 379 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index da3a6d3ba1f..e8ac9d882f0 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -503,7 +503,7 @@ or1k*-*-*)
> ;;
>  powerpc*-*-*)
> cpu_type=rs6000
> -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
> rs6000-logue.o"
> extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
> extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> @@ -538,7 +538,7 @@ riscv*)
> ;;
>  rs6000*-*-*)
> extra_options="${extra_options} g.opt fused-madd.opt 
> rs6000/rs6000-tables.opt"
> -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
> rs6000-logue.o"
> extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> target_gtfiles="$target_gtfiles 
> \$(srcdir)/config/rs6000/rs6000-logue.cc 
> \$(srcdir)/config/rs6000/rs6000-call.cc"
> target_gtfiles="$target_gtfiles 
> \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
> diff --git a/gcc/config/rs6000/rs6000-passes.def 
> b/gcc/config/rs6000/rs6000-passes.def
> index ca899d5f7af..d7500feddf1 100644
> --- a/gcc/config/rs6000/rs6000-passes.def
> +++ b/gcc/config/rs6000/rs6000-passes.def
> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>   The power8 does not have instructions that automaticaly do the byte 
> swaps
>   for loads and stores.  */
>INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
> +
>
>/* Pass to do the PCREL_OPT optimization that combines the load of an
>   external symbol's address along with a single load or store using that
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 1a4fc1df668..f6cf2d673d4 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>  class rtl_opt_pass;
>
>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc 
> b/gcc/config/rs6000/rs6000-zext-elim.cc
> new file mode 100644
> index 000..777c7a5a387
> --- /dev/null
> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
> @@ -0,0 +1,361 @@
> +/* Subroutine to eliminate redundant zero extend for power architecture.
> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published
> +   by the Free Software Foundation; either version 3, or (at your
> +   option) 

Re: [PATCH] [testsuite] fix array element count

2023-03-16 Thread Richard Biener via Gcc-patches
On Wed, 15 Mar 2023, Alexandre Oliva wrote:

> 
> This test is similar to pr103116-1.c, but instead of writing to
> 4*COUNT elements of x, it writes to 8*COUNT elements, but the
> definition of x seems to have been adjusted along with the loop.  Fix
> the array size so that it doesn't scribble over unrelated
> statically-allocated objects.
> 
> Regstrapped on ppc64-linux-gnu.  Also tested with gcc-11 on vxworks7r2
> (x86- and x86_64-), where the scribbling caused visible runtime effects.
> Ok to install?  I'm tempted to put this in as obvious.

OK (it's really obvious).

Richard.

> 
> for  gcc/testsuite/ChangeLog
> 
>   * gcc.dg/vect/pr103116-2.c (x): Fix array size.
> ---
>  gcc/testsuite/gcc.dg/vect/pr103116-2.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/pr103116-2.c 
> b/gcc/testsuite/gcc.dg/vect/pr103116-2.c
> index 2f4ed0f404c76..aa9797a94074c 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr103116-2.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr103116-2.c
> @@ -31,7 +31,7 @@ loop (TYPE *restrict x, TYPE *restrict y)
>  }
>  }
>  
> -TYPE x[COUNT * 4];
> +TYPE x[COUNT * 8];
>  
>  int
>  main (void)
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


Re: [PATCH] [testsuite] test for weak_undefined support and add options

2023-03-16 Thread Alexandre Oliva via Gcc-patches
On Mar 15, 2023, Alexandre Oliva  wrote:

> Regstrapped on ppc64-linux-gnu.  Also tested (with gcc-12) on multiple
> *-vxworks7r2 targets (arm, aarch64, ppc64, x86, x86_64).  Ok to install?

Further testing revealed a problem in my attempted use of lappend in
aapcs64.exp, in the previous version of the patch.  Fixed in this one,
retested on aarch64-vxworks7r2.  Ok to install?


for  gcc/testsuite/ChangeLog

* lib/target-supports.exp (add_options_for_weak_undefined):
New.
(check_effective_target_weak_undefined): Use it.
(check_effective_target_posix_memalign): New.
* gcc.dg/torture/pr53922.c: Drop skips and custom options in
favor of effective target requirement and added options for
weak_undefined symbols.
* gcc.dg/torture/pr90020.c: Likewise.
* gcc.dg/addr_equal-1.c: Likewise.
* gcc.target/aarch64/aapcs64/aapcs64.exp: Likewise, for
abitest.S-using tests.
* gcc.dg/torture/pr60092.c: Likewise, but in favor of
posix_memalign tests.
* gcc.dg/vect/vect-tail-nomask-1.c: Likewise.
---
 gcc/testsuite/gcc.dg/addr_equal-1.c|5 ++--
 gcc/testsuite/gcc.dg/torture/pr53922.c |   10 ++--
 gcc/testsuite/gcc.dg/torture/pr60092.c |   12 ++---
 gcc/testsuite/gcc.dg/torture/pr90020.c |7 ++---
 gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c |   11 ++--
 .../gcc.target/aarch64/aapcs64/aapcs64.exp |   17 -
 gcc/testsuite/lib/target-supports.exp  |   26 ++--
 7 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/addr_equal-1.c 
b/gcc/testsuite/gcc.dg/addr_equal-1.c
index 35fa0102e3063..db65dea4a8d2a 100644
--- a/gcc/testsuite/gcc.dg/addr_equal-1.c
+++ b/gcc/testsuite/gcc.dg/addr_equal-1.c
@@ -1,9 +1,8 @@
-/* { dg-do run { target { nonpic || pie_enabled } } } */
-/* { dg-require-weak "" } */
+/* { dg-do run { target { { nonpic || pie_enabled } && weak_undefined } } } */
 /* { dg-require-alias "" } */
 /* { dg-options "-O2 -fdelete-null-pointer-checks" } */
-/* { dg-skip-if "" { powerpc-ibm-aix* } } */
 /* { dg-skip-if "function pointers can be NULL" { keeps_null_pointer_checks } 
} */
+/* { dg-add-options weak_undefined } */
 void abort (void);
 extern int undef_var0, undef_var1;
 extern __attribute__ ((weak)) int weak_undef_var0;
diff --git a/gcc/testsuite/gcc.dg/torture/pr53922.c 
b/gcc/testsuite/gcc.dg/torture/pr53922.c
index b3f2c1a58f830..07359d6764964 100644
--- a/gcc/testsuite/gcc.dg/torture/pr53922.c
+++ b/gcc/testsuite/gcc.dg/torture/pr53922.c
@@ -1,11 +1,5 @@
-/* { dg-do run } */
-/* { dg-require-weak "" } */
-/* { dg-skip-if "No undefined" { *-*-mingw* } } */
-/* { dg-skip-if "No undefined weak" { *-*-aix* } } */
-/* { dg-skip-if "No undefined weak" { hppa*-*-hpux* && { ! lp64 } } } */
-/* { dg-skip-if "No undefined weak" { nvptx-*-* } } */
-/* { dg-options "-Wl,-undefined,dynamic_lookup" { target *-*-darwin* } } */
-/* { dg-additional-options "-Wl,-flat_namespace" { target *-*-darwin[89]* } } 
*/
+/* { dg-do run { target { weak_undefined } } } */
+/* { dg-add-options weak_undefined } */
 
 int x(int a)
 {
diff --git a/gcc/testsuite/gcc.dg/torture/pr60092.c 
b/gcc/testsuite/gcc.dg/torture/pr60092.c
index 74e7c174a8323..102ba6e0d9f87 100644
--- a/gcc/testsuite/gcc.dg/torture/pr60092.c
+++ b/gcc/testsuite/gcc.dg/torture/pr60092.c
@@ -1,12 +1,7 @@
-/* { dg-do run } */
-/* { dg-require-weak "" } */
-/* { dg-skip-if "No undefined weak" { hppa*-*-hpux* && { ! lp64 } } } */
-/* { dg-skip-if "No undefined weak" { nvptx-*-* } } */
-/* { dg-additional-options "-Wl,-undefined,dynamic_lookup" { target 
*-*-darwin* } } */
-/* { dg-additional-options "-Wl,-flat_namespace" { target *-*-darwin[89]* } } 
*/
+/* { dg-do run { target { posix_memalign } } } */
 
 typedef __SIZE_TYPE__ size_t;
-extern int posix_memalign(void **memptr, size_t alignment, size_t size) 
__attribute__((weak));
+extern int posix_memalign(void **memptr, size_t alignment, size_t size);
 extern void abort(void);
 int
 main (void)
@@ -14,9 +9,6 @@ main (void)
   void *p;
   int ret;
 
-  if (!posix_memalign)
-return 0;
-
   p = (void *)
   ret = posix_memalign (, sizeof (void *), -1);
   if (p != (void *))
diff --git a/gcc/testsuite/gcc.dg/torture/pr90020.c 
b/gcc/testsuite/gcc.dg/torture/pr90020.c
index 657c4ccfe45c4..40035aa758e6e 100644
--- a/gcc/testsuite/gcc.dg/torture/pr90020.c
+++ b/gcc/testsuite/gcc.dg/torture/pr90020.c
@@ -1,8 +1,5 @@
-/* { dg-do run } */
-/* { dg-skip-if "No undefined weak" { hppa*-*-hpux* || powerpc-ibm-aix* } } */
-/* { dg-require-weak "" } */
-/* { dg-additional-options "-Wl,-undefined,dynamic_lookup" { target 
*-*-darwin* } } */
-/* { dg-additional-options "-Wl,-flat_namespace" { target *-*-darwin[89]* } } 
*/
+/* { dg-do run  { target { weak_undefined } } } */
+/* { dg-add-options weak_undefined } */
 
 void __attribute__((noinline,noclone))
 check (int i)
diff --git