[PATCH 0/4] [rs6000] ROP support

2021-04-25 Thread Bill Schmidt via Gcc-patches
Add POWER10 support for hashst[p] and hashchk[p] operations.  When
the -mrop-protect option is selected, any function that loads the link
register from memory before returning must have protection in the
prologue and epilogue to ensure the link register save location has
not been compromised.  If -mprivileged is also specified, the
protection instructions generated require supervisor privilege.

The patches are broken up into logical chunks:
 - Option handling
 - Instruction generation
 - Predefined macro handling
 - Test cases

Bootstrapped and tested on a POWER10 system with no regressions.
Tests on a kernel that enables user-space ROP mitigation were
successful.  Is this series ok for trunk?  I would also like to later
backport these patches to GCC for the 11.2 release.

Thanks!
Bill

Bill Schmidt (4):
  rs6000: Add -mrop-protect and -mprivileged flags
  rs6000: Emit ROP-protect instructions in prologue and epilogue
  rs6000: Conditionally define __ROP_PROTECT__
  rs6000: Add ROP tests

 gcc/config/rs6000/rs6000-c.c |  3 +
 gcc/config/rs6000/rs6000-internal.h  |  2 +
 gcc/config/rs6000/rs6000-logue.c | 86 +---
 gcc/config/rs6000/rs6000.c   |  7 ++
 gcc/config/rs6000/rs6000.md  | 39 +++
 gcc/config/rs6000/rs6000.opt |  6 ++
 gcc/doc/invoke.texi  | 19 +-
 gcc/testsuite/gcc.target/powerpc/rop-1.c | 16 +
 gcc/testsuite/gcc.target/powerpc/rop-2.c | 16 +
 gcc/testsuite/gcc.target/powerpc/rop-3.c | 19 ++
 gcc/testsuite/gcc.target/powerpc/rop-4.c | 14 
 gcc/testsuite/gcc.target/powerpc/rop-5.c | 17 +
 12 files changed, 231 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-5.c

-- 
2.27.0



[PATCH 4/4] rs6000: Add ROP tests

2021-04-25 Thread Bill Schmidt via Gcc-patches
2021-03-25  Bill Schmidt  

gcc/testsuite/
* gcc.target/powerpc/rop-1.c: New.
* gcc.target/powerpc/rop-2.c: New.
* gcc.target/powerpc/rop-3.c: New.
* gcc.target/powerpc/rop-4.c: New.
* gcc.target/powerpc/rop-5.c: New.
---
 gcc/testsuite/gcc.target/powerpc/rop-1.c | 16 
 gcc/testsuite/gcc.target/powerpc/rop-2.c | 16 
 gcc/testsuite/gcc.target/powerpc/rop-3.c | 19 +++
 gcc/testsuite/gcc.target/powerpc/rop-4.c | 14 ++
 gcc/testsuite/gcc.target/powerpc/rop-5.c | 17 +
 5 files changed, 82 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-5.c

diff --git a/gcc/testsuite/gcc.target/powerpc/rop-1.c 
b/gcc/testsuite/gcc.target/powerpc/rop-1.c
new file mode 100644
index 000..cf8e2b01dda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that ROP-protect instructions are inserted when a
+   call is present.  */
+
+extern void foo (void);
+
+int bar ()
+{
+  foo ();
+  return 5;
+}
+
+/* { dg-final { scan-assembler {\mhashst\M} } } */
+/* { dg-final { scan-assembler {\mhashchk\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-2.c 
b/gcc/testsuite/gcc.target/powerpc/rop-2.c
new file mode 100644
index 000..dde403b0ef5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect -mprivileged" } */
+
+/* Verify that privileged ROP-protect instructions are inserted when a
+   call is present.  */
+
+extern void foo (void);
+
+int bar ()
+{
+  foo ();
+  return 5;
+}
+
+/* { dg-final { scan-assembler {\mhashstp\M} } } */
+/* { dg-final { scan-assembler {\mhashchkp\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-3.c 
b/gcc/testsuite/gcc.target/powerpc/rop-3.c
new file mode 100644
index 000..054f94fda99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-3.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that ROP-protect instructions execute correctly when a
+   call is present.  */
+
+void __attribute__((noinline)) foo ()
+{
+  asm ("");
+}
+
+int main ()
+{
+  foo ();
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-4.c 
b/gcc/testsuite/gcc.target/powerpc/rop-4.c
new file mode 100644
index 000..e2be8b2c035
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that no ROP-protect instructions are inserted when no
+   call is present.  */
+
+
+int bar ()
+{
+  return 5;
+}
+
+/* { dg-final { scan-assembler-not {\mhashst\M} } } */
+/* { dg-final { scan-assembler-not {\mhashchk\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-5.c 
b/gcc/testsuite/gcc.target/powerpc/rop-5.c
new file mode 100644
index 000..b759fa59979
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-5.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that __ROP_PROTECT__ is predefined for -mrop-protect.  */
+
+extern void abort (void);
+
+int main ()
+{
+#ifndef __ROP_PROTECT__
+  abort ();
+#endif
+  return 0;
+}
+
-- 
2.27.0



[PATCH 2/4] rs6000: Emit ROP-protect instructions in prologue and epilogue

2021-04-25 Thread Bill Schmidt via Gcc-patches
Insert the hashst and hashchk instructions when -mrop-protect has been
selected.  The encrypted save slot for ROP mitigation is placed
between the parameter save area and the alloca space (if any;
otherwise the local variable space).

Note that ROP-mitigation instructions are currently only provided for
the ELFv2 ABI.

2021-03-25  Bill Schmidt  

gcc/
* config/rs6000/rs6000-internal.h (rs6000_stack): Add
rop_check_save_offset and rop_check_size.
* config/rs6000/rs6000-logue.c (rs6000_stack_info): Compute
rop_check_size and rop_check_save_offset.
(debug_stack_info): Dump rop_save_offset and rop_check_size.
(rs6000_emit_prologue): Assert if WORLD_SAVE used with
-mrop-protect; emit hashst[p] in prologue; emit hashchk[p] in
epilogue.
* config/rs6000/rs6000.md (unspec): Add UNSPEC_HASHST[P] and
UNSPEC_HASHCHK[P].
(hashst): New define_insn.
(hashstp): Likewise.
(hashchk): Likewise.
(hashchkp): Likewise.
---
 gcc/config/rs6000/rs6000-internal.h |  2 +
 gcc/config/rs6000/rs6000-logue.c| 86 +
 gcc/config/rs6000/rs6000.md | 39 +
 3 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-internal.h 
b/gcc/config/rs6000/rs6000-internal.h
index 428a7861a98..8fc77ba6138 100644
--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -39,6 +39,7 @@ typedef struct rs6000_stack {
   int gp_save_offset;  /* offset to save GP regs from initial SP */
   int fp_save_offset;  /* offset to save FP regs from initial SP */
   int altivec_save_offset; /* offset to save AltiVec regs from initial SP 
*/
+  int rop_check_save_offset;   /* offset to save ROP check from initial SP */
   int lr_save_offset;  /* offset to save LR from initial SP */
   int cr_save_offset;  /* offset to save CR from initial SP */
   int vrsave_save_offset;  /* offset to save VRSAVE from initial SP */
@@ -53,6 +54,7 @@ typedef struct rs6000_stack {
   int gp_size; /* size of saved GP registers */
   int fp_size; /* size of saved FP registers */
   int altivec_size;/* size of saved AltiVec registers */
+  int rop_check_size;  /* size of ROP check slot */
   int cr_size; /* size to hold CR if not in fixed area */
   int vrsave_size; /* size to hold VRSAVE */
   int altivec_padding_size;/* size of altivec alignment padding */
diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c
index b0ac183ceff..10cf7a2de93 100644
--- a/gcc/config/rs6000/rs6000-logue.c
+++ b/gcc/config/rs6000/rs6000-logue.c
@@ -595,19 +595,21 @@ rs6000_savres_strategy (rs6000_stack_t *info,
+---+
| Parameter save area (+padding*) (P)   |  32
+---+
-   | Alloca space (A)  |  32+P
+   | Optional ROP check slot (R)   |  32+P
+---+
-   | Local variable space (L)  |  32+P+A
+   | Alloca space (A)  |  32+P+R
+---+
-   | Save area for AltiVec registers (W)   |  32+P+A+L
+   | Local variable space (L)  |  32+P+R+A
+---+
-   | AltiVec alignment padding (Y) |  32+P+A+L+W
+   | Save area for AltiVec registers (W)   |  32+P+R+A+L
+---+
-   | Save area for GP registers (G)|  32+P+A+L+W+Y
+   | AltiVec alignment padding (Y) |  32+P+R+A+L+W
+---+
-   | Save area for FP registers (F)|  32+P+A+L+W+Y+G
+   | Save area for GP registers (G)|  32+P+R+A+L+W+Y
+---+
-   old SP->| back chain to caller's caller |  32+P+A+L+W+Y+G+F
+   | Save area for FP registers (F)|  32+P+R+A+L+W+Y+G
+   +---+
+   old SP->| back chain to caller's caller |  32+P+R+A+L+W+Y+G+F
+---+
 
  * If the alloca area is present, the parameter save area is
@@ -717,6 +719,19 @@ rs6000_stack_info (void)
   /* Does this function call anything (apart from sibling calls)?  */
   info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
 
+  if (TARGET_POWER10 && info->calls_p
+  && DEFAULT_ABI == ABI_ELFv2 && rs6000_rop_protect)
+info->rop_check_size = 8;
+  else if (rs6000_rop_protect && DEFAULT_ABI != ABI_ELFv2)
+{
+  /* We 

[PATCH 3/4] rs6000: Conditionally define __ROP_PROTECT__

2021-04-25 Thread Bill Schmidt via Gcc-patches
2021-03-25  Bill Schmidt  

gcc/
* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
__ROP_PROTECT__ if -mrop-protect is selected.
---
 gcc/config/rs6000/rs6000-c.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 0f8a629ff5a..afcb5bb6e39 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -602,6 +602,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
   /* Whether pc-relative code is being generated.  */
   if ((flags & OPTION_MASK_PCREL) != 0)
 rs6000_define_or_undefine_macro (define_p, "__PCREL__");
+  /* Tell the user -mrop-protect is in play.  */
+  if (rs6000_rop_protect)
+rs6000_define_or_undefine_macro (define_p, "__ROP_PROTECT__");
 }
 
 void
-- 
2.27.0



[PATCH 1/4] rs6000: Add -mrop-protect and -mprivileged flags

2021-04-25 Thread Bill Schmidt via Gcc-patches
2021-03-25  Bill Schmidt  

gcc/
* config/rs6000/rs6000.c (rs6000_option_override_internal):
Disable shrink wrap when inserting ROP-protect instructions.
* config/rs6000/rs6000.opt (mrop-protect): New option.
(mprivileged): Likewise.
* doc/invoke.texi: Document mrop-protect and mprivileged.
---
 gcc/config/rs6000/rs6000.c   |  7 +++
 gcc/config/rs6000/rs6000.opt |  6 ++
 gcc/doc/invoke.texi  | 19 +--
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 844fee88cf3..d13ed6e7ff4 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4036,6 +4036,13 @@ rs6000_option_override_internal (bool global_init_p)
   && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
 
+  /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
+  if (rs6000_rop_protect)
+{
+  flag_shrink_wrap = 0;
+  flag_shrink_wrap_separate = 0;
+}
+
   /* If we can shrink-wrap the TOC register save separately, then use
  -msave-toc-indirect unless explicitly disabled.  */
   if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0dbdf753673..d116fd12f7e 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -619,3 +619,9 @@ Generate (do not generate) MMA instructions.
 
 mrelative-jumptables
 Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save
+
+mrop-protect
+Target Var(rs6000_rop_protect) Init(0)
+
+mprivileged
+Target Var(rs6000_privileged) Init(0)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e98b0962b9f..36bd0bf9b3b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1238,7 +1238,8 @@ See RS/6000 and PowerPC Options.
 -mgnu-attribute  -mno-gnu-attribute @gol
 -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol
 -mstack-protector-guard-offset=@var{offset} -mprefixed -mno-prefixed @gol
--mpcrel -mno-pcrel -mmma -mno-mmma}
+-mpcrel -mno-pcrel -mmma -mno-mmma -mrop-protect -mno-rop-protect @gol
+-mprivileged -mno-privileged}
 
 @emph{RX Options}
 @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
@@ -27019,7 +27020,8 @@ following options:
 -mmulhw  -mdlmzb  -mmfpgpr  -mvsx @gol
 -mcrypto  -mhtm  -mpower8-fusion  -mpower8-vector @gol
 -mquad-memory  -mquad-memory-atomic  -mfloat128 @gol
--mfloat128-hardware -mprefixed -mpcrel -mmma}
+-mfloat128-hardware -mprefixed -mpcrel -mmma @gol
+-mrop-protect -mprivileged}
 
 The particular options set for any particular CPU varies between
 compiler versions, depending on what setting seems to produce optimal
@@ -28024,6 +28026,19 @@ store instructions when the option 
@option{-mcpu=future} is used.
 Generate (do not generate) the MMA instructions when the option
 @option{-mcpu=future} is used.
 
+@item -mrop-protect
+@itemx -mno-rop-protect
+@opindex mrop-protect
+@opindex mno-rop-protect
+Generate (do not generate) ROP protection instructions when the option
+@option{-mcpu=power10} is used.
+
+@item -mprivileged
+@itemx -mno-privileged
+@opindex mprivileged
+@opindex mno-privileged
+Generate (do not generate) instructions for privileged state.
+
 @item -mblock-ops-unaligned-vsx
 @itemx -mno-block-ops-unaligned-vsx
 @opindex block-ops-unaligned-vsx
-- 
2.27.0



[PATCH v2] Generate offset adjusted operation for op_by_pieces operations

2021-04-25 Thread H.J. Lu via Gcc-patches
On Fri, Apr 23, 2021 at 09:12:10AM +0200, Richard Biener wrote:
> On Fri, Apr 23, 2021 at 1:35 AM H.J. Lu via Gcc-patches
>  wrote:
> >
> > For op_by_pieces operations between two areas of memory on non-strict
> > alignment target, add -foverlap-op-by-pieces=[off|on|max-memset] to
> > generate overlapping operations to minimize number of operations if it
> > is not a stack push which must not overlap.
> >
> > When operating on LENGTH bytes of memory, -foverlap-op-by-pieces=on
> > starts with the widest usable integer size, MAX_SIZE, for LENGTH bytes
> > and finishes with the smallest usable integer size, MIN_SIZE, for the
> > remaining bytes where MAX_SIZE >= MIN_SIZE.  If MIN_SIZE > the remaining
> > bytes, the last operation is performed on MIN_SIZE bytes of overlapping
> > memory from the previous operation.
> >
> > For memset with non-zero byte, -foverlap-op-by-pieces=max-memset generates
> > an overlapping fill with MAX_SIZE if the number of the remaining bytes is
> > greater than one.
> >
> > Tested on Linux/x86-64 with both -foverlap-op-by-pieces enabled and
> > disabled by default.
> 
> Neither the user documentation nor the patch description tells me what
> "generate overlapping operations" does.  I _suspect_ it's doing an
> offset adjusted read/write of the last piece of a memory region to
> avoid doing more than one smaller operations.  Thus for a region
> of size 7 and 4-byte granular ops you'd do operations at
> offset 0 and 3 rather than one at 0, a two-byte at offset 4 and
> a one-byte at offset 7.

That is correct.

> 
> When the tail is of power-of-two size you still generate non-overlapping
> ops?

No.

> 
> For memmove there's a correctness issue so you have to make sure
> to first load the last two ops before performing the stores which
> increases register pressure.

I believe memmove is handled correctly.

> 
> I'm not sure we want a -f option to control this - not all targets will
> be able to support this.  So I'd use a target hook or rather extend
> the existing use_by_pieces_infrastructure_p hook with an alternate
> return (some flags bitmask I guess).  We do have one extra
> target hook, compare_by_pieces_branch_ratio, so by that using
> an alternate hook might be also OK.

Fixed.

> 
> Adding a -m option in targets that want this user-controllable would
> be OK of course.
> 

I can add a -m option to control if neeed.

Here is the v2 patch.  Changes are:

1. Added a target hook, TARGET_OVERLAP_OP_BY_PIECES_P.
2. Added a pointer argument to pieces_addr::adjust to pass the RTL
information from the previous iteraton to m_constfn.
3. Updated builtin_memset_read_str and builtin_memset_gen_str to
generate the new RTL from the previous RTL info.

OK for master branch?

Thanks.

H.J.

Add an overlap_op_by_pieces_p target hook for op_by_pieces operations
between two areas of memory to generate one offset adjusted operation
in the smallest integer mode for the remaining bytes on the last piece
operation of a memory region to avoid doing more than one smaller
operations.

Pass the RTL information from the previous iteration to m_constfn in
op_by_pieces operation so that builtin_memset_[read|gen]_str can
generate the new RTL from the previous RTL.

Tested on Linux/x86-64.

gcc/

PR middl-end/90773
* builtins.c (builtin_memcpy_read_str): Add a dummy argument.
(builtin_strncpy_read_str): Likewise.
(builtin_memset_read_str): Add an argument for the previous RTL
information and generate the new RTL from the previous RTL info.
(builtin_memset_gen_str): Likewise.
* builtins.h (builtin_strncpy_read_str): Update the prototype.
(builtin_memset_read_str): Likewise.
* expr.c (by_pieces_ninsns): If targetm.overlap_op_by_pieces_p()
returns true, round up size and alignment to the widest integer
mode for maximum size.
(pieces_addr::adjust): Add a pointer to by_pieces_prev argument
and pass it to m_constfn.
(op_by_pieces_d): Add get_usable_mode, m_push and
m_overlap_op_by_pieces.
(op_by_pieces_d::op_by_pieces_d): Add a bool argument to
initialize m_push.  Initialize m_overlap_op_by_pieces with
targetm.overlap_op_by_pieces_p ().
(op_by_pieces_d::get_usable_mode): New.
(op_by_pieces_d::run): Use get_usable_mode to get the largest
usable integer mode, pass the previous info to pieces_addr::adjust
and generate overlapping operations if m_overlap_op_by_pieces is
true.
(PUSHG_P): New.
(move_by_pieces_d::move_by_pieces_d): Updated for op_by_pieces_d
change.
(store_by_pieces_d::store_by_pieces_d): Updated for op_by_pieces_d
change.
(can_store_by_pieces): Use by_pieces_constfn on constfun.
(store_by_pieces): Use by_pieces_constfn on constfun.  Updated
for op_by_pieces_d change.
(clear_by_pieces_1): Add a dummy argument.
(clear_by_pieces): Updated for 

RE: [RFC] bpf.2: Use standard types and attributes

2021-04-25 Thread David Laight via Gcc-patches
From: Zack Weinberg
> Sent: 25 April 2021 20:17
> 
> On Sat, Apr 24, 2021 at 4:43 PM David Laight via Libc-alpha
>  wrote:
> > From: Alexei Starovoitov
> > > On Fri, Apr 23, 2021 at 4:15 PM Alejandro Colomar 
> > >  wrote:
> ...
> > > > Some pages also document attributes, using GNU syntax
> > > > '__attribute__((xxx))'.  Update those to use the shorter and more
> > > > portable C2x syntax, which hasn't been standardized yet, but is
> > > > already implemented in GCC, and available through either --std=c2x
> > > > or any of the --std=gnu... options.
> ..
> > And the code below is no more portable that a #pragma'.
> > It is probably worse than __attribute__((aligned(8)))
> > +uint64_t [[gnu::aligned(8)]] value;
> > The standards committee are smoking dope again.
> > At least the '__aligned_u64 value;' form stands a reasonable
> > chance of being converted by cpp into whatever your compiler supports.
> 
> Is it actually necessary to mention the alignment overrides at all in
> the manpages?  They are only relevant to people working at the level
> of physical layout of the data in RAM, and those people are probably
> going to have to consult the header file anyway.

Depends, if the man page defines the structure - it needs to
contain its definition.
If theory the man page ought to be the definition, and the code
do what the man page says happens.

An alternative is for the man page to say that the structure
contains some fields - without prescribing the order, or
stopping the implementation adding additional fields (or even
changing the actual numeric type).
This is more common in the standards documents.
IMHO The Linux pages really ought to say how linux does things.
(With notes about portability.)

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)


Re: [RFC] bpf.2: Use standard types and attributes

2021-04-25 Thread Zack Weinberg
On Sat, Apr 24, 2021 at 4:43 PM David Laight via Libc-alpha
 wrote:
> From: Alexei Starovoitov
> > On Fri, Apr 23, 2021 at 4:15 PM Alejandro Colomar  
> > wrote:
...
> > > Some pages also document attributes, using GNU syntax
> > > '__attribute__((xxx))'.  Update those to use the shorter and more
> > > portable C2x syntax, which hasn't been standardized yet, but is
> > > already implemented in GCC, and available through either --std=c2x
> > > or any of the --std=gnu... options.
..
> And the code below is no more portable that a #pragma'.
> It is probably worse than __attribute__((aligned(8)))
> +uint64_t [[gnu::aligned(8)]] value;
> The standards committee are smoking dope again.
> At least the '__aligned_u64 value;' form stands a reasonable
> chance of being converted by cpp into whatever your compiler supports.

Is it actually necessary to mention the alignment overrides at all in
the manpages?  They are only relevant to people working at the level
of physical layout of the data in RAM, and those people are probably
going to have to consult the header file anyway.

zw


Re: [RFC] bpf.2: Use standard types and attributes

2021-04-25 Thread Zack Weinberg
On Sun, Apr 25, 2021 at 12:52 PM Alexei Starovoitov via Libc-alpha
 wrote:
> On Sat, Apr 24, 2021 at 10:56 AM Alejandro Colomar (man-pages)
>  wrote:
> >
> > Hello Alexei,
> >
> > On 4/24/21 1:20 AM, Alexei Starovoitov wrote:
> > > Nack.
> > > The man page should describe the kernel api the way it is in .h file.
> >
> > Why?
>
> Because man page must describe the linux uapi headers the way they
> are installed in the system and not invent alternative implementations.
> The users will include those .h with __u32 and will see them in their code.
> Man page saying something else is a dangerous lie.

Why do you consider it _dangerous_ for the manpages to replace __u32
with uint32_t, when we know by construction that the two types will
always be the same?  Alejandro's preference for the types standardized
by ISO C seems perfectly reasonable to me for documentation; people
reading the documentation can be expected to already know what they
mean, unlike the  Linux-specifc __[iu]NN types.  Also, all else being
equal, documentation should avoid use of symbols in the ISO C reserved
namespace.

If anything I would argue that it is the uapi headers that should be
changed, to use the  types.

zw


Re: [RFC] bpf.2: Use standard types and attributes

2021-04-25 Thread Alexei Starovoitov via Gcc-patches
On Sat, Apr 24, 2021 at 10:56 AM Alejandro Colomar (man-pages)
 wrote:
>
> Hello Alexei,
>
> On 4/24/21 1:20 AM, Alexei Starovoitov wrote:
> > Nack.
> > The man page should describe the kernel api the way it is in .h file.
>
> Why?

Because man page must describe the linux uapi headers the way they
are installed in the system and not invent alternative implementations.
The users will include those .h with __u32 and will see them in their code.
Man page saying something else is a dangerous lie.

> using uint32_t in every situation where __u32 is expected.  They're both
> typedefs for the same basic type.

That's irrelevant. Languages like golang have their own bpf.h equivalent
that matches /usr/include/linux/bpf.h.

> I can understand why Linux will keep using u32 types (and their __ user
> space variants), but that doesn't mean user space programs need to use
> the same type.

No one says that the users must use __u32. See golang example.
But if the users do #include  they will get them and man page
must describe that.

> If we have a standard syntax for fixed-width integral types (and for
> anything, actually), the manual pages should probably follow it,
> whenever possible.

Absolutely not. linux man page must describe linux.


Re: [Patch, fortran] PR fortran/82376 - Duplicate function call using -fcheck=pointer

2021-04-25 Thread Paul Richard Thomas via Gcc-patches
Hi José!

The fix is fine.

Note however that the testcase will pass even without the fix because you
haven't included the
! { dg-options "-fcheck=pointer" }.

In fact, I suggest that you use the version of the tescase that I have
attached that does not run but counts the number of occurrences of 'new' in
the tree dump.

OK for master, certainly for 11-branch, when it is open again, and for
10-branch after a wait.

Are you reliant on others to commit and push your patches or have you done
the FSF paperwork?

Thanks

Paul




On Thu, 22 Apr 2021 at 21:50, José Rui Faustino de Sousa via Fortran <
fort...@gcc.gnu.org> wrote:

> Hi All!
>
> Proposed patch to:
>
> PR82376 - Duplicate function call using -fcheck=pointer
>
> Patch tested only on x86_64-pc-linux-gnu.
>
> Evaluate function result and then pass a pointer, instead of a reference
> to the function itself, thus avoiding multiple evaluations of the function.
>
> Thank you very much.
>
> Best regards,
> José Rui
>
> Fortran: Fix double function call with -fcheck=pointer [PR]
>
> gcc/fortran/ChangeLog:
>
> PR fortran/82376
> * trans-expr.c (gfc_conv_procedure_call): Evaluate function result
> and then pass a pointer.
>
> gcc/testsuite/ChangeLog:
>
> PR fortran/82376
> * gfortran.dg/PR82376.f90: New test.
>
>

-- 
"If you can't explain it simply, you don't understand it well enough" -
Albert Einstein
! { dg-do compile }
! { dg-options "-fdump-tree-original -fcheck=pointer" }
!
! Test the fix for PR82376. The pointer check was doubling up the call
! to new. The fix reduces the count of 'new' from 5 to 4.
!
! Contributed by José Rui Faustino de Sousa  
!
program main_p

  integer, parameter :: n = 10

  type :: foo_t
integer, pointer :: v =>null()
  end type foo_t

  integer, save :: pcnt = 0

  type(foo_t) :: int
  integer :: i

  do i = 1, n
call init(int, i)
if(.not.associated(int%v)) stop 1
if(int%v/=i) stop 2
if(pcnt/=i) stop 3
  end do

contains

  function new(data) result(this)
integer, target, intent(in) :: data

integer, pointer :: this

nullify(this)
this => data
pcnt = pcnt + 1
return
  end function new

  subroutine init(this, data)
type(foo_t), intent(out) :: this
integer, intent(in)  :: data

call set(this, new(data))
return
  end subroutine init

  subroutine set(this, that)
type(foo_t), intent(inout) :: this
integer, target, intent(in):: that

this%v => that
return
  end subroutine set

end program main_p
! { dg-final { scan-tree-dump-times "new" 4 "original" } }

Re: [PATCH 1/3] Come up with startswith function.

2021-04-25 Thread Arnaud Charlet
> Thank you for a quick reply.
> There's an updated version of the patch.

The Ada part is OK now, thanks.


Re: [PATCH] Fix logic error in 32-bit trampolines, PR target/98952

2021-04-25 Thread Bill Schmidt via Gcc-patches

On 4/23/21 6:58 PM, Segher Boessenkool wrote:

On Fri, Apr 23, 2021 at 06:24:07PM -0400, Michael Meissner wrote:

On Thu, Apr 22, 2021 at 05:56:32PM -0500, Segher Boessenkool wrote:

As Will says, it looks like the ELFv2 version has the same bug.  Please
fix that the same way.

Yes it has the same bug.  However in practice it would never be hit, since this
bug is 32-bit, and we only build 64-bit systems with ELF v2.  I did fix it.

Hrm, in that case, why do we have that code at all?!


Okay for trunk.  Okay for backport to 11 when that branch opens again.
Does this need more backports?  (Those should follow after 11 of
course).

Bill mentioned we may want to backport this to earlier branches before they are
frozen.  Tulio, are backports to earlier revisions important?

Well, the bug has been there since the original commit to (then)
tramp.asm, which was 25 years ago, and only now people noticed ;-)

We should have a backport to GCC 11 at least.  Older is up to you (and
Tulio).
This was reported to us as a compatibility problem with Clang that was 
holding up porting a language runtime to Power.  Since this is very 
obviously a bug, I would like to be aggressive about backporting it to 
previous releases to avoid any other such problems.  Thanks for considering!


Bill




Segher


Re: [PATCH] [i386] Optimize __builtin_shuffle when it's used to zero the upper bits of the dest. [PR target/94680]

2021-04-25 Thread Hongtao Liu via Gcc-patches
On Fri, Apr 23, 2021 at 5:13 PM Jakub Jelinek  wrote:
>
> On Fri, Apr 23, 2021 at 12:53:58PM +0800, Hongtao Liu via Gcc-patches wrote:
> > +  if (!CONST_INT_P (er))
> > + return 0;
> > +  ei = INTVAL (er);
> > +  if (i < nelt2 && ei != i)
> > + return 0;
> > +  if (i >= nelt2
> > +  && (ei < nelt || ei >= nelt<<1))
>
> Formatting:
> 1) you have spaces followed by tab, remove the spaces; but,
>   if (i >= nelt2 && (ei < nelt || ei >= nelt<<1))
>fits on one line, so keep it on one line.
> 2) nelt<<1 should be nelt << 1 with spaces around the <<
>

Done.

> > -(define_insn "*vec_concatv4si_0"
> > -  [(set (match_operand:V4SI 0 "register_operand"   "=v,x")
> > - (vec_concat:V4SI
> > -   (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
> > -   (match_operand:V2SI 2 "const0_operand"   " C,C")))]
> > +(define_insn "*vec_concat_0"
> > +  [(set (match_operand:VI124_128 0 "register_operand"   "=v,x")
> > + (vec_concat:VI124_128
> > +   (match_operand: 1 "nonimmediate_operand" "vm,?!*y")
> > +   (match_operand: 2 "const0_operand"   " C,C")))]
> >"TARGET_SSE2"
> >"@
> > %vmovq\t{%1, %0|%0, %1}
> > @@ -22154,6 +22157,24 @@ (define_insn "avx_vec_concat"
> > (set_attr "prefix" "maybe_evex")
> > (set_attr "mode" "")])
> >
> > +(define_insn_and_split "*vec_concat_0"
>
> Would be better to use a different pattern name, *vec_concat_0
> is already used in the above define_insn.
> Use some additional suffix after _0?
>

Changed to "*vec_concat_0_1"

> > +  return __builtin_shuffle (x, (v32qi) { 0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0, 0, 0, 0, 0, 0, 0 },
> > +(v32qi) { 0, 1, 2, 3, 4, 5, 6, 7,
> > +  8, 9, 10, 11, 12, 13, 14, 15,
> > +  32, 49, 34, 58, 36, 53, 38, 39,
> > +  40, 60, 42, 43, 63, 45, 46, 47 });
>
> In this testcase the shuffles in the part taking indexes from the zero
> vector are nicely randomized.
>
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr94680.c
> > @@ -0,0 +1,78 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-mavx512bw -mavx512vbmi -O2" } */
> > +/* { dg-final { scan-assembler-times {(?n)vmov[a-z0-9]*[ \t]*%ymm[0-9]} 6} 
> > } */
> > +/* { dg-final { scan-assembler-not "pxor" } } */
> > +
> > +
> > +typedef float v16sf __attribute__((vector_size(64)));
> > +typedef double v8df __attribute__ ((vector_size (64)));
> > +typedef long long v8di __attribute__((vector_size(64)));
> > +typedef int v16si __attribute__((vector_size(64)));
> > +typedef short v32hi __attribute__ ((vector_size (64)));
> > +typedef char v64qi __attribute__ ((vector_size (64)));
> > +
> > +v8df
> > +foo_v8df (v8df x)
> > +{
> > +  return __builtin_shuffle (x, (v8df) { 0, 0, 0, 0, 0, 0, 0, 0 },
> > + (v8di) { 0, 1, 2, 3, 8, 9, 10, 11 });
> > +}
> > +
> > +v8di
> > +foo_v8di (v8di x)
> > +{
> > +  return __builtin_shuffle (x, (v8di) { 0, 0, 0, 0, 0, 0, 0, 0 },
> > + (v8di) { 0, 1, 2, 3, 8, 9, 10, 11 });
> > +}
> > +
> > +v16sf
> > +foo_v16sf (v16sf x)
> > +{
> > +  return __builtin_shuffle (x, (v16sf)  { 0, 0, 0, 0, 0, 0, 0, 0,
> > +0, 0, 0, 0, 0, 0, 0, 0 },
> > +(v16si) { 0, 1, 2, 3, 4, 5, 6, 7,
> > +  16, 17, 18, 19, 20, 21, 22, 23 });
> > +}
> > +
> > +v16si
> > +foo_v16si (v16si x)
> > +{
> > +return __builtin_shuffle (x, (v16si)  { 0, 0, 0, 0, 0, 0, 0, 0,
> > +0, 0, 0, 0, 0, 0, 0, 0 },
> > +(v16si) { 0, 1, 2, 3, 4, 5, 6, 7,
> > +  16, 17, 18, 19, 20, 21, 22, 23 });
> > +}
> > +
> > +v32hi
> > +foo_v32hi (v32hi x)
> > +{
> > +  return __builtin_shuffle (x, (v32hi) { 0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0, 0, 0, 0, 0, 0, 0 },
> > +(v32hi) { 0, 1, 2, 3, 4, 5, 6, 7,
> > +  8, 9, 10, 11, 12, 13, 14, 15,
> > +  32, 33, 34, 35, 36, 37, 38, 39,
> > +  40,41, 42, 43, 44, 45, 46, 47 });
> > +}
> > +
> > +v64qi
> > +foo_v64qi (v64qi x)
> > +{
> > +  return __builtin_shuffle (x, (v64qi) { 0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0, 0, 0, 0, 0, 0, 0,
> > +  0, 0,