On Wed, Apr 18, 2018 at 8:55 AM, H.J. Lu <hjl.to...@gmail.com> wrote:
> On Wed, Apr 18, 2018 at 7:00 AM, Tsimbalist, Igor V
> <igor.v.tsimbal...@intel.com> wrote:
>>> -----Original Message-----
>>> From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
>>> ow...@gcc.gnu.org] On Behalf Of H.J. Lu
>>> Sent: Wednesday, April 18, 2018 3:25 PM
>>> To: GCC Patches <gcc-patches@gcc.gnu.org>; Tsimbalist, Igor V
>>> <igor.v.tsimbal...@intel.com>
>>> Cc: Uros Bizjak <ubiz...@gmail.com>
>>> Subject: PING: [PATCH] i386: Insert ENDBR after __morestack call
>>>
>>> On Fri, Apr 13, 2018 at 5:56 AM, H.J. Lu <hongjiu...@intel.com> wrote:
>>> > Since __morestack will jump back to its callee via indirect call, we
>>> > need to insert ENDBR after calling __morestack.
>>> >
>>> > OK for trunk?
>>> >
>>> > H.J.
>>> > ----
>>> > gcc/
>>> >
>>> >         PR target/85388
>>> >         * config/i386/i386.c (ix86_expand_split_stack_prologue): Insert
>>> >         ENDBR after calling __morestack.
>>> >
>>> > gcc/testsuite/
>>> >
>>> >         PR target/85388
>>> >         * gcc.dg/pr85388-1.c: New test.
>>> >         * gcc.dg/pr85388-2.c: Likewise.
>>> >         * gcc.dg/pr85388-3.c: Likewise.
>>> >         * gcc.dg/pr85388-4.c: Likewise.
>>> >         * gcc.dg/pr85388-5.c: Likewise.
>>> >         * gcc.dg/pr85388-6.c: Likewise.
>>> > ---
>>> >  gcc/config/i386/i386.c           | 11 ++++++-
>>> >  gcc/testsuite/gcc.dg/pr85388-1.c | 50
>>> +++++++++++++++++++++++++++++
>>> >  gcc/testsuite/gcc.dg/pr85388-2.c | 56
>>> ++++++++++++++++++++++++++++++++
>>> >  gcc/testsuite/gcc.dg/pr85388-3.c | 65
>>> +++++++++++++++++++++++++++++++++++++
>>> >  gcc/testsuite/gcc.dg/pr85388-4.c | 69
>>> ++++++++++++++++++++++++++++++++++++++++
>>> >  gcc/testsuite/gcc.dg/pr85388-5.c | 54
>>> +++++++++++++++++++++++++++++++
>>> >  gcc/testsuite/gcc.dg/pr85388-6.c | 56
>>> ++++++++++++++++++++++++++++++++
>>> >  7 files changed, 360 insertions(+), 1 deletion(-)
>>> >  create mode 100644 gcc/testsuite/gcc.dg/pr85388-1.c
>>> >  create mode 100644 gcc/testsuite/gcc.dg/pr85388-2.c
>>> >  create mode 100644 gcc/testsuite/gcc.dg/pr85388-3.c
>>> >  create mode 100644 gcc/testsuite/gcc.dg/pr85388-4.c
>>> >  create mode 100644 gcc/testsuite/gcc.dg/pr85388-5.c
>>> >  create mode 100644 gcc/testsuite/gcc.dg/pr85388-6.c
>>> >
>>> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>> > index 03e5c433574..8b4fd8ae30b 100644
>>> > --- a/gcc/config/i386/i386.c
>>> > +++ b/gcc/config/i386/i386.c
>>> > @@ -15242,7 +15242,16 @@ ix86_expand_split_stack_prologue (void)
>>> >       instruction--we need control flow to continue at the subsequent
>>> >       label.  Therefore, we use an unspec.  */
>>> >    gcc_assert (crtl->args.pops_args < 65536);
>>> > -  emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
>>> > +  rtx_insn *ret_insn
>>> > +    = emit_insn (gen_split_stack_return (GEN_INT 
>>> > (crtl->args.pops_args)));
>>> > +
>>> > +  if ((flag_cf_protection & CF_BRANCH) && TARGET_IBT)
>>> > +    {
>>> > +      /* Insert ENDBR since __morestack will jump back here via indirect
>>> > +        call.  */
>>> > +      rtx cet_eb = gen_nop_endbr ();
>>> > +      emit_insn_after (cet_eb, ret_insn);
>>> > +    }
>>> >
>>> >    /* If we are in 64-bit mode and this function uses a static chain,
>>> >       we saved %r10 in %rax before calling _morestack.  */
>>>
>>> PING:
>>>
>>> https://gcc.gnu.org/ml/gcc-patches/2018-04/msg00669.html
>>>
>>
>> OK.
>
> I am going to check it in.
>

This is what I checked in.


-- 
H.J.
From 5cad7ddc0ee6f862414b411fae19516fe2f17e49 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Fri, 13 Apr 2018 05:27:09 -0700
Subject: [PATCH] i386: Insert ENDBR after __morestack call

Since __morestack will jump back to its callee via indirect call, we
need to insert ENDBR after calling __morestack.

gcc/

	PR target/85388
	* config/i386/i386.c (ix86_expand_split_stack_prologue): Insert
	ENDBR after calling __morestack.

gcc/testsuite/

	PR target/85388
	* gcc.dg/pr85388-1.c: New test.
	* gcc.dg/pr85388-2.c: Likewise.
	* gcc.dg/pr85388-3.c: Likewise.
	* gcc.dg/pr85388-4.c: Likewise.
	* gcc.dg/pr85388-5.c: Likewise.
	* gcc.dg/pr85388-6.c: Likewise.
---
 gcc/config/i386/i386.c           | 11 ++++++-
 gcc/testsuite/gcc.dg/pr85388-1.c | 50 +++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr85388-2.c | 56 ++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr85388-3.c | 65 +++++++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr85388-4.c | 69 ++++++++++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr85388-5.c | 54 +++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr85388-6.c | 56 ++++++++++++++++++++++++++++++++
 7 files changed, 360 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr85388-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr85388-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr85388-3.c
 create mode 100644 gcc/testsuite/gcc.dg/pr85388-4.c
 create mode 100644 gcc/testsuite/gcc.dg/pr85388-5.c
 create mode 100644 gcc/testsuite/gcc.dg/pr85388-6.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9074526b8a1..d24c81b0dfe 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -15265,7 +15265,16 @@ ix86_expand_split_stack_prologue (void)
      instruction--we need control flow to continue at the subsequent
      label.  Therefore, we use an unspec.  */
   gcc_assert (crtl->args.pops_args < 65536);
-  emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
+  rtx_insn *ret_insn
+    = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
+
+  if ((flag_cf_protection & CF_BRANCH))
+    {
+      /* Insert ENDBR since __morestack will jump back here via indirect
+	 call.  */
+      rtx cet_eb = gen_nop_endbr ();
+      emit_insn_after (cet_eb, ret_insn);
+    }
 
   /* If we are in 64-bit mode and this function uses a static chain,
      we saved %r10 in %rax before calling _morestack.  */
diff --git a/gcc/testsuite/gcc.dg/pr85388-1.c b/gcc/testsuite/gcc.dg/pr85388-1.c
new file mode 100644
index 00000000000..86d4737e32b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr85388-1.c
@@ -0,0 +1,50 @@
+/* This test needs to use setrlimit to set the stack size, so it can
+   only run on Unix.  */
+/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */
+/* { dg-require-effective-target cet } */
+/* { dg-require-effective-target split_stack } */
+/* { dg-options "-fsplit-stack -fcf-protection -mcet" } */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+
+/* Use a noinline function to ensure that the buffer is not removed
+   from the stack.  */
+static void use_buffer (char *buf) __attribute__ ((noinline));
+static void
+use_buffer (char *buf)
+{
+  buf[0] = '\0';
+}
+
+/* Each recursive call uses 10,000 bytes.  We call it 1000 times,
+   using a total of 10,000,000 bytes.  If -fsplit-stack is not
+   working, that will overflow our stack limit.  */
+
+static void
+down (int i)
+{
+  char buf[10000];
+
+  if (i > 0)
+    {
+      use_buffer (buf);
+      down (i - 1);
+    }
+}
+
+int
+main (void)
+{
+  struct rlimit r;
+
+  /* We set a stack limit because we are usually invoked via make, and
+     make sets the stack limit to be as large as possible.  */
+  r.rlim_cur = 8192 * 1024;
+  r.rlim_max = 8192 * 1024;
+  if (setrlimit (RLIMIT_STACK, &r) != 0)
+    abort ();
+  down (1000);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr85388-2.c b/gcc/testsuite/gcc.dg/pr85388-2.c
new file mode 100644
index 00000000000..fd13d984c50
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr85388-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */
+/* { dg-require-effective-target cet } */
+/* { dg-require-effective-target split_stack } */
+/* { dg-require-effective-target pthread_h } */
+/* { dg-options "-pthread -fsplit-stack -fcf-protection -mcet" } */
+
+#include <stdlib.h>
+#include <pthread.h>
+
+/* Use a noinline function to ensure that the buffer is not removed
+   from the stack.  */
+static void use_buffer (char *buf) __attribute__ ((noinline));
+static void
+use_buffer (char *buf)
+{
+  buf[0] = '\0';
+}
+
+/* Each recursive call uses 10,000 bytes.  We call it 1000 times,
+   using a total of 10,000,000 bytes.  If -fsplit-stack is not
+   working, that will overflow our stack limit.  */
+
+static void
+down (int i)
+{
+  char buf[10000];
+
+  if (i > 0)
+    {
+      use_buffer (buf);
+      down (i - 1);
+    }
+}
+
+static void *
+thread_routine (void *arg __attribute__ ((unused)))
+{
+  down (1000);
+  return NULL;
+}
+
+int
+main (void)
+{
+  int i;
+  pthread_t tid;
+  void *dummy;
+
+  i = pthread_create (&tid, NULL, thread_routine, NULL);
+  if (i != 0)
+    abort ();
+  i = pthread_join (tid, &dummy);
+  if (i != 0)
+    abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr85388-3.c b/gcc/testsuite/gcc.dg/pr85388-3.c
new file mode 100644
index 00000000000..730d2be9c22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr85388-3.c
@@ -0,0 +1,65 @@
+/* This test needs to use setrlimit to set the stack size, so it can
+   only run on Unix.  */
+/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */
+/* { dg-require-effective-target cet } */
+/* { dg-require-effective-target split_stack } */
+/* { dg-options "-fsplit-stack -fcf-protection -mcet" } */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+
+/* Use a noinline function to ensure that the buffer is not removed
+   from the stack.  */
+static void use_buffer (char *buf) __attribute__ ((noinline));
+static void
+use_buffer (char *buf)
+{
+  buf[0] = '\0';
+}
+
+/* Each recursive call uses 10,000 bytes.  We call it 1000 times,
+   using a total of 10,000,000 bytes.  If -fsplit-stack is not
+   working, that will overflow our stack limit.  */
+
+static void
+down (int i, ...)
+{
+  char buf[10000];
+  va_list ap;
+
+  va_start (ap, i);
+  if (va_arg (ap, int) != 1
+      || va_arg (ap, int) != 2
+      || va_arg (ap, int) != 3
+      || va_arg (ap, int) != 4
+      || va_arg (ap, int) != 5
+      || va_arg (ap, int) != 6
+      || va_arg (ap, int) != 7
+      || va_arg (ap, int) != 8
+      || va_arg (ap, int) != 9
+      || va_arg (ap, int) != 10)
+    abort ();
+
+  if (i > 0)
+    {
+      use_buffer (buf);
+      down (i - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+    }
+}
+
+int
+main (void)
+{
+  struct rlimit r;
+
+  /* We set a stack limit because we are usually invoked via make, and
+     make sets the stack limit to be as large as possible.  */
+  r.rlim_cur = 8192 * 1024;
+  r.rlim_max = 8192 * 1024;
+  if (setrlimit (RLIMIT_STACK, &r) != 0)
+    abort ();
+  down (1000, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr85388-4.c b/gcc/testsuite/gcc.dg/pr85388-4.c
new file mode 100644
index 00000000000..03937d0e735
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr85388-4.c
@@ -0,0 +1,69 @@
+/* This test needs to use setrlimit to set the stack size, so it can
+   only run on Unix.  */
+/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */
+/* { dg-require-effective-target cet } */
+/* { dg-require-effective-target split_stack } */
+/* { dg-options "-fsplit-stack -fcf-protection -mcet" } */
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+
+/* Use a noinline function to ensure that the buffer is not removed
+   from the stack.  */
+static void use_buffer (char *buf, size_t) __attribute__ ((noinline));
+static void
+use_buffer (char *buf, size_t c)
+{
+  size_t i;
+
+  for (i = 0; i < c; ++i)
+    buf[i] = (char) i;
+}
+
+/* Each recursive call uses 10 * i bytes.  We call it 1000 times,
+   using a total of 5,000,000 bytes.  If -fsplit-stack is not working,
+   that will overflow our stack limit.  */
+
+static void
+down1 (int i)
+{
+  char buf[10 * i];
+
+  if (i > 0)
+    {
+      use_buffer (buf, 10 * i);
+      down1 (i - 1);
+    }
+}
+
+/* Same thing, using alloca.  */
+
+static void
+down2 (int i)
+{
+  char *buf = alloca (10 * i);
+
+  if (i > 0)
+    {
+      use_buffer (buf, 10 * i);
+      down2 (i - 1);
+    }
+}
+
+int
+main (void)
+{
+  struct rlimit r;
+
+  /* We set a stack limit because we are usually invoked via make, and
+     make sets the stack limit to be as large as possible.  */
+  r.rlim_cur = 8192 * 1024;
+  r.rlim_max = 8192 * 1024;
+  if (setrlimit (RLIMIT_STACK, &r) != 0)
+    abort ();
+  down1 (1000);
+  down2 (1000);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr85388-5.c b/gcc/testsuite/gcc.dg/pr85388-5.c
new file mode 100644
index 00000000000..7462a40892c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr85388-5.c
@@ -0,0 +1,54 @@
+/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */
+/* { dg-require-effective-target cet } */
+/* { dg-require-effective-target split_stack } */
+/* { dg-options "-fsplit-stack -fcf-protection -mcet" } */
+
+/* This test is like split-3.c, but tests with a smaller stack frame,
+   since that uses a different prologue.  */
+
+#include <stdarg.h>
+#include <stdlib.h>
+
+/* Use a noinline function to ensure that the buffer is not removed
+   from the stack.  */
+static void use_buffer (char *buf) __attribute__ ((noinline));
+static void
+use_buffer (char *buf)
+{
+  buf[0] = '\0';
+}
+
+/* When using gold, the call to abort will force a stack split.  */
+
+static void
+down (int i, ...)
+{
+  char buf[1];
+  va_list ap;
+
+  va_start (ap, i);
+  if (va_arg (ap, int) != 1
+      || va_arg (ap, int) != 2
+      || va_arg (ap, int) != 3
+      || va_arg (ap, int) != 4
+      || va_arg (ap, int) != 5
+      || va_arg (ap, int) != 6
+      || va_arg (ap, int) != 7
+      || va_arg (ap, int) != 8
+      || va_arg (ap, int) != 9
+      || va_arg (ap, int) != 10)
+    abort ();
+
+  if (i > 0)
+    {
+      use_buffer (buf);
+      down (i - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+    }
+}
+
+int
+main (void)
+{
+  down (1000, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr85388-6.c b/gcc/testsuite/gcc.dg/pr85388-6.c
new file mode 100644
index 00000000000..23b5d8e3df6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr85388-6.c
@@ -0,0 +1,56 @@
+/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */
+/* { dg-require-effective-target cet } */
+/* { dg-require-effective-target split_stack } */
+/* { dg-options "-fsplit-stack -O2 -fcf-protection -mcet" } */
+/* { dg-options "-fsplit-stack -O2 -mno-accumulate-outgoing-args -fcf-protection -mcet" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
+
+/* A case that used to fail on 32-bit x86 when optimizing and not
+   using -maccumulate-args.  The stack adjustment of the alloca got
+   mixed up with the arguments pushed on the stack to the function
+   before the call of alloca.  */
+
+#include <stdlib.h>
+
+typedef struct { const char* s; int l; } s;
+
+typedef unsigned long long align16 __attribute__ ((aligned(16)));
+
+s gobats (const void *, int) __attribute__ ((noinline));
+
+s
+gobats (const void* p __attribute__ ((unused)),
+	int l __attribute__ ((unused)))
+{
+  s v;
+  v.s = 0;
+  v.l = 0;
+  return v;
+}
+
+void check_aligned (void *p) __attribute__ ((noinline));
+
+void
+check_aligned (void *p)
+{
+  if (((__SIZE_TYPE__) p & 0xf) != 0)
+    abort ();
+}
+
+void gap (void *) __attribute__ ((noinline));
+
+void gap (void *p)
+{
+  align16 a;
+  check_aligned (&a);
+}
+
+int
+main (int argc, char **argv)
+{
+  s *space;
+  gobats(0, 16);
+  space = (s *) alloca(sizeof(s) + 1);
+  *space = (s){0, 16};
+  gap(space);
+  return 0;
+}
-- 
2.14.3

Reply via email to