This patch adds handling of new omp_simt_enter/omp_simt_exit named insns
in the NVPTX backend.
* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare.
(nvptx_output_simt_exit): Declare.
* config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use
cfun->machine->unisimt_location. Handle NULL unisimt_predicate.
(init_softstack_frame): Move initialization of crtl->is_leaf to...
(nvptx_declare_function_name): ...here. Emit declaration of local
memory space buffer for omp_simt_enter insn.
(nvptx_output_unisimt_switch): New.
(nvptx_output_softstack_switch): New.
(nvptx_output_simt_enter): New.
(nvptx_output_simt_exit): New.
* config/nvptx/nvptx.h (struct machine_function): New fields
has_simtreg, unisimt_location, simt_stack_size, simt_stack_align.
* config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec.
(UNSPECV_SIMT_EXIT): Ditto.
(omp_simt_enter_insn): New insn.
(omp_simt_enter): New expansion.
(omp_simt_exit): New insn.
* config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option.
---
gcc/config/nvptx/nvptx-protos.h | 2 +
gcc/config/nvptx/nvptx.c| 163 +++-
gcc/config/nvptx/nvptx.h| 6 ++
gcc/config/nvptx/nvptx.md | 39 ++
gcc/config/nvptx/nvptx.opt | 4 +
5 files changed, 196 insertions(+), 18 deletions(-)
diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index aaea3ba..16b316f 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -53,5 +53,7 @@ extern const char *nvptx_output_mov_insn (rtx, rtx);
extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
extern const char *nvptx_output_return (void);
extern const char *nvptx_output_set_softstack (unsigned);
+extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
+extern const char *nvptx_output_simt_exit (rtx);
#endif
#endif
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 647855c..83f4610 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -1048,11 +1048,6 @@ init_softstack_frame (FILE *file, unsigned alignment,
HOST_WIDE_INT size)
fprintf (file, "\t\tsub.u%d %s, %s, " HOST_WIDE_INT_PRINT_DEC ";\n",
bits, reg_stack, reg_frame, size);
- /* Usually 'crtl->is_leaf' is computed during register allocator
- initialization (which is not done on NVPTX) or for pressure-sensitive
- optimizations. Initialize it here, except if already set. */
- if (!crtl->is_leaf)
-crtl->is_leaf = leaf_function_p ();
if (!crtl->is_leaf)
fprintf (file, "\t\tst.shared.u%d [%s], %s;\n",
bits, reg_sspslot, reg_stack);
@@ -1080,24 +1075,29 @@ nvptx_init_axis_predicate (FILE *file, int regno, const
char *name)
static void
nvptx_init_unisimt_predicate (FILE *file)
{
+ cfun->machine->unisimt_location = gen_reg_rtx (Pmode);
+ int loc = REGNO (cfun->machine->unisimt_location);
int bits = POINTER_SIZE;
- int master = REGNO (cfun->machine->unisimt_master);
- int pred = REGNO (cfun->machine->unisimt_predicate);
+ fprintf (file, "\t.reg.u%d %%r%d;\n", bits, loc);
fprintf (file, "\t{\n");
fprintf (file, "\t\t.reg.u32 %%ustmp0;\n");
fprintf (file, "\t\t.reg.u%d %%ustmp1;\n", bits);
- fprintf (file, "\t\t.reg.u%d %%ustmp2;\n", bits);
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
bits == 64 ? ".wide" : ".lo");
- fprintf (file, "\t\tmov.u%d %%ustmp2, __nvptx_uni;\n", bits);
- fprintf (file, "\t\tadd.u%d %%ustmp2, %%ustmp2, %%ustmp1;\n", bits);
- fprintf (file, "\t\tld.shared.u32 %%r%d, [%%ustmp2];\n", master);
- fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.x;\n");
- /* Compute 'master lane index' as 'tid.x & __nvptx_uni[tid.y]'. */
- fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
- /* Compute predicate as 'tid.x == master'. */
- fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
+ fprintf (file, "\t\tmov.u%d %%r%d, __nvptx_uni;\n", bits, loc);
+ fprintf (file, "\t\tadd.u%d %%r%d, %%r%d, %%ustmp1;\n", bits, loc, loc);
+ if (cfun->machine->unisimt_predicate)
+{
+ int master = REGNO (cfun->machine->unisimt_master);
+ int pred = REGNO (cfun->machine->unisimt_predicate);
+ fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc);
+ fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n");
+ /* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'. */
+ fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
+ /* Compute predicate as 'tid.x == master'. */
+ fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred,
master);
+}
fprintf (file, "\t}\n");
need_unisimt_decl = true;
}
@@ -1224,6 +1224,12 @@ nvptx_declare_function_name (FILE