This patch implements variable stack allocation for alloca/VLA on NVPTX if
-msoft-stack is enabled.  In addition to moving the stack pointer, we need to
copy the updated pointer into __nvptx_stacks[tid.y].

        * config/nvptx/nvptx.c (nvptx_declare_function_name): Emit %outargs
        using .local %outargs_ar only if not TARGET_SOFT_STACK.  Emit %outargs
        under TARGET_SOFT_STACK by offsetting from %frame.
        (nvptx_get_drap_rtx): Return %argp as the DRAP if needed.
        * config/nvptx/nvptx.md (nvptx_register_operand): Allow %outargs under
        TARGET_SOFT_STACK.
        (nvptx_nonimmediate_operand): Ditto.
        (allocate_stack): Implement for TARGET_SOFT_STACK.  Remove unused code.
        (allocate_stack_<mode>): Remove unused pattern.
        (set_softstack_insn): New pattern.
        (restore_stack_block): Handle for TARGET_SOFT_STACK.
---

I have committed this patch to the gomp-nvptx branch.  Bernd, Nathan, I would
appreciate if you could comment on 'define_predicate' changes in nvptx.md.
There are three predicates that start like this:

  if (REG_P (op))
    return !HARD_REGISTER_P (op);
  if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
    return false;
  if (GET_CODE (op) == SUBREG)
    return false;

For stack adjustments I need to allow operations on the stack pointer.  For
now I've implemented that as a fairly straightforward shortcut, but I guess it
doesn't look very nice.  What is the reason to reject "hard registers" there,
in the first place?  In any case, I'd like your input if you see a better way
to handle it.

Also, note that there's either a bug or a cleanup opportunity: the third "if"
statement is clearly more general than the second.

No regressions on check-c testsuite (with 'alloca' effective-target enabled).

Thanks.
Alexander

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index b12a7a8..599e460 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -883,7 +883,7 @@ nvptx_declare_function_name (FILE *file, const char *name, 
const_tree decl)
   HOST_WIDE_INT sz = crtl->outgoing_args_size;
   if (sz == 0)
     sz = 1;
-  if (cfun->machine->has_call_with_varargs)
+  if (!TARGET_SOFT_STACK && cfun->machine->has_call_with_varargs)
     {
       fprintf (file, "\t.reg.u%d %%outargs;\n"
               "\t.local.align 8 .b8 %%outargs_ar["
@@ -897,7 +897,8 @@ nvptx_declare_function_name (FILE *file, const char *name, 
const_tree decl)
   sz = get_frame_size ();
   if (sz == 0 && cfun->machine->has_call_with_sc)
     sz = 1;
-  if (sz > 0)
+  bool need_sp = cfun->calls_alloca || cfun->machine->has_call_with_varargs;
+  if (sz > 0 || TARGET_SOFT_STACK && need_sp)
     {
       int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
 
@@ -923,10 +924,15 @@ nvptx_declare_function_name (FILE *file, const char 
*name, const_tree decl)
          if (alignment > keep_align)
            fprintf (file, "\tand.b%d %%frame, %%frame, %d;\n",
                     bits, -alignment);
+         fprintf (file, "\t.reg.u%d %%outargs;\n", bits);
+         sz = crtl->outgoing_args_size;
+         gcc_assert (sz % keep_align == 0);
+         fprintf (file, "\tsub.u%d %%outargs, %%frame, "
+                  HOST_WIDE_INT_PRINT_DEC ";\n", bits, sz);
          /* crtl->is_leaf is not initialized because RA is not run.  */
          if (!leaf_function_p ())
            {
-             fprintf (file, "\tst.shared.u%d [%%fstmp2], %%frame;\n", bits);
+             fprintf (file, "\tst.shared.u%d [%%fstmp2], %%outargs;\n", bits);
              cfun->machine->using_softstack = true;
            }
          need_softstack_decl = true;
@@ -996,6 +1002,8 @@ nvptx_function_ok_for_sibcall (tree, tree)
 static rtx
 nvptx_get_drap_rtx (void)
 {
+  if (TARGET_SOFT_STACK && stack_realign_drap)
+    return arg_pointer_rtx;
   return NULL_RTX;
 }
 
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index ae1909d..130c809 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -69,6 +69,8 @@ (define_attr "divergent" "false,true"
 (define_predicate "nvptx_register_operand"
   (match_code "reg,subreg")
 {
+  if (TARGET_SOFT_STACK && op == stack_pointer_rtx)
+    return true;
   if (REG_P (op))
     return !HARD_REGISTER_P (op);
   if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
@@ -123,6 +125,8 @@ (define_predicate "nvptx_general_operand"
 (define_predicate "nvptx_nonimmediate_operand"
   (match_code "reg,subreg,mem")
 {
+  if (TARGET_SOFT_STACK && op == stack_pointer_rtx)
+    return true;
   if (REG_P (op))
     return (op != frame_pointer_rtx
            && op != arg_pointer_rtx
@@ -1061,31 +1065,41 @@ (define_expand "allocate_stack"
    (match_operand 1 "nvptx_register_operand")]
   ""
 {
+  if (TARGET_SOFT_STACK)
+    {
+      emit_move_insn (stack_pointer_rtx,
+                     gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
+      emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
+      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+    }
   /* The ptx documentation specifies an alloca intrinsic (for 32 bit
      only)  but notes it is not implemented.  The assembler emits a
      confused error message.  Issue a blunt one now instead.  */
   sorry ("target cannot support alloca.");
   emit_insn (gen_nop ());
   DONE;
-  if (TARGET_ABI64)
-    emit_insn (gen_allocate_stack_di (operands[0], operands[1]));
-  else
-    emit_insn (gen_allocate_stack_si (operands[0], operands[1]));
-  DONE;
 })
 
-(define_insn "allocate_stack_<mode>"
-  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
-        (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")]
-                   UNSPEC_ALLOCA))]
-  ""
-  "%.\\tcall (%0), %%alloca, (%1);")
+(define_insn "set_softstack_insn"
+  [(unspec [(match_operand 0 "nvptx_register_operand" "R")] UNSPEC_ALLOCA)]
+  "TARGET_SOFT_STACK"
+{
+  return (cfun->machine->using_softstack
+         ? "%.\\tst.shared%t0\\t[%%fstmp2], %0;"
+         : "");
+})
 
 (define_expand "restore_stack_block"
   [(match_operand 0 "register_operand" "")
    (match_operand 1 "register_operand" "")]
   ""
 {
+  if (TARGET_SOFT_STACK)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      emit_insn (gen_set_softstack_insn (operands[0]));
+    }
   DONE;
 })
 
-- 
1.8.3.1

Reply via email to