i got sick of not having arguments in ddb stack traces on amd64.
the lack of them is because the first 6 arguments are generally
passed in registers, not on the stack. recovering the arguments
seems to rely on dwarf, which is complicated, which is not something
i think is right for ddb.

solaris has a simple solution to this, described at
https://blogs.oracle.com/sherrym/entry/obtaining_function_arguments_on_amd64.
the short version of this is they tweaked their compilers to have
functions save arguments on the stack, and their debug tools to
look at the stack to see them.

this ports their changes to gcc 3 to our gcc, as found at
https://github.com/joyent/gcc/commit/023cc9a4c9c698bed1f51d38eac850d327fc1146#diff-94a81875bb757178dee11b23d7cffee2
(thanks to kettenis@ for finding that).

i have a quick and dirty change to amd64 to take advantage of this
in the kernel, which in turn lets ddb do the following:

dlg@r630 ~$ sudo sysctl ddb.trigger=1 
Stopped at      Debugger+0x9:   leave
ddb{0}> ps
  PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
*36541  196789  53744      0  7         0x3                sysctl
39635  476331  37586   1000  3    0x100083  ttyin         ksh
37586    1910  72467   1000  3        0x90  select        sshd
72467  369897  23716      0  3        0x92  poll          sshd
53744  289772      1   1000  3    0x10008b  pause         ksh
28701   47347      1      0  3    0x100083  ttyin         getty
90069  344953      1      0  3    0x100083  ttyin         getty
38545  186981      1      0  3    0x100083  ttyin         getty
35956   54159      1      0  3    0x100083  ttyin         getty
13341   36346      1      0  3    0x100083  ttyin         getty
35543  128666      1      0  3    0x100098  poll          cron
 9182    3366      1      0  3    0x100080  kqread        vmd
 4770  514282      1    107  3    0x100092  kqread        vmd
14941   85380      1    107  3    0x100092  kqread        vmd
34895   34348      1      0  3        0x92  kqread        vmd
66406     989  92896     95  3    0x100092  kqread        smtpd
69193  282017  92896    103  3    0x100092  kqread        smtpd
52352   95484  92896     95  3    0x100092  kqread        smtpd
 1494  510202  92896     95  3    0x100092  kqread        smtpd
40895   51314  92896     95  3    0x100092  kqread        smtpd
91469   12329  92896     95  3    0x100092  kqread        smtpd
ddb{0}> tr /p 0t476331
sleep_finish(ffff800020f5e8b0,1,119,ffff800020f5e8b0,ffff8000020b5610,ffffffff8
193b008) at sleep_finish+0xc2
tsleep(ffff8000020b5610,119,ffffffff8193b008,0,0,443) at tsleep+0x164
ttysleep(ffff8000020b5600,ffff8000020b5610,119,ffffffff8193b008,0,ffff800020f5e
a00) at ttysleep+0x42
ttread(ffff8000020b5600,ffff800020f5ed20,0,1,ffff800001e53e80,ffff8000fffff978)
at ttread+0x1c5
ptsread(500,ffff800020f5ed20,0,500,ffff800020f5ed20,ffffff087c0fb1b8) at ptsrea
d+0x191
spec_read(ffff800020f5eb80,2001,ffffff0785e14c18,ffffff087c0fb1b8,1,0) at spec_
read+0x301
VOP_READ(ffffff087c0fb1b8,ffff800020f5ed20,0,ffffff087f7d3900,d9a1671edd3aaa2,1
) at VOP_READ+0x4f
vn_read(ffffff0785e14be8,ffffff0785e14c18,ffff800020f5ed20,ffffff087f7d3900,fff
fff0785e14be8,ffff800020f5eeb0) at vn_read+0xaf
dofilereadv(ffff8000fffff978,0,ffffff0785e14be8,ffff800020f5ede0,1,0) at dofile
readv+0x247
sys_read(ffff8000fffff978,ffff800020f5ee60,ffff800020f5eeb0,1,1,ffff800020f5ef2
0) at sys_read+0x95
syscall() at syscall+0x29f
--- syscall (number 3) ---
end of kernel
end trace frame: 0x7f7fffff59df, count: -11
0x1a9c77f5220a:
ddb{0}> tr
Debugger(ffffff087d95b800,7f7fffff5000,10,ffff800020f63d00,286,8) at Debugger+0
x9
ddb_sysctl(ffff800020f63db4,1,7f7fffff3a20,ffff800020f63de0,7f7fffff3a14,4) at d
db_sysctl+0x1c9
sys_sysctl(ffff800020eb2950,ffff800020f63e60,ffff800020f63eb0,1,ca,1) at sys_sy
sctl+0x239
syscall() at syscall+0x29f
--- syscall (number 202) ---
end of kernel
end trace frame: 0x7f7fffff5bbf, count: -4
0x71a6b2106ea:
ddb{0}>

ok?

Index: share/man/man1/gcc-local.1
===================================================================
RCS file: /cvs/src/share/man/man1/gcc-local.1,v
retrieving revision 1.51
diff -u -p -r1.51 gcc-local.1
--- share/man/man1/gcc-local.1  14 Feb 2017 12:50:15 -0000      1.51
+++ share/man/man1/gcc-local.1  24 Apr 2017 23:18:10 -0000
@@ -160,6 +160,14 @@ which includes additional functions to b
 have local array definitions
 or have references to local frame addresses.
 .It
+On amd64,
+.Fl msave-args
+can be passed to the compiler to have functions save their register
+arguments on the stack, while maintaining compatability with the
+System 5 AMD64 ABI.
+This enables tools and debuggers that understand this semantic to
+trivially generate stack traces that include function arguments.
+.It
 On the alpha, amd64, arm, hppa, i386, mips64, powerpc, sh and sparc64
 architectures,
 .Nm gcc
Index: gnu/gcc/gcc/dwarf2.h
===================================================================
RCS file: /cvs/src/gnu/gcc/gcc/dwarf2.h,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 dwarf2.h
--- gnu/gcc/gcc/dwarf2.h        15 Oct 2009 17:11:28 -0000      1.1.1.1
+++ gnu/gcc/gcc/dwarf2.h        24 Apr 2017 23:18:10 -0000
@@ -371,6 +371,8 @@ enum dwarf_attribute
     DW_AT_GNU_vector = 0x2107,
     /* VMS extensions.  */
     DW_AT_VMS_rtnbeg_pd_address = 0x2201,
+    /* Sun extension.  */
+    DW_AT_SUN_amd64_parmdump = 0x2224,
     /* UPC extension.  */
     DW_AT_upc_threads_scaled = 0x3210,
     /* PGI (STMicroelectronics) extensions.  */
Index: gnu/gcc/gcc/dwarf2out.c
===================================================================
RCS file: /cvs/src/gnu/gcc/gcc/dwarf2out.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 dwarf2out.c
--- gnu/gcc/gcc/dwarf2out.c     15 Oct 2009 17:11:28 -0000      1.1.1.1
+++ gnu/gcc/gcc/dwarf2out.c     24 Apr 2017 23:18:10 -0000
@@ -11960,6 +11960,10 @@ gen_subprogram_die (tree decl, dw_die_re
   /* Add the calling convention attribute if requested.  */
   add_calling_convention_attribute (subr_die, TREE_TYPE (decl));
 
+#ifdef TARGET_SAVE_ARGS
+  if (TARGET_SAVE_ARGS)
+    add_AT_flag (subr_die, DW_AT_SUN_amd64_parmdump, 1);
+#endif
 }
 
 /* Generate a DIE to represent a declared data object.  */
Index: gnu/gcc/gcc/config/i386/i386.c
===================================================================
RCS file: /cvs/src/gnu/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.5
diff -u -p -r1.5 i386.c
--- gnu/gcc/gcc/config/i386/i386.c      1 Dec 2015 15:18:29 -0000       1.5
+++ gnu/gcc/gcc/config/i386/i386.c      24 Apr 2017 23:18:10 -0000
@@ -997,6 +997,10 @@ struct stack_local_entry GTY(())
 
    saved frame pointer if frame_pointer_needed
                                              <- HARD_FRAME_POINTER
+   [-msave-args]
+
+   [padding0]
+
    [saved regs]
 
    [padding1]          \
@@ -1009,6 +1013,8 @@ struct stack_local_entry GTY(())
   */
 struct ix86_frame
 {
+  int nmsave_args;
+  int padding0;
   int nregs;
   int padding1;
   int va_arg_size;
@@ -1164,6 +1170,7 @@ static const char *ix86_mangle_fundament
 static tree ix86_stack_protect_fail (void);
 static rtx ix86_internal_arg_pointer (void);
 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
+static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int);
 
 /* This function is only used on Solaris.  */
 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
@@ -1897,6 +1904,8 @@ override_options (void)
      }
   else
     {
+      if (TARGET_SAVE_ARGS)
+        error ("-msave-args makes no sense in the 32-bit mode");
       /* i386 ABI does not specify red zone.  It still makes sense to use it
          when programmer takes care to stack from being destroyed.  */
       if (!(target_flags_explicit & MASK_NO_RED_ZONE))
@@ -4798,7 +4807,7 @@ ix86_can_use_return_insn_p (void)
     return 0;
 
   ix86_compute_frame_layout (&frame);
-  return frame.to_allocate == 0 && frame.nregs == 0;
+  return frame.to_allocate == 0 && frame.nmsave_args == 0 && frame.nregs == 0;
 }
 
 /* Value should be nonzero if functions must have frame pointers.
@@ -4818,6 +4827,9 @@ ix86_frame_pointer_required (void)
   if (SUBTARGET_FRAME_POINTER_REQUIRED)
     return 1;
 
+  if (TARGET_SAVE_ARGS)
+    return 1;
+
   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
      the frame pointer by default.  Turn it back on now if we've not
      got a leaf function.  */
@@ -5075,6 +5087,18 @@ ix86_nsaved_regs (void)
   return nregs;
 }
 
+/* Return number of arguments to be saved on the stack with
+   -msave-args.  */
+
+static int
+ix86_nsaved_args (void)
+{
+  if (TARGET_SAVE_ARGS)
+    return current_function_args_info.regno - current_function_returns_struct;
+  else
+    return 0;
+}
+
 /* Return the offset between two registers, one to be eliminated, and the other
    its replacement, at the start of a routine.  */
 
@@ -5114,6 +5138,7 @@ ix86_compute_frame_layout (struct ix86_f
 
   frame->local_size = size;
   frame->nregs = ix86_nsaved_regs ();
+  frame->nmsave_args = ix86_nsaved_args ();
   total_size = size;
 
   stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
@@ -5155,6 +5180,11 @@ ix86_compute_frame_layout (struct ix86_f
   else
     frame->save_regs_using_mov = false;
 
+  if (TARGET_SAVE_ARGS)
+    {
+       cfun->machine->use_fast_prologue_epilogue = true;
+       frame->save_regs_using_mov = true;
+    }
 
   /* Skip return address and saved base pointer.  */
   offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
@@ -5174,6 +5204,16 @@ ix86_compute_frame_layout (struct ix86_f
   if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
     stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
 
+  /* Argument save area */
+  if (TARGET_SAVE_ARGS)
+    {
+       offset += frame->nmsave_args * UNITS_PER_WORD;
+       frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD;
+       offset += frame->padding0;
+    }
+  else
+    frame->padding0 = 0;
+
   /* Register save area */
   offset += frame->nregs * UNITS_PER_WORD;
 
@@ -5231,8 +5271,10 @@ ix86_compute_frame_layout (struct ix86_f
     (size + frame->padding1 + frame->padding2
      + frame->outgoing_arguments_size + frame->va_arg_size);
 
-  if ((!frame->to_allocate && frame->nregs <= 1)
-      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
+  if (!TARGET_SAVE_ARGS
+      && ((!frame->to_allocate && frame->nregs <= 1)
+         || (TARGET_64BIT
+             && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)))
     frame->save_regs_using_mov = false;
 
   if (TARGET_RED_ZONE && current_function_sp_is_unchanging
@@ -5241,7 +5283,11 @@ ix86_compute_frame_layout (struct ix86_f
     {
       frame->red_zone_size = frame->to_allocate;
       if (frame->save_regs_using_mov)
-       frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
+      {
+         frame->red_zone_size
+           += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD;
+         frame->red_zone_size += frame->padding0;
+      }
       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
        frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
     }
@@ -5250,6 +5296,8 @@ ix86_compute_frame_layout (struct ix86_f
   frame->to_allocate -= frame->red_zone_size;
   frame->stack_pointer_offset -= frame->red_zone_size;
 #if 0
+  fprintf (stderr, "nmsave_args: %i\n", frame->nmsave_args);
+  fprintf (stderr, "padding0: %i\n", frame->padding0);
   fprintf (stderr, "nregs: %i\n", frame->nregs);
   fprintf (stderr, "size: %i\n", size);
   fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
@@ -5273,6 +5321,22 @@ ix86_emit_save_regs (void)
   unsigned int regno;
   rtx insn;
 
+  if (TARGET_SAVE_ARGS)
+    {
+      int i;
+      int nsaved = ix86_nsaved_args ();
+      int start = cfun->returns_struct;
+      for (i = start; i < start + nsaved; i++)
+       {
+         regno = x86_64_int_parameter_registers[i];
+         insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
+         RTX_FRAME_RELATED_P (insn) = 1;
+       }
+      if (nsaved % 2 != 0)
+       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+                                  GEN_INT (-UNITS_PER_WORD), -1);
+    }
+
   for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
     if (ix86_save_reg (regno, true))
       {
@@ -5298,6 +5362,25 @@ ix86_emit_save_regs_using_mov (rtx point
        RTX_FRAME_RELATED_P (insn) = 1;
        offset += UNITS_PER_WORD;
       }
+
+  if (TARGET_SAVE_ARGS)
+    {
+      int i;
+      int nsaved = ix86_nsaved_args ();
+      int start = cfun->returns_struct;
+      if (nsaved % 2 != 0)
+       offset += UNITS_PER_WORD;
+      for (i = start + nsaved - 1; i >= start; i--)
+       {
+         regno = x86_64_int_parameter_registers[i];
+         insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
+                                                Pmode, offset),
+                                gen_rtx_REG (Pmode, regno));
+         RTX_FRAME_RELATED_P (insn) = 1;
+         offset += UNITS_PER_WORD;
+       }
+    }
+
 }
 
 /* Expand prologue or epilogue stack adjustment.
@@ -5470,14 +5553,16 @@ ix86_expand_prologue (void)
   if (!frame.save_regs_using_mov)
     ix86_emit_save_regs ();
   else
-    allocate += frame.nregs * UNITS_PER_WORD;
+    allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD
+      + frame.padding0;
 
   /* When using red zone we may start register saving before allocating
      the stack frame saving one cycle of the prologue.  */
   if (TARGET_RED_ZONE && frame.save_regs_using_mov)
     ix86_emit_save_regs_using_mov (frame_pointer_needed ? 
hard_frame_pointer_rtx
                                   : stack_pointer_rtx,
-                                  -frame.nregs * UNITS_PER_WORD);
+                                  -(frame.nregs + frame.nmsave_args)
+                                   * UNITS_PER_WORD - frame.padding0);
 
   if (allocate == 0)
     ;
@@ -5514,7 +5599,8 @@ ix86_expand_prologue (void)
            t = plus_constant (hard_frame_pointer_rtx,
                               allocate
                               - frame.to_allocate
-                              - frame.nregs * UNITS_PER_WORD);
+                              - (frame.nregs + frame.nmsave_args)
+                                * UNITS_PER_WORD - frame.padding0);
          else
            t = plus_constant (stack_pointer_rtx, allocate);
          emit_move_insn (eax, gen_rtx_MEM (SImode, t));
@@ -5523,11 +5609,13 @@ ix86_expand_prologue (void)
 
   if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
     {
-      if (!frame_pointer_needed || !frame.to_allocate)
+      if (!TARGET_SAVE_ARGS &&
+       (!frame_pointer_needed || !frame.to_allocate))
         ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
       else
         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
-                                      -frame.nregs * UNITS_PER_WORD);
+                                      -(frame.nregs + frame.nmsave_args)
+                                       * UNITS_PER_WORD - frame.padding0);
     }
 
   pic_reg_used = false;
@@ -5611,10 +5699,11 @@ ix86_expand_epilogue (int style)
      must be taken for the normal return case of a function using
      eh_return: the eax and edx registers are marked as saved, but not
      restored along this path.  */
-  offset = frame.nregs;
+  offset = frame.nregs + frame.nmsave_args;
   if (current_function_calls_eh_return && style != 2)
     offset -= 2;
   offset *= -UNITS_PER_WORD;
+  offset -= frame.padding0;
 
   /* If we're only restoring one register and sp is not valid then
      using a move instruction to restore the register since it's
@@ -5670,14 +5759,16 @@ ix86_expand_epilogue (int style)
            {
              tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
              tmp = plus_constant (tmp, (frame.to_allocate
-                                         + frame.nregs * UNITS_PER_WORD));
+                                         + (frame.nregs + frame.nmsave_args)
+                                          * UNITS_PER_WORD + frame.padding0));
              emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
            }
        }
       else if (!frame_pointer_needed)
        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
                                   GEN_INT (frame.to_allocate
-                                           + frame.nregs * UNITS_PER_WORD),
+                                           + (frame.nregs + frame.nmsave_args)
+                                            * UNITS_PER_WORD + frame.padding0),
                                   style);
       /* If not an i386, mov & pop is faster than "leave".  */
       else if (TARGET_USE_LEAVE || optimize_size
@@ -5717,6 +5808,10 @@ ix86_expand_epilogue (int style)
            else
              emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
          }
+      if (frame.nmsave_args)
+        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+                                GEN_INT (frame.nmsave_args * UNITS_PER_WORD
+                                         + frame.padding0), style);
       if (frame_pointer_needed)
        {
          /* Leave results in shorter dependency chains on CPUs that are
Index: gnu/gcc/gcc/config/i386/i386.opt
===================================================================
RCS file: /cvs/src/gnu/gcc/gcc/config/i386/i386.opt,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 i386.opt
--- gnu/gcc/gcc/config/i386/i386.opt    15 Oct 2009 17:11:30 -0000      1.1.1.1
+++ gnu/gcc/gcc/config/i386/i386.opt    24 Apr 2017 23:18:10 -0000
@@ -221,6 +221,10 @@ mtls-direct-seg-refs
 Target Report Mask(TLS_DIRECT_SEG_REFS)
 Use direct references against %gs when accessing tls data
 
+msave-args
+Target Report Mask(SAVE_ARGS)
+Save integer arguments on the stack at function entry
+
 mtune=
 Target RejectNegative Joined Var(ix86_tune_string)
 Schedule code for given CPU
Index: gnu/gcc/gcc/doc/invoke.texi
===================================================================
RCS file: /cvs/src/gnu/gcc/gcc/doc/invoke.texi,v
retrieving revision 1.3
diff -u -p -r1.3 invoke.texi
--- gnu/gcc/gcc/doc/invoke.texi 14 Jan 2014 02:03:57 -0000      1.3
+++ gnu/gcc/gcc/doc/invoke.texi 24 Apr 2017 23:18:10 -0000
@@ -9806,6 +9806,10 @@ building of shared libraries are not sup
 Generate code for the large model: This model makes no assumptions
 about addresses and sizes of sections.  Currently GCC does not implement
 this model.
+
+@item -msave-args
+@opindex msave-args
+Save integer arguments on the stack at function entry.
 @end table
 
 @node IA-64 Options

Reply via email to