Author: wma
Date: Wed Apr  6 05:13:36 2016
New Revision: 297611
URL: https://svnweb.freebsd.org/changeset/base/297611

Log:
  Implement dtrace_getupcstack in ARM64
  
  Allow using DTRACE for performance analysis of userspace
  applications - the function call stack can be captured.
  This is almost an exact copy of AMD64 solution.
  
  Obtained from:         Semihalf
  Sponsored by:          Cavium
  Reviewed by:           emaste, gnn, jhibbits
  Differential Revision: https://reviews.freebsd.org/D5779

Modified:
  head/sys/arm64/include/frame.h
  head/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c

Modified: head/sys/arm64/include/frame.h
==============================================================================
--- head/sys/arm64/include/frame.h      Wed Apr  6 04:58:20 2016        
(r297610)
+++ head/sys/arm64/include/frame.h      Wed Apr  6 05:13:36 2016        
(r297611)
@@ -49,6 +49,11 @@ struct trapframe {
        uint64_t tf_x[30];
 };
 
+struct arm64_frame {
+       struct arm64_frame      *f_frame;
+       u_long                  f_retaddr;
+};
+
 /*
  * Signal frame, pushedonto the user stack
  */

Modified: head/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c
==============================================================================
--- head/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c       Wed Apr  6 04:58:20 
2016        (r297610)
+++ head/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c       Wed Apr  6 05:13:36 
2016        (r297611)
@@ -57,6 +57,7 @@
  */
 #define        MAX_FUNCTION_SIZE 0x10000
 #define        MAX_PROLOGUE_SIZE 0x100
+#define        MAX_USTACK_DEPTH  2048
 
 uint8_t dtrace_fuword8_nocheck(void *);
 uint16_t dtrace_fuword16_nocheck(void *);
@@ -111,11 +112,127 @@ dtrace_getpcstack(pc_t *pcstack, int pcs
        }
 }
 
+static int
+dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
+    uintptr_t fp)
+{
+       volatile uint16_t *flags =
+           (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
+       int ret = 0;
+       uintptr_t oldfp;
+
+       ASSERT(pcstack == NULL || pcstack_limit > 0);
+
+       while (pc != 0) {
+               /*
+                * We limit the number of times we can go around this
+                * loop to account for a circular stack.
+                */
+               if (ret++ >= MAX_USTACK_DEPTH) {
+                       *flags |= CPU_DTRACE_BADSTACK;
+                       cpu_core[curcpu].cpuc_dtrace_illval = fp;
+                       break;
+               }
+
+               if (pcstack != NULL) {
+                       *pcstack++ = (uint64_t)pc;
+                       pcstack_limit--;
+                       if (pcstack_limit <= 0)
+                               break;
+               }
+
+               if (fp == 0)
+                       break;
+
+               pc = dtrace_fuword64((void *)(fp +
+                   offsetof(struct arm64_frame, f_retaddr)));
+               fp = dtrace_fuword64((void *)fp);
+
+               if (fp == oldfp) {
+                       *flags |= CPU_DTRACE_BADSTACK;
+                       cpu_core[curcpu].cpuc_dtrace_illval = fp;
+                       break;
+               }
+
+               /*
+                * ARM64TODO:
+                *     This workaround might not be necessary. It needs to be
+                *     revised and removed from all architectures if found
+                *     unwanted. Leaving the original x86 comment for reference.
+                *
+                * This is totally bogus:  if we faulted, we're going to clear
+                * the fault and break.  This is to deal with the apparently
+                * broken Java stacks on x86.
+                */
+               if (*flags & CPU_DTRACE_FAULT) {
+                       *flags &= ~CPU_DTRACE_FAULT;
+                       break;
+               }
+       }
+
+       return (ret);
+}
+
 void
 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
 {
+       proc_t *p = curproc;
+       struct trapframe *tf;
+       uintptr_t pc, sp, fp;
+       volatile uint16_t *flags =
+           (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
+       int n;
+
+       if (*flags & CPU_DTRACE_FAULT)
+               return;
+
+       if (pcstack_limit <= 0)
+               return;
+
+       /*
+        * If there's no user context we still need to zero the stack.
+        */
+       if (p == NULL || (tf = curthread->td_frame) == NULL)
+               goto zero;
+
+       *pcstack++ = (uint64_t)p->p_pid;
+       pcstack_limit--;
+
+       if (pcstack_limit <= 0)
+               return;
+
+       pc = tf->tf_elr;
+       sp = tf->tf_sp;
+       fp = tf->tf_x[29];
 
-       printf("IMPLEMENT ME: %s\n", __func__);
+       if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
+               /*
+                * In an entry probe.  The frame pointer has not yet been
+                * pushed (that happens in the function prologue).  The
+                * best approach is to add the current pc as a missing top
+                * of stack and back the pc up to the caller, which is stored
+                * at the current stack pointer address since the call
+                * instruction puts it there right before the branch.
+                */
+
+               *pcstack++ = (uint64_t)pc;
+               pcstack_limit--;
+               if (pcstack_limit <= 0)
+                       return;
+
+               pc = tf->tf_lr;
+       }
+
+       n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
+       ASSERT(n >= 0);
+       ASSERT(n <= pcstack_limit);
+
+       pcstack += n;
+       pcstack_limit -= n;
+
+zero:
+       while (pcstack_limit-- > 0)
+               *pcstack++ = 0;
 }
 
 int
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to