Re: [akaros] Code Review - New Akaros profiler

Dan Cross Thu, 12 Nov 2015 19:22:57 -0800

On Thu, Nov 12, 2015 at 3:02 PM, Barret Rhoden <[email protected]> wrote:


> > +static void kprof_usage_fail(void)
> > +{
> > +     static const char *ctlstring = "clear|start|stop|timer";
> > +     const char* const * cmds = profiler_configure_cmds();
> > +     size_t i, size, csize;
> > +     char msgbuf[128];
> > +
> > +     strncpy(msgbuf, ctlstring, sizeof(msgbuf));
>

This is arguably incorrect. If one decreased the size of 'msgbuf' or made
ctlstring larger so that sizeof(msgbuf) <= strlen(ctlstring), this this
would omit the trailing NUL on 'msgbuf'. Even if that's not true, this is
overwriting every byte of 'msgbuf' beyond the the copy of 'ctlstring' with
0s, seemingly unnecessarily.


> > +     size = MIN(strlen(ctlstring), sizeof(msgbuf) - 1);
>

Strlen(ctlstring) could then cause a crash in the previous example.


> > +     for (i = 0; cmds[i]; i++) {
> > +             csize = strlen(cmds[i]);
> > +             if ((csize + size + 2) > sizeof(msgbuf))
> > +                     break;
> > +             msgbuf[size] = '|';
> > +             memcpy(msgbuf + size + 1, cmds[i], csize);
> > +             size += csize + 1;
> > +     }
> > +     msgbuf[size] = 0;
>

This loop could be replaced with,

  strlcpy(msgbuf, ctlstring, sizeof(msgbuf));
  for (int i = 0; i < nelem(cmds); i++) {
    strlcat(msgbuf, "|", sizeof(msgbuf));
    strlcat(msgbuf, cmds[i], sizeof(msgbuf));
  }


> > +
> > +     error(EFAIL, msgbuf);
> > +}
>
> This is cool, and the pattern tends to come up a lot, where we'd like to
> generate usage information.  Perhaps there's a Plan 9 helper for this
> already?
> (I don't know off the top of my head, maybe the Plan 9 guys do).  We
> already
> have stuff that parses command strings.  If we don't have a helper, we can
> make
> one, so that every device doesn't have to redo this logic.
>
> Another part to it is that a device has several layers of arguments and
> commands
> - it'd be neat if that was clearly written down somewhere and then we could
> generate these switches and error messages more easily.  This isn't
> something
> that needs to be done now, but I like your kprof_usage_fail and it brought
> the
> topic up.  =)
>
> > +static struct trace_printk_buffer *kprof_get_printk_buffer(void)
> > +{
> > +     static struct trace_printk_buffer boot_tpb;
> > +     static struct trace_printk_buffer *cpu_tpbs;
> > +
> > +     if (unlikely(booting))
> > +             return &boot_tpb;
> > +     if (unlikely(!cpu_tpbs)) {
> > +             /* Poor man per-CPU data structure. I really do no like
> littering global
> > +              * data structures with module specific data.
> > +              */
> > +             spin_lock_irqsave(&ktrace_lock);
> > +             if (!cpu_tpbs)
> > +                     cpu_tpbs = kzmalloc(num_cores * sizeof(struct
> trace_printk_buffer),
> > +                                                             0);
>
> If this alloc fails, (flags == 0), then we'll return 0 (+coreid) and PF
> later.
>
> > +             spin_unlock_irqsave(&ktrace_lock);
> >       }
> > +
> > +     return cpu_tpbs + core_id();
> >  }
>
>
> > --- a/kern/include/kdebug.h
> > +++ b/kern/include/kdebug.h
> > @@ -46,7 +48,7 @@ int printdump(char *buf, int buflen, uint8_t *data);
> >  extern bool printx_on;
> >  void set_printx(int mode);
> >  #define printx(args...) if (printx_on) printk(args)
> > -#define trace_printx(args...) if (printx_on) trace_printk(args)
> > +#define trace_printx(args...) if (printx_on) trace_printk(TRUE, args)
>                                                             ^FALSE?
> We just want to print text, not a backtrace.  That was also my intent for
> trace_printk (it's just the print, not a backtrace).
>
> > --- a/kern/src/mm.c
> > +++ b/kern/src/mm.c
> > @@ -24,6 +24,7 @@
> >  #include <kmalloc.h>
> >  #include <vfs.h>
> >  #include <smp.h>
> > +#include <profiler.h>
> >
> >  struct kmem_cache *vmr_kcache;
> >
> > @@ -692,6 +693,9 @@ void *do_mmap(struct proc *p, uintptr_t addr, size_t
> len, int prot, int flags,
> >               }
> >       }
> >       spin_unlock(&p->vmr_lock);
> > +
> > +     profiler_notify_mmap(p, addr, len, prot, flags, file, offset);
> > +
>
> Do you need to know when something was unmapped?
>
>
> > diff --git a/kern/src/process.c b/kern/src/process.c
> > index bd26789e3ef1..2e4c69518e8a 100644
> > --- a/kern/src/process.c
> > +++ b/kern/src/process.c
> > @@ -331,6 +331,8 @@ error_t proc_alloc(struct proc **pp, struct proc
> *parent, int flags)
> >               kmem_cache_free(proc_cache, p);
> >               return -ENOFREEPID;
> >       }
> > +     if (parent && parent->binary_path)
> > +             kstrdup(&p->binary_path, parent->binary_path);
>
> Is this to fix a leak where paths weren't set for forked processes?  If
> so, this
> is an example of a fixup that could have been squashed into a previous
> commit.
>
> > --- a/kern/src/profiler.c
> > +++ b/kern/src/profiler.c
>
> > +static inline char* vb_encode_uint64(char* data, uint64_t n)
> > +{
> > +     for (; n >= 0x80; n >>= 7)
> > +             *data++ = (char) (n | 0x80);
> > +     *data++ = (char) n;
> > +
> > +     return data;
> > +}
>
> This probably should be in a library or header (so we can reuse it) and
> with a
> little explanation.  It looks like we take 7 bits of n at a time and push
> it
> into *data, with bit 0x80 signifying we're done or not.
>
>
> > +static struct block *profiler_buffer_write(struct profiler_cpu_context
> *cpu_buf,
> > +
>         struct block *b)
> >  {
> > -     return (((uint64_t) 0xee01) << 48) | ((uint64_t) cpu << 16) |
> > -             (uint64_t) nbt;
> > +     if (b) {
> > +             qibwrite(profiler_queue, b);
> > +
> > +             if (qlen(profiler_queue) > profiler_queue_limit) {
> > +                     b = qget(profiler_queue);
> > +                     if (likely(b)) {
> > +                             cpu_buf->dropped_data_size += BLEN(b);
> > +                             freeb(b);
> > +                     }
>
> This seems like a candidate for a feature added to qio, such as a "drop
> from the
> front" mode.
>
> > +             }
> > +     }
> > +
> > +     return iallocb(profiler_cpu_buffer_size);
> >  }
>
> This function seems a little weird.  If you're given a block, you write it.
> That makes sense.  But regardless, you alloc a new block?
>
> Seems like we should not be too cute by having write return a fresh block,
> and
> just do the allocation separately.
>
> > +static void profiler_emit_current_system_status(void)
> >  {
> > -     struct block *b = cpu_buf->block;
> > -    size_t totalsize = sizeof(struct op_sample) +
> > -             size * sizeof(entry->sample->data[0]);
> > -
> > -     if (unlikely((!b) || (b->lim - b->wp) < totalsize)) {
> > -             if (b)
> > -                     qibwrite(profiler_queue, b);
> > -             /* For now. Later, we will grab a block off the
> > -              * emptyblock queue.
> > -              */
> > -             cpu_buf->block = b = iallocb(profiler_cpu_buffer_size);
> > -        if (unlikely(!b)) {
> > -                     printk("%s: fail\n", __func__);
> > -                     return NULL;
> > +     void enum_proc(struct vm_region *vmr, void *opaque)
> > +     {
> > +             struct proc *p = (struct proc *) opaque;
> > +
> > +             profiler_notify_mmap(p, vmr->vm_base, vmr->vm_end -
> vmr->vm_base,
> > +                                                      vmr->vm_prot,
> vmr->vm_flags, vmr->vm_file,
> > +                                                      vmr->vm_foff);
> > +     }
> > +
> > +     ERRSTACK(2);
>
> I think you only need 1 here.  It's based on the number of waserrors.
>
> > +     struct process_set pset;
> > +
> > +     proc_get_set(&pset);
> > +     if (waserror()) {
> > +             proc_free_set(&pset);
> > +             nexterror();
> > +     }
> > +
> > +     for (size_t i = 0; i < pset.num_processes; i++)
> > +             enumerate_vrms(pset.procs[i], enum_proc, pset.procs[i]);
>                  ^vmrs
>
> > +     poperror();
> > +     proc_free_set(&pset);
> > +}
> > +
> > +static inline int profiler_is_tracing(struct profiler_cpu_context
> *cpu_buf)
>
> Since this is named "is_tracing", it should be a bool, and return
> TRUE/False to
> avoid any confusion.
>
>
> > -static inline int profiler_add_sample(struct profiler_cpu_context
> *cpu_buf,
> > -
>  uintptr_t pc, unsigned long event)
> > +static void alloc_cpu_buffers(void)
> >  {
> >       ERRSTACK(1);
> > -     struct op_entry entry;
> > -     struct block *b;
> > +     int i;
> >
> > +     profiler_queue = qopen(profiler_queue_limit, 0, NULL, NULL);
> > +     if (!profiler_queue)
> > +             error(-ENOMEM, NULL);
>               ^just ENOMEM
>
> >       if (waserror()) {
> > -             poperror();
> > -             printk("%s: failed\n", __func__);
> > -             return 1;
> > +             free_cpu_buffers();
> > +             nexterror();
> >       }
> >
> > -     b = profiler_cpu_buffer_write_reserve(cpu_buf, &entry, 0);
> > -     if (likely(b)) {
> > -             entry.sample->hdr = profiler_create_header(core_id(), 1);
> > -             entry.sample->event = (uint64_t) event;
> > -             profiler_cpu_buffer_add_data(&entry, &pc, 1);
> > -     }
> > -     poperror();
> > +     qdropoverflow(profiler_queue, 1);
> > +     qnonblock(profiler_queue, 1);
>
> As mentioned in a previous patch, you might not want qnonblock.
>
> Incidentally, qnonblock and qdropoverflow take a bool, not an int.  So TRUE
> instead of 1.
>
> > +     profiler_percpu_ctx =
> > +             kzmalloc(sizeof(*profiler_percpu_ctx) * num_cores,
> KMALLOC_WAIT);
> > +     if (!profiler_percpu_ctx)
> > +             error(-ENOMEM, NULL);
>
> Since you did a KMALLOC_WAIT, you will never have !profiler_percpu_ctx
>
> > -static inline void profiler_begin_trace(struct profiler_cpu_context
> *cpu_buf)
> > +int profiler_configure(struct cmdbuf *cb)
> >  {
> > -     cpu_buf->tracing = 1;
> > +     if (!strcmp(cb->f[0], "prof_qlimit")) {
> > +             if (cb->nf < 2)
> > +                     error(EFAIL, "prof_qlimit KB");
> > +             profiler_queue_limit = atoi(cb->f[1]) * 1024;
>
> Probably want a sanity check.  Also, what happens when this changes after
> the
> queue was already allocated?
>
> > +     } else if (!strcmp(cb->f[0], "prof_cpubufsz")) {
> > +             if (cb->nf < 2)
> > +                     error(EFAIL, "prof_cpubufsz KB");
> > +             profiler_cpu_buffer_size = atoi(cb->f[1]) * 1024;
>
> Is there any danger with the user setting this to be a very small value
> (like
> 0)?   It looks like the assumption in profiler_cpu_buffer_write_reserve()
> is
> that a fresh allocation (done by profiler_buffer_write()) is enough for
> size
> bytes.
>
> > +     } else if (!strcmp(cb->f[0], "prof_btdepth")) {
> > +             if (cb->nf < 2)
> > +                     error(EFAIL, "prof_btdepth DEPTH");
> > +             profiler_backtrace_depth = atoi(cb->f[1]);
>
> It is dangerous to have the user control this.  It's a stack allocation.
> Even
> if it wasn't, we'd need a sanity check of some sort.
>
> > +void profiler_cleanup(void)
> > +{
> > +     sem_down(&profiler_mtx);
> > +     profiler_users--;
> > +     if (profiler_users == 0)
> > +             free_cpu_buffers();
> > +     sem_up(&profiler_mtx);
> >  }
>
> I'm still concerned about this.  If the only source of profiling data is
> from
> the timer IRQs, then your current stuff is seems fine.  (You disable the
> timers,
> and thus the handlers) before freeing this stuff).  But if we ever have any
> other use of this, then we'll need to be careful.
>
> But there's more to it than that.  The assumption here is that once
> profiler_users == 0, then there is no code, (RKM, pending IRQ, etc.) that
> has
> access to profiler_percpu_ctx or profiler_queue.
>
> Just as an example, say someone is calling profiler_notify_mmap(), which
> checks
> profiler_percpu_ctx.  They get past the if () check, then concurrently
> someone
> calls profiler_cleanup and triggers free_cpu_buffers.  Then the original
> thread
> eventually calls profiler_push_pid_mmap, then qwrite, then page faults.
>
> So a user could theoretically trigger a PF.  This is part of the reason
> why I
> was reluctant to have you try and free the buffers.
>
> > @@ -572,6 +571,7 @@ static int sys_proc_create(struct proc *p, char
> *path, size_t path_l,
> >       user_memdup_free(p, kargenv);
> >       __proc_ready(new_p);
> >       pid = new_p->pid;
> > +     profiler_notify_new_process(new_p);
> >       proc_decref(new_p);     /* give up the reference created in
> proc_create() */
> >       return pid;
> >  error_load_elf:
> > @@ -728,6 +728,7 @@ static ssize_t sys_fork(env_t* e)
> >
> >       printd("[PID %d] fork PID %d\n", e->pid, env->pid);
> >       ret = env->pid;
> > +     profiler_notify_new_process(env);
> >       proc_decref(env);       /* give up the reference created in
> proc_alloc() */
> >       return ret;
> >  }
>
> Do you need to update things when a process changes its binary path?
> (exec)
>
> > From c0ab4ec0d729ad8e0555b3beef72340f90c23712 Mon Sep 17 00:00:00 2001
> > From: Davide Libenzi <[email protected]>
> > Date: Sat, 7 Nov 2015 18:47:16 -0800
> > Subject: Enabled /prof/kptrace collection of anything which goes into
> cprintf
> >
> > Enabled /prof/kptrace collection of anything which goes into cprintf,
> > printk, and its associates.
> > The kptrace collector is a circular buffer whose default size is 128KB.
>
> What happens when the ring is full?  New stuff drops?  My main concern
> here is
> that trace_vprintk could somehow cause an issue and hold up the real
> printk,
> which would be hard to debug.
>
> > --- a/kern/drivers/dev/kprof.c
> > +++ b/kern/drivers/dev/kprof.c
> > @@ -19,6 +19,7 @@
> >  #include <error.h>
> >  #include <pmap.h>
> >  #include <smp.h>
> > +#include <time.h>
> >  #include <circular_buffer.h>
> >  #include <umem.h>
> >  #include <profiler.h>
> > @@ -62,7 +63,7 @@ struct dirtab kproftab[] = {
> >       {"mpstat-raw",  {Kmpstatrawqid},        0,      0600},
> >  };
> >
> > -extern int booting;
> > +extern system_timing_t system_timing;
>
> Should be able to get system_timing from a header.
>
> >  static struct kprof kprof;
> >  static bool ktrace_init_done = FALSE;
> >  static spinlock_t ktrace_lock = SPINLOCK_INITIALIZER_IRQSAVE;
> > @@ -567,7 +568,7 @@ static struct trace_printk_buffer
> *kprof_get_printk_buffer(void)
> >       static struct trace_printk_buffer boot_tpb;
> >       static struct trace_printk_buffer *cpu_tpbs;
> >
> > -     if (unlikely(booting))
> > +     if (unlikely(!num_cores))
> >               return &boot_tpb;
>
> That seems a little odd.  I'd think if we're still booting, we'd use the
> boot_tpb.  Was there some corner case that triggered this?  I understand
> this
> one:
>
> > -     if (likely(!booting))
> > +     if (likely(system_timing.tsc_freq))
> >               tsc2timespec(read_tsc(), &ts_now);
>
> --
> You received this message because you are subscribed to the Google Groups
> "Akaros" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to [email protected].
> To post to this group, send email to [email protected].
> For more options, visit https://groups.google.com/d/optout.
>

-- 
You received this message because you are subscribed to the Google Groups 
"Akaros" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
For more options, visit https://groups.google.com/d/optout.

Re: [akaros] Code Review - New Akaros profiler

Reply via email to