On Mon, Jun 22, 2009 at 01:49:31PM +0200, Ingo Molnar wrote:
> > How do you justify your usage of ioctl() in this context?
> 
> We can certainly do a separate sys_perf_counter_ctrl() syscall - and
> we will do that if people think the extra syscall slot is worth it
> in this case.
> 
> The (mild) counter-argument so far was that the current ioctls are
> very simple over "IO" attributes of counters:
> 
>  - enable
>  - disable
>  - reset
>  - refresh
>  - set-period
> 
> So they could be considered 'IO controls' in the classic sense and
> act as a (mild) exception to the 'dont use ioctls' rule.
> 
> They are not some weird tacked-on syscall functionality - they
> modify the IO properties of counters: on/off, value and rate. If
> they go beyond that we'll put it all into a separate syscall and
> deprecate the ioctl (which will have a relatively short half-time
> due to the tools being hosted in the kernel repo).
> 
> This could happen right now in fact, if people think it's worth it.

Yet another multiplexer doesn't buy as anything over ioctls unless it
adds more structure.  PERF_COUNTER_IOC_ENABLE/PERF_COUNTER_IOC_DISABLE/
PERF_COUNTER_IOC_RESET are calls without any argument, so it's kinda
impossible to add more structure.  perf_counter_refresh has an integer
argument, and perf_counter_period aswell (with a slightly more
complicated calling convention due to passing a pointer to the 64bit
integer).  I don't see how moving this to syscalls would improve
things.

But talking about syscalls the sys_perf_counter_open prototype is
really ugly - it uses either the pid or cpu argument which is a pretty
clear indicator it should actually be two sys calls.

Incomplete patch without touching the actuall wire-up below to
demonstrate it:


Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c        2009-06-22 14:43:35.323966162 
+0200
+++ linux-2.6/kernel/perf_counter.c     2009-06-22 14:57:30.223807475 +0200
@@ -1396,41 +1396,14 @@ __perf_counter_init_context(struct perf_
        ctx->task = task;
 }
 
-static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
+static struct perf_counter_context *find_get_pid_context(pid_t pid)
 {
        struct perf_counter_context *parent_ctx;
        struct perf_counter_context *ctx;
-       struct perf_cpu_context *cpuctx;
        struct task_struct *task;
        unsigned long flags;
        int err;
 
-       /*
-        * If cpu is not a wildcard then this is a percpu counter:
-        */
-       if (cpu != -1) {
-               /* Must be root to operate on a CPU counter: */
-               if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-                       return ERR_PTR(-EACCES);
-
-               if (cpu < 0 || cpu > num_possible_cpus())
-                       return ERR_PTR(-EINVAL);
-
-               /*
-                * We could be clever and allow to attach a counter to an
-                * offline CPU and activate it when the CPU comes up, but
-                * that's for later.
-                */
-               if (!cpu_isset(cpu, cpu_online_map))
-                       return ERR_PTR(-ENODEV);
-
-               cpuctx = &per_cpu(perf_cpu_context, cpu);
-               ctx = &cpuctx->ctx;
-               get_ctx(ctx);
-
-               return ctx;
-       }
-
        rcu_read_lock();
        if (!pid)
                task = current;
@@ -3727,6 +3700,16 @@ static int perf_copy_attr(struct perf_co
        if (attr->read_format & ~(PERF_FORMAT_MAX-1))
                return -EINVAL;
 
+       if (!attr->exclude_kernel) {
+               if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+                       return -EACCES;
+       }
+
+       if (attr->freq) {
+               if (attr->sample_freq > sysctl_perf_counter_sample_rate)
+                       return -EINVAL;
+       }
+
 out:
        return ret;
 
@@ -3736,52 +3719,16 @@ err_size:
        goto out;
 }
 
-/**
- * sys_perf_counter_open - open a performance counter, associate it to a 
task/cpu
- *
- * @attr_uptr: event type attributes for monitoring/sampling
- * @pid:               target pid
- * @cpu:               target cpu
- * @group_fd:          group leader counter fd
- */
-SYSCALL_DEFINE5(perf_counter_open,
-               struct perf_counter_attr __user *, attr_uptr,
-               pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
+static int do_perf_counter_open(struct perf_counter_attr *attr,
+               struct perf_counter_context *ctx, int cpu, int group_fd)
 {
        struct perf_counter *counter, *group_leader;
-       struct perf_counter_attr attr;
-       struct perf_counter_context *ctx;
        struct file *counter_file = NULL;
        struct file *group_file = NULL;
        int fput_needed = 0;
        int fput_needed2 = 0;
        int ret;
 
-       /* for future expandability... */
-       if (flags)
-               return -EINVAL;
-
-       ret = perf_copy_attr(attr_uptr, &attr);
-       if (ret)
-               return ret;
-
-       if (!attr.exclude_kernel) {
-               if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-                       return -EACCES;
-       }
-
-       if (attr.freq) {
-               if (attr.sample_freq > sysctl_perf_counter_sample_rate)
-                       return -EINVAL;
-       }
-
-       /*
-        * Get the target context (task or percpu):
-        */
-       ctx = find_get_context(pid, cpu);
-       if (IS_ERR(ctx))
-               return PTR_ERR(ctx);
-
        /*
         * Look up the group leader (we will attach this counter to it):
         */
@@ -3810,11 +3757,11 @@ SYSCALL_DEFINE5(perf_counter_open,
                /*
                 * Only a group leader can be exclusive or pinned
                 */
-               if (attr.exclusive || attr.pinned)
+               if (attr->exclusive || attr->pinned)
                        goto err_put_context;
        }
 
-       counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
+       counter = perf_counter_alloc(attr, cpu, ctx, group_leader,
                                     GFP_KERNEL);
        ret = PTR_ERR(counter);
        if (IS_ERR(counter))
@@ -3857,6 +3804,68 @@ err_put_context:
        goto out_fput;
 }
 
+SYSCALL_DEFINE4(perf_counter_open_pid,
+               struct perf_counter_attr __user *, attr_uptr,
+               pid_t, pid, int, group_fd, unsigned long, flags)
+{
+       struct perf_counter_attr attr;
+       struct perf_counter_context *ctx;
+       int ret;
+
+       /* for future expandability... */
+       if (flags)
+               return -EINVAL;
+
+       ret = perf_copy_attr(attr_uptr, &attr);
+       if (ret)
+               return ret;
+
+       ctx = find_get_pid_context(pid);
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
+
+       return do_perf_counter_open(&attr, ctx, -1, group_fd);
+}
+
+SYSCALL_DEFINE4(perf_counter_open_cpu,
+               struct perf_counter_attr __user *, attr_uptr,
+               int, cpu, int, group_fd, unsigned long, flags)
+{
+       struct perf_counter_attr attr;
+       struct perf_counter_context *ctx;
+       struct perf_cpu_context *cpuctx;
+       int ret;
+
+       /* for future expandability... */
+       if (flags)
+               return -EINVAL;
+
+       ret = perf_copy_attr(attr_uptr, &attr);
+       if (ret)
+               return ret;
+
+       /* Must be root to operate on a CPU counter: */
+       if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       if (cpu < 0 || cpu > num_possible_cpus())
+               return -EINVAL;
+
+       /*
+        * We could be clever and allow to attach a counter to an
+        * offline CPU and activate it when the CPU comes up, but
+        * that's for later.
+        */
+       if (!cpu_isset(cpu, cpu_online_map))
+               return -ENODEV;
+
+       cpuctx = &per_cpu(perf_cpu_context, cpu);
+       ctx = &cpuctx->ctx;
+       get_ctx(ctx);
+
+       return do_perf_counter_open(&attr, ctx, cpu, group_fd);
+}
+
 /*
  * inherit a counter from parent task to child task:
  */
@@ -4027,7 +4036,7 @@ void perf_counter_exit_task(struct task_
        __perf_counter_task_sched_out(child_ctx);
 
        /*
-        * Take the context lock here so that if find_get_context is
+        * Take the context lock here so that if find_get_pid_context is
         * reading child->perf_counter_ctxp, we wait until it has
         * incremented the context's refcount before we do put_ctx below.
         */

> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
---end quoted text---

------------------------------------------------------------------------------
Are you an open source citizen? Join us for the Open Source Bridge conference!
Portland, OR, June 17-19. Two days of sessions, one day of unconference: $250.
Need another reason to go? 24-hour hacker lounge. Register today!
http://ad.doubleclick.net/clk;215844324;13503038;v?http://opensourcebridge.org
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel

Reply via email to