timerfd v1 - signalfd core ...

Oleg Nesterov Sat, 10 Mar 2007 07:54:23 -0800

Davide Libenzi wrote:
>
> +int signalfd_deliver(struct sighand_struct *sighand, int sig,
> +                  struct siginfo *info)
> +{
> +     int nsig = 0;
> +     struct list_head *pos;
> +     struct signalfd_ctx *ctx;
> +
> +     list_for_each(pos, &sighand->sfdlist) {
> +             ctx = list_entry(pos, struct signalfd_ctx, lnk);


list_for_each_entry()

> +             /*
> +              * We use a negative signal value as a way to broadcast that the
> +              * sighand has been orphaned, so that we can notify all the
> +              * listeners about this. Remeber the ctx->sigmask is inverted,
> +              * so if the user is interested in a signal, that corresponding
> +              * bit will be zero.
> +              */
> +             if (sig < 0 || !sigismember(&ctx->sigmask, sig)) {
> +                     __wake_up_locked(&ctx->wqh,
> +                                      TASK_UNINTERRUPTIBLE | 
> TASK_INTERRUPTIBLE);

wake_up_locked(&ctx->wqh)

> +asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t 
> sizemask)
> +{
> [...snip...]
> +     if (ufd == -1) {
> +             error = -ENOMEM;
> +             ctx = kmem_cache_alloc(signalfd_ctx_cachep, GFP_KERNEL);
> +             if (!ctx)
> +                     goto err_exit;
> +
> +             init_waitqueue_head(&ctx->wqh);
> +             ctx->sigmask = sigmask;
> +             ctx->tsk = current;
> +             get_task_struct(current);
> +
> +             /*
> +              * We also increment the sighand count to make sure
> +              * it doesn't go away from us in poll() when the task
> +              * exits (which can happen if the fd is passed to
> +              * another process with unix domain sockets.
> +              *
> +              * This also guarantees that an execve() will reallocate
> +              * the signal state, and thus avoids security concerns
> +              * with a untrusted process that passes off the signal
> +              * queue fd to another, and then does a suid execve.
> +              */
> +             ctx->sighand = current->sighand;
> +             atomic_inc(&ctx->sighand->count);

I personally don't like this. de_thread() was/is the source of numerous
problems, and this patch adds yet another subtle dependency. The usage of
"private" __cleanup_sighand() is not good per se, imho.

Also, this is not so flexible, we can't take S_ISUID into account. It seems
logical to preserve ctx after a "normal" exec.

I think, we don't need signalfd_ctx->sighand at all, please see below.

> +     } else {
> +             error = -EBADF;
> +             file = fget(ufd);
> +             if (!file)
> +                     goto err_exit;
> +             ctx = file->private_data;
> +             error = -EINVAL;
> +             if (file->f_op != &signalfd_fops) {
> +                     fput(file);
> +                     goto err_exit;
> +             }
> +             spin_lock_irq(&ctx->sighand->siglock);
> +             ctx->sigmask = sigmask;
> +             spin_unlock_irq(&ctx->sighand->siglock);
> +             wake_up(&ctx->wqh);

Race with signalfd_read()->__add_wait_queue().

> +static unsigned int signalfd_poll(struct file *file, poll_table *wait)
> +{
> +     struct signalfd_ctx *ctx = file->private_data;
> +     struct sighand_struct *sighand = ctx->sighand;
> +     unsigned int events = 0;
> +     unsigned long flags;
> +
> +     poll_wait(file, &ctx->wqh, wait);
> +
> +     spin_lock_irqsave(&sighand->siglock, flags);
> +     /*
> +      * Let the caller get a POLLIN in this case, ala socket recv() when
> +      * the peer disconnect. The check for the changed sighand must be
> +      * done before calling next_signal(), since if sighand changed, a call
> +      * to next_signal() would crash. It'd be possible to avoid grabbing a
> +      * lock by incrementing the tsk->signal count like we do with the
> +      * tsk->sighand, but the code in __exit_signal() assumes that the
> +      * tsk->signal can be freed only there, and this would require
> +      * some code restructuring that I'm living out at this time.
> +      */
> +     if (sighand != ctx->tsk->sighand || ctx->tsk->signal == NULL ||

We don't need "ctx->tsk->signal == NULL". tsk->signal == NULL (when checked
under ->siglock) implies tsk->sighand == NULL. This is covered by the first
"sighand != ctx->tsk->sighand"  check.

> +static ssize_t signalfd_read(struct file *file, char *buf, size_t count,
> +                          loff_t *ppos)
> +{
> [...snip...]
> +             for (;;) {
> +                     if (sighand != ctx->tsk->sighand) {
> +                             /*
> +                              * Let the caller read zero byte, ala socket 
> recv()
> +                              * when the peer disconnect. This test must be 
> done
> +                              * before doing a dequeue_signal(), because if 
> the
> +                              * task's sighand changed, a dequeue_signal() 
> is going
> +                              * to crash (tsk->signal is set to NULL).
> +                              */
> +                             res = 0;
> +                             break;
> +                     }
> +                     set_current_state(TASK_INTERRUPTIBLE);
> +                     if ((signo = dequeue_signal(ctx->tsk, &ctx->sigmask,
> +                                                 &info)) != 0)
> +                             break;
> +                     if (signal_pending(current)) {
> +                             res = -EINTR;

-ERESTARTSYS ?

> +                             break;
> +                     }
> +                     spin_unlock_irq(&sighand->siglock);
> +                     schedule();
> +                     spin_lock_irq(&sighand->siglock);
> +             }
> +             __remove_wait_queue(&ctx->wqh, &wait);
> +             set_current_state(TASK_RUNNING);

We don't need mb() here.

> +void signal_fill_info(struct siginfo *dinfo, int sig, struct siginfo *sinfo)
> +{
> +     switch ((unsigned long) sinfo) {
> +     case (unsigned long) SEND_SIG_NOINFO:
> +             dinfo->si_signo = sig;
> +             dinfo->si_errno = 0;
> +             dinfo->si_code = SI_USER;
> +             dinfo->si_pid = current->pid;
> +             dinfo->si_uid = current->uid;
> +             break;
> +     case (unsigned long) SEND_SIG_PRIV:
> +             dinfo->si_signo = sig;
> +             dinfo->si_errno = 0;
> +             dinfo->si_code = SI_KERNEL;
> +             dinfo->si_pid = 0;
> +             dinfo->si_uid = 0;
> +             break;
> +     default:
> +             copy_siginfo(dinfo, sinfo);
> +     }
> +}

This change seems unneeded?


I'd suggest to remove signalfd_ctx->sighand. de_thread()/exit_signal() call

        signalfd_exit_task(struct sighand_struct *sighand)
        {
                list_for_each_entry_safe(ctx, sighand->sfdlist)
                        if (ctx->tsk == current) {
                                wake_up_locked(ctx->wqh);
                                list_del_init(ctx->lnk);
                        }
        }

signalfd_read()/signalfd_poll use

        static struct sighand_struct *ctx_try_to_lock(struct signalfd_ctx *ctx, 
flags)
        {
                struct sighand_struct *ret;

                rcu_read_lock();
                ret = lock_task_sighand(ctx->task);
                rcu_read_unlock();

                if (ret && list_empty(ctx->lnk)) {
                        unlock_task_sighand(ctx->task);
                        ret = NULL;
                }

                return ret;
        }

instead of "spin_lock_irq(ctx->sighand)" + "if (ctx->sighand != 
ctx->tsk->sighand)".

Possible?

Note that signalfd_exit_task() is generic, could be used in another context,
de_thread() can avoid the call if !suid.

How about CONFIG_SIGNALFD, btw?

Davide, could you please cc me? I am not subscribed to lkml, noticed the new
version by accident.

Oleg.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 2/9] signalfd/timerfd v1 - signalfd core ...

Reply via email to