On Thu, Jan 23, 2025 at 11:17:34PM +0100, Peter Zijlstra wrote:
> On Thu, Jan 23, 2025 at 11:48:07AM -0800, Josh Poimboeuf wrote:
> > > > >       cookie = READ_ONCE(current->unwind_info.cookie);
> > > > >       do {
> > > > >               if (cookie)
> > > > >                       return cookie;
> > > > >               cookie = ctx_to_cookie(cpu, ctr+1);
> > > > >       } while (!try_cmpxchg64(&current->unwind_info.cookie, &cookie, 
> > > > > cookie));
> > 
> > Should not the 2nd argument be &zero?
> 
> This I suppose
> 
>       cookie = READ_ONCE(current->unwind_info.cookie);
>       do {
>               if (cookie)
>                       return cookie;
>       } while (!try_cmpxchg64(&current->unwind_info.cookie, &cookie,
>                               ctx_to_cookie(cpu, ctr+1)));

Here's what I have at the moment.  unwind_deferred_request() is vastly
simplified thanks to your suggestions, no more NMI-specific muck.
work->pending is replaced with work->cookie so the task work can use it
in case it crossed the cookie-clearing boundary (return-to-user with
IRQs disabled).

static inline bool nmi_supported(void)
{
        return IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG);
}

/*
 * Read the task context cookie, initializing it if this is the first request
 * since the most recent entry from user.
 */
static u64 get_cookie(void)
{
        struct unwind_task_info *info = &current->unwind_info;
        u64 ctr, cookie, old_cookie = 0;
        unsigned int cpu;

        cookie = READ_ONCE(info->cookie);
        if (cookie)
                return cookie;

        guard(irqsave)();

        cpu = raw_smp_processor_id();
        ctr = __this_cpu_read(unwind_ctx_ctr);

        cookie = ctx_to_cookie(cpu, ++ctr);

        /* Make sure the cookie is non-zero. */
        if (!cookie)
                cookie = ctx_to_cookie(cpu, ++ctr);

        /* Write the task cookie unless an NMI just now swooped in to do so. */
        if (!try_cmpxchg64(&info->cookie, &old_cookie, cookie))
                return old_cookie;

        /* Success, update the context counter. */
        __this_cpu_write(unwind_ctx_ctr, ctr);

        return cookie;
}

/*
 * Schedule a user space unwind to be done in task work before exiting the
 * kernel.
 *
 * The returned cookie output is a unique identifer for the current task entry
 * context.  Its value will also be passed to the callback function.  It can be
 * used to stitch kernel and user stack traces together in post-processing.
 *
 * It's valid to call this function multiple times for the same @work within
 * the same task entry context.  Each call will return the same cookie.  If the
 * callback is already pending, an error will be returned along with the
 * cookie.  If the callback is not pending because it has already been
 * previously called for the same entry context, it will be called again with
 * the same stack trace and cookie.
 *
 * Thus are three possible return scenarios:
 *
 *   * return != 0, *cookie == 0: the operation failed, no pending callback.
 *
 *   * return != 0, *cookie != 0: the callback is already pending. The cookie
 *     can still be used to correlate with the pending callback.
 *
 *   * return == 0, *cookie != 0: the callback queued successfully.  The
 *     callback is guaranteed to be called with the given cookie.
 */
int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
{
        enum task_work_notify_mode mode = in_nmi() ? TWA_NMI_CURRENT : 
TWA_RESUME;
        u64 zero = 0;
        int ret;

        *cookie = 0;

        if (!current->mm || !user_mode(task_pt_regs(current)))
                return -EINVAL;

        if (!nmi_supported() && in_nmi())
                return -EINVAL;

        /* Return a valid @cookie even if the callback is already pending. */
        *cookie = READ_ONCE(work->cookie);
        if (*cookie)
                return -EEXIST;

        *cookie = get_cookie();

        /* Claim the work unless an NMI just now swooped in to do so. */
        if (!try_cmpxchg64(&work->cookie, &zero, *cookie))
                return -EEXIST;

        /* The work has been claimed, now schedule it. */
        ret = task_work_add(current, &work->work, mode);
        if (WARN_ON_ONCE(ret)) {
                WRITE_ONCE(work->cookie, 0);
                return ret;
        }

        return 0;
}

-- 
Josh

Reply via email to