On Mon, Jan 26 2026 at 17:33, Yuwen Chen wrote:
> This test item has extremely high requirements for timing and can only

Extremely high?

The main thread waits for 10000us aka. 10 seconds to allow the waiter
thread to reach futex_wait().

If anything is extreme then it's the 10 seconds wait, not the
requirements. Please write factual changelogs and not fairy tales.

> pass the test under specific conditions. The following situations will
> lead to test failure:
>
>     MainThread                  Thread1
>         │
>   pthread_create-------------------┐
>         │                          │
>  futex_cmp_requeue                 │
>         │                     futex_wait
>         │                          │
>
> If the child thread is not waiting in the futex_wait function when the
> main thread reaches the futex_cmp_requeue function, the test will
> fail.

That's a known issue for all futex selftests when the test system is
under extreme load. That's why there is a gratious 10 seconds timeout,
which is annoyingly long already.

Also why is this special for the requeue_single test case?

It's exactly the same issue for all futex selftests including the multi
waiter one in the very same file, no?

> This patch avoids this problem by checking whether the child thread is
> in a sleeping state in the main thread.

# git grep 'This patch' Documentation/process

>  volatile futex_t *f1;
> +static pthread_barrier_t barrier;
>  
>  void *waiterfn(void *arg)
>  {
>       struct timespec to;
> +     atomic_int *tid = (atomic_int *)arg;

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#coding-style-notes

All over the place.

> -     to.tv_sec = 0;
> -     to.tv_nsec = timeout_ns;
> +     to.tv_sec = timeout_s;
> +     to.tv_nsec = 0;
> +
> +     atomic_store(tid, gettid());

Why do you need an atomic store here?

pthread_barrier_wait() is a full memory barrier already, no?

> +     pthread_barrier_wait(&barrier);
>  
>       if (futex_wait(f1, *f1, &to, 0))
>               printf("waiter failed errno %d\n", errno);
> @@ -29,22 +35,52 @@ void *waiterfn(void *arg)
>       return NULL;
>  }
>  
> +static int get_thread_state(pid_t pid)
> +{
> +     FILE *fp;
> +     char buf[80], tag[80];
> +     char val = 0;
> +
> +     snprintf(buf, sizeof(buf), "/proc/%d/status", pid);
> +     fp = fopen(buf, "r");
> +     if (!fp)
> +             return -1;
> +
> +     while (fgets(buf, sizeof(buf), fp))

Lacks curly braces on the while...

> +             if (sscanf(buf, "%s %c", tag, &val) == 2 && !strcmp(tag, 
> "State:")) {
> +                     fclose(fp);
> +                     return val;
> +             }

What's wrong with reading /proc/$PID/wchan ?

It's equally unreliable as /proc/$PID/stat because both can return the
desired state _before_ the thread reaches the inner workings of the test
related sys_futex(... WAIT).

> +     fclose(fp);
> +     return -1;
> +}
> +


>  TEST(requeue_single)
>  {
>       volatile futex_t _f1 = 0;
>       volatile futex_t f2 = 0;
>       pthread_t waiter[10];
> -     int res;
> +     atomic_int tid = 0;
> +     int res, state, retry = 100;
>  
>       f1 = &_f1;
> +     pthread_barrier_init(&barrier, NULL, 2);
>  
>       /*
>        * Requeue a waiter from f1 to f2, and wake f2.
>        */
> -     if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
> +     if (pthread_create(&waiter[0], NULL, waiterfn, &tid))
>               ksft_exit_fail_msg("pthread_create failed\n");
>  
> -     usleep(WAKE_WAIT_US);
> +     pthread_barrier_wait(&barrier);
> +     pthread_barrier_destroy(&barrier);
> +     while ((state = get_thread_state(atomic_load(&tid))) != 'S') {
> +             usleep(WAKE_WAIT_US / 100);
> +
> +             if (state < 0 || retry-- <= 0)
> +                     break;
> +     }

That's a disgusting hack. Are you going to copy this stuff around into
_all_ futex selftests, which suffer from exactly the same problem?
Please grep for 'WAKE_WAIT_US' to see them all.

Something like the uncompiled below in a "library" C source which is
linked into every futex test case:

#define WAIT_THREAD_RETRIES             100
#define WAIT_THREAD_DELAY_US            100

static int wait_for_thread(FILE *fp)
{
        char buf[80];

        for (int i = 0; i < WAIT_THREAD_RETRIES; i++) {
                if (!fgets(buf, sizeof(buf), fp))
                        return -EIO;
                if (!strncmp(buf, "futex", 5))
                        return 0;
                usleep(WAIT_THREAD_DELAY_US);
                rewind(fp);
        }
        return -ETIMEDOUT;
}

int futex_wait_for_thread(pid_t tid)
{
        char fname[80];
        FILE *fp;
        int res;

        snprintf(fname, sizeof(fname), "/proc/%d/wchan", tid);
        fp = fopen(fname, "r");
        if (!fp)
                return -EIO;
        res = wait_for_thread(fp);
        fclose(fp);
        return res;
}

No?

While at it create a helper mechanism which avoids copying the whole
pthread_create()/barrier() muck around to every single test case:

struct thread_data {
        pthread_t               thread;
        pthread_barrier_t       barrier;
        pid_t                   tid;
        void                    (*threadfn)(void *);
        void                    *arg;
};

static void *futex_thread_fn(void *arg)
{
        struct thread_data *td = arg;

        td->tid = gettid();
        pthread_barrier_wait(&td->barrier);
        td->threadfn(td->arg);
        return NULL;
}

int futex_thread_create(struct thread_data *td, void (*threadfn)(void*), void 
*arg)
{
        int ret;

        pthread_barrier_init(&td->barrier, NULL, 2);
        td->tid = 0;
        td->threadfn = threadfn;
        td->arg = arg;

        ret = pthread_create(&td->thread, NULL, futex_thread_fn, td);
        if (ret)
                return ret;

        pthread_barrier_wait(&td->barrier);
        return futex_wait_for_thread(td->tid);
}

or something like that. That will at least fix all the futex muck and
w/o looking I'm sure that's something which other selftests might find
useful too.

The upside of such a change is that the futex selftests runtime will be
significantly lower because the hideous 10 seconds wait can be avoided,
which is an actual improvement and not a made up extreme requirement...

Thanks,

        tglx

Reply via email to