On Mon, 2026-02-23 at 16:25 +0000, Tvrtko Ursulin wrote:
> 
> On 19/02/2026 14:07, Marco Pagani wrote:
> > Add a new test suite to simulate concurrent job submissions to the DRM
> > scheduler. The suite includes two initial test cases: (i) a primary test
> > case for parallel job submission and (ii) a secondary test case for
> > interleaved job submission and completion. In the first test case, worker
> > threads concurrently submit jobs to the scheduler and then the timeline is
> > manually advanced. In the second test case, worker threads periodically
> > submit a sequence of jobs to the mock scheduler. Periods are chosen as
> > harmonic, starting from a base period, to allow interleaving between
> > submission and completion. To avoid impractically large execution times,
> > periods are cycled. The timeline is advanced automatically when the jobs
> > completes. This models how job submission from userspace (in process
> > context) may interleave with job completion (hrtimer callback interrupt
> > context, in the test case) by a physical device.

I still maintain the opinion expressed the last time: that the commit
message should make explicit why the patch / test is added. Which this
doesn't do. It just says: "We add X", but not "Currently, the problem
is that YZ, thus we need X".
(also breaking longer commit messages into paragraphs is nice)

Also see my comments about interleaved submits below.

> > 
> > Signed-off-by: Marco Pagani <[email protected]>
> > ---
> > Changes in v1:
> > - Split the original suite into two test cases (Tvrtko Ursulin).
> > - General test refactoring and variable renaming for clarity.
> > - Changed parameters to have a fairer workload distribution.
> > - Improved cleanup logic.
> > - Added kunit_info() prints for tracing workers.
> > ---
> >   drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
> >   1 file changed, 338 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c 
> > b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > index 82a41a456b0a..391edcbaf43a 100644
> > --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > @@ -2,6 +2,8 @@
> >   /* Copyright (c) 2025 Valve Corporation */
> >   
> >   #include <linux/delay.h>
> > +#include <linux/completion.h>
> > +#include <linux/workqueue.h>
> >   
> >   #include "sched_tests.h"
> >   
> > @@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)
> >     KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
> >   }
> >   
> > +struct sched_concurrent_context {
> > +   struct drm_mock_scheduler *sched;
> > +   struct workqueue_struct *sub_wq;
> > +   struct kunit *test;
> > +   struct completion wait_go;
> > +};
> > +
> > +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
> > +                       struct drm_mock_scheduler *);
> > +
> > +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, 
> > drm_mock_sched_entity_free,
> > +                       struct drm_mock_sched_entity *);
> > +
> > +static void complete_destroy_workqueue(void *context)
> > +{
> > +   struct sched_concurrent_context *ctx = (struct sched_concurrent_context 
> > *)context;
> > +
> > +   complete_all(&ctx->wait_go);
> > +

nit: do we need that empty line

> > +   destroy_workqueue(ctx->sub_wq);
> > +}
> > +
> > +static int drm_sched_concurrent_init(struct kunit *test)
> > +{
> > +   struct sched_concurrent_context *ctx;
> > +   int ret;
> > +
> > +   ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
> > +   KUNIT_ASSERT_NOT_NULL(test, ctx);
> > +
> > +   init_completion(&ctx->wait_go);
> > +
> > +   ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
> > +
> > +   ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, 
> > ctx->sched);
> > +   KUNIT_ASSERT_EQ(test, ret, 0);
> > +
> > +   /* Use an unbounded workqueue to maximize job submission concurrency */
> > +   ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
> > +                                 WQ_UNBOUND_MAX_ACTIVE);
> > +   KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
> > +
> > +   ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
> > +   KUNIT_ASSERT_EQ(test, ret, 0);
> > +
> > +   ctx->test = test;
> > +   test->priv = ctx;
> > +
> > +   return 0;
> > +}
> > +
> > +struct drm_sched_parallel_params {
> > +   const char *description;
> > +   unsigned int num_jobs;
> > +   unsigned int num_workers;
> > +};
> > +
> > +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = 
> > {
> > +   {
> > +           .description = "Workers submitting multiple jobs against a 
> > single entity",
> 
> Each worker has own entity so the description is a bit confusing.

Do you have a suggestion for a better one?

> 
> > +           .num_jobs = 4,
> > +           .num_workers = 16,
> > +   },
> > +};
> > +
> > +static void
> > +drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, 
> > char *desc)
> > +{
> > +   strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> > +}
> > +
> > +KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, 
> > drm_sched_parallel_desc);
> > +
> > +struct parallel_worker {
> > +   struct work_struct work;
> > +   struct sched_concurrent_context *ctx;
> > +   struct drm_mock_sched_entity *entity;
> > +   struct drm_mock_sched_job **jobs;
> > +   unsigned int id;
> > +};
> > +
> > +static void drm_sched_parallel_worker(struct work_struct *work)
> > +{
> > +   const struct drm_sched_parallel_params *params;
> > +   struct sched_concurrent_context *test_ctx;
> > +   struct parallel_worker *worker;
> > +   unsigned int i;
> > +
> > +   worker = container_of(work, struct parallel_worker, work);
> > +   test_ctx = worker->ctx;
> > +   params = test_ctx->test->param_value;
> > +
> > +   wait_for_completion(&test_ctx->wait_go);
> > +
> > +   kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
> > +
> > +   for (i = 0; i < params->num_jobs; i++)
> > +           drm_mock_sched_job_submit(worker->jobs[i]);
> > +}
> > +
> > +/*
> > + * Spawns workers that submit a sequence of jobs to the mock scheduler.
> > + * Once all jobs are submitted, the timeline is manually advanced.
> > + */
> > +static void drm_sched_parallel_submit_test(struct kunit *test)
> > +{
> > +   struct sched_concurrent_context *ctx = test->priv;
> > +   const struct drm_sched_parallel_params *params = test->param_value;
> > +   struct parallel_worker *workers;
> > +   unsigned int i, j, completed_jobs;
> > +   bool done;
> > +   int ret;
> > +
> > +   workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
> > +                           GFP_KERNEL);
> > +   KUNIT_ASSERT_NOT_NULL(test, workers);
> > +
> > +   /*
> > +    * Init workers only after all jobs and entities have been successfully
> > +    * allocated. In this way, the cleanup logic for when an assertion fail
> > +    * can be simplified.
> > +    */
> > +   for (i = 0; i < params->num_workers; i++) {
> > +           workers[i].id = i;
> > +           workers[i].ctx = ctx;
> > +           workers[i].entity = drm_mock_sched_entity_new(test,
> > +                                                         
> > DRM_SCHED_PRIORITY_NORMAL,
> > +                                                         ctx->sched);
> > +
> > +           ret = kunit_add_action_or_reset(test, 
> > drm_mock_sched_entity_free_wrap,
> > +                                           workers[i].entity);
> > +           KUNIT_ASSERT_EQ(test, ret, 0);
> > +
> > +           workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
> > +                                           sizeof(*workers[i].jobs), 
> > GFP_KERNEL);
> > +           KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
> > +
> > +           for (j = 0; j < params->num_jobs; j++)
> > +                   workers[i].jobs[j] = drm_mock_sched_job_new(test,
> > +                                                               
> > workers[i].entity);

The kernel doesn't strictly require the 80 column limit anymore. In
some cases it can make sense to exceed it a bit to avoid an ugly line
break.

Often one can make code a bit more readable by creating a helper
variable such as

struct xy *pos = &workers[i];

You need workers[i] at meany places, so that definitely seems handy.


> > +   }
> > +
> > +   for (i = 0; i < params->num_workers; i++) {
> > +           INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
> > +           queue_work(ctx->sub_wq, &workers[i].work);
> > +   }
> > +
> > +   complete_all(&ctx->wait_go);
> > +   flush_workqueue(ctx->sub_wq);
> > +
> > +   for (i = 0; i < params->num_workers; i++) {
> > +           for (j = 0; j < params->num_jobs; j++) {
> > +                   done = 
> > drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
> > +                                                            HZ);

same

> > +                   KUNIT_ASSERT_TRUE(test, done);
> > +
> > +                   done = 
> > drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> > +                   KUNIT_ASSERT_FALSE(test, done);
> 
> This second assert does not seem to be bringing much value, but instead 
> makes the reader ask themselves why it is there. Remove it?
> 
> Hmm in fact this whole loop could be removed. All that it is needed 
> below is to wait for the last job to be completed.

I suppose it's being tested whether all jobs are finished. That sounds
clean and not harmful to me.

> 
> > +           }
> > +   }
> > +
> > +   completed_jobs = drm_mock_sched_advance(ctx->sched,
> > +                                           params->num_workers * 
> > params->num_jobs);
> > +   KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * 
> > params->num_jobs);

`params` doesn't change here anymore, so the lengthy multiplication
here, which is needed at two places, can be made a const at the top,
making the code more readable.

> > +
> > +   for (i = 0; i < params->num_workers; i++) {
> > +           for (j = 0; j < params->num_jobs; j++) {
> > +                   done = 
> > drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> > +                   KUNIT_ASSERT_TRUE(test, done);
> > +           }
> > +   }
> 
> So here, after advancing you just need to wait on the last job to complete.
> 
> Not a deal breaker to have it verbose but usually the less is better and 
> easier to spot the key thing being tested and what are the functional steps.

depends a bit on what Marco intended. Looks like "better safe than
sorry" to me?

> 
> > +}
> > +
> > +struct drm_sched_interleaved_params {
> > +   const char *description;
> > +   unsigned int job_base_period_us;
> > +   unsigned int periods_cycle;
> > +   unsigned int num_jobs;
> > +   unsigned int num_workers;
> > +};
> > +
> > +static const struct drm_sched_interleaved_params 
> > drm_sched_interleaved_cases[] = {
> > +   {
> > +           .description = "Workers submitting single jobs against a single 
> > entity",
> 
> As with the parallel description.

I think (see again my comment about the commit message) what the test
wants implement is threaded submission to a entity shared between
threads. That can certainly be made more verbose.

> 
> > +           .job_base_period_us = 100,
> > +           .periods_cycle = 10,
> > +           .num_jobs = 1,
> > +           .num_workers = 32,
> > +   },
> > +   {
> > +           .description = "Workers submitting multiple jobs against a 
> > single entity",
> > +           .job_base_period_us = 100,
> > +           .periods_cycle = 10,
> > +           .num_jobs = 4,
> > +           .num_workers = 16,
> > +   },
> > +};
> > +
> > +static void
> > +drm_sched_interleaved_desc(const struct drm_sched_interleaved_params 
> > *params, char *desc)
> > +{
> > +   strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> > +}
> > +
> > +KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
> > +             drm_sched_interleaved_desc);
> > +
> > +struct interleaved_worker {
> > +   struct work_struct work;
> > +   struct sched_concurrent_context *ctx;
> > +   struct drm_mock_sched_entity *entity;
> > +   struct drm_mock_sched_job **jobs;
> > +   unsigned int id;
> > +   unsigned int period_us;
> > +   unsigned int timeout_us;
> > +};
> > +
> > +static void drm_sched_interleaved_worker(struct work_struct *work)
> > +{
> > +   const struct drm_sched_interleaved_params *params;
> > +   struct sched_concurrent_context *test_ctx;
> > +   struct interleaved_worker *worker;
> > +   unsigned int i;
> > +   bool done;
> > +
> > +   worker = container_of(work, struct interleaved_worker, work);
> > +   test_ctx = worker->ctx;
> > +   params = test_ctx->test->param_value;
> > +
> > +   wait_for_completion(&test_ctx->wait_go);
> > +
> > +   kunit_info(test_ctx->test, "Interleaved worker %u with period %u us 
> > started\n",
> > +              worker->id, worker->period_us);
> > +
> > +   /* Release jobs as a periodic sequence */
> > +   for (i = 0; i < params->num_jobs; i++) {
> > +           drm_mock_sched_job_set_duration_us(worker->jobs[i], 
> > worker->period_us);
> > +           drm_mock_sched_job_submit(worker->jobs[i]);
> > +
> > +           done = drm_mock_sched_job_wait_finished(worker->jobs[i],

worker->jobs[i] is used at 3 places and can be made a helper variable.

> > +                                                   
> > usecs_to_jiffies(worker->timeout_us));
> > +           if (!done)
> > +                   kunit_info(test_ctx->test, "Job %u of worker %u 
> > timedout\n",
> > +                              i, worker->id);
> > +   }
> > +}
> > +
> > +/*
> > + * Spawns workers that submit a periodic sequence of jobs to the mock 
> > scheduler.
> > + * Uses harmonic periods to allow interleaving and cycles through them to 
> > prevent
> > + * excessively large execution times.
> > 

What are "large execution times", like too large for the test
framework?

> >  Since the scheduler serializes jobs from all
> > + * workers, the timeout is set to the hyperperiod with a 2x safety factor. 
> > Entities
> > + * and jobs are pre-allocated in the main thread to avoid using KUnit 
> > assertions in
> > + * the workers.
> > + */
> > +static void drm_sched_interleaved_submit_test(struct kunit *test)

It appears to me that this is effectively a separate test, which might
want its own patch. What is the motivation behind adding it?

> > +{
> > +   struct sched_concurrent_context *ctx = test->priv;
> > +   const struct drm_sched_interleaved_params *params = test->param_value;
> > +   struct interleaved_worker *workers;
> > +   unsigned int period_max_us, timeout_us;
> > +   unsigned int i, j;
> > +   bool done;
> > +   int ret;
> > +
> > +   workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
> > +                           GFP_KERNEL);
> > +   KUNIT_ASSERT_NOT_NULL(test, workers);
> > +
> > +   period_max_us = params->job_base_period_us * (1 << 
> > params->periods_cycle);
> > +   timeout_us = params->num_workers * period_max_us * 2;
> > +
> > +   /*
> > +    * Init workers only after all jobs and entities have been successfully
> > +    * allocated. In this way, the cleanup logic for when an assertion fail
> > +    * can be simplified.
> > +    */
> > +   for (i = 0; i < params->num_workers; i++) {
> > +           workers[i].id = i;
> > +           workers[i].ctx = ctx;
> > +           workers[i].timeout_us = timeout_us;

helper variable for workers[i].


P.


> > +
> > +           if (i % params->periods_cycle == 0)
> > +                   workers[i].period_us = params->job_base_period_us;
> > +           else
> > +                   workers[i].period_us = workers[i - 1].period_us * 2;
> 
> Are the two if statements effectively equivalent to:
> 
>   period_us = params->job_base_period_us << (i % params->periods_cycle);
> 
> ?
> 
> Also, do you have an idea how to locally calculate a suitable 
> periods_cycle instead of having to come up with a number in the 
> drm_sched_interleaved_params array. Just strikes me as hard to know what 
> to put in there if someone would want to add a test.
> 
> > +
> > +           workers[i].entity = drm_mock_sched_entity_new(test,
> > +                                                         
> > DRM_SCHED_PRIORITY_NORMAL,
> > +                                                         ctx->sched);
> > +
> > +           ret = kunit_add_action_or_reset(test, 
> > drm_mock_sched_entity_free_wrap,
> > +                                           workers[i].entity);
> > +           KUNIT_ASSERT_EQ(test, ret, 0);
> > +
> > +           workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
> > +                                           sizeof(*workers[i].jobs), 
> > GFP_KERNEL);
> > +           KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
> > +
> > +           for (j = 0; j < params->num_jobs; j++) {
> > +                   workers[i].jobs[j] = drm_mock_sched_job_new(test,
> > +                                                               
> > workers[i].entity);
> > +                   done = 
> > drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> > +                   KUNIT_ASSERT_FALSE(test, done);
> 
> Same as above, this is asserted by a basic test case so I'd just remove it.
> 
> > +           }
> > +   }
> > +
> > +   for (i = 0; i < params->num_workers; i++) {
> > +           INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
> > +           queue_work(ctx->sub_wq, &workers[i].work);
> > +   }
> > +
> > +   complete_all(&ctx->wait_go);
> > +   flush_workqueue(ctx->sub_wq);
> > +
> > +   for (i = 0; i < params->num_workers; i++) {
> > +           for (j = 0; j < params->num_jobs; j++) {
> > +                   done = 
> > drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> > +                   KUNIT_ASSERT_TRUE(test, done);
> > +           }
> > +   }
> 
> Here as well you could wait just for the last job per worker.
> 
> On the overal submission pattern - Don't you end up with a very uneven 
> activity between the workers? Because the job duration is doubling and 
> workers are single-shot, once the low index workers are done they are
> done, and all that remains are the higher ones, and so the wave of fewer 
> and fewer active workers continues. Low index worker do not end up 
> racing with the job completions of the high index workers but only 
> between themselves, and even that with the cycle=10 workers=16 case is 
> even more limited.
> 
> Alternative approach could be to set a per test time budget and just 
> keep the workers submitting until over. It would be simpler to 
> understand and there would be more submit/complete overlap.
> 
> Regards,
> 
> Tvrtko
> 
> > +}
> > +
> > +static struct kunit_case drm_sched_concurrent_tests[] = {
> > +   KUNIT_CASE_PARAM(drm_sched_parallel_submit_test, 
> > drm_sched_parallel_gen_params),
> > +   KUNIT_CASE_PARAM(drm_sched_interleaved_submit_test, 
> > drm_sched_interleaved_gen_params),
> > +   {}
> > +};
> > +
> > +static struct kunit_suite drm_sched_concurrent = {
> > +   .name = "drm_sched_concurrent_tests",
> > +   .init = drm_sched_concurrent_init,
> > +   .test_cases = drm_sched_concurrent_tests,
> > +};
> > +
> >   static struct kunit_case drm_sched_cancel_tests[] = {
> >     KUNIT_CASE(drm_sched_basic_cancel),
> >     {}
> > @@ -556,6 +893,7 @@ static struct kunit_suite drm_sched_credits = {
> >   };
> >   
> >   kunit_test_suites(&drm_sched_basic,
> > +             &drm_sched_concurrent,
> >               &drm_sched_timeout,
> >               &drm_sched_cancel,
> >               &drm_sched_priority,
> 

Reply via email to