util: import the multithreaded job queue from amdgpu winsys

Marek Olšák Tue, 14 Jun 2016 06:04:55 -0700

The event doesn't limit the number of outstanding jobs. It's like a fence -
you can wait for it or you don't. The limitation is that you must have
exactly one event instance for each active job. util_queue_fence is a
better name IMO. I don't plan to extend the feature set beyond that.


Marek
On Jun 14, 2016 1:26 PM, "Nicolai Hähnle" <[email protected]> wrote:

> On 13.06.2016 19:34, Marek Olšák wrote:
>
>> From: Marek Olšák <[email protected]>
>>
>> ---
>>   src/gallium/auxiliary/Makefile.sources        |   2 +
>>   src/gallium/auxiliary/util/u_queue.c          | 129
>> ++++++++++++++++++++++++++
>>   src/gallium/auxiliary/util/u_queue.h          |  80 ++++++++++++++++
>>   src/gallium/winsys/amdgpu/drm/amdgpu_cs.c     |  23 ++---
>>   src/gallium/winsys/amdgpu/drm/amdgpu_cs.h     |   4 +-
>>   src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c |  63 +------------
>>   src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |  11 +--
>>   7 files changed, 229 insertions(+), 83 deletions(-)
>>   create mode 100644 src/gallium/auxiliary/util/u_queue.c
>>   create mode 100644 src/gallium/auxiliary/util/u_queue.h
>>
>> diff --git a/src/gallium/auxiliary/Makefile.sources
>> b/src/gallium/auxiliary/Makefile.sources
>> index 7b3853e..ab58358 100644
>> --- a/src/gallium/auxiliary/Makefile.sources
>> +++ b/src/gallium/auxiliary/Makefile.sources
>> @@ -274,6 +274,8 @@ C_SOURCES := \
>>         util/u_pstipple.c \
>>         util/u_pstipple.h \
>>         util/u_pwr8.h \
>> +       util/u_queue.c \
>> +       util/u_queue.h \
>>         util/u_range.h \
>>         util/u_rect.h \
>>         util/u_resource.c \
>> diff --git a/src/gallium/auxiliary/util/u_queue.c
>> b/src/gallium/auxiliary/util/u_queue.c
>> new file mode 100644
>> index 0000000..311b591
>> --- /dev/null
>> +++ b/src/gallium/auxiliary/util/u_queue.c
>> @@ -0,0 +1,129 @@
>> +/*
>> + * Copyright © 2016 Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining
>> + * a copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
>> + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>> + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
>> + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial
>> portions
>> + * of the Software.
>> + */
>> +
>> +#include "u_queue.h"
>> +
>> +static PIPE_THREAD_ROUTINE(util_queue_thread_func, param)
>> +{
>> +   struct util_queue *queue = (struct util_queue*)param;
>> +   unsigned i;
>> +
>> +   while (1) {
>> +      struct util_queue_job job;
>> +
>> +      pipe_semaphore_wait(&queue->queued);
>> +      if (queue->kill_thread)
>> +         break;
>> +
>> +      pipe_mutex_lock(queue->lock);
>> +      job = queue->jobs[0];
>> +      for (i = 1; i < queue->num_jobs; i++)
>> +         queue->jobs[i - 1] = queue->jobs[i];
>> +      queue->jobs[--queue->num_jobs].job = NULL;
>> +      pipe_mutex_unlock(queue->lock);
>> +
>> +      pipe_semaphore_signal(&queue->has_space);
>> +
>> +      if (job.job) {
>> +         queue->execute_job(job.job);
>> +         pipe_semaphore_signal(&job.event->done);
>> +      }
>> +   }
>> +
>> +   /* signal remaining jobs before terminating */
>> +   pipe_mutex_lock(queue->lock);
>> +   for (i = 0; i < queue->num_jobs; i++) {
>> +      pipe_semaphore_signal(&queue->jobs[i].event->done);
>> +      queue->jobs[i].job = NULL;
>> +   }
>> +   queue->num_jobs = 0;
>> +   pipe_mutex_unlock(queue->lock);
>> +   return 0;
>> +}
>> +
>> +void
>> +util_queue_init(struct util_queue *queue,
>> +                void (*execute_job)(void *))
>> +{
>> +   memset(queue, 0, sizeof(*queue));
>> +   queue->execute_job = execute_job;
>> +   pipe_mutex_init(queue->lock);
>> +   pipe_semaphore_init(&queue->has_space, ARRAY_SIZE(queue->jobs));
>> +   pipe_semaphore_init(&queue->queued, 0);
>> +   queue->thread = pipe_thread_create(util_queue_thread_func, queue);
>> +}
>> +
>> +void
>> +util_queue_destroy(struct util_queue *queue)
>> +{
>> +   queue->kill_thread = 1;
>> +   pipe_semaphore_signal(&queue->queued);
>> +   pipe_thread_wait(queue->thread);
>> +   pipe_semaphore_destroy(&queue->has_space);
>> +   pipe_semaphore_destroy(&queue->queued);
>> +   pipe_mutex_destroy(queue->lock);
>> +}
>> +
>> +void
>> +util_queue_event_init(struct util_queue_event *event)
>> +{
>> +   pipe_semaphore_init(&event->done, 1);
>> +}
>> +
>> +void
>> +util_queue_event_destroy(struct util_queue_event *event)
>> +{
>> +   pipe_semaphore_destroy(&event->done);
>> +}
>> +
>> +void
>> +util_queue_add_job(struct util_queue *queue,
>> +                   void *job,
>> +                   struct util_queue_event *event)
>> +{
>> +   /* Set the semaphore to "busy". */
>> +   pipe_semaphore_wait(&event->done);
>> +
>> +   /* if the queue is full, wait until there is space */
>> +   pipe_semaphore_wait(&queue->has_space);
>> +
>> +   pipe_mutex_lock(queue->lock);
>> +   assert(queue->num_jobs < ARRAY_SIZE(queue->jobs));
>> +   queue->jobs[queue->num_jobs].job = job;
>> +   queue->jobs[queue->num_jobs].event = event;
>> +   queue->num_jobs++;
>> +   pipe_mutex_unlock(queue->lock);
>> +   pipe_semaphore_signal(&queue->queued);
>> +}
>> +
>> +void
>> +util_queue_job_wait(struct util_queue_event *event)
>> +{
>> +   /* wait and set the semaphore to "busy" */
>> +   pipe_semaphore_wait(&event->done);
>> +   /* set the semaphore to "idle" */
>> +   pipe_semaphore_signal(&event->done);
>> +}
>> diff --git a/src/gallium/auxiliary/util/u_queue.h
>> b/src/gallium/auxiliary/util/u_queue.h
>> new file mode 100644
>> index 0000000..b7c1f44
>> --- /dev/null
>> +++ b/src/gallium/auxiliary/util/u_queue.h
>> @@ -0,0 +1,80 @@
>> +/*
>> + * Copyright © 2016 Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining
>> + * a copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
>> + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>> + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
>> + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial
>> portions
>> + * of the Software.
>> + */
>> +
>> +/* Job queue with execution in a separate thread.
>> + *
>> + * Jobs can be added from any thread. After that, the wait call can be
>> used
>> + * to wait for completion of the job.
>> + */
>> +
>> +#ifndef U_QUEUE_H
>> +#define U_QUEUE_H
>> +
>> +#include "os/os_thread.h"
>> +
>> +/* Job completion event.
>> + * Put this into your job structure.
>> + */
>> +struct util_queue_event {
>> +   pipe_semaphore done;
>> +};
>> +
>> +struct util_queue_job {
>> +   void *job;
>> +   struct util_queue_event *event;
>> +};
>> +
>> +/* Put this into your context. */
>> +struct util_queue {
>> +   pipe_mutex lock;
>> +   pipe_semaphore has_space;
>> +   pipe_semaphore queued;
>> +   pipe_thread thread;
>> +   int kill_thread;
>> +   int num_jobs;
>> +   struct util_queue_job jobs[8];
>> +   void (*execute_job)(void *job);
>> +};
>> +
>> +void util_queue_init(struct util_queue *queue,
>> +                     void (*execute_job)(void *));
>> +void util_queue_destroy(struct util_queue *queue);
>> +void util_queue_event_init(struct util_queue_event *event);
>> +void util_queue_event_destroy(struct util_queue_event *event);
>> +
>> +void util_queue_add_job(struct util_queue *queue,
>> +                        void *job,
>> +                        struct util_queue_event *event);
>> +void util_queue_job_wait(struct util_queue_event *event);
>>
>
> I think the util_queue_event part of the interface is basically impossible
> to understand without knowledge of the code. It does two things:
>
> - limit the number of outstanding jobs
> - wait for outstanding jobs to finish
>
> I think it should be called util_queue_writer for this reason (or perhaps
> _submitter, but _writer is shorter):
>
> void util_queue_writer_init(struct util_queue_writer *writer);
> void util_queue_writer_destroy(struct util_queue_writer *writer);
> void util_queue_writer_wait(struct util_queue_writer *writer);
>
> void util_queue_add_job(struct util_queue *queue,
>                         struct util_queue_writer *writer,
>                         void *job);
>
> This would also logically allow a future extension where each writer can
> have multiple jobs outstanding. In that case, the _writer structure would
> live outside the job structure.
>
> [An alternative that occurs to me now is to rename util_queue_event to
> util_queue_job and expect users of this utility to use that as a "base
> struct" for their job structure. That would simplify util_queue_add_job to
> take only one parameter. I think both alternatives have some merit.]
>
> Nicolai
>
> +
>> +/* util_queue needs to be cleared to zeroes for this to work */
>> +static inline bool
>> +util_queue_is_initialized(struct util_queue *queue)
>> +{
>> +   return queue->thread != 0;
>> +}
>> +
>> +#endif
>> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
>> b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
>> index fefa5d6..737f0c4 100644
>> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
>> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
>> @@ -605,7 +605,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
>>         return NULL;
>>      }
>>
>> -   pipe_semaphore_init(&cs->flush_completed, 1);
>> +   util_queue_event_init(&cs->flush_completed);
>>
>>      cs->ctx = ctx;
>>      cs->flush_cs = flush;
>> @@ -872,8 +872,9 @@ static void amdgpu_add_fence_dependencies(struct
>> amdgpu_cs *acs)
>>      }
>>   }
>>
>> -void amdgpu_cs_submit_ib(struct amdgpu_cs *acs)
>> +void amdgpu_cs_submit_ib(void *job)
>>   {
>> +   struct amdgpu_cs *acs = (struct amdgpu_cs*)job;
>>      struct amdgpu_winsys *ws = acs->ctx->ws;
>>      struct amdgpu_cs_context *cs = acs->cst;
>>      int i, r;
>> @@ -957,14 +958,11 @@ cleanup:
>>   void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
>>   {
>>      struct amdgpu_cs *cs = amdgpu_cs(rcs);
>> +   struct amdgpu_winsys *ws = cs->ctx->ws;
>>
>>      /* Wait for any pending ioctl of this CS to complete. */
>> -   if (cs->ctx->ws->thread) {
>> -      /* wait and set the semaphore to "busy" */
>> -      pipe_semaphore_wait(&cs->flush_completed);
>> -      /* set the semaphore to "idle" */
>> -      pipe_semaphore_signal(&cs->flush_completed);
>> -   }
>> +   if (util_queue_is_initialized(&ws->cs_queue))
>> +      util_queue_job_wait(&cs->flush_completed);
>>   }
>>
>>   DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
>> @@ -1052,10 +1050,9 @@ static void amdgpu_cs_flush(struct
>> radeon_winsys_cs *rcs,
>>         cs->cst = cur;
>>
>>         /* Submit. */
>> -      if (ws->thread && (flags & RADEON_FLUSH_ASYNC)) {
>> -         /* Set the semaphore to "busy". */
>> -         pipe_semaphore_wait(&cs->flush_completed);
>> -         amdgpu_ws_queue_cs(ws, cs);
>> +      if ((flags & RADEON_FLUSH_ASYNC) &&
>> +          util_queue_is_initialized(&ws->cs_queue)) {
>> +         util_queue_add_job(&ws->cs_queue, cs, &cs->flush_completed);
>>         } else {
>>            amdgpu_cs_submit_ib(cs);
>>         }
>> @@ -1077,7 +1074,7 @@ static void amdgpu_cs_destroy(struct
>> radeon_winsys_cs *rcs)
>>      struct amdgpu_cs *cs = amdgpu_cs(rcs);
>>
>>      amdgpu_cs_sync_flush(rcs);
>> -   pipe_semaphore_destroy(&cs->flush_completed);
>> +   util_queue_event_destroy(&cs->flush_completed);
>>      p_atomic_dec(&cs->ctx->ws->num_cs);
>>      pb_reference(&cs->main.big_ib_buffer, NULL);
>>      FREE(cs->main.base.prev);
>> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
>> b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
>> index cc1516c..ff50345 100644
>> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
>> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
>> @@ -111,7 +111,7 @@ struct amdgpu_cs {
>>      void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle
>> **fence);
>>      void *flush_data;
>>
>> -   pipe_semaphore flush_completed;
>> +   struct util_queue_event flush_completed;
>>   };
>>
>>   struct amdgpu_fence {
>> @@ -218,6 +218,6 @@ bool amdgpu_fence_wait(struct pipe_fence_handle
>> *fence, uint64_t timeout,
>>                          bool absolute);
>>   void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs);
>>   void amdgpu_cs_init_functions(struct amdgpu_winsys *ws);
>> -void amdgpu_cs_submit_ib(struct amdgpu_cs *cs);
>> +void amdgpu_cs_submit_ib(void *job);
>>
>>   #endif
>> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
>> b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
>> index 7016221..7ef3529 100644
>> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
>> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
>> @@ -308,14 +308,9 @@ static void amdgpu_winsys_destroy(struct
>> radeon_winsys *rws)
>>   {
>>      struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
>>
>> -   if (ws->thread) {
>> -      ws->kill_thread = 1;
>> -      pipe_semaphore_signal(&ws->cs_queued);
>> -      pipe_thread_wait(ws->thread);
>> -   }
>> -   pipe_semaphore_destroy(&ws->cs_queue_has_space);
>> -   pipe_semaphore_destroy(&ws->cs_queued);
>> -   pipe_mutex_destroy(ws->cs_queue_lock);
>> +   if (util_queue_is_initialized(&ws->cs_queue))
>> +      util_queue_destroy(&ws->cs_queue);
>> +
>>      pipe_mutex_destroy(ws->bo_fence_lock);
>>      pb_cache_deinit(&ws->bo_cache);
>>      pipe_mutex_destroy(ws->global_bo_list_lock);
>> @@ -400,53 +395,7 @@ static int compare_dev(void *key1, void *key2)
>>      return key1 != key2;
>>   }
>>
>> -void amdgpu_ws_queue_cs(struct amdgpu_winsys *ws, struct amdgpu_cs *cs)
>> -{
>> -   pipe_semaphore_wait(&ws->cs_queue_has_space);
>> -
>> -   pipe_mutex_lock(ws->cs_queue_lock);
>> -   assert(ws->num_enqueued_cs < ARRAY_SIZE(ws->cs_queue));
>> -   ws->cs_queue[ws->num_enqueued_cs++] = cs;
>> -   pipe_mutex_unlock(ws->cs_queue_lock);
>> -   pipe_semaphore_signal(&ws->cs_queued);
>> -}
>> -
>> -static PIPE_THREAD_ROUTINE(amdgpu_cs_thread_func, param)
>> -{
>> -   struct amdgpu_winsys *ws = (struct amdgpu_winsys *)param;
>> -   struct amdgpu_cs *cs;
>> -   unsigned i;
>> -
>> -   while (1) {
>> -      pipe_semaphore_wait(&ws->cs_queued);
>> -      if (ws->kill_thread)
>> -         break;
>> -
>> -      pipe_mutex_lock(ws->cs_queue_lock);
>> -      cs = ws->cs_queue[0];
>> -      for (i = 1; i < ws->num_enqueued_cs; i++)
>> -         ws->cs_queue[i - 1] = ws->cs_queue[i];
>> -      ws->cs_queue[--ws->num_enqueued_cs] = NULL;
>> -      pipe_mutex_unlock(ws->cs_queue_lock);
>> -
>> -      pipe_semaphore_signal(&ws->cs_queue_has_space);
>> -
>> -      if (cs) {
>> -         amdgpu_cs_submit_ib(cs);
>> -         pipe_semaphore_signal(&cs->flush_completed);
>> -      }
>> -   }
>> -   pipe_mutex_lock(ws->cs_queue_lock);
>> -   for (i = 0; i < ws->num_enqueued_cs; i++) {
>> -      pipe_semaphore_signal(&ws->cs_queue[i]->flush_completed);
>> -      ws->cs_queue[i] = NULL;
>> -   }
>> -   pipe_mutex_unlock(ws->cs_queue_lock);
>> -   return 0;
>> -}
>> -
>>   DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
>> -static PIPE_THREAD_ROUTINE(amdgpu_cs_thread_func, param);
>>
>>   static bool amdgpu_winsys_unref(struct radeon_winsys *rws)
>>   {
>> @@ -541,14 +490,10 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t
>> screen_create)
>>
>>      LIST_INITHEAD(&ws->global_bo_list);
>>      pipe_mutex_init(ws->global_bo_list_lock);
>> -   pipe_mutex_init(ws->cs_queue_lock);
>>      pipe_mutex_init(ws->bo_fence_lock);
>>
>> -   pipe_semaphore_init(&ws->cs_queue_has_space,
>> ARRAY_SIZE(ws->cs_queue));
>> -   pipe_semaphore_init(&ws->cs_queued, 0);
>> -
>>      if (sysconf(_SC_NPROCESSORS_ONLN) > 1 && debug_get_option_thread())
>> -      ws->thread = pipe_thread_create(amdgpu_cs_thread_func, ws);
>> +      util_queue_init(&ws->cs_queue, amdgpu_cs_submit_ib);
>>
>>      /* Create the screen at the end. The winsys must be initialized
>>       * completely.
>> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
>> b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
>> index d6734f7..b13a17e 100644
>> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
>> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
>> @@ -35,7 +35,7 @@
>>   #include "pipebuffer/pb_cache.h"
>>   #include "gallium/drivers/radeon/radeon_winsys.h"
>>   #include "addrlib/addrinterface.h"
>> -#include "os/os_thread.h"
>> +#include "util/u_queue.h"
>>   #include <amdgpu.h>
>>
>>   struct amdgpu_cs;
>> @@ -59,13 +59,7 @@ struct amdgpu_winsys {
>>      struct radeon_info info;
>>
>>      /* multithreaded IB submission */
>> -   pipe_mutex cs_queue_lock;
>> -   pipe_semaphore cs_queue_has_space;
>> -   pipe_semaphore cs_queued;
>> -   pipe_thread thread;
>> -   int kill_thread;
>> -   int num_enqueued_cs;
>> -   struct amdgpu_cs *cs_queue[8];
>> +   struct util_queue cs_queue;
>>
>>      struct amdgpu_gpu_info amdinfo;
>>      ADDR_HANDLE addrlib;
>> @@ -84,7 +78,6 @@ amdgpu_winsys(struct radeon_winsys *base)
>>      return (struct amdgpu_winsys*)base;
>>   }
>>
>> -void amdgpu_ws_queue_cs(struct amdgpu_winsys *ws, struct amdgpu_cs *cs);
>>   void amdgpu_surface_init_functions(struct amdgpu_winsys *ws);
>>   ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws);
>>
>>
>>

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] gallium/util: import the multithreaded job queue from amdgpu winsys

Reply via email to