Module: Mesa Branch: main Commit: f36fd4ac4c631cfac2e40566f7644a2a401f6ee5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f36fd4ac4c631cfac2e40566f7644a2a401f6ee5
Author: Boris Brezillon <[email protected]> Date: Fri Aug 6 14:03:25 2021 +0200 panfrost: Prepare scoreboard helpers to per-gen XML Signed-off-by: Boris Brezillon <[email protected]> Reviewed-by: Alyssa Rosenzweig <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12935> --- src/panfrost/lib/meson.build | 1 - src/panfrost/lib/pan_scoreboard.c | 221 -------------------------------------- src/panfrost/lib/pan_scoreboard.h | 203 +++++++++++++++++++++++++++++++--- 3 files changed, 191 insertions(+), 234 deletions(-) diff --git a/src/panfrost/lib/meson.build b/src/panfrost/lib/meson.build index bd7d35d5d64..522149466b3 100644 --- a/src/panfrost/lib/meson.build +++ b/src/panfrost/lib/meson.build @@ -90,7 +90,6 @@ libpanfrost_lib_files = files( 'pan_tiler.c', 'pan_texture.c', 'pan_shader.c', - 'pan_scoreboard.c', 'pan_scratch.c', 'pan_props.c', 'pan_util.c', diff --git a/src/panfrost/lib/pan_scoreboard.c b/src/panfrost/lib/pan_scoreboard.c deleted file mode 100644 index a904f05de4a..00000000000 --- a/src/panfrost/lib/pan_scoreboard.c +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (C) 2019 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <string.h> -#include "pan_scoreboard.h" -#include "pan_device.h" -#include "panfrost-quirks.h" - -/* - * There are various types of Mali jobs: - * - * - WRITE_VALUE: generic write primitive, used to zero tiler field - * - VERTEX: runs a vertex shader - * - TILER: runs tiling and sets up a fragment shader - * - FRAGMENT: runs fragment shaders and writes out - * - COMPUTE: runs a compute shader - * - FUSED: vertex+tiler fused together, implicit intradependency (Bifrost) - * - GEOMETRY: runs a geometry shader (unimplemented) - * - CACHE_FLUSH: unseen in the wild, theoretically cache flush - * - * In between a full batch and a single Mali job is the "job chain", a series - * of Mali jobs together forming a linked list. Within the job chain, each Mali - * job can set (up to) two dependencies on other earlier jobs in the chain. - * This dependency graph forms a scoreboard. The general idea of a scoreboard - * applies: when there is a data dependency of job B on job A, job B sets one - * of its dependency indices to job A, ensuring that job B won't start until - * job A finishes. - * - * More specifically, here are a set of rules: - * - * - A write value job must appear if and only if there is at least one tiler - * job, and tiler jobs must depend on it. - * - * - Vertex jobs and tiler jobs are independent. - * - * - A tiler job must have a dependency on its data source. If it's getting - * data from a vertex job, it depends on the vertex job. If it's getting data - * from software, this is null. - * - * - Tiler jobs must depend on the write value job (chained or otherwise). - * - * - Tiler jobs must be strictly ordered. So each tiler job must depend on the - * previous job in the chain. - * - * - Jobs linking via next_job has no bearing on order of execution, rather it - * just establishes the linked list of jobs, EXCEPT: - * - * - A job's dependencies must appear earlier in the linked list (job chain). - * - * Justification for each rule: - * - * - Write value jobs are used to write a zero into a magic tiling field, which - * enables tiling to work. If tiling occurs, they are needed; if it does not, - * we cannot emit them since then tiling partially occurs and it's bad. - * - * - The hardware has no notion of a "vertex/tiler job" (at least not our - * hardware -- other revs have fused jobs, but --- crap, this just got even - * more complicated). They are independent units that take in data, process - * it, and spit out data. - * - * - Any job must depend on its data source, in fact, or risk a - * read-before-write hazard. Tiler jobs get their data from vertex jobs, ergo - * tiler jobs depend on the corresponding vertex job (if it's there). - * - * - The tiler is not thread-safe; this dependency prevents race conditions - * between two different jobs trying to write to the tiler outputs at the - * same time. - * - * - Internally, jobs are scoreboarded; the next job fields just form a linked - * list to allow the jobs to be read in; the execution order is from - * resolving the dependency fields instead. - * - * - The hardware cannot set a dependency on a job it doesn't know about yet, - * and dependencies are processed in-order of the next job fields. - * - */ - -/* Generates, uploads, and queues a a new job. All fields are written in order - * except for next_job accounting (TODO: Should we be clever and defer the - * upload of the header here until next job to keep the access pattern totally - * linear? Or is that just a micro op at this point?). Returns the generated - * index for dep management. - * - * Inject is used to inject a job at the front, for wallpapering. If you are - * not wallpapering and set this, dragons will eat you. */ - -unsigned -panfrost_add_job( - struct pan_pool *pool, - struct pan_scoreboard *scoreboard, - enum mali_job_type type, - bool barrier, bool suppress_prefetch, - unsigned local_dep, unsigned global_dep, - const struct panfrost_ptr *job, - bool inject) -{ - if (type == MALI_JOB_TYPE_TILER) { - /* Tiler jobs must be chained, and on Midgard, the first tiler - * job must depend on the write value job, whose index we - * reserve now */ - - if (!pan_is_bifrost(pool->dev) && !scoreboard->write_value_index) - scoreboard->write_value_index = ++scoreboard->job_index; - - if (scoreboard->tiler_dep && !inject) - global_dep = scoreboard->tiler_dep; - else if (!pan_is_bifrost(pool->dev)) - global_dep = scoreboard->write_value_index; - } - - /* Assign the index */ - unsigned index = ++scoreboard->job_index; - - pan_pack(job->cpu, JOB_HEADER, header) { - header.type = type; - header.barrier = barrier; - header.suppress_prefetch = suppress_prefetch; - header.index = index; - header.dependency_1 = local_dep; - header.dependency_2 = global_dep; - - if (inject) - header.next = scoreboard->first_job; - } - - if (inject) { - assert(type == MALI_JOB_TYPE_TILER && "only for blit shaders"); - - if (scoreboard->first_tiler) { - /* Manual update of the dep2 field. This is bad, - * don't copy this pattern. - */ - scoreboard->first_tiler->opaque[5] = - scoreboard->first_tiler_dep1 | (index << 16); - } - - scoreboard->first_tiler = (void *)job->cpu; - scoreboard->first_tiler_dep1 = local_dep; - scoreboard->first_job = job->gpu; - return index; - } - - /* Form a chain */ - if (type == MALI_JOB_TYPE_TILER) { - if (!scoreboard->first_tiler) { - scoreboard->first_tiler = (void *)job->cpu; - scoreboard->first_tiler_dep1 = local_dep; - } - scoreboard->tiler_dep = index; - } - - if (scoreboard->prev_job) { - /* Manual update of the next pointer. This is bad, don't copy - * this pattern. - * TODO: Find a way to defer last job header emission until we - * have a new job to queue or the batch is ready for execution. - */ - scoreboard->prev_job->opaque[6] = job->gpu; - scoreboard->prev_job->opaque[7] = job->gpu >> 32; - } else { - scoreboard->first_job = job->gpu; - } - - scoreboard->prev_job = (struct mali_job_header_packed *)job->cpu; - return index; -} - -/* Generates a write value job, used to initialize the tiler structures. Note - * this is called right before frame submission. */ - -struct panfrost_ptr -panfrost_scoreboard_initialize_tiler(struct pan_pool *pool, - struct pan_scoreboard *scoreboard, - mali_ptr polygon_list) -{ - struct panfrost_ptr transfer = { 0 }; - - /* Check if we even need tiling */ - if (pan_is_bifrost(pool->dev) || !scoreboard->first_tiler) - return transfer; - - /* Okay, we do. Let's generate it. We'll need the job's polygon list - * regardless of size. */ - - transfer = pan_pool_alloc_desc(pool, WRITE_VALUE_JOB); - - pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, HEADER, header) { - header.type = MALI_JOB_TYPE_WRITE_VALUE; - header.index = scoreboard->write_value_index; - header.next = scoreboard->first_job; - } - - pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) { - payload.address = polygon_list; - payload.type = MALI_WRITE_VALUE_TYPE_ZERO; - } - - scoreboard->first_job = transfer.gpu; - return transfer; -} diff --git a/src/panfrost/lib/pan_scoreboard.h b/src/panfrost/lib/pan_scoreboard.h index 78d5945a925..ada142843b8 100644 --- a/src/panfrost/lib/pan_scoreboard.h +++ b/src/panfrost/lib/pan_scoreboard.h @@ -54,19 +54,198 @@ struct pan_scoreboard { unsigned write_value_index; }; -unsigned -panfrost_add_job( - struct pan_pool *pool, - struct pan_scoreboard *scoreboard, - enum mali_job_type type, - bool barrier, bool suppress_prefetch, - unsigned local_dep, unsigned global_dep, - const struct panfrost_ptr *job, - bool inject); - -struct panfrost_ptr +#ifdef PAN_ARCH +/* + * There are various types of Mali jobs: + * + * - WRITE_VALUE: generic write primitive, used to zero tiler field + * - VERTEX: runs a vertex shader + * - TILER: runs tiling and sets up a fragment shader + * - FRAGMENT: runs fragment shaders and writes out + * - COMPUTE: runs a compute shader + * - FUSED: vertex+tiler fused together, implicit intradependency (Bifrost) + * - GEOMETRY: runs a geometry shader (unimplemented) + * - CACHE_FLUSH: unseen in the wild, theoretically cache flush + * + * In between a full batch and a single Mali job is the "job chain", a series + * of Mali jobs together forming a linked list. Within the job chain, each Mali + * job can set (up to) two dependencies on other earlier jobs in the chain. + * This dependency graph forms a scoreboard. The general idea of a scoreboard + * applies: when there is a data dependency of job B on job A, job B sets one + * of its dependency indices to job A, ensuring that job B won't start until + * job A finishes. + * + * More specifically, here are a set of rules: + * + * - A write value job must appear if and only if there is at least one tiler + * job, and tiler jobs must depend on it. + * + * - Vertex jobs and tiler jobs are independent. + * + * - A tiler job must have a dependency on its data source. If it's getting + * data from a vertex job, it depends on the vertex job. If it's getting data + * from software, this is null. + * + * - Tiler jobs must depend on the write value job (chained or otherwise). + * + * - Tiler jobs must be strictly ordered. So each tiler job must depend on the + * previous job in the chain. + * + * - Jobs linking via next_job has no bearing on order of execution, rather it + * just establishes the linked list of jobs, EXCEPT: + * + * - A job's dependencies must appear earlier in the linked list (job chain). + * + * Justification for each rule: + * + * - Write value jobs are used to write a zero into a magic tiling field, which + * enables tiling to work. If tiling occurs, they are needed; if it does not, + * we cannot emit them since then tiling partially occurs and it's bad. + * + * - The hardware has no notion of a "vertex/tiler job" (at least not our + * hardware -- other revs have fused jobs, but --- crap, this just got even + * more complicated). They are independent units that take in data, process + * it, and spit out data. + * + * - Any job must depend on its data source, in fact, or risk a + * read-before-write hazard. Tiler jobs get their data from vertex jobs, ergo + * tiler jobs depend on the corresponding vertex job (if it's there). + * + * - The tiler is not thread-safe; this dependency prevents race conditions + * between two different jobs trying to write to the tiler outputs at the + * same time. + * + * - Internally, jobs are scoreboarded; the next job fields just form a linked + * list to allow the jobs to be read in; the execution order is from + * resolving the dependency fields instead. + * + * - The hardware cannot set a dependency on a job it doesn't know about yet, + * and dependencies are processed in-order of the next job fields. + * + */ + +/* Generates, uploads, and queues a a new job. All fields are written in order + * except for next_job accounting (TODO: Should we be clever and defer the + * upload of the header here until next job to keep the access pattern totally + * linear? Or is that just a micro op at this point?). Returns the generated + * index for dep management. + * + * Inject is used to inject a job at the front, for wallpapering. If you are + * not wallpapering and set this, dragons will eat you. */ + +static inline unsigned +panfrost_add_job(struct pan_pool *pool, + struct pan_scoreboard *scoreboard, + enum mali_job_type type, + bool barrier, bool suppress_prefetch, + unsigned local_dep, unsigned global_dep, + const struct panfrost_ptr *job, + bool inject) +{ + if (type == MALI_JOB_TYPE_TILER) { + /* Tiler jobs must be chained, and on Midgard, the first tiler + * job must depend on the write value job, whose index we + * reserve now */ + + if (PAN_ARCH <= 5 && !scoreboard->write_value_index) + scoreboard->write_value_index = ++scoreboard->job_index; + + if (scoreboard->tiler_dep && !inject) + global_dep = scoreboard->tiler_dep; + else if (PAN_ARCH <= 5) + global_dep = scoreboard->write_value_index; + } + + /* Assign the index */ + unsigned index = ++scoreboard->job_index; + + pan_pack(job->cpu, JOB_HEADER, header) { + header.type = type; + header.barrier = barrier; + header.suppress_prefetch = suppress_prefetch; + header.index = index; + header.dependency_1 = local_dep; + header.dependency_2 = global_dep; + + if (inject) + header.next = scoreboard->first_job; + } + + if (inject) { + assert(type == MALI_JOB_TYPE_TILER && "only for blit shaders"); + + if (scoreboard->first_tiler) { + /* Manual update of the dep2 field. This is bad, + * don't copy this pattern. + */ + scoreboard->first_tiler->opaque[5] = + scoreboard->first_tiler_dep1 | (index << 16); + } + + scoreboard->first_tiler = (void *)job->cpu; + scoreboard->first_tiler_dep1 = local_dep; + scoreboard->first_job = job->gpu; + return index; + } + + /* Form a chain */ + if (type == MALI_JOB_TYPE_TILER) { + if (!scoreboard->first_tiler) { + scoreboard->first_tiler = (void *)job->cpu; + scoreboard->first_tiler_dep1 = local_dep; + } + scoreboard->tiler_dep = index; + } + + if (scoreboard->prev_job) { + /* Manual update of the next pointer. This is bad, don't copy + * this pattern. + * TODO: Find a way to defer last job header emission until we + * have a new job to queue or the batch is ready for execution. + */ + scoreboard->prev_job->opaque[6] = job->gpu; + scoreboard->prev_job->opaque[7] = job->gpu >> 32; + } else { + scoreboard->first_job = job->gpu; + } + + scoreboard->prev_job = (struct mali_job_header_packed *)job->cpu; + return index; +} + +/* Generates a write value job, used to initialize the tiler structures. Note + * this is called right before frame submission. */ + +static inline struct panfrost_ptr panfrost_scoreboard_initialize_tiler(struct pan_pool *pool, struct pan_scoreboard *scoreboard, - mali_ptr polygon_list); + mali_ptr polygon_list) +{ + struct panfrost_ptr transfer = { 0 }; + + /* Check if we even need tiling */ + if (PAN_ARCH >= 6 || !scoreboard->first_tiler) + return transfer; + + /* Okay, we do. Let's generate it. We'll need the job's polygon list + * regardless of size. */ + + transfer = pan_pool_alloc_desc(pool, WRITE_VALUE_JOB); + + pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, HEADER, header) { + header.type = MALI_JOB_TYPE_WRITE_VALUE; + header.index = scoreboard->write_value_index; + header.next = scoreboard->first_job; + } + + pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) { + payload.address = polygon_list; + payload.type = MALI_WRITE_VALUE_TYPE_ZERO; + } + + scoreboard->first_job = transfer.gpu; + return transfer; +} +#endif /* PAN_ARCH */ #endif
