[Intel-gfx] [PATCH 12/25] drm/i915/guc: Implement multi-lrc submission
Implement multi-lrc submission via a single workqueue entry and single H2G. The workqueue entry contains an updated tail value for each request, of all the contexts in the multi-lrc submission, and updates these values simultaneously. As such, the tasklet and bypass path have been updated to coalesce requests into a single submission. v2: (John Harrison) - s/wqe/wqi - Use FIELD_PREP macros - Add GEM_BUG_ONs ensures length fits within field - Add comment / white space to intel_guc_write_barrier (Kernel test robot) - Make need_tasklet a static function v3: (Docs) - A comment for submission_stall_reason v4: (Kernel test robot) - Initialize return value in bypass tasklt submit function (John Harrison) - Add comment near work queue defs - Add BUILD_BUG_ON to ensure WQ_SIZE is a power of 2 - Update write_barrier comment to talk about work queue v5: (John Harrison) - Fix typo in work queue comment Reviewed-by: John Harrison Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 29 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.h| 11 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 +- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 30 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 323 +++--- drivers/gpu/drm/i915/i915_request.h | 8 + 6 files changed, 350 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 8f8182bf7c11..6e228343e8cb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) } } } + +void intel_guc_write_barrier(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { + /* +* Ensure intel_uncore_write_fw can be used rather than +* intel_uncore_write. +*/ + GEM_BUG_ON(guc->send_regs.fw_domains); + + /* +* This register is used by the i915 and GuC for MMIO based +* communication. Once we are in this code CTBs are the only +* method the i915 uses to communicate with the GuC so it is +* safe to write to this register (a value of 0 is NOP for MMIO +* communication). If we ever start mixing CTBs and MMIOs a new +* register will have to be chosen. This function is also used +* to enforce ordering of a work queue item write and an update +* to the process descriptor. When a work queue is being used, +* CTBs are also the only mechanism of communication. +*/ + intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); + } else { + /* wmb() sufficient for a barrier if in smem */ + wmb(); + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 4ca197f400ba..31cf9fb48c7e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -46,6 +46,15 @@ struct intel_guc { * submitted until the stalled request is processed. */ struct i915_request *stalled_request; + /** +* @submission_stall_reason: reason why submission is stalled +*/ + enum { + STALL_NONE, + STALL_REGISTER_CONTEXT, + STALL_MOVE_LRC_TAIL, + STALL_ADD_REQUEST, + } submission_stall_reason; /* intel_guc_recv interrupt related state */ /** @irq_lock: protects GuC irq state */ @@ -367,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); +void intel_guc_write_barrier(struct intel_guc *guc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 0a3504bc0b61..a0cc34be7b56 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) return ++ct->requests.last_fence; } -static void write_barrier(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_gt *gt = guc_to_gt(guc); - - if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { - GEM_BUG_ON(guc->send_regs.fw_domains); - /* -* This register is used by the i915 and GuC for MMIO based -* communication. Once we are in this code CTBs are the only -* method the i915 uses to communicate with the GuC so it is -* safe to write to this register (a value of 0 is NOP for MMIO
Re: [Intel-gfx] [PATCH 12/25] drm/i915/guc: Implement multi-lrc submission
On 10/13/2021 13:42, Matthew Brost wrote: Implement multi-lrc submission via a single workqueue entry and single H2G. The workqueue entry contains an updated tail value for each request, of all the contexts in the multi-lrc submission, and updates these values simultaneously. As such, the tasklet and bypass path have been updated to coalesce requests into a single submission. v2: (John Harrison) - s/wqe/wqi - Use FIELD_PREP macros - Add GEM_BUG_ONs ensures length fits within field - Add comment / white space to intel_guc_write_barrier (Kernel test robot) - Make need_tasklet a static function v3: (Docs) - A comment for submission_stall_reason v4: (Kernel test robot) - Initialize return value in bypass tasklt submit function (John Harrison) - Add comment near work queue defs - Add BUILD_BUG_ON to ensure WQ_SIZE is a power of 2 - Update write_barrier comment to talk about work queue Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 29 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.h| 11 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 +- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 30 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 323 +++--- drivers/gpu/drm/i915/i915_request.h | 8 + 6 files changed, 350 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 8f8182bf7c11..6e228343e8cb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) } } } + +void intel_guc_write_barrier(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { + /* +* Ensure intel_uncore_write_fw can be used rather than +* intel_uncore_write. +*/ + GEM_BUG_ON(guc->send_regs.fw_domains); + + /* +* This register is used by the i915 and GuC for MMIO based +* communication. Once we are in this code CTBs are the only +* method the i915 uses to communicate with the GuC so it is +* safe to write to this register (a value of 0 is NOP for MMIO +* communication). If we ever start mixing CTBs and MMIOs a new +* register will have to be chosen. This function is also used +* to enforce ordering of a work queue item write and an update +* to the process descriptor. When a work queue is being used, +* CTBs are also the only mechanism of communication. +*/ + intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); + } else { + /* wmb() sufficient for a barrier if in smem */ + wmb(); + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 4ca197f400ba..31cf9fb48c7e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -46,6 +46,15 @@ struct intel_guc { * submitted until the stalled request is processed. */ struct i915_request *stalled_request; + /** +* @submission_stall_reason: reason why submission is stalled +*/ + enum { + STALL_NONE, + STALL_REGISTER_CONTEXT, + STALL_MOVE_LRC_TAIL, + STALL_ADD_REQUEST, + } submission_stall_reason; /* intel_guc_recv interrupt related state */ /** @irq_lock: protects GuC irq state */ @@ -367,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); +void intel_guc_write_barrier(struct intel_guc *guc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 0a3504bc0b61..a0cc34be7b56 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) return ++ct->requests.last_fence; } -static void write_barrier(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_gt *gt = guc_to_gt(guc); - - if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { - GEM_BUG_ON(guc->send_regs.fw_domains); - /* -* This register is used by the i915 and GuC for MMIO based -* communication. Once we are in this code CTBs are the only -* method the i915 uses to communicate with the GuC so it is -* safe to write to this register (a value of 0 is NOP for MMIO -
[Intel-gfx] [PATCH 12/25] drm/i915/guc: Implement multi-lrc submission
Implement multi-lrc submission via a single workqueue entry and single H2G. The workqueue entry contains an updated tail value for each request, of all the contexts in the multi-lrc submission, and updates these values simultaneously. As such, the tasklet and bypass path have been updated to coalesce requests into a single submission. v2: (John Harrison) - s/wqe/wqi - Use FIELD_PREP macros - Add GEM_BUG_ONs ensures length fits within field - Add comment / white space to intel_guc_write_barrier (Kernel test robot) - Make need_tasklet a static function v3: (Docs) - A comment for submission_stall_reason v4: (Kernel test robot) - Initialize return value in bypass tasklt submit function (John Harrison) - Add comment near work queue defs - Add BUILD_BUG_ON to ensure WQ_SIZE is a power of 2 - Update write_barrier comment to talk about work queue Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 29 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.h| 11 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 +- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 30 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 323 +++--- drivers/gpu/drm/i915/i915_request.h | 8 + 6 files changed, 350 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 8f8182bf7c11..6e228343e8cb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) } } } + +void intel_guc_write_barrier(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { + /* +* Ensure intel_uncore_write_fw can be used rather than +* intel_uncore_write. +*/ + GEM_BUG_ON(guc->send_regs.fw_domains); + + /* +* This register is used by the i915 and GuC for MMIO based +* communication. Once we are in this code CTBs are the only +* method the i915 uses to communicate with the GuC so it is +* safe to write to this register (a value of 0 is NOP for MMIO +* communication). If we ever start mixing CTBs and MMIOs a new +* register will have to be chosen. This function is also used +* to enforce ordering of a work queue item write and an update +* to the process descriptor. When a work queue is being used, +* CTBs are also the only mechanism of communication. +*/ + intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); + } else { + /* wmb() sufficient for a barrier if in smem */ + wmb(); + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 4ca197f400ba..31cf9fb48c7e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -46,6 +46,15 @@ struct intel_guc { * submitted until the stalled request is processed. */ struct i915_request *stalled_request; + /** +* @submission_stall_reason: reason why submission is stalled +*/ + enum { + STALL_NONE, + STALL_REGISTER_CONTEXT, + STALL_MOVE_LRC_TAIL, + STALL_ADD_REQUEST, + } submission_stall_reason; /* intel_guc_recv interrupt related state */ /** @irq_lock: protects GuC irq state */ @@ -367,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); +void intel_guc_write_barrier(struct intel_guc *guc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 0a3504bc0b61..a0cc34be7b56 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) return ++ct->requests.last_fence; } -static void write_barrier(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_gt *gt = guc_to_gt(guc); - - if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { - GEM_BUG_ON(guc->send_regs.fw_domains); - /* -* This register is used by the i915 and GuC for MMIO based -* communication. Once we are in this code CTBs are the only -* method the i915 uses to communicate with the GuC so it is -* safe to write to this register (a value of 0 is NOP for MMIO -* communication). If we ever start mixing CTBs and MMIOs a new -