[Intel-gfx] [PATCH v6 6/9] drm/i915/gt: Ensure memory quiesced before invalidation for all engines

2023-07-20 Thread Andi Shyti
Commit af9e423a8aae ("drm/i915/gt: Ensure memory quiesced before
invalidation") has made sure that the memory is quiesced before
invalidating the AUX CCS table. Do it for all the other engines
and not just RCS.

Signed-off-by: Andi Shyti 
Cc: Jonathan Cavitt 
Cc: Matt Roper 
---
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 46 ++--
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 1b1dadacfbf42..3bedab8d61db1 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -309,19 +309,45 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 
mode)
 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
 {
intel_engine_mask_t aux_inv = 0;
-   u32 cmd, *cs;
+   u32 cmd = 4;
+   u32 *cs;
 
-   cmd = 4;
-   if (mode & EMIT_INVALIDATE) {
+   if (mode & EMIT_INVALIDATE)
cmd += 2;
 
-   if (HAS_AUX_CCS(rq->engine->i915) &&
-   (rq->engine->class == VIDEO_DECODE_CLASS ||
-rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
-   aux_inv = rq->engine->mask &
-   ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
-   if (aux_inv)
-   cmd += 4;
+   if (HAS_AUX_CCS(rq->engine->i915))
+   aux_inv = rq->engine->mask &
+ ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
+
+   /*
+* On Aux CCS platforms the invalidation of the Aux
+* table requires quiescing memory traffic beforehand
+*/
+   if (aux_inv) {
+   u32 bit_group_0 = 0;
+   u32 bit_group_1 = 0;
+
+   cmd += 4;
+
+   bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+   switch (rq->engine->class) {
+   case VIDEO_DECODE_CLASS:
+   bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+   bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+   bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+   bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+   bit_group_1 |= PIPE_CONTROL_CS_STALL;
+
+   intel_emit_pipe_control_cs(rq, bit_group_0, bit_group_1,
+  LRC_PPHWSP_SCRATCH_ADDR);
+
+   break;
+
+   case VIDEO_ENHANCEMENT_CLASS:
+   case COMPUTE_CLASS:
+   case COPY_ENGINE_CLASS:
+   break;
}
}
 
-- 
2.40.1



Re: [Intel-gfx] [PATCH v6 6/9] drm/i915/gt: Ensure memory quiesced before invalidation for all engines

2023-07-20 Thread Andi Shyti
Hi Nirmoy,

> +   if (aux_inv) {
> +   u32 bit_group_0 = 0;
> +   u32 bit_group_1 = 0;
> +
> +   cmd += 4;
> +
> +   bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
> +
> +   switch (rq->engine->class) {
> +   case VIDEO_DECODE_CLASS:
> +   bit_group_1 |= 
> PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> +   bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +   bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
> +   bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
> +   bit_group_1 |= PIPE_CONTROL_CS_STALL;
> +
> +   intel_emit_pipe_control_cs(rq, bit_group_0, 
> bit_group_1,
> +  
> LRC_PPHWSP_SCRATCH_ADDR);
> 
> 
> I think pipe control is only for compute and render engines.
> 
> +
> +   break;
> +
> +   case VIDEO_ENHANCEMENT_CLASS:
> +   case COMPUTE_CLASS:
> 
> Don't think gen12_emit_flush_xcs() will get called for compute engine.
> 
> intel_guc_submission_setup() --> rcs_submission_override() replaces
> gen12_emit_flush_xcs() with gen12_emit_flush_rcs()
> 
> for compute and render.

yes, I made some confusion here... this part is bogus... will try
to clean things up and try again.

Andi


Re: [Intel-gfx] [PATCH v6 6/9] drm/i915/gt: Ensure memory quiesced before invalidation for all engines

2023-07-20 Thread Nirmoy Das

Hi Andi,

On 7/19/2023 1:07 PM, Andi Shyti wrote:

Commit af9e423a8aae ("drm/i915/gt: Ensure memory quiesced before
invalidation") has made sure that the memory is quiesced before
invalidating the AUX CCS table. Do it for all the other engines
and not just RCS.

Signed-off-by: Andi Shyti
Cc: Jonathan Cavitt
Cc: Matt Roper
---
  drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 46 ++--
  1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 1b1dadacfbf42..3bedab8d61db1 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -309,19 +309,45 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 
mode)
  int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
  {
intel_engine_mask_t aux_inv = 0;
-   u32 cmd, *cs;
+   u32 cmd = 4;
+   u32 *cs;
  
-	cmd = 4;

-   if (mode & EMIT_INVALIDATE) {
+   if (mode & EMIT_INVALIDATE)
cmd += 2;
  
-		if (HAS_AUX_CCS(rq->engine->i915) &&

-   (rq->engine->class == VIDEO_DECODE_CLASS ||
-rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
-   aux_inv = rq->engine->mask &
-   ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
-   if (aux_inv)
-   cmd += 4;
+   if (HAS_AUX_CCS(rq->engine->i915))
+   aux_inv = rq->engine->mask &
+ ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
+
+   /*
+* On Aux CCS platforms the invalidation of the Aux
+* table requires quiescing memory traffic beforehand
+*/
+   if (aux_inv) {
+   u32 bit_group_0 = 0;
+   u32 bit_group_1 = 0;
+
+   cmd += 4;
+
+   bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+   switch (rq->engine->class) {
+   case VIDEO_DECODE_CLASS:
+   bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+   bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+   bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+   bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+   bit_group_1 |= PIPE_CONTROL_CS_STALL;
+
+   intel_emit_pipe_control_cs(rq, bit_group_0, bit_group_1,
+  LRC_PPHWSP_SCRATCH_ADDR);



I think pipe control is only for compute and render engines.


+
+   break;
+
+   case VIDEO_ENHANCEMENT_CLASS:
+   case COMPUTE_CLASS:


Don't think gen12_emit_flush_xcs() will get called for compute engine.

intel_guc_submission_setup() --> rcs_submission_override() replaces 
gen12_emit_flush_xcs() with |gen12_emit_flush_rcs()|


|for compute and render.|

|
|

|Regards,|

|Nirmoy
|



+   case COPY_ENGINE_CLASS:
+   break;
}
}
  

[Intel-gfx] [PATCH v6 6/9] drm/i915/gt: Ensure memory quiesced before invalidation for all engines

2023-07-19 Thread Andi Shyti
Commit af9e423a8aae ("drm/i915/gt: Ensure memory quiesced before
invalidation") has made sure that the memory is quiesced before
invalidating the AUX CCS table. Do it for all the other engines
and not just RCS.

Signed-off-by: Andi Shyti 
Cc: Jonathan Cavitt 
Cc: Matt Roper 
---
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 46 ++--
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 1b1dadacfbf42..3bedab8d61db1 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -309,19 +309,45 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 
mode)
 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
 {
intel_engine_mask_t aux_inv = 0;
-   u32 cmd, *cs;
+   u32 cmd = 4;
+   u32 *cs;
 
-   cmd = 4;
-   if (mode & EMIT_INVALIDATE) {
+   if (mode & EMIT_INVALIDATE)
cmd += 2;
 
-   if (HAS_AUX_CCS(rq->engine->i915) &&
-   (rq->engine->class == VIDEO_DECODE_CLASS ||
-rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
-   aux_inv = rq->engine->mask &
-   ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
-   if (aux_inv)
-   cmd += 4;
+   if (HAS_AUX_CCS(rq->engine->i915))
+   aux_inv = rq->engine->mask &
+ ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
+
+   /*
+* On Aux CCS platforms the invalidation of the Aux
+* table requires quiescing memory traffic beforehand
+*/
+   if (aux_inv) {
+   u32 bit_group_0 = 0;
+   u32 bit_group_1 = 0;
+
+   cmd += 4;
+
+   bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+   switch (rq->engine->class) {
+   case VIDEO_DECODE_CLASS:
+   bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+   bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+   bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+   bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+   bit_group_1 |= PIPE_CONTROL_CS_STALL;
+
+   intel_emit_pipe_control_cs(rq, bit_group_0, bit_group_1,
+  LRC_PPHWSP_SCRATCH_ADDR);
+
+   break;
+
+   case VIDEO_ENHANCEMENT_CLASS:
+   case COMPUTE_CLASS:
+   case COPY_ENGINE_CLASS:
+   break;
}
}
 
-- 
2.40.1