Re: [Intel-gfx] [PATCH v9 3/7] drm/i915: Define and use GuC and CTB TLB invalidation routines

2023-10-10 Thread Tvrtko Ursulin



On 09/10/2023 18:29, Jonathan Cavitt wrote:

From: Prathap Kumar Valsan 

The GuC firmware had defined the interface for Translation Look-Aside
Buffer (TLB) invalidation.  We should use this interface when
invalidating the engine and GuC TLBs.
Add additional functionality to intel_gt_invalidate_tlb, invalidating
the GuC TLBs and falling back to GT invalidation when the GuC is
disabled.
The invalidation is done by sending a request directly to the GuC
tlb_lookup that invalidates the table.  The invalidation is submitted as
a wait request and is performed in the CT event handler.  This means we
cannot perform this TLB invalidation path if the CT is not enabled.
If the request isn't fulfilled in two seconds, this would constitute
an error in the invalidation as that would constitute either a lost
request or a severe GuC overload.

With this new invalidation routine, we can perform GuC-based GGTT
invalidations.  GuC-based GGTT invalidation is incompatible with
MMIO invalidation so we should not perform MMIO invalidation when
GuC-based GGTT invalidation is expected.

The additional complexity incurred in this patch will be necessary for
range-based tlb invalidations, which will be platformed in the future.

Signed-off-by: Prathap Kumar Valsan 
Signed-off-by: Bruce Chang 
Signed-off-by: Chris Wilson 
Signed-off-by: Umesh Nerlige Ramappa 
Signed-off-by: Jonathan Cavitt 
Signed-off-by: Aravind Iddamsetty 
Signed-off-by: Fei Yang 
CC: Andi Shyti 
---
  drivers/gpu/drm/i915/gt/intel_ggtt.c  |  34 +++-
  drivers/gpu/drm/i915/gt/intel_tlb.c   |  16 +-
  .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |  33 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  22 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   4 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   1 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 188 +-
  7 files changed, 286 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 4d7d88b92632b..a1f7bdc602996 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -206,22 +206,38 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  }
  
+static void guc_ggtt_ct_invalidate(struct intel_gt *gt)

+{
+   struct intel_uncore *uncore = gt->uncore;
+   intel_wakeref_t wakeref;
+
+   with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
+   struct intel_guc *guc = >uc.guc;
+
+   intel_guc_invalidate_tlb_guc(guc);
+   }
+}
+
  static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
  {
struct drm_i915_private *i915 = ggtt->vm.i915;
+   struct intel_gt *gt;
  
-	gen8_ggtt_invalidate(ggtt);

-
-   if (GRAPHICS_VER(i915) >= 12) {
-   struct intel_gt *gt;
+   if (!HAS_GUC_TLB_INVALIDATION(i915))
+   gen8_ggtt_invalidate(ggtt);
  
-		list_for_each_entry(gt, >gt_list, ggtt_link)

+   list_for_each_entry(gt, >gt_list, ggtt_link) {
+   if (HAS_GUC_TLB_INVALIDATION(i915) &&
+   intel_guc_is_ready(>uc.guc)) {
+   guc_ggtt_ct_invalidate(gt);
+   } else if (GRAPHICS_VER(i915) >= 12) {
intel_uncore_write_fw(gt->uncore,
  GEN12_GUC_TLB_INV_CR,
  GEN12_GUC_TLB_INV_CR_INVALIDATE);
-   } else {
-   intel_uncore_write_fw(ggtt->vm.gt->uncore,
- GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   } else {
+   intel_uncore_write_fw(gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   }
}
  }
  
@@ -1243,7 +1259,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)

ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
}
  
-	if (intel_uc_wants_guc(>vm.gt->uc))

+   if (intel_uc_wants_guc_submission(>vm.gt->uc))
ggtt->invalidate = guc_ggtt_invalidate;
else
ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c 
b/drivers/gpu/drm/i915/gt/intel_tlb.c
index 139608c30d978..4bb13d1890e37 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -12,6 +12,7 @@
  #include "intel_gt_print.h"
  #include "intel_gt_regs.h"
  #include "intel_tlb.h"
+#include "uc/intel_guc.h"
  
  /*

   * HW architecture suggest typical invalidation time at 40us,
@@ -131,11 +132,24 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, 
u32 seqno)
return;
  
  	with_intel_gt_pm_if_awake(gt, wakeref) {

+   struct intel_guc *guc = >uc.guc;
+
mutex_lock(>tlb.invalidate_lock);
if (tlb_seqno_passed(gt, seqno))
goto 

[Intel-gfx] [PATCH v9 3/7] drm/i915: Define and use GuC and CTB TLB invalidation routines

2023-10-09 Thread Jonathan Cavitt
From: Prathap Kumar Valsan 

The GuC firmware had defined the interface for Translation Look-Aside
Buffer (TLB) invalidation.  We should use this interface when
invalidating the engine and GuC TLBs.
Add additional functionality to intel_gt_invalidate_tlb, invalidating
the GuC TLBs and falling back to GT invalidation when the GuC is
disabled.
The invalidation is done by sending a request directly to the GuC
tlb_lookup that invalidates the table.  The invalidation is submitted as
a wait request and is performed in the CT event handler.  This means we
cannot perform this TLB invalidation path if the CT is not enabled.
If the request isn't fulfilled in two seconds, this would constitute
an error in the invalidation as that would constitute either a lost
request or a severe GuC overload.

With this new invalidation routine, we can perform GuC-based GGTT
invalidations.  GuC-based GGTT invalidation is incompatible with
MMIO invalidation so we should not perform MMIO invalidation when
GuC-based GGTT invalidation is expected.

The additional complexity incurred in this patch will be necessary for
range-based tlb invalidations, which will be platformed in the future.

Signed-off-by: Prathap Kumar Valsan 
Signed-off-by: Bruce Chang 
Signed-off-by: Chris Wilson 
Signed-off-by: Umesh Nerlige Ramappa 
Signed-off-by: Jonathan Cavitt 
Signed-off-by: Aravind Iddamsetty 
Signed-off-by: Fei Yang 
CC: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  34 +++-
 drivers/gpu/drm/i915/gt/intel_tlb.c   |  16 +-
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |  33 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  22 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   1 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 188 +-
 7 files changed, 286 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 4d7d88b92632b..a1f7bdc602996 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -206,22 +206,38 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 }
 
+static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
+{
+   struct intel_uncore *uncore = gt->uncore;
+   intel_wakeref_t wakeref;
+
+   with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
+   struct intel_guc *guc = >uc.guc;
+
+   intel_guc_invalidate_tlb_guc(guc);
+   }
+}
+
 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
struct drm_i915_private *i915 = ggtt->vm.i915;
+   struct intel_gt *gt;
 
-   gen8_ggtt_invalidate(ggtt);
-
-   if (GRAPHICS_VER(i915) >= 12) {
-   struct intel_gt *gt;
+   if (!HAS_GUC_TLB_INVALIDATION(i915))
+   gen8_ggtt_invalidate(ggtt);
 
-   list_for_each_entry(gt, >gt_list, ggtt_link)
+   list_for_each_entry(gt, >gt_list, ggtt_link) {
+   if (HAS_GUC_TLB_INVALIDATION(i915) &&
+   intel_guc_is_ready(>uc.guc)) {
+   guc_ggtt_ct_invalidate(gt);
+   } else if (GRAPHICS_VER(i915) >= 12) {
intel_uncore_write_fw(gt->uncore,
  GEN12_GUC_TLB_INV_CR,
  GEN12_GUC_TLB_INV_CR_INVALIDATE);
-   } else {
-   intel_uncore_write_fw(ggtt->vm.gt->uncore,
- GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   } else {
+   intel_uncore_write_fw(gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   }
}
 }
 
@@ -1243,7 +1259,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
}
 
-   if (intel_uc_wants_guc(>vm.gt->uc))
+   if (intel_uc_wants_guc_submission(>vm.gt->uc))
ggtt->invalidate = guc_ggtt_invalidate;
else
ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c 
b/drivers/gpu/drm/i915/gt/intel_tlb.c
index 139608c30d978..4bb13d1890e37 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -12,6 +12,7 @@
 #include "intel_gt_print.h"
 #include "intel_gt_regs.h"
 #include "intel_tlb.h"
+#include "uc/intel_guc.h"
 
 /*
  * HW architecture suggest typical invalidation time at 40us,
@@ -131,11 +132,24 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, 
u32 seqno)
return;
 
with_intel_gt_pm_if_awake(gt, wakeref) {
+   struct intel_guc *guc = >uc.guc;
+
mutex_lock(>tlb.invalidate_lock);
if (tlb_seqno_passed(gt, seqno))
goto unlock;
 
-   mmio_invalidate_full(gt);