Re: [PATCH 1/3] drm/i915/gt: Support fixed CCS mode

2023-12-28 Thread Andi Shyti
Hi Joonas,

> > +   /*
> > +* Track fixed mapping between CCS engines and compute slices.
> > +*
> > +* In order to w/a HW that has the inability to dynamically load
> > +* balance between CCS engines and EU in the compute slices, we 
> > have to
> > +* reconfigure a static mapping on the fly. We track the current CCS
> > +* configuration (determined by inspection of the user's engine
> > +* selection during execbuf) and compare it against the current
> > +* CCS_MODE (which maps CCS engines to compute slices).  If there is
> > +* only a single engine selected, we can map it to all available
> > +* compute slices for maximal single task performance 
> > (fast/narrow). If
> > +* there are more then one engine selected, we have to reduce the
> > +* number of slices allocated to each engine (wide/slow), fairly
> > +* distributing the EU between the equivalent engines.
> > +*/
> 
> This comment is outdated as we don't consider execbuf but the sysfs
> configuration.

yes, I removed some parts of the original comment already, but
yes, I agree it can be improved.

Thanks,
Andi


Re: [PATCH 1/3] drm/i915/gt: Support fixed CCS mode

2023-12-22 Thread Joonas Lahtinen
Quoting Andi Shyti (2023-12-21 19:08:22)
> The CCS mode involves assigning CCS engines to slices depending
> on the number of slices and the number of engines the user wishes
> to set.
> 
> In this patch, the default CCS setting is established during the
> initial GT settings. It involves assigning only one CCS to all
> the slices.
> 
> Based on a patch by Chris Wilson 
> and Tejas Upadhyay .
> 
> Signed-off-by: Andi Shyti 
> Cc: Chris Wilson 
> Cc: Joonas Lahtinen 
> Cc: Niranjana Vishwanathapura 
> Cc: Tejas Upadhyay 



> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -207,6 +207,26 @@ struct intel_gt {
> [MAX_ENGINE_INSTANCE + 1];
> enum intel_submission_method submission_method;
>  
> +   /*
> +* Track fixed mapping between CCS engines and compute slices.
> +*
> +* In order to w/a HW that has the inability to dynamically load
> +* balance between CCS engines and EU in the compute slices, we have 
> to
> +* reconfigure a static mapping on the fly. We track the current CCS
> +* configuration (determined by inspection of the user's engine
> +* selection during execbuf) and compare it against the current
> +* CCS_MODE (which maps CCS engines to compute slices).  If there is
> +* only a single engine selected, we can map it to all available
> +* compute slices for maximal single task performance (fast/narrow). 
> If
> +* there are more then one engine selected, we have to reduce the
> +* number of slices allocated to each engine (wide/slow), fairly
> +* distributing the EU between the equivalent engines.
> +*/

This comment is outdated as we don't consider execbuf but the sysfs
configuration.

Regards, Joonas

> +   struct {
> +   struct mutex mutex;
> +   u32 mode;
> +   } ccs;
> +
> /*
>  * Default address space (either GGTT or ppGTT depending on arch).
>  *


[PATCH 1/3] drm/i915/gt: Support fixed CCS mode

2023-12-21 Thread Andi Shyti
The CCS mode involves assigning CCS engines to slices depending
on the number of slices and the number of engines the user wishes
to set.

In this patch, the default CCS setting is established during the
initial GT settings. It involves assigning only one CCS to all
the slices.

Based on a patch by Chris Wilson 
and Tejas Upadhyay .

Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Niranjana Vishwanathapura 
Cc: Tejas Upadhyay 
---
 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/gt/intel_gt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 81 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 16 
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 13 
 drivers/gpu/drm/i915/gt/intel_gt_types.h| 20 +
 drivers/gpu/drm/i915/i915_drv.h |  2 +
 7 files changed, 139 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e777686190ca..1dce15d6306b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -119,6 +119,7 @@ gt-y += \
gt/intel_ggtt_fencing.o \
gt/intel_gt.o \
gt/intel_gt_buffer_pool.o \
+   gt/intel_gt_ccs_mode.o \
gt/intel_gt_clock_utils.o \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index a425db5ed3a2..e83c7b80c07a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -17,6 +17,7 @@
 #include "intel_engine_regs.h"
 #include "intel_ggtt_gmch.h"
 #include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
 #include "intel_gt_buffer_pool.h"
 #include "intel_gt_clock_utils.h"
 #include "intel_gt_debugfs.h"
@@ -47,6 +48,7 @@ void intel_gt_common_init_early(struct intel_gt *gt)
init_llist_head(>->watchdog.list);
INIT_WORK(>->watchdog.work, intel_gt_watchdog_work);
 
+   intel_gt_init_ccs_mode(gt);
intel_gt_init_buffer_pool(gt);
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
@@ -195,6 +197,9 @@ int intel_gt_init_hw(struct intel_gt *gt)
 
intel_gt_init_swizzling(gt);
 
+   /* Configure CCS mode */
+   intel_gt_apply_ccs_mode(gt);
+
/*
 * At least 830 can leave some of the unused rings
 * "active" (ie. head != tail) after resume which
@@ -860,6 +865,7 @@ void intel_gt_driver_late_release_all(struct 
drm_i915_private *i915)
 
for_each_gt(gt, i915, id) {
intel_uc_driver_late_release(>->uc);
+   intel_gt_fini_ccs_mode(gt);
intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
intel_gt_fini_timelines(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index ..fab8a77bded2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,81 @@
+//SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+#include "intel_gt_types.h"
+
+static void __intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+   u32 mode = XEHP_CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */
+   int num_slices = hweight32(CCS_MASK(gt));
+   int num_engines = gt->ccs.mode;
+   int slice = 0;
+   int i;
+
+   if (!num_engines)
+   return;
+
+   /*
+* Loop over all available slices and assign each a user engine.
+*
+* With 1 engine (ccs0):
+*   slice 0, 1, 2, 3: ccs0
+*
+* With 2 engines (ccs0, ccs1):
+*   slice 0, 2: ccs0
+*   slice 1, 3: ccs1
+*
+* With 4 engines (ccs0, ccs1, ccs2, ccs3):
+*   slice 0: ccs0
+*   slice 1: ccs1
+*   slice 2: ccs2
+*   slice 3: ccs3
+*
+* Since the number of slices and the number of engines is
+* known, and we ensure that there is an exact multiple of
+* engines for slices, the double loop becomes a loop over each
+* slice.
+*/
+   for (i = num_slices / num_engines; i < num_slices; i++) {
+   struct intel_engine_cs *engine;
+   intel_engine_mask_t tmp;
+
+   for_each_engine_masked(engine, gt, ALL_CCS(gt), tmp) {
+   /* If a slice is fused off, leave disabled */
+   while (!(CCS_MASK(gt) & BIT(slice)))
+   slice++;
+
+   mode &= ~XEHP_CCS_MODE_CSLICE(slice, 
XEHP_CCS_MODE_CSLICE_MASK);
+   mode |= XEHP_CCS_MODE_CSLICE(slice, engine->instance);
+
+   /* assign the next slice */
+