Re: [PATCH] drm/i915/gt: Automate CCS Mode setting during engine resets

2024-04-26 Thread Andi Shyti
Hi,

On Fri, Apr 26, 2024 at 02:07:23AM +0200, Andi Shyti wrote:
> We missed setting the CCS mode during resume and engine resets.
> Create a workaround to be added in the engine's workaround list.
> This workaround sets the XEHP_CCS_MODE value at every reset.
> 
> The issue can be reproduced by running:
> 
>   $ clpeak --kernel-latency
> 
> Without resetting the CCS mode, we encounter a fence timeout:
> 
>   Fence expiration time out i915-:03:00.0:clpeak[2387]:2!
> 
> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10895
> Fixes: 6db31251bb26 ("drm/i915/gt: Enable only one CCS for compute workload")
> Reported-by: Gnattu OC 
> Signed-off-by: Andi Shyti 
> Cc: Chris Wilson 
> Cc: Joonas Lahtinen 
> Cc: Matt Roper 
> Cc:  # v6.2+

based on the discussion on the issue and as agreed with Gnattu:

Tested-by: Gnattu OC 

Thank you again, gnattu,
Andi


[PATCH] drm/i915/gt: Automate CCS Mode setting during engine resets

2024-04-25 Thread Andi Shyti
We missed setting the CCS mode during resume and engine resets.
Create a workaround to be added in the engine's workaround list.
This workaround sets the XEHP_CCS_MODE value at every reset.

The issue can be reproduced by running:

  $ clpeak --kernel-latency

Without resetting the CCS mode, we encounter a fence timeout:

  Fence expiration time out i915-:03:00.0:clpeak[2387]:2!

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10895
Fixes: 6db31251bb26 ("drm/i915/gt: Enable only one CCS for compute workload")
Reported-by: Gnattu OC 
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
---
Hi Gnattu,

thanks again for reporting this issue and for your prompt
replies on the issue. Would you give this patch a chance?

Andi

 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 6 +++---
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 2 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
index 044219c5960a..99b71bb7da0a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -8,14 +8,14 @@
 #include "intel_gt_ccs_mode.h"
 #include "intel_gt_regs.h"
 
-void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt)
 {
int cslice;
u32 mode = 0;
int first_ccs = __ffs(CCS_MASK(gt));
 
if (!IS_DG2(gt->i915))
-   return;
+   return 0;
 
/* Build the value for the fixed CCS load balancing */
for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
@@ -35,5 +35,5 @@ void intel_gt_apply_ccs_mode(struct intel_gt *gt)
 XEHP_CCS_MODE_CSLICE_MASK);
}
 
-   intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+   return mode;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
index 9e5549caeb26..55547f2ff426 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -8,6 +8,6 @@
 
 struct intel_gt;
 
-void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt);
 
 #endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 68b6aa11bcf7..58693923bf6c 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2703,6 +2703,7 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
 static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
 {
struct intel_gt *gt = engine->gt;
+   u32 mode;
 
if (!IS_DG2(gt->i915))
return;
@@ -2719,7 +2720,8 @@ static void ccs_engine_wa_mode(struct intel_engine_cs 
*engine, struct i915_wa_li
 * After having disabled automatic load balancing we need to
 * assign all slices to a single CCS. We will call it CCS mode 1
 */
-   intel_gt_apply_ccs_mode(gt);
+   mode = intel_gt_apply_ccs_mode(gt);
+   wa_masked_en(wal, XEHP_CCS_MODE, mode);
 }
 
 /*
-- 
2.43.0



Re: ✗ Fi.CI.IGT: failure for series starting with [v2,1/2] drm/i915: Refactor confusing __intel_gt_reset() (rev2)

2024-04-24 Thread Andi Shyti
Hi Nirmoy,

On Wed, Apr 24, 2024 at 10:56:36AM +0200, Nirmoy Das wrote:
> 
> On 4/24/2024 10:16 AM, Patchwork wrote:
> 
> Patch Details
> 
> Series:  series starting with [v2,1/2] drm/i915: Refactor confusing
>  __intel_gt_reset() (rev2)
> URL: https://patchwork.freedesktop.org/series/132731/
> State:   failure
> Details: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132731v2/
>  index.html
> 
> CI Bug Log - changes from CI_DRM_14633_full -> Patchwork_132731v2_full
> 
> Summary
> 
> FAILURE
> 
> Serious unknown changes coming with Patchwork_132731v2_full absolutely 
> need
> to be
> verified manually.
> 
> If you think the reported changes have nothing to do with the changes
> introduced in Patchwork_132731v2_full, please notify your bug team ('
> i915-ci-in...@lists.freedesktop.org') to allow them
> to document this new failure mode, which will reduce false positives in 
> CI.
> 
> External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132731v2/
> index.html
> 
> Participating hosts (9 -> 8)
> 
> Missing (1): shard-dg2-set2
> 
> Possible new issues
> 
> Here are the unknown changes that may have been introduced in
> Patchwork_132731v2_full:
> 
> IGT changes
> 
> Possible regressions
> 
>   □ igt@gem_exec_await@wide-all:
> 
>   ☆ shard-dg1: NOTRUN -> INCOMPLETE
>   □ igt@gem_exec_gttfill@engines@ccs0:
> 
>   ☆ shard-dg2: NOTRUN -> INCOMPLETE
> 
> These are unrelated as the change only effects where GuC submission disabled.
> 
> Andi, could you please help me merge this one. My dev machine is still broken.

merged into drm-intel-gt-next.

Thanks,
Andi


Re: [PATCH v2 2/2] drm/i915: Fix gt reset with GuC submission is disabled

2024-04-23 Thread Andi Shyti
Hi Nirmoy,

> > > Currently intel_gt_reset() kills the GuC and then resets requested
> > > engines. This is problematic because there is a dedicated CSB FIFO
> > > which only GuC can access and if that FIFO fills up, the hardware
> > > will block on the next context switch until there is space that means
> > > the system is effectively hung. If an engine is reset whilst actively
> > > executing a context, a CSB entry will be sent to say that the context
> > > has gone idle. Thus if reset happens on a very busy system then
> > > killing GuC before killing the engines will lead to deadlock because
> > > of filled up CSB FIFO.
> > is this a fix?
> 
> I went quite far back in the commit logs, and it appears to me that we've
> always been using the current reset flow.
> 
> I believe we don't perform a GT reset immediately after sending a number of
> requests, which is what the current failed test is doing.
> 
> So, I don't think there will be any visible impact on the user with the
> current flow.

Agree... good thinking here... we often abuse on the Fixes tag.

Thanks,
Andi


Re: [PATCH] drm/i915/gt: Refactor uabi engine class/instance list creation

2024-04-23 Thread Andi Shyti
Hi Nirmoy,

On Tue, Apr 23, 2024 at 11:02:06AM +0200, Nirmoy Das wrote:
> On 4/17/2024 12:49 AM, Andi Shyti wrote:

...

>  void intel_engines_driver_register(struct drm_i915_private *i915)
>  {
> -   u16 name_instance, other_instance = 0;
> +   u16 class_instance[I915_LAST_UABI_ENGINE_CLASS + 2] = { };
> 
> +2 is confusing here. I think we need a better macro.

This is to avoid buffer overflow, otherwise we will always hit
the GEM_BUG_ON below.

> struct legacy_ring ring = {};
> struct list_head *it, *next;
> struct rb_node **p, *prev;

...

> +   GEM_BUG_ON(uabi_class >=
> +  ARRAY_SIZE(i915->engine_uabi_class_count));
> +   i915->engine_uabi_class_count[uabi_class]++;
> 
> Shouldn't this be  i915->engine_uabi_class_count[uabi_class] =  class_instance
> [uabi_class]; ?

that's mostly a counter, we don't really need to be on sync with
the real instance of the engines.

> What I see is that this patch mainly adding this class_instance array and rest
> looks the same.
> May be it make sense to add other upcoming  patches to better understand why 
> we
> need this patch.

yes, this patch simplifies the coming patches and the logic
inside, as well.

I just wanted to anticipate some of the refactorings so that we
could speed up the reviews. There is no functional change in
here, that's why I thought it was harmless.

Thanks for taking a look into it.

Andi


Re: [PATCH v2] drm/i915/gem: Downgrade stolen lmem setup warning

2024-04-23 Thread Andi Shyti
Hi Jonathan,

On Mon, Apr 22, 2024 at 06:59:59AM -0700, Jonathan Cavitt wrote:
> In the case where lmem_size < dsm_base, hardware is reporting that
> stolen lmem is unusable.  In this case, instead of throwing a warning,
> we can continue execution as normal by disabling stolen LMEM support.
> For example, this change will allow the following error report from
> ATS-M to no longer apply:
> 
> <6> [144.859887] pcieport :4b:00.0: bridge window [mem 
> 0xb100-0xb11f]
> <6> [144.859900] pcieport :4b:00.0: bridge window [mem 
> 0x3bbc-0x3bbc17ff 64bit pref]
> <6> [144.859917] pcieport :4c:01.0: PCI bridge to [bus 4d-4e]
> <6> [144.859932] pcieport :4c:01.0: bridge window [mem 
> 0xb100-0xb11f]
> <6> [144.859945] pcieport :4c:01.0: bridge window [mem 
> 0x3bbc-0x3bbc17ff 64bit pref]
> <6> [144.859984] i915 :4d:00.0: [drm] BAR2 resized to 256M
> <6> [144.860640] i915 :4d:00.0: [drm] Using a reduced BAR size of 256MiB. 
> Consider enabling 'Resizable BAR' or similar, if available in the BIOS.
> <4> [144.860719] ---[ cut here ]---
> <4> [144.860727] WARNING: CPU: 17 PID: 1815 at 
> drivers/gpu/drm/i915/gem/i915_gem_stolen.c:939 
> i915_gem_stolen_lmem_setup+0x38c/0x430 [i915]
> <4> [144.861430] Modules linked in: i915 snd_intel_dspcfg snd_hda_codec 
> snd_hwdep snd_hda_core snd_pcm vgem drm_shmem_helper prime_numbers 
> i2c_algo_bit ttm video drm_display_helper drm_buddy fuse x86_pkg_temp_thermal 
> coretemp kvm_intel kvm ixgbe mdio irqbypass ptp crct10dif_pclmul crc32_pclmul 
> ghash_clmulni_intel pps_core i2c_i801 mei_me i2c_smbus mei wmi 
> acpi_power_meter [last unloaded: i915]
> <4> [144.861611] CPU: 17 PID: 1815 Comm: i915_module_loa Tainted: G U 
> 6.8.0-rc5-drmtip_1515-g78f49af27723+ #1
> <4> [144.861624] Hardware name: Intel Corporation WHITLEY/WHITLEY, BIOS 
> SE5C6200.86B.0020.P41.2109300305 09/30/2021
> <4> [144.861632] RIP: 0010:i915_gem_stolen_lmem_setup+0x38c/0x430 [i915]
> <4> [144.862287] Code: ff 41 c1 e4 05 e9 ac fe ff ff 4d 63 e4 48 89 ef 48 85 
> ed 74 04 48 8b 7d 08 48 c7 c6 10 a3 7b a0 e8 e9 90 43 e1 e9 ee fd ff ff <0f> 
> 0b 49 c7 c4 ed ff ff ff e9 e0 fd ff ff 0f b7 d2 48 c7 c6 00 d9
> <4> [144.862299] RSP: 0018:c90005607980 EFLAGS: 00010207
> <4> [144.862315] RAX: fff0 RBX: 0003 RCX: 
> 
> 
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10833
> Suggested-by: Chris Wilson 
> Signed-off-by: Jonathan Cavitt 

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH v2 2/2] drm/i915: Fix gt reset with GuC submission is disabled

2024-04-23 Thread Andi Shyti
Hi Nirmoy,

On Mon, Apr 22, 2024 at 10:19:51PM +0200, Nirmoy Das wrote:
> Currently intel_gt_reset() kills the GuC and then resets requested
> engines. This is problematic because there is a dedicated CSB FIFO
> which only GuC can access and if that FIFO fills up, the hardware
> will block on the next context switch until there is space that means
> the system is effectively hung. If an engine is reset whilst actively
> executing a context, a CSB entry will be sent to say that the context
> has gone idle. Thus if reset happens on a very busy system then
> killing GuC before killing the engines will lead to deadlock because
> of filled up CSB FIFO.

is this a fix?

> To address this issue, the GuC should be killed only after resetting
> the requested engines and before calling intel_gt_init_hw().
> 
> v2: Improve commit message(John)
> 
> Cc: John Harrison 
> Signed-off-by: Nirmoy Das 
> ---
>  drivers/gpu/drm/i915/gt/intel_reset.c | 16 ++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
> b/drivers/gpu/drm/i915/gt/intel_reset.c
> index b1393863ca9b..6161f7a3ff70 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -879,8 +879,17 @@ static intel_engine_mask_t reset_prepare(struct intel_gt 
> *gt)
>   intel_engine_mask_t awake = 0;
>   enum intel_engine_id id;
>  
> - /* For GuC mode, ensure submission is disabled before stopping ring */
> - intel_uc_reset_prepare(>uc);
> + /**
> +  * For GuC mode with submission enabled, ensure submission
> +  * is disabled before stopping ring.

nit: "stopping *the* ring"

> +  *
> +  * For GuC mode with submission disabled, ensure that GuC is not
> +  * sanitized, do that after engine reset. reset_prepare()
> +  * is followed by engine reset which in this mode requires GuC to
> +  * process any CSB FIFO entries generated by the resets.
> +  */
> + if (intel_uc_uses_guc_submission(>uc))
> + intel_uc_reset_prepare(>uc);
>  
>   for_each_engine(engine, gt, id) {
>   if (intel_engine_pm_get_if_awake(engine))
> @@ -1227,6 +1236,9 @@ void intel_gt_reset(struct intel_gt *gt,
>  
>   intel_overlay_reset(gt->i915);
>  
> + /* sanitize uC after engine reset */
> + if (!intel_uc_uses_guc_submission(>uc))
> + intel_uc_reset_prepare(>uc);

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH v2 1/2] drm/i915: Refactor confusing __intel_gt_reset()

2024-04-23 Thread Andi Shyti
Hi Nirmoy,

On Mon, Apr 22, 2024 at 10:19:50PM +0200, Nirmoy Das wrote:
> __intel_gt_reset() is really for resetting engines though
> the name might suggest something else. So add a helper function
> to remove confusions with no functional changes.
> 
> v2: Move intel_gt_reset_all_engines() next to
> intel_gt_reset_engine() to make diff simple(John)
> 
> Cc: John Harrison 
> Signed-off-by: Nirmoy Das 

Reviewed-by: Andi Shyti 

Thanks,
Andi


[CI 2/2] drm/i915/gt: Force ccs_mode 4

2024-04-22 Thread Andi Shyti
Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
index 044219c5960a..d0f181a8e73e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -10,29 +10,33 @@
 
 void intel_gt_apply_ccs_mode(struct intel_gt *gt)
 {
+   unsigned long ccs_mask = CCS_MASK(gt);
int cslice;
u32 mode = 0;
-   int first_ccs = __ffs(CCS_MASK(gt));
+   int first_ccs = __ffs(ccs_mask);
 
if (!IS_DG2(gt->i915))
return;
 
/* Build the value for the fixed CCS load balancing */
for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
-   if (CCS_MASK(gt) & BIT(cslice))
+   if (CCS_MASK(gt) & BIT(cslice)) {
/*
 * If available, assign the cslice
 * to the first available engine...
 */
mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
-
-   else
+   first_ccs = find_next_bit(_mask,
+ I915_MAX_CCS,
+ first_ccs + 1);
+   } else {
/*
 * ... otherwise, mark the cslice as
 * unavailable if no CCS dispatches here
 */
mode |= XEHP_CCS_MODE_CSLICE(cslice,
 XEHP_CCS_MODE_CSLICE_MASK);
+   }
}
 
intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
-- 
2.43.0



[CI 1/2] Revert "drm/i915/gt: Do not generate the command streamer for all the CCS"

2024-04-22 Thread Andi Shyti
This reverts commit ea315f98e5d6d3191b74beb0c3e5fc16081d517c.
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 17 -
 1 file changed, 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 8c44af1c3451..476651bd0a21 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -874,23 +874,6 @@ static intel_engine_mask_t init_engine_mask(struct 
intel_gt *gt)
info->engine_mask &= ~BIT(GSC0);
}
 
-   /*
-* Do not create the command streamer for CCS slices beyond the first.
-* All the workload submitted to the first engine will be shared among
-* all the slices.
-*
-* Once the user will be allowed to customize the CCS mode, then this
-* check needs to be removed.
-*/
-   if (IS_DG2(gt->i915)) {
-   u8 first_ccs = __ffs(CCS_MASK(gt));
-
-   /* Mask off all the CCS engine */
-   info->engine_mask &= ~GENMASK(CCS3, CCS0);
-   /* Put back in the first CCS engine */
-   info->engine_mask |= BIT(_CCS(first_ccs));
-   }
-
return info->engine_mask;
 }
 
-- 
2.43.0



[CI 0/2] Force CCS mode to the maximum

2024-04-22 Thread Andi Shyti
Hi,

There has been a regression apparently caused by the CCS mode
forced to be 1[*]. But, because I think the kernel approach is
correct and there might be something hardcoded in userspace, I
want to show that with this series we won't see the regression.

What this series does is to force CCS mode to 4 (or to the
maximum). This way we will understand whether the issue comes
because of disabling the automatic load balancing or by forcing
it to use only one CCS.

Thanks gnattu for your report, i will appreciate if you can give
this a try.

Andi

[*] https://gitlab.freedesktop.org/drm/intel/-/issues/10895

Andi Shyti (2):
  Revert "drm/i915/gt: Do not generate the command streamer for all the
CCS"
  drm/i915/gt: Force ccs_mode 4

 drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 17 -
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 12 
 2 files changed, 8 insertions(+), 21 deletions(-)

-- 
2.43.0



Re: [PATCH] drm/i915/hwmon: Get rid of devm

2024-04-18 Thread Andi Shyti
Hi Ashutosh,

On Wed, Apr 17, 2024 at 07:56:46AM -0700, Ashutosh Dixit wrote:
> When both hwmon and hwmon drvdata (on which hwmon depends) are device
> managed resources, the expectation, on device unbind, is that hwmon will be
> released before drvdata. However, in i915 there are two separate code
> paths, which both release either drvdata or hwmon and either can be
> released before the other. These code paths (for device unbind) are as
> follows (see also the bug referenced below):
> 
> Call Trace:
> release_nodes+0x11/0x70
> devres_release_group+0xb2/0x110
> component_unbind_all+0x8d/0xa0
> component_del+0xa5/0x140
> intel_pxp_tee_component_fini+0x29/0x40 [i915]
> intel_pxp_fini+0x33/0x80 [i915]
> i915_driver_remove+0x4c/0x120 [i915]
> i915_pci_remove+0x19/0x30 [i915]
> pci_device_remove+0x32/0xa0
> device_release_driver_internal+0x19c/0x200
> unbind_store+0x9c/0xb0
> 
> and
> 
> Call Trace:
> release_nodes+0x11/0x70
> devres_release_all+0x8a/0xc0
> device_unbind_cleanup+0x9/0x70
> device_release_driver_internal+0x1c1/0x200
> unbind_store+0x9c/0xb0
> 
> This means that in i915, if use devm, we cannot gurantee that hwmon will
> always be released before drvdata. Which means that we have a uaf if hwmon
> sysfs is accessed when drvdata has been released but hwmon hasn't.
> 
> The only way out of this seems to be do get rid of devm_ and release/free
> everything explicitly during device unbind.
> 
> v2: Change commit message and other minor code changes
> v3: Cleanup from i915_hwmon_register on error (Armin Wolf)
> v4: Eliminate potential static analyzer warning (Rodrigo)
> Eliminate fetch_and_zero (Jani)
> v5: Restore previous logic for ddat_gt->hwmon_dev error return (Andi)

Thanks!

Reviewed-by: Andi Shyti 

Andi


Re: [PATCH v4] drm/i915/hwmon: Get rid of devm

2024-04-17 Thread Andi Shyti
Hi Ashutosh,

> @@ -839,16 +837,38 @@ void i915_hwmon_register(struct drm_i915_private *i915)
>   if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, 
> hwmon_energy_input, 0))
>   continue;
>  
> - hwmon_dev = devm_hwmon_device_register_with_info(dev, 
> ddat_gt->name,
> -  ddat_gt,
> -  
> _gt_chip_info,
> -  NULL);
> - if (!IS_ERR(hwmon_dev))
> - ddat_gt->hwmon_dev = hwmon_dev;
> + hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name,
> + ddat_gt,
> + _gt_chip_info,
> + NULL);
> + if (IS_ERR(hwmon_dev))
> + goto err;

here the logic is changing, though. Before we were not leaving if
hwmon_device_register_with_info() was returning error.

Is this wanted? And why isn't it described in the log?

Thanks,
Andi

> +
> + ddat_gt->hwmon_dev = hwmon_dev;
>   }


[PATCH] drm/i915/gt: Refactor uabi engine class/instance list creation

2024-04-16 Thread Andi Shyti
For the upcoming changes we need a cleaner way to build the list
of uabi engines.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Andi Shyti 
---
Hi,

just sending this patch to unburden the coming series from this
single patch inherited from a previously sent series.

Andi

 drivers/gpu/drm/i915/gt/intel_engine_user.c | 29 -
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 833987015b8b..11cc06c0c785 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -203,7 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, 
const char *name, u16
 
 void intel_engines_driver_register(struct drm_i915_private *i915)
 {
-   u16 name_instance, other_instance = 0;
+   u16 class_instance[I915_LAST_UABI_ENGINE_CLASS + 2] = { };
struct legacy_ring ring = {};
struct list_head *it, *next;
struct rb_node **p, *prev;
@@ -214,6 +214,8 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
prev = NULL;
p = >uabi_engines.rb_node;
list_for_each_safe(it, next, ) {
+   u16 uabi_class;
+
struct intel_engine_cs *engine =
container_of(it, typeof(*engine), uabi_list);
 
@@ -222,15 +224,14 @@ void intel_engines_driver_register(struct 
drm_i915_private *i915)
 
GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
engine->uabi_class = uabi_classes[engine->class];
-   if (engine->uabi_class == I915_NO_UABI_CLASS) {
-   name_instance = other_instance++;
-   } else {
-   GEM_BUG_ON(engine->uabi_class >=
-  ARRAY_SIZE(i915->engine_uabi_class_count));
-   name_instance =
-   
i915->engine_uabi_class_count[engine->uabi_class]++;
-   }
-   engine->uabi_instance = name_instance;
+
+   if (engine->uabi_class == I915_NO_UABI_CLASS)
+   uabi_class = I915_LAST_UABI_ENGINE_CLASS + 1;
+   else
+   uabi_class = engine->uabi_class;
+
+   GEM_BUG_ON(uabi_class >= ARRAY_SIZE(class_instance));
+   engine->uabi_instance = class_instance[uabi_class]++;
 
/*
 * Replace the internal name with the final user and log facing
@@ -238,11 +239,15 @@ void intel_engines_driver_register(struct 
drm_i915_private *i915)
 */
engine_rename(engine,
  intel_engine_class_repr(engine->class),
- name_instance);
+ engine->uabi_instance);
 
-   if (engine->uabi_class == I915_NO_UABI_CLASS)
+   if (uabi_class > I915_LAST_UABI_ENGINE_CLASS)
continue;
 
+   GEM_BUG_ON(uabi_class >=
+  ARRAY_SIZE(i915->engine_uabi_class_count));
+   i915->engine_uabi_class_count[uabi_class]++;
+
rb_link_node(>uabi_node, prev, p);
rb_insert_color(>uabi_node, >uabi_engines);
 
-- 
2.43.0



Re: [PATCH][next] drm/i915: remove redundant assignement to variable err

2024-04-15 Thread Andi Shyti
Hi Colin,

On Mon, Apr 15, 2024 at 10:56:59AM +0100, Colin Ian King wrote:
> The variable err is being assigned a value 0 that is never read, the
> break statement escapes a do-while loop and then the code returns
> without referencing err. The assignment is redundant and can be
> removed.
> 
> Cleans up clang scan build warning:
> drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:1075:5: warning: Value
> stored to 'err' is never read [deadcode.DeadStores]
> 
> Signed-off-by: Colin Ian King 

Reviewed-by: Andi Shyti 

Thanks,
Andi


[PATCH v3 3/3] drm/i915/gem: Calculate object page offset for partial memory mapping

2024-04-11 Thread Andi Shyti
To enable partial memory mapping of GPU virtual memory, it's
necessary to introduce an offset to the object's memory
(obj->mm.pages) scatterlist. This adjustment compensates for
instances when userspace mappings do not start from the beginning
of the object.

Based on a patch by Chris Wilson.

Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 10 +++---
 drivers/gpu/drm/i915/i915_mm.c   | 12 +++-
 drivers/gpu/drm/i915/i915_mm.h   |  3 ++-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 4e57844a9ebb..2241257a9ccf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -252,6 +252,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
struct vm_area_struct *area = vmf->vma;
struct i915_mmap_offset *mmo = area->vm_private_data;
struct drm_i915_gem_object *obj = mmo->obj;
+   unsigned long obj_offset;
resource_size_t iomap;
int err;
 
@@ -273,10 +274,11 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
iomap -= obj->mm.region->region.start;
}
 
+   obj_offset = area->vm_pgoff - drm_vma_node_start(>vma_node);
/* PTEs are revoked in obj->ops->put_pages() */
err = remap_io_sg(area,
  area->vm_start, area->vm_end - area->vm_start,
- obj->mm.pages->sgl, iomap);
+ obj->mm.pages->sgl, obj_offset, iomap);
 
if (area->vm_flags & VM_WRITE) {
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
@@ -302,14 +304,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
bool write = area->vm_flags & VM_WRITE;
struct i915_gem_ww_ctx ww;
+   unsigned long obj_offset;
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
int srcu;
int ret;
 
-   /* We don't use vmf->pgoff since that has the fake offset */
+   obj_offset = area->vm_pgoff - drm_vma_node_start(>vma_node);
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+   page_offset += obj_offset;
 
trace_i915_gem_object_fault(obj, page_offset, true, write);
 
@@ -404,7 +408,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 
/* Finally, remap it using the new GTT offset */
ret = remap_io_mapping(area,
-  area->vm_start + (vma->gtt_view.partial.offset 
<< PAGE_SHIFT),
+  area->vm_start + ((vma->gtt_view.partial.offset 
- obj_offset) << PAGE_SHIFT),
   (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> 
PAGE_SHIFT,
   min_t(u64, vma->size, area->vm_end - 
area->vm_start),
   >iomap);
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index 7998bc74ab49..f5c97a620962 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -122,13 +122,15 @@ int remap_io_mapping(struct vm_area_struct *vma,
  * @addr: target user address to start at
  * @size: size of map area
  * @sgl: Start sg entry
+ * @offset: offset from the start of the page
  * @iobase: Use stored dma address offset by this address or pfn if -1
  *
  *  Note: this is only safe if the mm semaphore is held when called.
  */
 int remap_io_sg(struct vm_area_struct *vma,
unsigned long addr, unsigned long size,
-   struct scatterlist *sgl, resource_size_t iobase)
+   struct scatterlist *sgl, unsigned long offset,
+   resource_size_t iobase)
 {
struct remap_pfn r = {
.mm = vma->vm_mm,
@@ -141,6 +143,14 @@ int remap_io_sg(struct vm_area_struct *vma,
/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
 
+   while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) {
+   offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT;
+   r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase));
+   if (!r.sgt.sgp)
+   return -EINVAL;
+   }
+   r.sgt.curr = offset << PAGE_SHIFT;
+
if (!use_dma(iobase))
flush_cache_range(vma, addr, size);
 
diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h
index 04c8974d822b..69f9351b1a1c 100644
--- a/drivers/gpu/drm/i915/i915_mm.h
+++ b/drivers/gpu/drm/i915/i915_mm.h
@@ -30,6 +30,7 @@ int remap_io_mapping(struct vm_area_struct *vma,
 
 int remap_io_sg(struct 

[PATCH v3 2/3] drm/i915/gem: Do not look for the exact address in node

2024-04-11 Thread Andi Shyti
In preparation for the upcoming partial memory mapping feature,
we want to make sure that when looking for a node we consider
also the offset and not just the starting address of the virtual
memory node.

Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index ce10dd259812..4e57844a9ebb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -1030,9 +1030,9 @@ int i915_gem_mmap(struct file *filp, struct 
vm_area_struct *vma)
 
rcu_read_lock();
drm_vma_offset_lock_lookup(dev->vma_offset_manager);
-   node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
- vma->vm_pgoff,
- vma_pages(vma));
+   node = drm_vma_offset_lookup_locked(dev->vma_offset_manager,
+   vma->vm_pgoff,
+   vma_pages(vma));
if (node && drm_vma_node_is_allowed(node, priv)) {
/*
 * Skip 0-refcnted objects as it is in the process of being
-- 
2.43.0



[PATCH v3 1/3] drm/i915/gem: Increment vma offset when mapping fb objects

2024-04-11 Thread Andi Shyti
Until now the "vm_pgoff" was not used and there has been no need
to set its offset.

But now, because we want to support partial mappings with a given
offset, we need it to be set.

Suggested-by: Chris Wilson 
Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a2195e28b625..ce10dd259812 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -1084,6 +1084,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, 
struct vm_area_struct *vma
mmo = mmap_offset_attach(obj, mmap_type, NULL);
if (IS_ERR(mmo))
return PTR_ERR(mmo);
+
+   vma->vm_pgoff += drm_vma_node_start(>vma_node);
}
 
/*
-- 
2.43.0



[PATCH v3 0/3] Add support for partial mapping

2024-04-11 Thread Andi Shyti
Hi,

this series based on a previous work from Chris adds support for
partial mapping.

Two preparatory patches are needed:

 - set the vm_pgoff when mapping frame buffer objects
 - do not fail when the exact address of a VM node is not the
   same as the starting address due to the offset.

Indeed I was receiving a negative offset at first.

Igt tests have been sent to the igt mailing list.

Andi

Test-with: 20240412004255.288046-1-andi.sh...@linux.intel.com

Changelog:
==
v2 -> v3:
 - Add a patch in order to not fail when the exact address of a
   VM node is not the same as the starting address due to the
   offset.

v1 -> v2:
 - Enable support for CPU memory
 - Increment vm_pgoff for fb objects

Andi Shyti (3):
  drm/i915/gem: Increment vma offset when mapping fb objects
  drm/i915/gem: Do not look for the exact address in node
  drm/i915/gem: Calculate object page offset for partial memory mapping

 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 18 --
 drivers/gpu/drm/i915/i915_mm.c   | 12 +++-
 drivers/gpu/drm/i915/i915_mm.h   |  3 ++-
 3 files changed, 25 insertions(+), 8 deletions(-)

-- 
2.43.0



[PATCH i-g-t] i915/gem_mmap_offset: Partial mmap and munmap

2024-04-11 Thread Andi Shyti
From: Chris Wilson 

Based on a test case developed by Lionel Landwerlin, this exercises
creation of partial mmaps using both direct methods of a partial mmap()
(where the mmap() only covers a portion of the object) and
munmap() to do the same.

Signed-off-by: Chris Wilson 
Signed-off-by: Andi Shyti 
---
 tests/intel/gem_mmap_offset.c | 84 +++
 1 file changed, 84 insertions(+)

diff --git a/tests/intel/gem_mmap_offset.c b/tests/intel/gem_mmap_offset.c
index 95d2158ca88f..0ba2f9591f85 100644
--- a/tests/intel/gem_mmap_offset.c
+++ b/tests/intel/gem_mmap_offset.c
@@ -56,6 +56,8 @@
  * SUBTEST: isolation
  * SUBTEST: oob-read
  * SUBTEST: open-flood
+ * SUBTEST: partial-mmap
+ * SUBTEST: partial-unmap
  * SUBTEST: perf
  * SUBTEST: pf-nonblock
  * SUBTEST: ptrace
@@ -874,6 +876,83 @@ static void blt_coherency(int i915)
igt_assert_f(compare_ok, "Problem with coherency, flush is too late\n");
 }
 
+static void partial_mmap(int i915)
+{
+   uint32_t handle;
+
+   handle = gem_create(i915, SZ_2M);
+
+   for_each_mmap_offset_type(i915, t) {
+   struct drm_i915_gem_mmap_offset arg = {
+   .handle = handle,
+   .flags = t->type,
+   };
+   uint32_t *ptr;
+
+   if (mmap_offset_ioctl(i915, ))
+   continue;
+
+   ptr = mmap(0, SZ_4K, PROT_WRITE, MAP_SHARED, i915, arg.offset);
+   if (ptr == MAP_FAILED)
+   continue;
+
+   memset(ptr, 0xcc, SZ_4K);
+   munmap(ptr, SZ_4K);
+
+   ptr = mmap(0, SZ_4K, PROT_READ, MAP_SHARED, i915, arg.offset + 
SZ_2M - SZ_4K);
+   igt_assert(ptr != MAP_FAILED);
+
+   for (uint32_t i = 0; i < SZ_4K / sizeof(uint32_t); i++)
+   igt_assert_eq_u32(ptr[i], 0);
+
+   munmap(ptr, SZ_4K);
+   }
+
+   gem_close(i915, handle);
+}
+
+static void partial_unmap(int i915)
+{
+   uint32_t handle;
+
+   handle = gem_create(i915, SZ_2M);
+
+   for_each_mmap_offset_type(i915, t) {
+   uint8_t *ptr_a, *ptr_b;
+
+   /* mmap the same GEM BO twice */
+   ptr_a = __mmap_offset(i915, handle, 0, SZ_2M,
+   PROT_READ | PROT_WRITE,
+   t->type);
+   if (!ptr_a)
+   continue;
+
+   ptr_b = __mmap_offset(i915, handle, 0, SZ_2M,
+   PROT_READ | PROT_WRITE,
+   t->type);
+   if (!ptr_b)
+   continue;
+
+   /* unmap the first mapping but the last 4k */
+   munmap(ptr_a, SZ_2M - SZ_4K);
+
+   /* memset that remaining 4k with 0xcc */
+   memset(ptr_a + SZ_2M - SZ_4K, 0xcc, SZ_4K);
+
+   /* memset the first page of the 2Mb with 0xdd */
+   memset(ptr_b, 0xdd, SZ_4K);
+
+   for (uint32_t i = 0; i < SZ_4K; i++)
+   igt_assert_eq_u32(ptr_a[SZ_2M - SZ_4K + i], 0xcc);
+
+   munmap(ptr_a + SZ_2M - SZ_4K, SZ_4K);
+   memset(ptr_b, 0, SZ_2M);
+   munmap(ptr_b, SZ_2M);
+   }
+
+   gem_close(i915, handle);
+}
+
 static int mmap_gtt_version(int i915)
 {
int gtt_version = -1;
@@ -931,6 +1010,11 @@ igt_main
igt_subtest_f("open-flood")
open_flood(i915, 20);
 
+   igt_subtest_f("partial-mmap")
+   partial_mmap(i915);
+   igt_subtest_f("partial-unmap")
+   partial_unmap(i915);
+
igt_subtest_with_dynamic("clear") {
for_each_memory_region(r, i915) {
igt_dynamic_f("%s", r->name)
-- 
2.43.0



Re: [PATCH 11/10] MAINTAINERS: update i915 and xe entries for include/drm/intel

2024-04-11 Thread Andi Shyti
Hi Jani,

On Thu, Apr 11, 2024 at 06:45:03PM +0300, Jani Nikula wrote:
> With all the Intel specific drm files under include/drm/intel, update
> the i915, xe, and the shared display entries. Do not discriminate based
> on file name pattern, just add the entire directory for all three
> entries.
> 
> Cc: Joonas Lahtinen 
> Cc: Lucas De Marchi 
> Cc: Oded Gabbay 
> Cc: Rodrigo Vivi 
> Cc: Thomas Hellström 
> Cc: Tvrtko Ursulin 
> Signed-off-by: Jani Nikula 

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH v2 2/2] drm/i915/gem: Calculate object page offset for partial memory mapping

2024-04-11 Thread Andi Shyti
Hi Nirmoy,

On Thu, Apr 11, 2024 at 04:18:41PM +0200, Nirmoy Das wrote:
> Hi Andi,
> 
> On 3/29/2024 5:39 PM, Andi Shyti wrote:
> > To enable partial memory mapping of GPU virtual memory, it's
> > necessary to introduce an offset to the object's memory
> > (obj->mm.pages) scatterlist. This adjustment compensates for
> > instances when userspace mappings do not start from the beginning
> > of the object.
> 
> I quickly tried 
> https://gitlab.freedesktop.org/llandwerlin/igt-gpu-tools/-/tree/wip/gem_mmap_offset-partial-unmap?ref_type=heads
> that didn't work for GTT.
> 
> Please make sure a proper IGT test is available for this as this looks very
> risky change.

Yes, I have igt's ready and a new v3.

Thanks for trying this out.

Andi


Re: [PATCH v2] drm: move i915_drm.h under include/drm/intel

2024-04-11 Thread Andi Shyti
Hi Jani,

On Wed, Apr 10, 2024 at 01:26:15PM +0300, Jani Nikula wrote:
> Clean up the top level include/drm directory by grouping all the Intel
> specific files under a common subdirectory.
> 
> v2: Also fix comment in intel_pci_config.h (Ilpo)
> 
> Cc: Daniel Vetter 
> Cc: Dave Airlie 
> Cc: Lucas De Marchi 
> Cc: Bjorn Helgaas 
> Cc: Hans de Goede 
> Cc: Ilpo Järvinen 
> Signed-off-by: Jani Nikula 
> ---
>  arch/x86/kernel/early-quirks.c | 2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 2 +-
>  drivers/gpu/drm/i915/gt/intel_ggtt.c   | 2 +-
>  drivers/gpu/drm/i915/gt/intel_rps.c| 2 +-
>  drivers/gpu/drm/i915/intel_pci_config.h| 2 +-
>  drivers/gpu/drm/i915/soc/intel_gmch.c  | 2 +-
>  drivers/gpu/drm/xe/xe_ggtt.c   | 2 +-
>  drivers/platform/x86/intel_ips.c   | 2 +-
>  include/drm/{ => intel}/i915_drm.h | 0
>  9 files changed, 8 insertions(+), 8 deletions(-)
>  rename include/drm/{ => intel}/i915_drm.h (100%)

Am I seeing wrong or are you missing a bunch of them, like
Documentation, MAINTAINERS and many more?

Andi


Re: [PATCH 05/10] drm: move intel_lpe_audio.h under include/drm/intel

2024-04-11 Thread Andi Shyti
Hi Jani,

On Wed, Apr 10, 2024 at 01:05:12PM +0300, Jani Nikula wrote:
> Clean up the top level include/drm directory by grouping all the Intel
> specific files under a common subdirectory.
> 
> Cc: Daniel Vetter 
> Cc: Dave Airlie 
> Cc: Lucas De Marchi 
> Cc: Jaroslav Kysela 
> Cc: Takashi Iwai 
> Signed-off-by: Jani Nikula 
> ---
>  drivers/gpu/drm/i915/display/intel_hdmi.c  | 2 +-
>  drivers/gpu/drm/i915/display/intel_lpe_audio.c | 2 +-
>  include/drm/{ => intel}/intel_lpe_audio.h  | 0

strange thing here is that we have two different
intel_lpe_audio.h. Can't they be merged?

And, perhaps, we could also think of dropping the intel_ prefix
for the files inside drm/intel/.

In any case,

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH 04/10] drm: move i915_component.h under include/drm/intel

2024-04-11 Thread Andi Shyti
Hi Jani,

On Wed, Apr 10, 2024 at 01:05:11PM +0300, Jani Nikula wrote:
> Clean up the top level include/drm directory by grouping all the Intel
> specific files under a common subdirectory.
> 
> Cc: Daniel Vetter 
> Cc: Dave Airlie 
> Cc: Lucas De Marchi 
> Cc: Tomas Winkler 
> Cc: Jaroslav Kysela 
> Cc: Takashi Iwai 
> Signed-off-by: Jani Nikula 
> ---
>  drivers/gpu/drm/i915/display/intel_audio.c   | 2 +-
>  drivers/gpu/drm/i915/display/intel_hdcp.c| 2 +-
>  drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c | 2 +-
>  drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 2 +-
>  drivers/gpu/drm/xe/xe_gsc_proxy.c| 2 +-
>  drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c   | 2 +-
>  drivers/misc/mei/hdcp/mei_hdcp.c | 2 +-
>  drivers/misc/mei/pxp/mei_pxp.c   | 2 +-
>  include/drm/{ => intel}/i915_component.h | 0
>  include/sound/hdaudio.h  | 2 +-

Please update also Documentation/gpu/i915.rst.

Andi


Re: [PATCH 03/10] drm: move i915_gsc_proxy_mei_interface.h under include/drm/intel

2024-04-11 Thread Andi Shyti
Hi Jani,

On Wed, Apr 10, 2024 at 01:05:10PM +0300, Jani Nikula wrote:
> Clean up the top level include/drm directory by grouping all the Intel
> specific files under a common subdirectory.
> 
> Cc: Daniel Vetter 
> Cc: Dave Airlie 
> Cc: Lucas De Marchi 
> Cc: Tomas Winkler 
> Signed-off-by: Jani Nikula 

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH 02/10] drm: move intel-gtt.h under include/drm/intel

2024-04-10 Thread Andi Shyti
On Wed, Apr 10, 2024 at 01:05:09PM +0300, Jani Nikula wrote:
> Clean up the top level include/drm directory by grouping all the Intel
> specific files under a common subdirectory.
> 
> Cc: Daniel Vetter 
> Cc: Dave Airlie 
> Cc: Lucas De Marchi 
> Signed-off-by: Jani Nikula 

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH 01/10] drm/i915: use system include for drm headers

2024-04-10 Thread Andi Shyti
Hi Jani,

On Wed, Apr 10, 2024 at 01:05:08PM +0300, Jani Nikula wrote:
> Use <> instead of "" for including headers from include/, even if the
> file is in the same directory.
> 
> Signed-off-by: Jani Nikula 

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH] drm/i915: Don't enable hwmon for selftests

2024-04-10 Thread Andi Shyti
Hi Ashutosh,

please use "git format-patch -v 3 ..." which generates subject
[PATCH v3] ...". Otherwise it gets confusing to see the patch
that needs to be reviewed.

On Tue, Apr 09, 2024 at 11:05:49PM -0700, Ashutosh Dixit wrote:
> There are no hwmon selftests so there is no need to enable hwmon for
> selftests. So enable hwmon only for real driver load.
> 
> v2: Move the logic inside i915_hwmon.c
> v3: Move is_i915_selftest definition to i915_selftest.h (Badal)
> 
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366
> Signed-off-by: Ashutosh Dixit 
> ---
>  drivers/gpu/drm/i915/i915_hwmon.c|  3 ++-
>  drivers/gpu/drm/i915/i915_selftest.h | 10 ++
>  2 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
> b/drivers/gpu/drm/i915/i915_hwmon.c
> index 8c3f443c8347..cf1689333ebf 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -10,6 +10,7 @@
>  #include "i915_drv.h"
>  #include "i915_hwmon.h"
>  #include "i915_reg.h"
> +#include "i915_selftest.h"
>  #include "intel_mchbar_regs.h"
>  #include "intel_pcode.h"
>  #include "gt/intel_gt.h"
> @@ -789,7 +790,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
>   int i;
>  
>   /* hwmon is available only for dGfx */
> - if (!IS_DGFX(i915))
> + if (!IS_DGFX(i915) || is_i915_selftest())
>   return;

I wonder if this is the right place to put it or rather place it
in i915_driver.c and avoid calling i915_hwmon_register() at all.

In any case, it's good:

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH] drm/i915/guc: Fix the fix for reset lock confusion

2024-04-03 Thread Andi Shyti
Hi John,

On Fri, Mar 29, 2024 at 04:53:05PM -0700, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> The previous fix for the circlular lock splat about the busyness
> worker wasn't quite complete. Even though the reset-in-progress flag
> is cleared at the start of intel_uc_reset_finish, the entire function
> is still inside the reset mutex lock. Not sure why the patch appeared
> to fix the issue both locally and in CI. However, it is now back
> again.
> 
> There is a further complication the wedge code path within
> intel_gt_reset() jumps around so much it results in nested
> reset_prepare/_finish calls. That is, the call sequence is:
>   intel_gt_reset
>   | reset_prepare
>   | __intel_gt_set_wedged
>   | | reset_prepare
>   | | reset_finish
>   | reset_finish
> 
> The nested finish means that even if the clear of the in-progress flag
> was moved to the end of _finish, it would still be clear for the
> entire second call. Surprisingly, this does not seem to be causing any
> other problems at present.
> 
> As an aside, a wedge on fini does not call the finish functions at
> all. The reset_in_progress flag is left set (twice).
> 
> So instead of trying to cancel the worker anywhere at all in the reset
> path, just add a cancel to intel_guc_submission_fini instead. Note
> that it is not a problem if the worker is still active during a reset.
> Either it will run before the reset path starts locking things and
> will simply block the reset code for a tiny amount of time. Or it will
> run after the locks have been acquired and will early exit due to the
> try-lock.
> 
> Also, do not use the reset-in-progress flag to decide whether a
> synchronous cancel is safe (from a lockdep perspective) or not.
> Instead, use the actual reset mutex state (both the genuine one and
> the custom rolled BACKOFF one).
> 
> Fixes: 0e00a8814eec ("drm/i915/guc: Avoid circular locking issue on busyness 
> flush")
> Signed-off-by: John Harrison 
> Cc: Zhanjun Dong 
> Cc: John Harrison 
> Cc: Andi Shyti 
> Cc: Daniel Vetter 
> Cc: Daniel Vetter 
> Cc: Rodrigo Vivi 
> Cc: Nirmoy Das 
> Cc: Tvrtko Ursulin 
> Cc: Umesh Nerlige Ramappa 
> Cc: Andrzej Hajda 
> Cc: Matt Roper 
> Cc: Jonathan Cavitt 
> Cc: Prathap Kumar Valsan 
> Cc: Alan Previn 
> Cc: Madhumitha Tolakanahalli Pradeep 
> 
> Cc: Daniele Ceraolo Spurio 
> Cc: Ashutosh Dixit 
> Cc: Dnyaneshwar Bhadane 

Looks good:

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: ✗ Fi.CI.IGT: failure for Disable automatic load CCS load balancing (rev14)

2024-03-29 Thread Andi Shyti
Hi,

On Sat, Mar 30, 2024 at 12:03:08AM -, Patchwork wrote:
> Patch Details
> 
> Series:  Disable automatic load CCS load balancing (rev14)
> URL: https://patchwork.freedesktop.org/series/129951/
> State:   failure
> Details: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_129951v14/
>  index.html
> 
> CI Bug Log - changes from CI_DRM_14506_full -> Patchwork_129951v14_full
> 
> Summary
> 
> FAILURE
> 
> Serious unknown changes coming with Patchwork_129951v14_full absolutely need 
> to
> be
> verified manually.
> 
> If you think the reported changes have nothing to do with the changes
> introduced in Patchwork_129951v14_full, please notify your bug team
> (i915-ci-in...@lists.freedesktop.org) to allow them
> to document this new failure mode, which will reduce false positives in CI.
> 
> Participating hosts (10 -> 9)
> 
> Missing (1): shard-snb-0
> 
> Possible new issues
> 
> Here are the unknown changes that may have been introduced in
> Patchwork_129951v14_full:
> 
> IGT changes
> 
> Possible regressions
> 
>   • igt@sysfs_heartbeat_interval@nopreempt@vcs0:
>   □ shard-dg2: NOTRUN -> INCOMPLETE

This looks unrelated. I also see from the previous shards tests
that I get some random failures.

I'm going ahead and merge this series.

Andi


Re: [PATCH v0 02/14] drm/amdgpu,drm/radeon: Make I2C terminology more inclusive

2024-03-29 Thread Andi Shyti
Hi,

On Fri, Mar 29, 2024 at 10:28:14AM -0700, Easwar Hariharan wrote:
> On 3/29/2024 10:16 AM, Andi Shyti wrote:
> > Hi Easwar,
> > 
> > On Fri, Mar 29, 2024 at 05:00:26PM +, Easwar Hariharan wrote:
> >> I2C v7, SMBus 3.2, and I3C specifications have replaced "master/slave"
> > 
> > I don't understand why we forget that i3c is 1.1.1 :-)
> 
> That's because it's a copy-paste error from Wolfram's cover letter. :) I'll 
> update
> next go-around.

not a binding comment, though. Just for completeness, because we
are giving the version to the i2c and smbus, but not i3c.

> >> with more appropriate terms. Inspired by and following on to Wolfram's
> >> series to fix drivers/i2c/[1], fix the terminology for users of
> >> I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
> >> in the specification.
> > 
> > The specification talks about:
> > 
> >  - master -> controller
> >  - slave -> target (and not client)
> > 
> > But both you and Wolfram have used client. I'd like to reach
> > some more consistency here.
> 
> I had the impression that remote targets (i.e external to the device) were to 
> be called clients,
> e.g. the QSFP FRUs in drivers/infiniband, and internal ones targets.
> I chose the terminology according to that understanding, but now I can't find 
> where I got that
> information.

The word "client" does not even appear in the documentation (only
one instance in the i3c document), so that the change is not
related to the document as stated in the commit log. Unless, of
course, I am missing something.

I'm OK with choosing a "customized" naming, but we need to reach
an agreement.

I raised the same question to Wolfram.

Thanks,
Andi


Re: [PATCH v0 02/14] drm/amdgpu,drm/radeon: Make I2C terminology more inclusive

2024-03-29 Thread Andi Shyti
Hi Easwar,

On Fri, Mar 29, 2024 at 05:00:26PM +, Easwar Hariharan wrote:
> I2C v7, SMBus 3.2, and I3C specifications have replaced "master/slave"

I don't understand why we forget that i3c is 1.1.1 :-)

> with more appropriate terms. Inspired by and following on to Wolfram's
> series to fix drivers/i2c/[1], fix the terminology for users of
> I2C_ALGOBIT bitbanging interface, now that the approved verbiage exists
> in the specification.

The specification talks about:

 - master -> controller
 - slave -> target (and not client)

But both you and Wolfram have used client. I'd like to reach
some more consistency here.

Thanks,
Andi


[PATCH v2 2/2] drm/i915/gem: Calculate object page offset for partial memory mapping

2024-03-29 Thread Andi Shyti
To enable partial memory mapping of GPU virtual memory, it's
necessary to introduce an offset to the object's memory
(obj->mm.pages) scatterlist. This adjustment compensates for
instances when userspace mappings do not start from the beginning
of the object.

Based on a patch by Chris Wilson.

Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 10 +++---
 drivers/gpu/drm/i915/i915_mm.c   | 12 +++-
 drivers/gpu/drm/i915/i915_mm.h   |  3 ++-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index ce10dd259812..9bd2b4c2e501 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -252,6 +252,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
struct vm_area_struct *area = vmf->vma;
struct i915_mmap_offset *mmo = area->vm_private_data;
struct drm_i915_gem_object *obj = mmo->obj;
+   unsigned long obj_offset;
resource_size_t iomap;
int err;
 
@@ -273,10 +274,11 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
iomap -= obj->mm.region->region.start;
}
 
+   obj_offset = area->vm_pgoff - drm_vma_node_start(>vma_node);
/* PTEs are revoked in obj->ops->put_pages() */
err = remap_io_sg(area,
  area->vm_start, area->vm_end - area->vm_start,
- obj->mm.pages->sgl, iomap);
+ obj->mm.pages->sgl, obj_offset, iomap);
 
if (area->vm_flags & VM_WRITE) {
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
@@ -302,14 +304,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
bool write = area->vm_flags & VM_WRITE;
struct i915_gem_ww_ctx ww;
+   unsigned long obj_offset;
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
int srcu;
int ret;
 
-   /* We don't use vmf->pgoff since that has the fake offset */
+   obj_offset = area->vm_pgoff - drm_vma_node_start(>vma_node);
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+   page_offset += obj_offset;
 
trace_i915_gem_object_fault(obj, page_offset, true, write);
 
@@ -404,7 +408,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 
/* Finally, remap it using the new GTT offset */
ret = remap_io_mapping(area,
-  area->vm_start + (vma->gtt_view.partial.offset 
<< PAGE_SHIFT),
+  area->vm_start + ((vma->gtt_view.partial.offset 
- obj_offset) << PAGE_SHIFT),
   (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> 
PAGE_SHIFT,
   min_t(u64, vma->size, area->vm_end - 
area->vm_start),
   >iomap);
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index 7998bc74ab49..f5c97a620962 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -122,13 +122,15 @@ int remap_io_mapping(struct vm_area_struct *vma,
  * @addr: target user address to start at
  * @size: size of map area
  * @sgl: Start sg entry
+ * @offset: offset from the start of the page
  * @iobase: Use stored dma address offset by this address or pfn if -1
  *
  *  Note: this is only safe if the mm semaphore is held when called.
  */
 int remap_io_sg(struct vm_area_struct *vma,
unsigned long addr, unsigned long size,
-   struct scatterlist *sgl, resource_size_t iobase)
+   struct scatterlist *sgl, unsigned long offset,
+   resource_size_t iobase)
 {
struct remap_pfn r = {
.mm = vma->vm_mm,
@@ -141,6 +143,14 @@ int remap_io_sg(struct vm_area_struct *vma,
/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
 
+   while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) {
+   offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT;
+   r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase));
+   if (!r.sgt.sgp)
+   return -EINVAL;
+   }
+   r.sgt.curr = offset << PAGE_SHIFT;
+
if (!use_dma(iobase))
flush_cache_range(vma, addr, size);
 
diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h
index 04c8974d822b..69f9351b1a1c 100644
--- a/drivers/gpu/drm/i915/i915_mm.h
+++ b/drivers/gpu/drm/i915/i915_mm.h
@@ -30,6 +30,7 @@ int remap_io_mapping(struct vm_area_struct *vma,
 
 int remap_io_sg(struct 

[PATCH v2 1/2] drm/i915/gem: Increment vma offset when mapping fb objects

2024-03-29 Thread Andi Shyti
Until now the "vm_pgoff" was not used and there has been no need
to set its offset.

But now, because we want to support partial mappings with a given
offset, we need it to be set.

Suggested-by: Chris Wilson 
Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a2195e28b625..ce10dd259812 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -1084,6 +1084,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, 
struct vm_area_struct *vma
mmo = mmap_offset_attach(obj, mmap_type, NULL);
if (IS_ERR(mmo))
return PTR_ERR(mmo);
+
+   vma->vm_pgoff += drm_vma_node_start(>vma_node);
}
 
/*
-- 
2.43.0



[PATCH v2 0/2] Add support for partial mapping

2024-03-29 Thread Andi Shyti
Hi,

this series based on a previous work from Chris adds support for
partial mapping.

A preparatory patch was needed in order to set the vm_pgoff when
mapping frame buffer objects. Indeed I was receiving a negative
offset at first.

Andi

Changelog:
==
v1 -> v2:
 - Enable support for CPU memory
 - Increment vm_pgoff for fb objects

Andi Shyti (2):
  drm/i915/gem: Increment vma offset when mapping fb objects
  drm/i915/gem: Calculate object page offset for partial memory mapping

 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 12 +---
 drivers/gpu/drm/i915/i915_mm.c   | 12 +++-
 drivers/gpu/drm/i915/i915_mm.h   |  3 ++-
 3 files changed, 22 insertions(+), 5 deletions(-)

-- 
2.43.0



Re: [PATCH] drm/i915/guc: Remove bogus null check

2024-03-29 Thread Andi Shyti
Hi Rodrigo,

On Thu, Mar 28, 2024 at 09:39:17PM -0400, Rodrigo Vivi wrote:
> On Thu, Mar 28, 2024 at 10:41:55PM +0100, Andi Shyti wrote:
> > On Thu, Mar 28, 2024 at 05:31:07PM -0400, Rodrigo Vivi wrote:
> > > This null check is bogus because we are already using 'ce' stuff
> > > in many places before this function is called.
> > > 
> > > Having this here is useless and confuses static analyzer tools
> > > that can see:
> > > 
> > > struct intel_engine_cs *engine = ce->engine;
> > > 
> > > before this check, in the same function.
> > > 
> > > Fixes: cec82816d0d0 ("drm/i915/guc: Use context hints for GT frequency")
> > 
> > there is no need to have the Fixes tag here.
> 
> why not? I imagine distros that have this commit cec82816d0d0 and use
> static analyzers would also want this patch ported to silent those, no?!

Still... it's not a bug. The tag "Fixes:" should be used when a
bug is fixed, but not for harmless static analyzer reports.

Besides, if we want to keep the Fixes tag we should also Cc
stable, i guess checkpatch.pl complains about it.

(BTW, Cc'ed in this mail we have the inventor of the tag and he
can confirm after having had his morning coffee :-) ).

> > > Reported-by: kernel test robot 
> > > Reported-by: Dan Carpenter 
> > > Closes: https://lore.kernel.org/r/202403101225.7ahejhzj-...@intel.com/
> > > Cc: Vinay Belgaumkar 
> > > Cc: John Harrison 
> > > Signed-off-by: Rodrigo Vivi 
> > > ---
> > >  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > index 01d0ec1b30f2..24a82616f844 100644
> > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > @@ -2677,7 +2677,7 @@ static int guc_context_policy_init_v70(struct 
> > > intel_context *ce, bool loop)
> > >   execution_quantum = engine->props.timeslice_duration_ms * 1000;
> > >   preemption_timeout = engine->props.preempt_timeout_ms * 1000;
> > >  
> > > - if (ce && (ce->flags & BIT(CONTEXT_LOW_LATENCY)))
> > > + if (ce->flags & BIT(CONTEXT_LOW_LATENCY))
> > 
> > We could keep the check but make it earlier.
> 
> yes, that's another alternative.
> 
> 
> -struct intel_engine_cs *engine = ce->engine;
> +struct intel_engine_cs *engine;
> 
> if (!ce)
>return;
> 
> engine = ce->engine.

this is what I meant...

> But looking to the 2 places where this function is getting called,
> we already have ce->something used.

... and I also checked where the function is called and how it's
called: I see that if we get here then for sure 'ce' is not NULL.
But for robustness we could still keep it.

Either way I think your patch is good except for the "Fixes:"
tag:

Reviewed-by: Andi Shyti 

Thanks,
Andi

> I can make the change to be like that if you believe that there's
> a possibility in the future that we change that, just to be on
> the safe side.
> 
> or anything else I might be missing?
> 
> Thanks for looking into this,
> Rodrigo.
> 
> > 
> > Thanks,
> > Andi
> > 
> > >   slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
> > >  
> > >   __guc_context_policy_start_klv(, ce->guc_id.id);
> > > -- 
> > > 2.44.0


Re: [PATCH] drm/i915/guc: Remove bogus null check

2024-03-28 Thread Andi Shyti
Hi Rodrigo,

On Thu, Mar 28, 2024 at 05:31:07PM -0400, Rodrigo Vivi wrote:
> This null check is bogus because we are already using 'ce' stuff
> in many places before this function is called.
> 
> Having this here is useless and confuses static analyzer tools
> that can see:
> 
> struct intel_engine_cs *engine = ce->engine;
> 
> before this check, in the same function.
> 
> Fixes: cec82816d0d0 ("drm/i915/guc: Use context hints for GT frequency")

there is no need to have the Fixes tag here.

> Reported-by: kernel test robot 
> Reported-by: Dan Carpenter 
> Closes: https://lore.kernel.org/r/202403101225.7ahejhzj-...@intel.com/
> Cc: Vinay Belgaumkar 
> Cc: John Harrison 
> Signed-off-by: Rodrigo Vivi 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 01d0ec1b30f2..24a82616f844 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -2677,7 +2677,7 @@ static int guc_context_policy_init_v70(struct 
> intel_context *ce, bool loop)
>   execution_quantum = engine->props.timeslice_duration_ms * 1000;
>   preemption_timeout = engine->props.preempt_timeout_ms * 1000;
>  
> - if (ce && (ce->flags & BIT(CONTEXT_LOW_LATENCY)))
> + if (ce->flags & BIT(CONTEXT_LOW_LATENCY))

We could keep the check but make it earlier.

Thanks,
Andi

>   slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
>  
>   __guc_context_policy_start_klv(, ce->guc_id.id);
> -- 
> 2.44.0


Re: [PATCHv2] drm/xe/display: check for error on drmm_mutex_init

2024-03-28 Thread Andi Shyti
Hi Arun,

> > On Thu, Mar 28, 2024 at 12:33:09PM +0200, Jani Nikula wrote:
> > > On Thu, 28 Mar 2024, Andi Shyti  wrote:
> > > >> -  drmm_mutex_init(>drm, >sb_lock);
> > > >> -  drmm_mutex_init(>drm, >display.backlight.lock);
> > > >> -  drmm_mutex_init(>drm, >display.audio.mutex);
> > > >> -  drmm_mutex_init(>drm, >display.wm.wm_mutex);
> > > >> -  drmm_mutex_init(>drm, >display.pps.mutex);
> > > >> -  drmm_mutex_init(>drm, >display.hdcp.hdcp_mutex);
> > > >> +  if (drmm_mutex_init(>drm, >sb_lock) ||
> > > >> +  drmm_mutex_init(>drm, >display.backlight.lock) ||
> > > >> +  drmm_mutex_init(>drm, >display.audio.mutex) ||
> > > >> +  drmm_mutex_init(>drm, >display.wm.wm_mutex) ||
> > > >> +  drmm_mutex_init(>drm, >display.pps.mutex) ||
> > > >> +  drmm_mutex_init(>drm, >display.hdcp.hdcp_mutex))
> > > >> +  return -ENOMEM;
> > > >
> > > > why not extract the value from drmm_mutex_init()? it would make the
> > > > code a bit more complex, but better than forcing a -ENOMEM return.
> > > >
> > > > err = drmm_mutex_init(...)
> > > > if (err)
> > > > return err;
> > > >
> > > > err = drmm_mutex_init(...)
> > > > if (err)
> > > > return err;
> > > >
> > > > err = drmm_mutex_init(...)
> > > > if (err)
> > > > return err;
> > > >
> > > > ...
> > > >
> > > > On the other hand drmm_mutex_init(), as of now returns only -ENOMEM,
> 
> The function is also returning -ENOMEM and there is no other error code 
> returned from the error.

yes, but it's wrong to assume this will always be true.

> > > > but it's a bad practice to assume it will always do. I'd rather
> > > > prefer not to check the error value at all.
> > >
> > > And round and round we go. This is exactly what v1 was [1], but it's
> > > not clear because the patch doesn't have a changelog.
> > 
> > ha! funny! I missed v1.
> > 
> > > This is all utterly ridiculous compared to *why* we even have or use
> > > drmm_mutex_init(). Managed initialization causes more trouble here
> > > than it gains us. Gah.
> > 
> > As I wrote here:
> > 
> > > > I'd rather prefer not to check the error value at all.
> > 
> > we could rather drop this patch. Checking the error value is always good, 
> > but
> > checking implausible errors with this price is not really worth it.
> 
> This is reported as error from Coverity. My suggestion was also to discard 
> this error from Coverity but the same API used in other places in our driver 
> is considering the return value.

Strictly speaking, coverity is right and if I have to choose, I'd
prefer your v1. v2, in my opinion, is wrong, even if it's more
nice and readable.

Thanks,
Andi


Re: [PATCH] drm/i915/gem: Calculate object page offset for partial memory mapping

2024-03-28 Thread Andi Shyti
Hi Nirmoy,

On Tue, Mar 26, 2024 at 01:05:37PM +0100, Nirmoy Das wrote:
> On 3/26/2024 12:12 PM, Andi Shyti wrote:

> > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
> > > > b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > > > index a2195e28b625..57a2dda2c3cc 100644
> > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > > > @@ -276,7 +276,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
> > > > /* PTEs are revoked in obj->ops->put_pages() */
> > > > err = remap_io_sg(area,
> > > >   area->vm_start, area->vm_end - area->vm_start,
> > > > - obj->mm.pages->sgl, iomap);
> > > > + obj->mm.pages->sgl, 0, iomap);
> > > Why don't we need partial mmap for CPU but only for GTT ?
> > As far as I understood we don't. I have a version with the CPU
> > offset as well in trybot[*]
> > 
> > But without support for segmented buffer objects, I don't know
> > how much this has any effect.
> 
> You confused me more :) Why segmented buffer object is needed for partial
> CPU mmap but not for GTT  ?

atually segmented bo's were introduced to support single dma
buffers instead of fragmented buffers. But this goes beyond the
scope of this patch.

> From high level,  GTT and CPU both should support partial mmap unless I
> missing something here.

But yes, we could take the patch I linked which adds some offset
to the cpu memory. I will add it in V2.

> > 
> > > Sounds like this also need to be cover by a IGT tests.
> > Yes, I it does need some igt work, working on it.
> > 
> > > Don't we need "Fixes" tag for this?
> > Why should we? I'm not fixing anything here,
> 
> If userspace  expects partial mmap to work then this is a bug/gap in i915 so
> we need to
> 
> backport this as far as possible. Need some information about the
> requirement about  why we need this patch suddenly?

But a gap is not a bug. Theoretically we are adding a feature.

On the other hand it would be a bug if the API promises to add
the offset but in reality it doesn't. I will check if this is the
case and it needs to be well described in the commit message.

Thanks,
Andi


Re: [PATCHv2] drm/xe/display: check for error on drmm_mutex_init

2024-03-28 Thread Andi Shyti
Hi Jani,

On Thu, Mar 28, 2024 at 12:33:09PM +0200, Jani Nikula wrote:
> On Thu, 28 Mar 2024, Andi Shyti  wrote:
> >> -  drmm_mutex_init(>drm, >sb_lock);
> >> -  drmm_mutex_init(>drm, >display.backlight.lock);
> >> -  drmm_mutex_init(>drm, >display.audio.mutex);
> >> -  drmm_mutex_init(>drm, >display.wm.wm_mutex);
> >> -  drmm_mutex_init(>drm, >display.pps.mutex);
> >> -  drmm_mutex_init(>drm, >display.hdcp.hdcp_mutex);
> >> +  if (drmm_mutex_init(>drm, >sb_lock) ||
> >> +  drmm_mutex_init(>drm, >display.backlight.lock) ||
> >> +  drmm_mutex_init(>drm, >display.audio.mutex) ||
> >> +  drmm_mutex_init(>drm, >display.wm.wm_mutex) ||
> >> +  drmm_mutex_init(>drm, >display.pps.mutex) ||
> >> +  drmm_mutex_init(>drm, >display.hdcp.hdcp_mutex))
> >> +  return -ENOMEM;
> >
> > why not extract the value from drmm_mutex_init()? it would make
> > the code a bit more complex, but better than forcing a -ENOMEM
> > return.
> >
> > err = drmm_mutex_init(...)
> > if (err)
> > return err;
> >
> > err = drmm_mutex_init(...)
> > if (err)
> > return err;
> >
> > err = drmm_mutex_init(...)
> > if (err)
> > return err;
> > 
> > ...
> >
> > On the other hand drmm_mutex_init(), as of now returns only
> > -ENOMEM, but it's a bad practice to assume it will always do. I'd
> > rather prefer not to check the error value at all.
> 
> And round and round we go. This is exactly what v1 was [1], but it's not
> clear because the patch doesn't have a changelog.

ha! funny! I missed v1.

> This is all utterly ridiculous compared to *why* we even have or use
> drmm_mutex_init(). Managed initialization causes more trouble here than
> it gains us. Gah.

As I wrote here:

> > I'd rather prefer not to check the error value at all.

we could rather drop this patch. Checking the error value is
always good, but checking implausible errors with this price is
not really worth it.

At the end drmm_mutex_init() should make our life easier.

Andi


Re: [PATCH] drm/i915/gt: Limit the reserved VM space to only the platforms that need it

2024-03-28 Thread Andi Shyti
Hi Nirmoy,

On Thu, Mar 28, 2024 at 09:54:12AM +0100, Nirmoy Das wrote:
> On 3/27/2024 9:05 PM, Andi Shyti wrote:
> > Commit 9bb66c179f50 ("drm/i915: Reserve some kernel space per
> > vm") reduces the available VM space of one page in order to apply
> > Wa_16018031267 and Wa_16018063123.
> > 
> > This page was reserved indiscrimitely in all platforms even when
> > not needed. Limit it to DG2 onwards.
> 
> I would use "Limit it to platforms that need WAs" as those WA are only

Thanks, will improve it.

> needed till 12.71,  otherwise

I easily get confused by the versioning... is 12.71 defined at
all in i915?

Thanks,
Andi


Re: [PATCH] drm/i915/gem: Replace dev_priv with i915

2024-03-28 Thread Andi Shyti
Hi,

On Thu, Mar 28, 2024 at 08:18:33AM +0100, Andi Shyti wrote:
> Anyone using 'dev_priv' instead of 'i915' in a cleaned-up area
> should be fined and required to do community service for a few
> days.

Not to scare people off, I would add another sentence in between:

"Using 'i915' instead of 'dev_priv' has been the preferred
practice over the past years and some effort has been spent to
replace 'dev_priv' with 'i915'. Therefore, 'dev_priv' should
almost never be used (unless it breaks some defines which are
dependent on the naming)."

> I thought I had cleaned up the 'gem/' directory in the past, but
> still, old aficionados of the 'dev_priv' name keep sneaking it
> in.
> 
> Signed-off-by: Andi Shyti 
> Cc: Jani Nikula 
> Cc: Joonas Lahtinen 
> Cc: Rodrigo Vivi 
> Cc: Tvrtko Ursulin 

Thanks,
Andi


Re: [PATCHv2] drm/xe/display: check for error on drmm_mutex_init

2024-03-28 Thread Andi Shyti
Hi Arun,

...

> - drmm_mutex_init(>drm, >sb_lock);
> - drmm_mutex_init(>drm, >display.backlight.lock);
> - drmm_mutex_init(>drm, >display.audio.mutex);
> - drmm_mutex_init(>drm, >display.wm.wm_mutex);
> - drmm_mutex_init(>drm, >display.pps.mutex);
> - drmm_mutex_init(>drm, >display.hdcp.hdcp_mutex);
> + if (drmm_mutex_init(>drm, >sb_lock) ||
> + drmm_mutex_init(>drm, >display.backlight.lock) ||
> + drmm_mutex_init(>drm, >display.audio.mutex) ||
> + drmm_mutex_init(>drm, >display.wm.wm_mutex) ||
> + drmm_mutex_init(>drm, >display.pps.mutex) ||
> + drmm_mutex_init(>drm, >display.hdcp.hdcp_mutex))
> + return -ENOMEM;

why not extract the value from drmm_mutex_init()? it would make
the code a bit more complex, but better than forcing a -ENOMEM
return.

err = drmm_mutex_init(...)
if (err)
return err;

err = drmm_mutex_init(...)
if (err)
return err;

err = drmm_mutex_init(...)
if (err)
return err;

...

On the other hand drmm_mutex_init(), as of now returns only
-ENOMEM, but it's a bad practice to assume it will always do. I'd
rather prefer not to check the error value at all.

Andi

>   xe->enabled_irq_mask = ~0;
>  
>   err = drmm_add_action_or_reset(>drm, display_destroy, NULL);
> -- 
> 2.25.1


[PATCH v8 3/3] drm/i915/gt: Enable only one CCS for compute workload

2024-03-28 Thread Andi Shyti
Enable only one CCS engine by default with all the compute sices
allocated to it.

While generating the list of UABI engines to be exposed to the
user, exclude any additional CCS engines beyond the first
instance.

This change can be tested with igt i915_query.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
Reviewed-by: Matt Roper 
Acked-by: Michal Mrozek 
---
 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  5 +++
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  7 
 5 files changed, 65 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3ef6ed41e62b..a6885a1d41a1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -118,6 +118,7 @@ gt-y += \
gt/intel_ggtt_fencing.o \
gt/intel_gt.o \
gt/intel_gt_buffer_pool.o \
+   gt/intel_gt_ccs_mode.o \
gt/intel_gt_clock_utils.o \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index ..044219c5960a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+   int cslice;
+   u32 mode = 0;
+   int first_ccs = __ffs(CCS_MASK(gt));
+
+   if (!IS_DG2(gt->i915))
+   return;
+
+   /* Build the value for the fixed CCS load balancing */
+   for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+   if (CCS_MASK(gt) & BIT(cslice))
+   /*
+* If available, assign the cslice
+* to the first available engine...
+*/
+   mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+   else
+   /*
+* ... otherwise, mark the cslice as
+* unavailable if no CCS dispatches here
+*/
+   mode |= XEHP_CCS_MODE_CSLICE(cslice,
+XEHP_CCS_MODE_CSLICE_MASK);
+   }
+
+   intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index ..9e5549caeb26
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index f8829be40199..e42b3a5d4e63 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1427,6 +1427,11 @@
 #define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
 
+#define XEHP_CCS_MODE  _MMIO(0x14804)
+#define   XEHP_CCS_MODE_CSLICE_MASKREG_GENMASK(2, 0) /* CCS0-3 + 
rsvd */
+#define   XEHP_CCS_MODE_CSLICE_WIDTH   ilog2(XEHP_CCS_MODE_CSLICE_MASK 
+ 1)
+#define   XEHP_CCS_MODE_CSLICE(cslice, ccs)(ccs << (cslice * 
XEHP_CCS_MODE_CSLICE_WIDTH))
+
 #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0  (1 << 10)
 #define   CHV_FGT_DISABLE_SS1  (1 << 11)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 74cf678b7589..68b6aa11bcf7 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -10,6 +10,7 @@
 #include "intel_engine_regs.h"
 #include "intel_gpu_commands.h"
 #include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
 #include "intel_gt_mcr.h"
 #include "intel_gt_print.h"
 #include "intel_gt_regs.h"
@@ -2713,6 +2714,12 @@ static void ccs_engine_wa_mode(struct intel_engine_cs 
*engine, struct i915_wa_li
 

[PATCH v8 2/3] drm/i915/gt: Do not generate the command streamer for all the CCS

2024-03-28 Thread Andi Shyti
We want a fixed load CCS balancing consisting in all slices
sharing one single user engine. For this reason do not create the
intel_engine_cs structure with its dedicated command streamer for
CCS slices beyond the first.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
Acked-by: Michal Mrozek 
Reviewed-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 476651bd0a21..8c44af1c3451 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -874,6 +874,23 @@ static intel_engine_mask_t init_engine_mask(struct 
intel_gt *gt)
info->engine_mask &= ~BIT(GSC0);
}
 
+   /*
+* Do not create the command streamer for CCS slices beyond the first.
+* All the workload submitted to the first engine will be shared among
+* all the slices.
+*
+* Once the user will be allowed to customize the CCS mode, then this
+* check needs to be removed.
+*/
+   if (IS_DG2(gt->i915)) {
+   u8 first_ccs = __ffs(CCS_MASK(gt));
+
+   /* Mask off all the CCS engine */
+   info->engine_mask &= ~GENMASK(CCS3, CCS0);
+   /* Put back in the first CCS engine */
+   info->engine_mask |= BIT(_CCS(first_ccs));
+   }
+
return info->engine_mask;
 }
 
-- 
2.43.0



[PATCH v8 1/3] drm/i915/gt: Disable HW load balancing for CCS

2024-03-28 Thread Andi Shyti
The hardware should not dynamically balance the load between CCS
engines. Wa_14019159160 recommends disabling it across all
platforms.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
Reviewed-by: Matt Roper 
Acked-by: Michal Mrozek 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 23 +++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 731ed3ecd9a6..f8829be40199 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1424,6 +1424,7 @@
 #define   ECOBITS_PPGTT_CACHE4B(0 << 8)
 
 #define GEN12_RCU_MODE _MMIO(0x14800)
+#define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
 
 #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 0x2168)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index de5fe26bb18e..74cf678b7589 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -51,7 +51,8 @@
  *   registers belonging to BCS, VCS or VECS should be implemented in
  *   xcs_engine_wa_init(). Workarounds for registers not belonging to a 
specific
  *   engine's MMIO range but that are part of of the common RCS/CCS reset 
domain
- *   should be implemented in general_render_compute_wa_init().
+ *   should be implemented in general_render_compute_wa_init(). The settings
+ *   about the CCS load balancing should be added in ccs_engine_wa_mode().
  *
  * - GT workarounds: the list of these WAs is applied whenever these registers
  *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -2698,6 +2699,22 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
 }
 
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
+{
+   struct intel_gt *gt = engine->gt;
+
+   if (!IS_DG2(gt->i915))
+   return;
+
+   /*
+* Wa_14019159160: This workaround, along with others, leads to
+* significant challenges in utilizing load balancing among the
+* CCS slices. Consequently, an architectural decision has been
+* made to completely disable automatic CCS load balancing.
+*/
+   wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+}
+
 /*
  * The workarounds in this function apply to shared registers in
  * the general render reset domain that aren't tied to a
@@ -2830,8 +2847,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, 
struct i915_wa_list *wal
 * to a single RCS/CCS engine's workaround list since
 * they're reset as part of the general render domain reset.
 */
-   if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+   if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+   ccs_engine_wa_mode(engine, wal);
+   }
 
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
-- 
2.43.0



[PATCH v8 0/3] Disable automatic load CCS load balancing

2024-03-28 Thread Andi Shyti
Hi,

I think we are at the end of it and hopefully this is the last
version. Thanks Matt for having followed this series until here.

This series does basically two things:

1. Disables automatic load balancing as adviced by the hardware
   workaround.

2. Assigns all the CCS slices to one single user engine. The user
   will then be able to query only one CCS engine

>From v5 I have created a new file, gt/intel_gt_ccs_mode.c where
I added the intel_gt_apply_ccs_mode(). In the upcoming patches,
this file will contain the implementation for dynamic CCS mode
setting.

Thanks Tvrtko, Matt, John and Joonas for your reviews!

Andi

Changelog
=
v7 -> v8
 - Just used a different way for removing the first instance of
   the CCS from the info->engine_mask, as suggested by Matt.

v6 -> v7
 - find a more appropriate place where to remove the CCS engines:
   remove them in init_engine_mask() instead of
   intel_engines_init_mmio(). (Thanks, Matt)
 - Add Michal's ACK, thanks Michal!

v5 -> v6 (thanks Matt for the suggestions in v6)
 - Remove the refactoring and the for_each_available_engine()
   macro and instead do not create the intel_engine_cs structure
   at all.
 - In patch 1 just a trivial reordering of the bit definitions.

v4 -> v5
 - Use the workaround framework to do all the CCS balancing
   settings in order to always apply the modes also when the
   engine resets. Put everything in its own specific function to
   be executed for the first CCS engine encountered. (Thanks
   Matt)
 - Calculate the CCS ID for the CCS mode as the first available
   CCS among all the engines (Thanks Matt)
 - create the intel_gt_ccs_mode.c function to host the CCS
   configuration. We will have it ready for the next series.
 - Fix a selftest that was failing because could not set CCS2.
 - Add the for_each_available_engine() macro to exclude CCS1+ and
   start using it in the hangcheck selftest.

v3 -> v4
 - Reword correctly the comment in the workaround
 - Fix a buffer overflow (Thanks Joonas)
 - Handle properly the fused engines when setting the CCS mode.

v2 -> v3
 - Simplified the algorithm for creating the list of the exported
   uabi engines. (Patch 1) (Thanks, Tvrtko)
 - Consider the fused engines when creating the uabi engine list
   (Patch 2) (Thanks, Matt)
 - Patch 4 now uses a the refactoring from patch 1, in a cleaner
   outcome.

v1 -> v2
 - In Patch 1 use the correct workaround number (thanks Matt).
 - In Patch 2 do not add the extra CCS engines to the exposed
   UABI engine list and adapt the engine counting accordingly
   (thanks Tvrtko).
 - Reword the commit of Patch 2 (thanks John).

Andi Shyti (3):
  drm/i915/gt: Disable HW load balancing for CCS
  drm/i915/gt: Do not generate the command streamer for all the CCS
  drm/i915/gt: Enable only one CCS for compute workload

 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 17 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  6 
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 30 ++--
 6 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h

-- 
2.43.0



Re: [PATCH v7 2/3] drm/i915/gt: Do not generate the command streamer for all the CCS

2024-03-28 Thread Andi Shyti
Hi Matt,

> > +   /*
> > +* Do not create the command streamer for CCS slices beyond the first.
> > +* All the workload submitted to the first engine will be shared among
> > +* all the slices.
> > +*
> > +* Once the user will be allowed to customize the CCS mode, then this
> > +* check needs to be removed.
> > +*/
> > +   if (IS_DG2(gt->i915)) {
> > +   intel_engine_mask_t first_ccs = BIT((CCS0 + 
> > __ffs(CCS_MASK(gt;
> > +   intel_engine_mask_t all_ccs = CCS_MASK(gt) << CCS0;
> > +
> > +   info->engine_mask &= ~(all_ccs &= ~first_ccs);
> 
> Shouldn't the second "&=" just be an "&" since there's no need to modify
> the all_ccs variable that never gets used again?

yes, that's a leftover from me trying different ways of removing
all the non first CCS engines.

> In fact since this is DG2-specific, it seems like it might be more
> intuitive to just write the whole thing more directly as
> 
> if (IS_DG2(gt->i915)) {
> int first_ccs = __ffs(CCS_MASK(gt));
> 
> info->engine_mask &= ~GENMASK(CCS3, CCS0);
> info->engine_mask |= BIT(_CCS(first_ccs));
> }

yes, looks a bit simpler. Will use this way.

> But up to you; if you just want to remove the unnecessary "=" that's
> fine too.  Either way,
> 
> Reviewed-by: Matt Roper 

Thanks!

Andi


[PATCH] drm/i915/gem: Replace dev_priv with i915

2024-03-28 Thread Andi Shyti
Anyone using 'dev_priv' instead of 'i915' in a cleaned-up area
should be fined and required to do community service for a few
days.

I thought I had cleaned up the 'gem/' directory in the past, but
still, old aficionados of the 'dev_priv' name keep sneaking it
in.

Signed-off-by: Andi Shyti 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c |  4 ++--
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  |  6 +++---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h |  8 
 drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 18 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c|  6 +++---
 .../gpu/drm/i915/gem/selftests/huge_pages.c| 14 +++---
 6 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 3f20fe381199..42619fc05de4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2456,7 +2456,7 @@ static int eb_submit(struct i915_execbuffer *eb)
  * The engine index is returned.
  */
 static unsigned int
-gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
+gen8_dispatch_bsd_engine(struct drm_i915_private *i915,
 struct drm_file *file)
 {
struct drm_i915_file_private *file_priv = file->driver_priv;
@@ -2464,7 +2464,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private 
*dev_priv,
/* Check whether the file_priv has already selected one ring. */
if ((int)file_priv->bsd_engine < 0)
file_priv->bsd_engine =
-   
get_random_u32_below(dev_priv->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
+   
get_random_u32_below(i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
 
return file_priv->bsd_engine;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 38b72d86560f..c5e1c718a6d2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -654,7 +654,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915,
 
 /* Allocate a new GEM object and fill it with the supplied data */
 struct drm_i915_gem_object *
-i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
   const void *data, resource_size_t size)
 {
struct drm_i915_gem_object *obj;
@@ -663,8 +663,8 @@ i915_gem_object_create_shmem_from_data(struct 
drm_i915_private *dev_priv,
resource_size_t offset;
int err;
 
-   GEM_WARN_ON(IS_DGFX(dev_priv));
-   obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
+   GEM_WARN_ON(IS_DGFX(i915));
+   obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE));
if (IS_ERR(obj))
return obj;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
index 258381d1c054..dfe0db8bb1b9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
@@ -14,14 +14,14 @@ struct drm_i915_gem_object;
 
 #define i915_stolen_fb drm_mm_node
 
-int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
+int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
struct drm_mm_node *node, u64 size,
unsigned alignment);
-int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
+int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915,
 struct drm_mm_node *node, u64 size,
 unsigned alignment, u64 start,
 u64 end);
-void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
+void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
 struct drm_mm_node *node);
 struct intel_memory_region *
 i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
@@ -31,7 +31,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 
type,
   u16 instance);
 
 struct drm_i915_gem_object *
-i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+i915_gem_object_create_stolen(struct drm_i915_private *i915,
  resource_size_t size);
 
 bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c 
b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index a049ca0b7980..d9eb84c1d2f1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -343,12 +343,12 @@ int
 i915_gem_set_tiling_io

Re: [PATCH] drm/i915/gt: Limit the reserved VM space to only the platforms that need it

2024-03-27 Thread Andi Shyti
Hi,

On Wed, Mar 27, 2024 at 09:05:46PM +0100, Andi Shyti wrote:
> Commit 9bb66c179f50 ("drm/i915: Reserve some kernel space per
> vm") reduces the available VM space of one page in order to apply
> Wa_16018031267 and Wa_16018063123.
> 
> This page was reserved indiscrimitely in all platforms even when
> not needed. Limit it to DG2 onwards.
> 
> Fixes: 9bb66c179f50 ("drm/i915: Reserve some kernel space per vm")
> Signed-off-by: Andi Shyti 
> Cc: Andrzej Hajda 
> Cc: Chris Wilson 
> Cc: Jonathan Cavitt 
> Cc: Nirmoy Das 

I forgot to add stable here:

Cc:  # v6.8+

Sorry about that!

Andi


[PATCH] drm/i915/gt: Limit the reserved VM space to only the platforms that need it

2024-03-27 Thread Andi Shyti
Commit 9bb66c179f50 ("drm/i915: Reserve some kernel space per
vm") reduces the available VM space of one page in order to apply
Wa_16018031267 and Wa_16018063123.

This page was reserved indiscrimitely in all platforms even when
not needed. Limit it to DG2 onwards.

Fixes: 9bb66c179f50 ("drm/i915: Reserve some kernel space per vm")
Signed-off-by: Andi Shyti 
Cc: Andrzej Hajda 
Cc: Chris Wilson 
Cc: Jonathan Cavitt 
Cc: Nirmoy Das 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 3 +++
 drivers/gpu/drm/i915/gt/intel_gt.c   | 6 ++
 drivers/gpu/drm/i915/gt/intel_gt.h   | 9 +
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 1bd0e041e15c..398d60a66410 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -961,6 +961,9 @@ static int gen8_init_rsvd(struct i915_address_space *vm)
struct i915_vma *vma;
int ret;
 
+   if (!intel_gt_needs_wa_16018031267(vm->gt))
+   return 0;
+
/* The memory will be used only by GPU. */
obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
  I915_BO_ALLOC_VOLATILE |
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 2c6d31b8fc1a..580b5141ce1e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1024,6 +1024,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct 
intel_gt *gt,
return I915_MAP_WC;
 }
 
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt)
+{
+   /* Wa_16018031267, Wa_16018063123 */
+   return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71));
+}
+
 bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
 {
return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == 
GT_MEDIA;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 6e7cab60834c..b5e114d284ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -82,17 +82,18 @@ struct drm_printer;
  ##__VA_ARGS__);   \
 } while (0)
 
-#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
-   IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
-   engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
-
 static inline bool gt_is_root(struct intel_gt *gt)
 {
return !gt->info.id;
 }
 
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt);
 bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
 
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+   intel_gt_needs_wa_16018031267(engine->gt) && \
+   engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
 static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
 {
return container_of(uc, struct intel_gt, uc);
-- 
2.43.0



[PATCH v7 3/3] drm/i915/gt: Enable only one CCS for compute workload

2024-03-27 Thread Andi Shyti
Enable only one CCS engine by default with all the compute sices
allocated to it.

While generating the list of UABI engines to be exposed to the
user, exclude any additional CCS engines beyond the first
instance.

This change can be tested with igt i915_query.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
Reviewed-by: Matt Roper 
Acked-by: Michal Mrozek 
---
 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  5 +++
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  7 
 5 files changed, 65 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3ef6ed41e62b..a6885a1d41a1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -118,6 +118,7 @@ gt-y += \
gt/intel_ggtt_fencing.o \
gt/intel_gt.o \
gt/intel_gt_buffer_pool.o \
+   gt/intel_gt_ccs_mode.o \
gt/intel_gt_clock_utils.o \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index ..044219c5960a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+   int cslice;
+   u32 mode = 0;
+   int first_ccs = __ffs(CCS_MASK(gt));
+
+   if (!IS_DG2(gt->i915))
+   return;
+
+   /* Build the value for the fixed CCS load balancing */
+   for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+   if (CCS_MASK(gt) & BIT(cslice))
+   /*
+* If available, assign the cslice
+* to the first available engine...
+*/
+   mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+   else
+   /*
+* ... otherwise, mark the cslice as
+* unavailable if no CCS dispatches here
+*/
+   mode |= XEHP_CCS_MODE_CSLICE(cslice,
+XEHP_CCS_MODE_CSLICE_MASK);
+   }
+
+   intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index ..9e5549caeb26
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 31b102604e3d..743fe3566722 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1480,6 +1480,11 @@
 #define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
 
+#define XEHP_CCS_MODE  _MMIO(0x14804)
+#define   XEHP_CCS_MODE_CSLICE_MASKREG_GENMASK(2, 0) /* CCS0-3 + 
rsvd */
+#define   XEHP_CCS_MODE_CSLICE_WIDTH   ilog2(XEHP_CCS_MODE_CSLICE_MASK 
+ 1)
+#define   XEHP_CCS_MODE_CSLICE(cslice, ccs)(ccs << (cslice * 
XEHP_CCS_MODE_CSLICE_WIDTH))
+
 #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0  (1 << 10)
 #define   CHV_FGT_DISABLE_SS1  (1 << 11)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 9963e5725ae5..8188c9f0b5ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -10,6 +10,7 @@
 #include "intel_engine_regs.h"
 #include "intel_gpu_commands.h"
 #include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
 #include "intel_gt_mcr.h"
 #include "intel_gt_print.h"
 #include "intel_gt_regs.h"
@@ -2869,6 +2870,12 @@ static void ccs_engine_wa_mode(struct intel_engine_cs 
*engine, struct i915_wa_li
 

[PATCH v7 2/3] drm/i915/gt: Do not generate the command streamer for all the CCS

2024-03-27 Thread Andi Shyti
We want a fixed load CCS balancing consisting in all slices
sharing one single user engine. For this reason do not create the
intel_engine_cs structure with its dedicated command streamer for
CCS slices beyond the first.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
Acked-by: Michal Mrozek 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f553cf4e6449..47c4a69e854c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -908,6 +908,21 @@ static intel_engine_mask_t init_engine_mask(struct 
intel_gt *gt)
info->engine_mask &= ~BIT(GSC0);
}
 
+   /*
+* Do not create the command streamer for CCS slices beyond the first.
+* All the workload submitted to the first engine will be shared among
+* all the slices.
+*
+* Once the user will be allowed to customize the CCS mode, then this
+* check needs to be removed.
+*/
+   if (IS_DG2(gt->i915)) {
+   intel_engine_mask_t first_ccs = BIT((CCS0 + 
__ffs(CCS_MASK(gt;
+   intel_engine_mask_t all_ccs = CCS_MASK(gt) << CCS0;
+
+   info->engine_mask &= ~(all_ccs &= ~first_ccs);
+   }
+
return info->engine_mask;
 }
 
-- 
2.43.0



[PATCH v7 1/3] drm/i915/gt: Disable HW load balancing for CCS

2024-03-27 Thread Andi Shyti
The hardware should not dynamically balance the load between CCS
engines. Wa_14019159160 recommends disabling it across all
platforms.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
Reviewed-by: Matt Roper 
Acked-by: Michal Mrozek 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 23 +++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 50962cfd1353..31b102604e3d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1477,6 +1477,7 @@
 #define   ECOBITS_PPGTT_CACHE4B(0 << 8)
 
 #define GEN12_RCU_MODE _MMIO(0x14800)
+#define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
 
 #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 0x2168)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index b079cbbc1897..9963e5725ae5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -51,7 +51,8 @@
  *   registers belonging to BCS, VCS or VECS should be implemented in
  *   xcs_engine_wa_init(). Workarounds for registers not belonging to a 
specific
  *   engine's MMIO range but that are part of of the common RCS/CCS reset 
domain
- *   should be implemented in general_render_compute_wa_init().
+ *   should be implemented in general_render_compute_wa_init(). The settings
+ *   about the CCS load balancing should be added in ccs_engine_wa_mode().
  *
  * - GT workarounds: the list of these WAs is applied whenever these registers
  *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -2854,6 +2855,22 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
 }
 
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
+{
+   struct intel_gt *gt = engine->gt;
+
+   if (!IS_DG2(gt->i915))
+   return;
+
+   /*
+* Wa_14019159160: This workaround, along with others, leads to
+* significant challenges in utilizing load balancing among the
+* CCS slices. Consequently, an architectural decision has been
+* made to completely disable automatic CCS load balancing.
+*/
+   wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+}
+
 /*
  * The workarounds in this function apply to shared registers in
  * the general render reset domain that aren't tied to a
@@ -3000,8 +3017,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, 
struct i915_wa_list *wal
 * to a single RCS/CCS engine's workaround list since
 * they're reset as part of the general render domain reset.
 */
-   if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+   if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+   ccs_engine_wa_mode(engine, wal);
+   }
 
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
-- 
2.43.0



[PATCH v7 0/3] Disable automatic load CCS load balancing

2024-03-27 Thread Andi Shyti
Hi,

this series does basically two things:

1. Disables automatic load balancing as adviced by the hardware
   workaround.

2. Assigns all the CCS slices to one single user engine. The user
   will then be able to query only one CCS engine

>From v5 I have created a new file, gt/intel_gt_ccs_mode.c where
I added the intel_gt_apply_ccs_mode(). In the upcoming patches,
this file will contain the implementation for dynamic CCS mode
setting.

Thanks Tvrtko, Matt, John and Joonas for your reviews!

Andi

Changelog
=
v6 -> v7
 - find a more appropriate place where to remove the CCS engines:
   remove them in init_engine_mask() instead of
   intel_engines_init_mmio(). (Thanks, Matt)
 - Add Michal's ACK, thanks Michal!

v5 -> v6 (thanks Matt for the suggestions in v6)
 - Remove the refactoring and the for_each_available_engine()
   macro and instead do not create the intel_engine_cs structure
   at all.
 - In patch 1 just a trivial reordering of the bit definitions.

v4 -> v5
 - Use the workaround framework to do all the CCS balancing
   settings in order to always apply the modes also when the
   engine resets. Put everything in its own specific function to
   be executed for the first CCS engine encountered. (Thanks
   Matt)
 - Calculate the CCS ID for the CCS mode as the first available
   CCS among all the engines (Thanks Matt)
 - create the intel_gt_ccs_mode.c function to host the CCS
   configuration. We will have it ready for the next series.
 - Fix a selftest that was failing because could not set CCS2.
 - Add the for_each_available_engine() macro to exclude CCS1+ and
   start using it in the hangcheck selftest.

v3 -> v4
 - Reword correctly the comment in the workaround
 - Fix a buffer overflow (Thanks Joonas)
 - Handle properly the fused engines when setting the CCS mode.

v2 -> v3
 - Simplified the algorithm for creating the list of the exported
   uabi engines. (Patch 1) (Thanks, Tvrtko)
 - Consider the fused engines when creating the uabi engine list
   (Patch 2) (Thanks, Matt)
 - Patch 4 now uses a the refactoring from patch 1, in a cleaner
   outcome.

v1 -> v2
 - In Patch 1 use the correct workaround number (thanks Matt).
 - In Patch 2 do not add the extra CCS engines to the exposed
   UABI engine list and adapt the engine counting accordingly
   (thanks Tvrtko).
 - Reword the commit of Patch 2 (thanks John).

Andi Shyti (3):
  drm/i915/gt: Disable HW load balancing for CCS
  drm/i915/gt: Do not generate the command streamer for all the CCS
  drm/i915/gt: Enable only one CCS for compute workload

 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 15 
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  6 
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 30 ++--
 6 files changed, 102 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h

-- 
2.43.0



Re: [PATCH v6 2/3] drm/i915/gt: Do not generate the command streamer for all the CCS

2024-03-26 Thread Andi Shyti
Hi Matt,

On Tue, Mar 26, 2024 at 02:30:33PM -0700, Matt Roper wrote:
> On Tue, Mar 26, 2024 at 07:42:34PM +0100, Andi Shyti wrote:
> > On Tue, Mar 26, 2024 at 09:03:10AM -0700, Matt Roper wrote:
> > > On Wed, Mar 13, 2024 at 09:19:50PM +0100, Andi Shyti wrote:
> > > > +   /*
> > > > +* Do not create the command streamer for CCS 
> > > > slices
> > > > +* beyond the first. All the workload submitted 
> > > > to the
> > > > +* first engine will be shared among all the 
> > > > slices.
> > > > +*
> > > > +* Once the user will be allowed to customize 
> > > > the CCS
> > > > +* mode, then this check needs to be removed.
> > > > +*/
> > > > +   if (IS_DG2(i915) &&
> > > > +   class == COMPUTE_CLASS &&
> > > > +   ccs_instance++)
> > > > +   continue;
> > > 
> > > Wouldn't it be more intuitive to drop the non-lowest CCS engines in
> > > init_engine_mask() since that's the function that's dedicated to
> > > building the list of engines we'll use?  Then we don't need to kill the
> > > assertion farther down either.
> > 
> > Because we don't check the result of init_engine_mask() while
> > creating the engine's structure. We check it only after and
> > indeed I removed the drm_WARN_ON() check.
> > 
> > I think the whole process of creating the engine's structure in
> > the intel_engines_init_mmio() can be simplified, but this goes
> > beyong the scope of the series.
> > 
> > Or am I missing something?
> 
> The important part of init_engine_mask isn't the return value, but
> rather that it's what sets up gt->info.engine_mask.  The HAS_ENGINE()
> check that intel_engines_init_mmio() uses is based on the value stored
> there, so updating that function will also ensure that we skip the
> engines we don't want in the loop.

Yes, can do like this, as well. After all this is done I'm going
to do some cleanup here, as well.

Thanks,
Andi


Re: [PATCH v6 2/3] drm/i915/gt: Do not generate the command streamer for all the CCS

2024-03-26 Thread Andi Shyti
Hi Matt,

On Tue, Mar 26, 2024 at 09:03:10AM -0700, Matt Roper wrote:
> On Wed, Mar 13, 2024 at 09:19:50PM +0100, Andi Shyti wrote:
> > +   /*
> > +* Do not create the command streamer for CCS slices
> > +* beyond the first. All the workload submitted to the
> > +* first engine will be shared among all the slices.
> > +*
> > +* Once the user will be allowed to customize the CCS
> > +* mode, then this check needs to be removed.
> > +*/
> > +   if (IS_DG2(i915) &&
> > +   class == COMPUTE_CLASS &&
> > +   ccs_instance++)
> > +   continue;
> 
> Wouldn't it be more intuitive to drop the non-lowest CCS engines in
> init_engine_mask() since that's the function that's dedicated to
> building the list of engines we'll use?  Then we don't need to kill the
> assertion farther down either.

Because we don't check the result of init_engine_mask() while
creating the engine's structure. We check it only after and
indeed I removed the drm_WARN_ON() check.

I think the whole process of creating the engine's structure in
the intel_engines_init_mmio() can be simplified, but this goes
beyong the scope of the series.

Or am I missing something?

Thanks,
Andi


Re: [PATCH v6 0/3] Disable automatic load CCS load balancing

2024-03-26 Thread Andi Shyti
Hi Michal, Mark,

can you please ack from your side this first batch of changes?

Thanks,
Andi

On Wed, Mar 13, 2024 at 09:19:48PM +0100, Andi Shyti wrote:
> Hi,
> 
> this series does basically two things:
> 
> 1. Disables automatic load balancing as adviced by the hardware
>workaround.
> 
> 2. Assigns all the CCS slices to one single user engine. The user
>will then be able to query only one CCS engine
> 
> >From v5 I have created a new file, gt/intel_gt_ccs_mode.c where
> I added the intel_gt_apply_ccs_mode(). In the upcoming patches,
> this file will contain the implementation for dynamic CCS mode
> setting.
> 
> Thanks Tvrtko, Matt, John and Joonas for your reviews!
> 
> Andi
> 
> Changelog
> =
> v5 -> v6 (thanks Matt for the suggestions in v6)
>  - Remove the refactoring and the for_each_available_engine()
>macro and instead do not create the intel_engine_cs structure
>at all.
>  - In patch 1 just a trivial reordering of the bit definitions.
> 
> v4 -> v5
>  - Use the workaround framework to do all the CCS balancing
>settings in order to always apply the modes also when the
>engine resets. Put everything in its own specific function to
>be executed for the first CCS engine encountered. (Thanks
>Matt)
>  - Calculate the CCS ID for the CCS mode as the first available
>CCS among all the engines (Thanks Matt)
>  - create the intel_gt_ccs_mode.c function to host the CCS
>configuration. We will have it ready for the next series.
>  - Fix a selftest that was failing because could not set CCS2.
>  - Add the for_each_available_engine() macro to exclude CCS1+ and
>start using it in the hangcheck selftest.
> 
> v3 -> v4
>  - Reword correctly the comment in the workaround
>  - Fix a buffer overflow (Thanks Joonas)
>  - Handle properly the fused engines when setting the CCS mode.
> 
> v2 -> v3
>  - Simplified the algorithm for creating the list of the exported
>uabi engines. (Patch 1) (Thanks, Tvrtko)
>  - Consider the fused engines when creating the uabi engine list
>(Patch 2) (Thanks, Matt)
>  - Patch 4 now uses a the refactoring from patch 1, in a cleaner
>outcome.
> 
> v1 -> v2
>  - In Patch 1 use the correct workaround number (thanks Matt).
>  - In Patch 2 do not add the extra CCS engines to the exposed
>UABI engine list and adapt the engine counting accordingly
>(thanks Tvrtko).
>  - Reword the commit of Patch 2 (thanks John).
> 
> Andi Shyti (3):
>   drm/i915/gt: Disable HW load balancing for CCS
>   drm/i915/gt: Do not generate the command streamer for all the CCS
>   drm/i915/gt: Enable only one CCS for compute workload
> 
>  drivers/gpu/drm/i915/Makefile   |  1 +
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 20 ---
>  drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +
>  drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  6 
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 30 ++--
>  6 files changed, 103 insertions(+), 6 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
> 
> -- 
> 2.43.0


Re: [PATCH v6 0/3] Disable automatic load CCS load balancing

2024-03-26 Thread Andi Shyti
Joonas,

> 1. Disables automatic load balancing as adviced by the hardware
>workaround.

do we need a documentation update here?

Andi


Re: [PATCH v7 0/3] drm/i915: Fix VMA UAF on destroy against deactivate race

2024-03-26 Thread Andi Shyti
Hi Janusz,

On Tue, Mar 05, 2024 at 03:35:05PM +0100, Janusz Krzysztofik wrote:
> Object debugging tools were sporadically reporting illegal attempts to
> free a still active i915 VMA object when parking a GT believed to be idle.
> 
> [161.359441] ODEBUG: free active (active state 0) object: 88811643b958 
> object type: i915_active hint: __i915_vma_active+0x0/0x50 [i915]
> [161.360082] WARNING: CPU: 5 PID: 276 at lib/debugobjects.c:514 
> debug_print_object+0x80/0xb0

...

pushded to drm-intel-gt-next.

Thanks,
Andi


Re: [PATCH] drm/i915/gt: Reset queue_priority_hint on parking

2024-03-26 Thread Andi Shyti
Hi Janusz and Chris,

> Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
> Closes: https://gitlab.freedesktop.org/drm/intel/issues/10154
> Signed-off-by: Chris Wilson 
> Cc: Mika Kuoppala 
> Signed-off-by: Janusz Krzysztofik 
> Cc: Chris Wilson 
> Cc:  # v5.4+

with the tags rearranged a bit, pushed to drm-intel-gt-next.

Andi


Re: [PATCH] drm/i915/gem: Calculate object page offset for partial memory mapping

2024-03-26 Thread Andi Shyti
Hi Nirmoy,

...

> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > index a2195e28b625..57a2dda2c3cc 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > @@ -276,7 +276,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
> > /* PTEs are revoked in obj->ops->put_pages() */
> > err = remap_io_sg(area,
> >   area->vm_start, area->vm_end - area->vm_start,
> > - obj->mm.pages->sgl, iomap);
> > + obj->mm.pages->sgl, 0, iomap);
> 
> Why don't we need partial mmap for CPU but only for GTT ?

As far as I understood we don't. I have a version with the CPU
offset as well in trybot[*]

But without support for segmented buffer objects, I don't know
how much this has any effect.

> Sounds like this also need to be cover by a IGT tests.

Yes, I it does need some igt work, working on it.

> Don't we need "Fixes" tag for this?

Why should we? I'm not fixing anything here, I'm just
recalculating the mapping not starting from the beginning of the
scatter page.

Andi

[*] https://patchwork.freedesktop.org/patch/584474/?series=131539=2


[PATCH] drm/i915/gem: Calculate object page offset for partial memory mapping

2024-03-25 Thread Andi Shyti
To enable partial memory mapping of GPU virtual memory, it's
necessary to introduce an offset to the object's memory
(obj->mm.pages) scatterlist. This adjustment compensates for
instances when userspace mappings do not start from the beginning
of the object.

Based on a patch by Chris Wilson
.

Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c |  8 +---
 drivers/gpu/drm/i915/i915_mm.c   | 12 +++-
 drivers/gpu/drm/i915/i915_mm.h   |  3 ++-
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a2195e28b625..57a2dda2c3cc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -276,7 +276,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
/* PTEs are revoked in obj->ops->put_pages() */
err = remap_io_sg(area,
  area->vm_start, area->vm_end - area->vm_start,
- obj->mm.pages->sgl, iomap);
+ obj->mm.pages->sgl, 0, iomap);
 
if (area->vm_flags & VM_WRITE) {
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
@@ -302,14 +302,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
bool write = area->vm_flags & VM_WRITE;
struct i915_gem_ww_ctx ww;
+   unsigned long obj_offset;
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
int srcu;
int ret;
 
-   /* We don't use vmf->pgoff since that has the fake offset */
+   obj_offset = area->vm_pgoff - drm_vma_node_start(>vma_node);
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+   page_offset += obj_offset;
 
trace_i915_gem_object_fault(obj, page_offset, true, write);
 
@@ -404,7 +406,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 
/* Finally, remap it using the new GTT offset */
ret = remap_io_mapping(area,
-  area->vm_start + (vma->gtt_view.partial.offset 
<< PAGE_SHIFT),
+  area->vm_start + ((vma->gtt_view.partial.offset 
- obj_offset) << PAGE_SHIFT),
   (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> 
PAGE_SHIFT,
   min_t(u64, vma->size, area->vm_end - 
area->vm_start),
   >iomap);
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index 7998bc74ab49..f5c97a620962 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -122,13 +122,15 @@ int remap_io_mapping(struct vm_area_struct *vma,
  * @addr: target user address to start at
  * @size: size of map area
  * @sgl: Start sg entry
+ * @offset: offset from the start of the page
  * @iobase: Use stored dma address offset by this address or pfn if -1
  *
  *  Note: this is only safe if the mm semaphore is held when called.
  */
 int remap_io_sg(struct vm_area_struct *vma,
unsigned long addr, unsigned long size,
-   struct scatterlist *sgl, resource_size_t iobase)
+   struct scatterlist *sgl, unsigned long offset,
+   resource_size_t iobase)
 {
struct remap_pfn r = {
.mm = vma->vm_mm,
@@ -141,6 +143,14 @@ int remap_io_sg(struct vm_area_struct *vma,
/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
 
+   while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) {
+   offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT;
+   r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase));
+   if (!r.sgt.sgp)
+   return -EINVAL;
+   }
+   r.sgt.curr = offset << PAGE_SHIFT;
+
if (!use_dma(iobase))
flush_cache_range(vma, addr, size);
 
diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h
index 04c8974d822b..69f9351b1a1c 100644
--- a/drivers/gpu/drm/i915/i915_mm.h
+++ b/drivers/gpu/drm/i915/i915_mm.h
@@ -30,6 +30,7 @@ int remap_io_mapping(struct vm_area_struct *vma,
 
 int remap_io_sg(struct vm_area_struct *vma,
unsigned long addr, unsigned long size,
-   struct scatterlist *sgl, resource_size_t iobase);
+   struct scatterlist *sgl, unsigned long offset,
+   resource_size_t iobase);
 
 #endif /* __I915_MM_H__ */
-- 
2.43.0



[PATCH v2] drm/i915/gt: Report full vm address range

2024-03-21 Thread Andi Shyti
Commit 9bb66c179f50 ("drm/i915: Reserve some kernel space per
vm") has reserved an object for kernel space usage.

Userspace, though, needs to know the full address range.

In the former patch the reserved space was substructed from the
total amount of the VM space. Add it back when the user requests
the GTT size through ioctl (I915_CONTEXT_PARAM_GTT_SIZE).

Fixes: 9bb66c179f50 ("drm/i915: Reserve some kernel space per vm")
Signed-off-by: Andi Shyti 
Cc: Andrzej Hajda 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
Cc: Michal Mrozek 
Cc: Nirmoy Das 
Cc:  # v6.2+
Acked-by: Michal Mrozek 
Acked-by: Lionel Landwerlin 
---
Hi,

Just proposing a different implementation that doesn't affect
i915 internally but provides the same result. Instead of not
substracting the space during the reservation, I add it back 
during the ioctl call.

All the "vm->rsvd.vma->node.size" looks a bit ugly, but that's
how it is. Maybe a comment can help to understand better why
there is this addition.

I kept the Ack from Michal and Lionel, because the outcome from
userspace perspactive doesn't really change.

Andi

 drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 81f65cab1330..60d9e7fe33b3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -2454,7 +2454,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_GTT_SIZE:
args->size = 0;
vm = i915_gem_context_get_eb_vm(ctx);
-   args->value = vm->total;
+   args->value = vm->total + vm->rsvd.vma->node.size;
i915_vm_put(vm);
 
break;
-- 
2.43.0



Re: [PATCH] drm/i915/gt: Report full vm address range

2024-03-20 Thread Andi Shyti
Hi Michal,

On Mon, Mar 18, 2024 at 05:21:54AM +, Mrozek, Michal wrote:
> > > Lionel, Michal, thoughts?
> Compute UMD needs to know exact GTT total size.

the problem is that we cannot apply the workaround without
reserving one page from the GTT total size and we need to apply
the workaround.

If we provide the total GTT size we will have one page that will
be contended between kernel and userspace and, if userspace is
unaware that the page belongs to the kernel, we might step on
each other toe.

The ask here from kernel side is to relax the check on the
maxNBitValue() in userspace and take what the kernel provides.

Thanks,
Andi


Re: [PATCH v6 0/3] Disable automatic load CCS load balancing

2024-03-20 Thread Andi Shyti
Hi Tvrtko,

On Wed, Mar 20, 2024 at 03:40:18PM +, Tvrtko Ursulin wrote:
> On 20/03/2024 15:06, Andi Shyti wrote:
> > Ping! Any thoughts here?
> 
> I only casually observed the discussion after I saw Matt suggested further
> simplifications. As I understood it, you will bring back the uabi engine
> games when adding the dynamic behaviour and that is fine by me.

yes, the refactoring suggested by you will come later.

Thanks,
Andi


Re: [PATCH v6 0/3] Disable automatic load CCS load balancing

2024-03-20 Thread Andi Shyti
Ping! Any thoughts here?

Andi

On Wed, Mar 13, 2024 at 09:19:48PM +0100, Andi Shyti wrote:
> Hi,
> 
> this series does basically two things:
> 
> 1. Disables automatic load balancing as adviced by the hardware
>workaround.
> 
> 2. Assigns all the CCS slices to one single user engine. The user
>will then be able to query only one CCS engine
> 
> >From v5 I have created a new file, gt/intel_gt_ccs_mode.c where
> I added the intel_gt_apply_ccs_mode(). In the upcoming patches,
> this file will contain the implementation for dynamic CCS mode
> setting.
> 
> Thanks Tvrtko, Matt, John and Joonas for your reviews!
> 
> Andi
> 
> Changelog
> =
> v5 -> v6 (thanks Matt for the suggestions in v6)
>  - Remove the refactoring and the for_each_available_engine()
>macro and instead do not create the intel_engine_cs structure
>at all.
>  - In patch 1 just a trivial reordering of the bit definitions.
> 
> v4 -> v5
>  - Use the workaround framework to do all the CCS balancing
>settings in order to always apply the modes also when the
>engine resets. Put everything in its own specific function to
>be executed for the first CCS engine encountered. (Thanks
>Matt)
>  - Calculate the CCS ID for the CCS mode as the first available
>CCS among all the engines (Thanks Matt)
>  - create the intel_gt_ccs_mode.c function to host the CCS
>configuration. We will have it ready for the next series.
>  - Fix a selftest that was failing because could not set CCS2.
>  - Add the for_each_available_engine() macro to exclude CCS1+ and
>start using it in the hangcheck selftest.
> 
> v3 -> v4
>  - Reword correctly the comment in the workaround
>  - Fix a buffer overflow (Thanks Joonas)
>  - Handle properly the fused engines when setting the CCS mode.
> 
> v2 -> v3
>  - Simplified the algorithm for creating the list of the exported
>uabi engines. (Patch 1) (Thanks, Tvrtko)
>  - Consider the fused engines when creating the uabi engine list
>(Patch 2) (Thanks, Matt)
>  - Patch 4 now uses a the refactoring from patch 1, in a cleaner
>outcome.
> 
> v1 -> v2
>  - In Patch 1 use the correct workaround number (thanks Matt).
>  - In Patch 2 do not add the extra CCS engines to the exposed
>UABI engine list and adapt the engine counting accordingly
>(thanks Tvrtko).
>  - Reword the commit of Patch 2 (thanks John).
> 
> Andi Shyti (3):
>   drm/i915/gt: Disable HW load balancing for CCS
>   drm/i915/gt: Do not generate the command streamer for all the CCS
>   drm/i915/gt: Enable only one CCS for compute workload
> 
>  drivers/gpu/drm/i915/Makefile   |  1 +
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 20 ---
>  drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +
>  drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  6 
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 30 ++--
>  6 files changed, 103 insertions(+), 6 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
> 
> -- 
> 2.43.0


Re: [PATCH] drm/i915/gt: Reset queue_priority_hint on parking

2024-03-20 Thread Andi Shyti
Hi Janusz,

...

> Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
> Closes: https://gitlab.freedesktop.org/drm/intel/issues/10154
> Signed-off-by: Chris Wilson 
> Cc: Mika Kuoppala 
> Signed-off-by: Janusz Krzysztofik 
> Cc: Chris Wilson 
> Cc:  # v5.4+

this tag list is a bit confusing. Let's keep all Cc's together
and, besides, Cc'eing the author looks a bit redundant.

No need to resend also because I retriggered another round of
test.

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH] drm/i915/gt: Report full vm address range

2024-03-15 Thread Andi Shyti
Hi Nirmoy,

> > In Mesa we've been relying on I915_CONTEXT_PARAM_GTT_SIZE so as long as
> > that is adjusted by the kernel
> 
> What do you mean by adjusted by, should it be a aligned size?
> 
> I915_CONTEXT_PARAM_GTT_SIZE ioctl is returning vm->total which is
> adjusted(reduced by a page).
> 
> This patch might cause silent error as it is not removing WABB which is
> using the reserved page to add dummy blt and if userspace is using that
> 
> page then it will be overwritten.

yes, I think this could happen, but there is no solution,
unfortunately. We need to fail at some point.

On the other hand, I think mesa is miscalculating the vm size. In
userspace the total size is derived by the bit size
(maxNBitValue()).

By doing so, I guess there will always be cases of
miscalculation.

There are two solutions here:

 1. we track two sizes, one the true available size and one the
total size. But this looks like a dirty hack to me.
 2. UMD fixes the size calculation by taking for granted what the
driver provides and we don't have anything to do in KMD.

Lionel, Michal, thoughts?

Andi


Re: [PATCH v2] drm/i915/gem: Execbuffer objects must have struct pages.

2024-03-14 Thread Andi Shyti
Hi Jonathan,

On Tue, Mar 12, 2024 at 07:55:06AM -0700, Jonathan Cavitt wrote:
> We cannot write requests to objects without struct pages, so escape
> early if the requests are bound to objects that lack them.
> 
> Signed-off-by: Jonathan Cavitt 

is this a fix? Do you need

Fixes: 544460c33821 ("drm/i915: Multi-BB execbuf")
Cc: Matthew Brost 
Cc:  # v5.16+

?

Andi

> ---
> 
> v2: s/vma-obj/vma->obj
> 
>  drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index d3a771afb083e..adb4f9e78cb49 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -3313,6 +3313,13 @@ eb_requests_create(struct i915_execbuffer *eb, struct 
> dma_fence *in_fence,
>   unsigned int i;
>  
>   for_each_batch_create_order(eb, i) {
> + /* Do not write requests to objects without struct pages. */
> + if (eb->batches[i]->vma &&
> + !i915_gem_object_has_struct_page(eb->batches[i]->vma->obj)) 
> {
> + out_fence = ERR_PTR(-EINVAL);
> + return out_fence;
> + }
> +
>   /* Allocate a request for this batch buffer nice and early. */
>   eb->requests[i] = i915_request_create(eb_find_context(eb, i));
>   if (IS_ERR(eb->requests[i])) {
> -- 
> 2.25.1


[PATCH v6 3/3] drm/i915/gt: Enable only one CCS for compute workload

2024-03-13 Thread Andi Shyti
Enable only one CCS engine by default with all the compute sices
allocated to it.

While generating the list of UABI engines to be exposed to the
user, exclude any additional CCS engines beyond the first
instance.

This change can be tested with igt i915_query.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  5 +++
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  7 
 5 files changed, 65 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3ef6ed41e62b..a6885a1d41a1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -118,6 +118,7 @@ gt-y += \
gt/intel_ggtt_fencing.o \
gt/intel_gt.o \
gt/intel_gt_buffer_pool.o \
+   gt/intel_gt_ccs_mode.o \
gt/intel_gt_clock_utils.o \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index ..044219c5960a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+   int cslice;
+   u32 mode = 0;
+   int first_ccs = __ffs(CCS_MASK(gt));
+
+   if (!IS_DG2(gt->i915))
+   return;
+
+   /* Build the value for the fixed CCS load balancing */
+   for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+   if (CCS_MASK(gt) & BIT(cslice))
+   /*
+* If available, assign the cslice
+* to the first available engine...
+*/
+   mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+   else
+   /*
+* ... otherwise, mark the cslice as
+* unavailable if no CCS dispatches here
+*/
+   mode |= XEHP_CCS_MODE_CSLICE(cslice,
+XEHP_CCS_MODE_CSLICE_MASK);
+   }
+
+   intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index ..9e5549caeb26
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 31b102604e3d..743fe3566722 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1480,6 +1480,11 @@
 #define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
 
+#define XEHP_CCS_MODE  _MMIO(0x14804)
+#define   XEHP_CCS_MODE_CSLICE_MASKREG_GENMASK(2, 0) /* CCS0-3 + 
rsvd */
+#define   XEHP_CCS_MODE_CSLICE_WIDTH   ilog2(XEHP_CCS_MODE_CSLICE_MASK 
+ 1)
+#define   XEHP_CCS_MODE_CSLICE(cslice, ccs)(ccs << (cslice * 
XEHP_CCS_MODE_CSLICE_WIDTH))
+
 #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0  (1 << 10)
 #define   CHV_FGT_DISABLE_SS1  (1 << 11)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 9963e5725ae5..8188c9f0b5ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -10,6 +10,7 @@
 #include "intel_engine_regs.h"
 #include "intel_gpu_commands.h"
 #include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
 #include "intel_gt_mcr.h"
 #include "intel_gt_print.h"
 #include "intel_gt_regs.h"
@@ -2869,6 +2870,12 @@ static void ccs_engine_wa_mode(struct intel_engine_cs 
*engine, struct i915_wa_li
 * made to completely disable automatic CCS load balanc

[PATCH v6 2/3] drm/i915/gt: Do not generate the command streamer for all the CCS

2024-03-13 Thread Andi Shyti
We want a fixed load CCS balancing consisting in all slices
sharing one single user engine. For this reason do not create the
intel_engine_cs structure with its dedicated command streamer for
CCS slices beyond the first.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f553cf4e6449..c4fb31bb6e72 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -966,6 +966,7 @@ int intel_engines_init_mmio(struct intel_gt *gt)
const unsigned int engine_mask = init_engine_mask(gt);
unsigned int mask = 0;
unsigned int i, class;
+   u8 ccs_instance = 0;
u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
int err;
 
@@ -986,6 +987,19 @@ int intel_engines_init_mmio(struct intel_gt *gt)
!HAS_ENGINE(gt, i))
continue;
 
+   /*
+* Do not create the command streamer for CCS slices
+* beyond the first. All the workload submitted to the
+* first engine will be shared among all the slices.
+*
+* Once the user will be allowed to customize the CCS
+* mode, then this check needs to be removed.
+*/
+   if (IS_DG2(i915) &&
+   class == COMPUTE_CLASS &&
+   ccs_instance++)
+   continue;
+
err = intel_engine_setup(gt, i,
 logical_ids[instance]);
if (err)
@@ -996,11 +1010,9 @@ int intel_engines_init_mmio(struct intel_gt *gt)
}
 
/*
-* Catch failures to update intel_engines table when the new engines
-* are added to the driver by a warning and disabling the forgotten
-* engines.
+* Update the intel_engines table.
 */
-   if (drm_WARN_ON(>drm, mask != engine_mask))
+   if (mask != engine_mask)
gt->info.engine_mask = mask;
 
gt->info.num_engines = hweight32(mask);
-- 
2.43.0



[PATCH v6 1/3] drm/i915/gt: Disable HW load balancing for CCS

2024-03-13 Thread Andi Shyti
The hardware should not dynamically balance the load between CCS
engines. Wa_14019159160 recommends disabling it across all
platforms.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
Reviewed-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 23 +++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 50962cfd1353..31b102604e3d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1477,6 +1477,7 @@
 #define   ECOBITS_PPGTT_CACHE4B(0 << 8)
 
 #define GEN12_RCU_MODE _MMIO(0x14800)
+#define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
 
 #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 0x2168)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index b079cbbc1897..9963e5725ae5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -51,7 +51,8 @@
  *   registers belonging to BCS, VCS or VECS should be implemented in
  *   xcs_engine_wa_init(). Workarounds for registers not belonging to a 
specific
  *   engine's MMIO range but that are part of of the common RCS/CCS reset 
domain
- *   should be implemented in general_render_compute_wa_init().
+ *   should be implemented in general_render_compute_wa_init(). The settings
+ *   about the CCS load balancing should be added in ccs_engine_wa_mode().
  *
  * - GT workarounds: the list of these WAs is applied whenever these registers
  *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -2854,6 +2855,22 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
 }
 
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
+{
+   struct intel_gt *gt = engine->gt;
+
+   if (!IS_DG2(gt->i915))
+   return;
+
+   /*
+* Wa_14019159160: This workaround, along with others, leads to
+* significant challenges in utilizing load balancing among the
+* CCS slices. Consequently, an architectural decision has been
+* made to completely disable automatic CCS load balancing.
+*/
+   wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+}
+
 /*
  * The workarounds in this function apply to shared registers in
  * the general render reset domain that aren't tied to a
@@ -3000,8 +3017,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, 
struct i915_wa_list *wal
 * to a single RCS/CCS engine's workaround list since
 * they're reset as part of the general render domain reset.
 */
-   if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+   if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+   ccs_engine_wa_mode(engine, wal);
+   }
 
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
-- 
2.43.0



[PATCH v6 0/3] Disable automatic load CCS load balancing

2024-03-13 Thread Andi Shyti
Hi,

this series does basically two things:

1. Disables automatic load balancing as adviced by the hardware
   workaround.

2. Assigns all the CCS slices to one single user engine. The user
   will then be able to query only one CCS engine

>From v5 I have created a new file, gt/intel_gt_ccs_mode.c where
I added the intel_gt_apply_ccs_mode(). In the upcoming patches,
this file will contain the implementation for dynamic CCS mode
setting.

Thanks Tvrtko, Matt, John and Joonas for your reviews!

Andi

Changelog
=
v5 -> v6 (thanks Matt for the suggestions in v6)
 - Remove the refactoring and the for_each_available_engine()
   macro and instead do not create the intel_engine_cs structure
   at all.
 - In patch 1 just a trivial reordering of the bit definitions.

v4 -> v5
 - Use the workaround framework to do all the CCS balancing
   settings in order to always apply the modes also when the
   engine resets. Put everything in its own specific function to
   be executed for the first CCS engine encountered. (Thanks
   Matt)
 - Calculate the CCS ID for the CCS mode as the first available
   CCS among all the engines (Thanks Matt)
 - create the intel_gt_ccs_mode.c function to host the CCS
   configuration. We will have it ready for the next series.
 - Fix a selftest that was failing because could not set CCS2.
 - Add the for_each_available_engine() macro to exclude CCS1+ and
   start using it in the hangcheck selftest.

v3 -> v4
 - Reword correctly the comment in the workaround
 - Fix a buffer overflow (Thanks Joonas)
 - Handle properly the fused engines when setting the CCS mode.

v2 -> v3
 - Simplified the algorithm for creating the list of the exported
   uabi engines. (Patch 1) (Thanks, Tvrtko)
 - Consider the fused engines when creating the uabi engine list
   (Patch 2) (Thanks, Matt)
 - Patch 4 now uses a the refactoring from patch 1, in a cleaner
   outcome.

v1 -> v2
 - In Patch 1 use the correct workaround number (thanks Matt).
 - In Patch 2 do not add the extra CCS engines to the exposed
   UABI engine list and adapt the engine counting accordingly
   (thanks Tvrtko).
 - Reword the commit of Patch 2 (thanks John).

Andi Shyti (3):
  drm/i915/gt: Disable HW load balancing for CCS
  drm/i915/gt: Do not generate the command streamer for all the CCS
  drm/i915/gt: Enable only one CCS for compute workload

 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 20 ---
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  6 
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 30 ++--
 6 files changed, 103 insertions(+), 6 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h

-- 
2.43.0



[PATCH] drm/i915/gt: Report full vm address range

2024-03-13 Thread Andi Shyti
Commit 9bb66c179f50 ("drm/i915: Reserve some kernel space per
vm") has reserved an object for kernel space usage.

Userspace, though, needs to know the full address range.

Fixes: 9bb66c179f50 ("drm/i915: Reserve some kernel space per vm")
Signed-off-by: Andi Shyti 
Cc: Andrzej Hajda 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
Cc: Michal Mrozek 
Cc: Nirmoy Das 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index fa46d2308b0e..d76831f50106 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -982,8 +982,9 @@ static int gen8_init_rsvd(struct i915_address_space *vm)
 
vm->rsvd.vma = i915_vma_make_unshrinkable(vma);
vm->rsvd.obj = obj;
-   vm->total -= vma->node.size;
+
return 0;
+
 unref:
i915_gem_object_put(obj);
return ret;
-- 
2.43.0



Re: [PATCH v2] drm/i915/hwmon: Fix locking inversion in sysfs getter

2024-03-13 Thread Andi Shyti
Hi Janusz,

On Mon, Mar 11, 2024 at 09:34:58PM +0100, Janusz Krzysztofik wrote:
> In i915 hwmon sysfs getter path we now take a hwmon_lock, then acquire an
> rpm wakeref.  That results in lock inversion:
> 
> <4> [197.079335] ==
> <4> [197.085473] WARNING: possible circular locking dependency detected
> <4> [197.091611] 6.8.0-rc7-Patchwork_129026v7-gc4dc92fb1152+ #1 Not tainted

...

> Fixes: c41b8bdcc297 ("drm/i915/hwmon: Show device level energy usage")
> Signed-off-by: Janusz Krzysztofik 
> Cc: Rodrigo Vivi 
> Cc: Guenter Roeck 
> Cc:  # v6.2+

With the "Fixes:" tag changed and the stable version updated,
pushed to drm-intel-next.

Thanks,
Andi


Re: [PATCH] drm/i915/selftests: Pick correct caching mode.

2024-03-13 Thread Andi Shyti
Hi Nirmoy,

On Tue, Mar 12, 2024 at 12:18:15PM +0100, Nirmoy Das wrote:
> Caching mode is HW dependent so pick a correct one using
> intel_gt_coherent_map_type().
> 
> Cc: Andi Shyti 
> Cc: Janusz Krzysztofik 
> Cc: Jonathan Cavitt 
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10249
> Signed-off-by: Nirmoy Das 

pushed to drm-intel-gt-next.

Thanks,
Andi


Re: [PATCH v5 2/4] drm/i915/gt: Refactor uabi engine class/instance list creation

2024-03-12 Thread Andi Shyti
On Tue, Mar 12, 2024 at 10:08:33AM -0700, Matt Roper wrote:
> On Fri, Mar 08, 2024 at 09:22:17PM +0100, Andi Shyti wrote:
> > For the upcoming changes we need a cleaner way to build the list
> > of uabi engines.
> > 
> > Suggested-by: Tvrtko Ursulin 
> > Signed-off-by: Andi Shyti 
> > Cc:  # v6.2+
> 
> I don't really see why we need patches 2 & 3 in this series. 

For patch number '2' We had a round of review with Tvrtko and we
wanted to avoid the change I pasted at the bottom[*], which would
decrease something that was increased earlier.

> If we want
> to restrict the platform to a single CCS engine for now (and give that
> single engine access to all of the cslices), it would be much simpler to
> only create a single intel_engine_cs which which would then cause both
> i915 and userspace to only consider a single engine, even if more than
> one is physically present.  That could be done with a simple adjustment
> to engine_mask_apply_compute_fuses() to mask off extra bits from the
> engine mask such that only a single CCS can get returned rather than the
> mask of all CCSs that are present.
> 
> Managing all of the engines in the KMD but only exposing one (some) of
> them to userspace might be something we need if you want to add extra
> functionality down to road to "hotplug" extra engines, or to allow
> userspace to explicitly request multi-CCS mode.  But none of that seems
> necessary for this series, especially for something you're backporting
> to stable kernels.

It's true, it would even be easier to mask out all the CCS
engines after the first. I thought of this.

On one hand hand, adding a for_each_available_engine() throught
the stable path its a bit of abusing, but it's functional to the
single CCS mode.

I was aiming for a longer term solution. If I add a patch to mask
off CCS engines, then I will need to revert it quite soon for
the stable release.

I'm not sure which one is better, though.

Thanks,
Andi

[*]
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 833987015b8b..7041acc77810 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -243,6 +243,15 @@  void intel_engines_driver_register(struct 
drm_i915_private *i915)
if (engine->uabi_class == I915_NO_UABI_CLASS)
continue;

+   /*
+* Do not list and do not count CCS engines other than the first
+*/
+   if (engine->uabi_class == I915_ENGINE_CLASS_COMPUTE &&
+   engine->uabi_instance > 0) {
+   i915->engine_uabi_class_count[engine->uabi_class]--;
+   continue;
+   }
+
rb_link_node(>uabi_node, prev, p);
rb_insert_color(>uabi_node, >uabi_engines);


Re: [PATCH v5 1/4] drm/i915/gt: Disable HW load balancing for CCS

2024-03-12 Thread Andi Shyti
Hi Matt,

...

> >  #define GEN12_RCU_MODE _MMIO(0x14800)
> >  #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
> > +#define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
> 
> Nitpick: we usually order register bits in descending order.  Aside from
> that,

I can take care of it.

> Reviewed-by: Matt Roper 

Thanks!
Andi


Re: [PATCH v2] drm/i915/hwmon: Fix locking inversion in sysfs getter

2024-03-12 Thread Andi Shyti
Hi Janusz,

On Mon, Mar 11, 2024 at 09:34:58PM +0100, Janusz Krzysztofik wrote:
> In i915 hwmon sysfs getter path we now take a hwmon_lock, then acquire an
> rpm wakeref.  That results in lock inversion:
> 
> <4> [197.079335] ==
> <4> [197.085473] WARNING: possible circular locking dependency detected
> <4> [197.091611] 6.8.0-rc7-Patchwork_129026v7-gc4dc92fb1152+ #1 Not tainted
> <4> [197.098096] --
> <4> [197.104231] prometheus-node/839 is trying to acquire lock:
> <4> [197.109680] 82764d80 (fs_reclaim){+.+.}-{0:0}, at: 
> __kmalloc+0x9a/0x350
> <4> [197.116939]
> but task is already holding lock:
> <4> [197.122730] 88811b772a40 (>hwmon_lock){+.+.}-{3:3}, at: 
> hwm_energy+0x4b/0x100 [i915]
> <4> [197.131543]
> which lock already depends on the new lock.
> ...
> <4> [197.507922] Chain exists of:
>   fs_reclaim --> >reset.mutex --> >hwmon_lock
> <4> [197.518528]  Possible unsafe locking scenario:
> <4> [197.524411]CPU0CPU1
> <4> [197.528916]
> <4> [197.533418]   lock(>hwmon_lock);
> <4> [197.537237]lock(>reset.mutex);
> <4> [197.543376]lock(>hwmon_lock);
> <4> [197.549682]   lock(fs_reclaim);
> ...
> <4> [197.632548] Call Trace:
> <4> [197.634990]  
> <4> [197.637088]  dump_stack_lvl+0x64/0xb0
> <4> [197.640738]  check_noncircular+0x15e/0x180
> <4> [197.652968]  check_prev_add+0xe9/0xce0
> <4> [197.656705]  __lock_acquire+0x179f/0x2300
> <4> [197.660694]  lock_acquire+0xd8/0x2d0
> <4> [197.673009]  fs_reclaim_acquire+0xa1/0xd0
> <4> [197.680478]  __kmalloc+0x9a/0x350
> <4> [197.689063]  acpi_ns_internalize_name.part.0+0x4a/0xb0
> <4> [197.694170]  acpi_ns_get_node_unlocked+0x60/0xf0
> <4> [197.720608]  acpi_ns_get_node+0x3b/0x60
> <4> [197.724428]  acpi_get_handle+0x57/0xb0
> <4> [197.728164]  acpi_has_method+0x20/0x50
> <4> [197.731896]  acpi_pci_set_power_state+0x43/0x120
> <4> [197.736485]  pci_power_up+0x24/0x1c0
> <4> [197.740047]  pci_pm_default_resume_early+0x9/0x30
> <4> [197.744725]  pci_pm_runtime_resume+0x2d/0x90
> <4> [197.753911]  __rpm_callback+0x3c/0x110
> <4> [197.762586]  rpm_callback+0x58/0x70
> <4> [197.766064]  rpm_resume+0x51e/0x730
> <4> [197.769542]  rpm_resume+0x267/0x730
> <4> [197.773020]  rpm_resume+0x267/0x730
> <4> [197.776498]  rpm_resume+0x267/0x730
> <4> [197.779974]  __pm_runtime_resume+0x49/0x90
> <4> [197.784055]  __intel_runtime_pm_get+0x19/0xa0 [i915]
> <4> [197.789070]  hwm_energy+0x55/0x100 [i915]
> <4> [197.793183]  hwm_read+0x9a/0x310 [i915]
> <4> [197.797124]  hwmon_attr_show+0x36/0x120
> <4> [197.800946]  dev_attr_show+0x15/0x60
> <4> [197.804509]  sysfs_kf_seq_show+0xb5/0x100
> 
> Acquire the wakeref before the lock and hold it as long as the lock is
> also held.  Follow that pattern across the whole source file where similar
> lock inversion can happen.
> 
> v2: Keep hardware read under the lock so the whole operation of updating
> energy from hardware is still atomic (Guenter),
>   - instead, acquire the rpm wakeref before the lock and hold it as long
> as the lock is held,
>   - use the same aproach for other similar places across the i915_hwmon.c
> source file (Rodrigo).
> 
> Fixes: c41b8bdcc297 ("drm/i915/hwmon: Show device level energy usage")
> Signed-off-by: Janusz Krzysztofik 
> Cc: Rodrigo Vivi 
> Cc: Guenter Roeck 
> Cc:  # v6.2+

Reviewed-by: Andi Shyti 

If you want I can change the Fixes tag as suggested by Ashutosh
before applying the patch before pushing the change.

Andi


Re: [PATCH] drm/i915/selftests: Pick correct caching mode.

2024-03-12 Thread Andi Shyti
Hi Nirmoy,

On Tue, Mar 12, 2024 at 12:18:15PM +0100, Nirmoy Das wrote:
> Caching mode is HW dependent so pick a correct one using
> intel_gt_coherent_map_type().
> 
> Cc: Andi Shyti 
> Cc: Janusz Krzysztofik 
> Cc: Jonathan Cavitt 
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10249
> Signed-off-by: Nirmoy Das 

I think it's a good choice not to have the Fixes tag here either.

Reviewed-by: Andi Shyti 

Thanks,
Andi


[PATCH v5 4/4] drm/i915/gt: Enable only one CCS for compute workload

2024-03-08 Thread Andi Shyti
Enable only one CCS engine by default with all the compute sices
allocated to it.

While generating the list of UABI engines to be exposed to the
user, exclude any additional CCS engines beyond the first
instance.

This change can be tested with igt i915_query.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Requires: 075e003a9e22 ("drm/i915/gt: Refactor uabi engine class/instance list 
creation")
Requires: 58b935268238 ("drm/i915/gt: Disable tests for CCS engines beyond the 
first")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/Makefile   |  1 +
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 11 ++
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  5 +++
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  7 
 6 files changed, 76 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3ef6ed41e62b..a6885a1d41a1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -118,6 +118,7 @@ gt-y += \
gt/intel_ggtt_fencing.o \
gt/intel_gt.o \
gt/intel_gt_buffer_pool.o \
+   gt/intel_gt_ccs_mode.o \
gt/intel_gt_clock_utils.o \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 11cc06c0c785..9ef1c4ce252d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -208,6 +208,7 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
struct list_head *it, *next;
struct rb_node **p, *prev;
LIST_HEAD(engines);
+   u16 uabi_ccs = 0;
 
sort_engines(i915, );
 
@@ -244,6 +245,16 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
if (uabi_class > I915_LAST_UABI_ENGINE_CLASS)
continue;
 
+   /*
+* The load is balanced among all the available compute
+* slices. Expose only the first instance of the compute
+* engine.
+*/
+   if (IS_DG2(i915) &&
+   uabi_class == I915_ENGINE_CLASS_COMPUTE &&
+   uabi_ccs++)
+   continue;
+
GEM_BUG_ON(uabi_class >=
   ARRAY_SIZE(i915->engine_uabi_class_count));
i915->engine_uabi_class_count[uabi_class]++;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index ..044219c5960a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+   int cslice;
+   u32 mode = 0;
+   int first_ccs = __ffs(CCS_MASK(gt));
+
+   if (!IS_DG2(gt->i915))
+   return;
+
+   /* Build the value for the fixed CCS load balancing */
+   for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+   if (CCS_MASK(gt) & BIT(cslice))
+   /*
+* If available, assign the cslice
+* to the first available engine...
+*/
+   mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+   else
+   /*
+* ... otherwise, mark the cslice as
+* unavailable if no CCS dispatches here
+*/
+   mode |= XEHP_CCS_MODE_CSLICE(cslice,
+XEHP_CCS_MODE_CSLICE_MASK);
+   }
+
+   intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h 
b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index ..9e5549caeb26
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
inde

[PATCH v5 3/4] drm/i915/gt: Disable tests for CCS engines beyond the first

2024-03-08 Thread Andi Shyti
In anticipation of the upcoming commit that will operate with
only one CCS stream, when more than one CCS slice is present,
create a new for_each_available_engine() that excludes CCS
engines beyond the forst. Begin using it in the hangcheck
selftest.

Signed-off-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/intel_gt.h   | 13 
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 22 ++--
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 6e7cab60834c..d3ee7aee9c7c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -188,6 +188,19 @@ void intel_gt_release_all(struct drm_i915_private *i915);
 (id__)++) \
for_each_if ((engine__) = (gt__)->engine[(id__)])
 
+/*
+ * Simple iterator over all initialised engines that
+ * takes into account CCS fixed mode load balancing
+ */
+#define for_each_available_engine(engine__, gt__, id__) \
+   for ((id__) = 0; \
+(id__) < I915_NUM_ENGINES; \
+(id__)++) \
+   for_each_if \
+   (((engine__) = (gt__)->engine[(id__)]) && \
+!(engine__->class == COMPUTE_CLASS && \
+  engine__->instance))
+
 /* Iterator over subset of engines selected by mask */
 #define for_each_engine_masked(engine__, gt__, mask__, tmp__) \
for ((tmp__) = (mask__) & (gt__)->info.engine_mask; \
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c 
b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 9ce8ff1c04fe..f1e684987ddb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -296,7 +296,7 @@ static int igt_hang_sanitycheck(void *arg)
if (err)
return err;
 
-   for_each_engine(engine, gt, id) {
+   for_each_available_engine(engine, gt, id) {
struct intel_wedge_me w;
long timeout;
 
@@ -360,7 +360,7 @@ static int igt_reset_nop(void *arg)
reset_count = i915_reset_count(global);
count = 0;
do {
-   for_each_engine(engine, gt, id) {
+   for_each_available_engine(engine, gt, id) {
struct intel_context *ce;
int i;
 
@@ -433,7 +433,7 @@ static int igt_reset_nop_engine(void *arg)
if (!intel_has_reset_engine(gt))
return 0;
 
-   for_each_engine(engine, gt, id) {
+   for_each_available_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count, count;
struct intel_context *ce;
IGT_TIMEOUT(end_time);
@@ -553,7 +553,7 @@ static int igt_reset_fail_engine(void *arg)
if (!intel_has_reset_engine(gt))
return 0;
 
-   for_each_engine(engine, gt, id) {
+   for_each_available_engine(engine, gt, id) {
unsigned int count;
struct intel_context *ce;
IGT_TIMEOUT(end_time);
@@ -700,7 +700,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool 
active)
return err;
}
 
-   for_each_engine(engine, gt, id) {
+   for_each_available_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
unsigned long count;
bool using_guc = intel_engine_uses_guc(engine);
@@ -990,7 +990,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
if (!threads)
return -ENOMEM;
 
-   for_each_engine(engine, gt, id) {
+   for_each_available_engine(engine, gt, id) {
unsigned long device = i915_reset_count(global);
unsigned long count = 0, reported;
bool using_guc = intel_engine_uses_guc(engine);
@@ -1010,7 +1010,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
 
memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES);
-   for_each_engine(other, gt, tmp) {
+   for_each_available_engine(other, gt, tmp) {
struct kthread_worker *worker;
 
threads[tmp].resets =
@@ -1185,7 +1185,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
 
 unwind:
-   for_each_engine(other, gt, tmp) {
+   for_each_available_engine(other, gt, tmp) {
int ret;
 
if (!threads[tmp].worker)
@@ -1621,7 +1621,7 @@ static int wait_for_others(struct intel_gt *gt,
struct intel_engine_cs *engine;
enum intel_engine_id id;
 
-   for_each_engine(engine, gt, id) {
+   for_each_available_engine(engine, gt, id) {
if (engine == exclude)
continue;
 
@@ -1649,7 +1649,7 @@ static int igt_reset_queue(void *arg)
  

[PATCH v5 2/4] drm/i915/gt: Refactor uabi engine class/instance list creation

2024-03-08 Thread Andi Shyti
For the upcoming changes we need a cleaner way to build the list
of uabi engines.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Andi Shyti 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 29 -
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 833987015b8b..11cc06c0c785 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -203,7 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, 
const char *name, u16
 
 void intel_engines_driver_register(struct drm_i915_private *i915)
 {
-   u16 name_instance, other_instance = 0;
+   u16 class_instance[I915_LAST_UABI_ENGINE_CLASS + 2] = { };
struct legacy_ring ring = {};
struct list_head *it, *next;
struct rb_node **p, *prev;
@@ -214,6 +214,8 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
prev = NULL;
p = >uabi_engines.rb_node;
list_for_each_safe(it, next, ) {
+   u16 uabi_class;
+
struct intel_engine_cs *engine =
container_of(it, typeof(*engine), uabi_list);
 
@@ -222,15 +224,14 @@ void intel_engines_driver_register(struct 
drm_i915_private *i915)
 
GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
engine->uabi_class = uabi_classes[engine->class];
-   if (engine->uabi_class == I915_NO_UABI_CLASS) {
-   name_instance = other_instance++;
-   } else {
-   GEM_BUG_ON(engine->uabi_class >=
-  ARRAY_SIZE(i915->engine_uabi_class_count));
-   name_instance =
-   
i915->engine_uabi_class_count[engine->uabi_class]++;
-   }
-   engine->uabi_instance = name_instance;
+
+   if (engine->uabi_class == I915_NO_UABI_CLASS)
+   uabi_class = I915_LAST_UABI_ENGINE_CLASS + 1;
+   else
+   uabi_class = engine->uabi_class;
+
+   GEM_BUG_ON(uabi_class >= ARRAY_SIZE(class_instance));
+   engine->uabi_instance = class_instance[uabi_class]++;
 
/*
 * Replace the internal name with the final user and log facing
@@ -238,11 +239,15 @@ void intel_engines_driver_register(struct 
drm_i915_private *i915)
 */
engine_rename(engine,
  intel_engine_class_repr(engine->class),
- name_instance);
+ engine->uabi_instance);
 
-   if (engine->uabi_class == I915_NO_UABI_CLASS)
+   if (uabi_class > I915_LAST_UABI_ENGINE_CLASS)
continue;
 
+   GEM_BUG_ON(uabi_class >=
+  ARRAY_SIZE(i915->engine_uabi_class_count));
+   i915->engine_uabi_class_count[uabi_class]++;
+
rb_link_node(>uabi_node, prev, p);
rb_insert_color(>uabi_node, >uabi_engines);
 
-- 
2.43.0



[PATCH v5 1/4] drm/i915/gt: Disable HW load balancing for CCS

2024-03-08 Thread Andi Shyti
The hardware should not dynamically balance the load between CCS
engines. Wa_14019159160 recommends disabling it across all
platforms.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 23 +++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 50962cfd1353..cf709f6c05ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1478,6 +1478,7 @@
 
 #define GEN12_RCU_MODE _MMIO(0x14800)
 #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
+#define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 
 #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0  (1 << 10)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 25413809b9dc..4865eb5ca9c9 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -51,7 +51,8 @@
  *   registers belonging to BCS, VCS or VECS should be implemented in
  *   xcs_engine_wa_init(). Workarounds for registers not belonging to a 
specific
  *   engine's MMIO range but that are part of of the common RCS/CCS reset 
domain
- *   should be implemented in general_render_compute_wa_init().
+ *   should be implemented in general_render_compute_wa_init(). The settings
+ *   about the CCS load balancing should be added in ccs_engine_wa_mode().
  *
  * - GT workarounds: the list of these WAs is applied whenever these registers
  *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -2854,6 +2855,22 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
 }
 
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
+{
+   struct intel_gt *gt = engine->gt;
+
+   if (!IS_DG2(gt->i915))
+   return;
+
+   /*
+* Wa_14019159160: This workaround, along with others, leads to
+* significant challenges in utilizing load balancing among the
+* CCS slices. Consequently, an architectural decision has been
+* made to completely disable automatic CCS load balancing.
+*/
+   wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+}
+
 /*
  * The workarounds in this function apply to shared registers in
  * the general render reset domain that aren't tied to a
@@ -3004,8 +3021,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, 
struct i915_wa_list *wal
 * to a single RCS/CCS engine's workaround list since
 * they're reset as part of the general render domain reset.
 */
-   if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+   if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+   ccs_engine_wa_mode(engine, wal);
+   }
 
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
-- 
2.43.0



[PATCH v5 0/4] Disable automatic load CCS load balancing

2024-03-08 Thread Andi Shyti
Hi,

this series does basically two things:

1. Disables automatic load balancing as adviced by the hardware
   workaround.

2. Assigns all the CCS slices to one single user engine. The user
   will then be able to query only one CCS engine

In this v5 I have created a new file, gt/intel_gt_ccs_mode.c
where I added the intel_gt_apply_ccs_mode(). In the upcoming
patches, this file will contain the implementation for dynamic
CCS mode setting.

I saw also necessary the creation of a new mechanism fro looping
through engines in order to exclude the CCS's that are merged
into one single stream. It's called for_each_available_engine()
and I started using it in the hangcheck sefltest. I might still
need to iterate a few CI runs in order to cover more cases when
this call is needed.

I'm using here the "Requires: " tag, but I'm not sure the commit
id will be valid, on the other hand, I don't know what commit id
I should use.

Thanks Tvrtko, Matt, John and Joonas for your reviews!

Andi

Changelog
=
v4 -> v5
 - Use the workaround framework to do all the CCS balancing
   settings in order to always apply the modes also when the
   engine resets. Put everything in its own specific function to
   be executed for the first CCS engine encountered. (Thanks
   Matt)
 - Calculate the CCS ID for the CCS mode as the first available
   CCS among all the engines (Thanks Matt)
 - create the intel_gt_ccs_mode.c function to host the CCS
   configuration. We will have it ready for the next series.
 - Fix a selftest that was failing because could not set CCS2.
 - Add the for_each_available_engine() macro to exclude CCS1+ and
   start using it in the hangcheck selftest.

v3 -> v4
 - Reword correctly the comment in the workaround
 - Fix a buffer overflow (Thanks Joonas)
 - Handle properly the fused engines when setting the CCS mode.

v2 -> v3
 - Simplified the algorithm for creating the list of the exported
   uabi engines. (Patch 1) (Thanks, Tvrtko)
 - Consider the fused engines when creating the uabi engine list
   (Patch 2) (Thanks, Matt)
 - Patch 4 now uses a the refactoring from patch 1, in a cleaner
   outcome.

v1 -> v2
 - In Patch 1 use the correct workaround number (thanks Matt).
 - In Patch 2 do not add the extra CCS engines to the exposed
   UABI engine list and adapt the engine counting accordingly
   (thanks Tvrtko).
 - Reword the commit of Patch 2 (thanks John).

Andi Shyti (4):
  drm/i915/gt: Disable HW load balancing for CCS
  drm/i915/gt: Refactor uabi engine class/instance list creation
  drm/i915/gt: Disable tests for CCS engines beyond the first
  drm/i915/gt: Enable only one CCS for compute workload

 drivers/gpu/drm/i915/Makefile|  1 +
 drivers/gpu/drm/i915/gt/intel_engine_user.c  | 40 ++--
 drivers/gpu/drm/i915/gt/intel_gt.h   | 13 +++
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c  | 39 +++
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h  | 13 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h  |  6 +++
 drivers/gpu/drm/i915/gt/intel_workarounds.c  | 30 ++-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 22 +--
 8 files changed, 139 insertions(+), 25 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h

-- 
2.43.0



Re: [PATCH v4 1/3] drm/i915/gt: Disable HW load balancing for CCS

2024-03-07 Thread Andi Shyti
Hi John,

...

> > > > +
> > > > +   /*
> > > > +* Wa_14019159160: disable the automatic CCS load 
> > > > balancing
> > > I'm still a bit concerned that this doesn't really match what this
> > > specific workaround is asking us to do.  There seems to be an agreement
> > > on various internal email threads that we need to disable load
> > > balancing, but there's no single specific workaround that officially
> > > documents that decision.
> > > 
> > > This specific workaround asks us to do a bunch of different things, and
> > > the third item it asks for is to disable load balancing in very specific
> > > cases (i.e., while the RCS is active at the same time as one or more CCS
> > > engines).  Taking this workaround in isolation, it would be valid to
> > > keep load balancing active if you were just using the CCS engines and
> > > leaving the RCS idle, or if balancing was turned on/off by the GuC
> > > scheduler according to engine use at the moment, as the documented
> > > workaround seems to assume will be the case.
> > > 
> > > So in general I think we do need to disable load balancing based on
> > > other offline discussion, but blaming that entire change on
> > > Wa_14019159160 seems a bit questionable since it's not really what this
> > > specific workaround is asking us to do and someone may come back and try
> > > to "correct" the implementation of this workaround in the future without
> > > realizing there are other factors too.  It would be great if we could
> > > get hardware teams to properly document this expectation somewhere
> > > (either in a separate dedicated workaround, or in the MMIO tuning guide)
> > > so that we'll have a more direct and authoritative source for such a
> > > large behavioral change.
> > On one had I think you are right, on the other hand I think this
> > workaround has not properly developed in what we have been
> > describing later.
> I think it is not so much that the w/a is 'not properly developed'. It's
> more that this w/a plus others when taken in combination plus knowledge of
> future directions has led to an architectural decision that is beyond the
> scope of the w/a.
> 
> As such, I think Matt is definitely correct. Tagging a code change with a
> w/a number when that change does something very different to what is
> described in the w/a is wrong and a maintenance issue waiting to happen.
> 
> At the very least, you should just put in a comment explaining the
> situation. E.g.:
> 
>  /*
>  * Wa_14019159160: This w/a plus others cause significant issues with the use 
> of
>  * load balancing. Hence an architectural level decision was taking to simply
>  * disable automatic CCS load balancing completely.
>  */
Good suggestion! I will anyway check tomorrow with Joonas if it's
worth the effort to set up a new "software" workaround.

Thanks,
Andi


Re: [PATCH v4 3/3] drm/i915/gt: Enable only one CCS for compute workload

2024-03-07 Thread Andi Shyti
Hi Matt,

> > +static void intel_gt_apply_ccs_mode(struct intel_gt *gt)
> > +{
> > +   u32 mode;
> > +   int cslice;
> > +
> > +   if (!IS_DG2(gt->i915))
> > +   return;
> > +
> > +   /* Set '0' as a default CCS id to all the cslices */
> > +   mode = 0;
> > +
> > +   for (cslice = 0; cslice < hweight32(CCS_MASK(gt)); cslice++)
> > +   /* Write 0x7 if no CCS context dispatches to this cslice */
> > +   if (!(CCS_MASK(gt) & BIT(cslice)))
> > +   mode |= XEHP_CCS_MODE_CSLICE(cslice,
> > +XEHP_CCS_MODE_CSLICE_MASK);
> > +
> > +   intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
> 
> This is still going to hook all available cslices up to hardware engine
> ccs0.  But what you actually want is to hook them all up to what
> userspace sees as CCS0 (i.e., the first CCS engine that wasn't fused
> off).  Hardware's engine numbering and userspace's numbering aren't the
> same.

Yes, correct... we had so many discussions and I forgot about it :-)

> Also, if you have a part that only has hardware ccs1/cslice1 for
> example, you're not going to set cslices 2 & 3 to 0x7 properly.

Good point also here, the XEHP_CCS_MODE register is indeed
generic to all platforms.

> So probably what you want is something like this (untested):
> 
> static void intel_gt_apply_ccs_mode(struct intel_gt *gt)
> {
> u32 mode = 0;
> int first_ccs = __ffs(CCS_MASK(gt));
> 
> /*
>  * Re-assign every present cslice to the first available CCS
>  * engine; mark unavailable cslices as unused.
>  */
> for (int cslice = 0; cslice < 4; cslice++) {
> if (CCS_MASK(gt) & BIT(cslice))
> mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
> else
> mode |= XEHP_CCS_MODE_CSLICE(cslice,
>  
> XEHP_CCS_MODE_CSLICE_MASK);
> }
> 
> intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
> }
> 
> > +}
> > +
> >  int intel_gt_init_hw(struct intel_gt *gt)
> >  {
> > struct drm_i915_private *i915 = gt->i915;
> > @@ -195,6 +215,9 @@ int intel_gt_init_hw(struct intel_gt *gt)
> >  
> > intel_gt_init_swizzling(gt);
> >  
> > +   /* Configure CCS mode */
> > +   intel_gt_apply_ccs_mode(gt);
> 
> This is only setting this once during init.  The value gets lost on
> every RCS/CCS reset, so we need to make sure it gets reapplied when
> necessary.  That means you either need to add this to the GuC regset, or
> you need to implement the programming as a "fake workaround" so that the
> workaround framework will take care of the re-application for you.

OK, I'll hook everything up in the ccs_engine_wa_init().

Thanks,
Andi


Re: [PATCH v4 1/3] drm/i915/gt: Disable HW load balancing for CCS

2024-03-07 Thread Andi Shyti
Hi Matt,

On Wed, Mar 06, 2024 at 03:46:09PM -0800, Matt Roper wrote:
> On Wed, Mar 06, 2024 at 02:22:45AM +0100, Andi Shyti wrote:
> > The hardware should not dynamically balance the load between CCS
> > engines. Wa_14019159160 recommends disabling it across all
> > platforms.
> > 
> > Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
> > Signed-off-by: Andi Shyti 
> > Cc: Chris Wilson 
> > Cc: Joonas Lahtinen 
> > Cc: Matt Roper 
> > Cc:  # v6.2+
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 +
> >  drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +
> >  2 files changed, 6 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > index 50962cfd1353..cf709f6c05ae 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > @@ -1478,6 +1478,7 @@
> >  
> >  #define GEN12_RCU_MODE _MMIO(0x14800)
> >  #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
> > +#define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
> >  
> >  #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 
> > 0x2168)
> >  #define   CHV_FGT_DISABLE_SS0  (1 << 10)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> > b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > index d67d44611c28..a2e78cf0b5f5 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > @@ -2945,6 +2945,11 @@ general_render_compute_wa_init(struct 
> > intel_engine_cs *engine, struct i915_wa_li
> >  
> > /* Wa_18028616096 */
> > wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, 
> > UGM_FRAGMENT_THRESHOLD_TO_3);
> > +
> > +   /*
> > +* Wa_14019159160: disable the automatic CCS load balancing
> 
> I'm still a bit concerned that this doesn't really match what this
> specific workaround is asking us to do.  There seems to be an agreement
> on various internal email threads that we need to disable load
> balancing, but there's no single specific workaround that officially
> documents that decision.
> 
> This specific workaround asks us to do a bunch of different things, and
> the third item it asks for is to disable load balancing in very specific
> cases (i.e., while the RCS is active at the same time as one or more CCS
> engines).  Taking this workaround in isolation, it would be valid to
> keep load balancing active if you were just using the CCS engines and
> leaving the RCS idle, or if balancing was turned on/off by the GuC
> scheduler according to engine use at the moment, as the documented
> workaround seems to assume will be the case.
> 
> So in general I think we do need to disable load balancing based on
> other offline discussion, but blaming that entire change on
> Wa_14019159160 seems a bit questionable since it's not really what this
> specific workaround is asking us to do and someone may come back and try
> to "correct" the implementation of this workaround in the future without
> realizing there are other factors too.  It would be great if we could
> get hardware teams to properly document this expectation somewhere
> (either in a separate dedicated workaround, or in the MMIO tuning guide)
> so that we'll have a more direct and authoritative source for such a
> large behavioral change.

On one had I think you are right, on the other hand I think this
workaround has not properly developed in what we have been
describing later.

Perhaps, one solution would be to create a new generic workaround
for all platforms with more than one CCS and put everyone at
peace. But I don't know the process.

Are you able to help here? Or Joonas?

Thanks, Matt!
Andi


[PATCH v4 3/3] drm/i915/gt: Enable only one CCS for compute workload

2024-03-05 Thread Andi Shyti
Enable only one CCS engine by default with all the compute sices
allocated to it.

While generating the list of UABI engines to be exposed to the
user, exclude any additional CCS engines beyond the first
instance.

This change can be tested with igt i915_query.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Requires: 97aba5e46038 ("drm/i915/gt: Refactor uabi engine class/instance list 
creation")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 11 ++
 drivers/gpu/drm/i915/gt/intel_gt.c  | 23 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  5 +
 3 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 11cc06c0c785..9ef1c4ce252d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -208,6 +208,7 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
struct list_head *it, *next;
struct rb_node **p, *prev;
LIST_HEAD(engines);
+   u16 uabi_ccs = 0;
 
sort_engines(i915, );
 
@@ -244,6 +245,16 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
if (uabi_class > I915_LAST_UABI_ENGINE_CLASS)
continue;
 
+   /*
+* The load is balanced among all the available compute
+* slices. Expose only the first instance of the compute
+* engine.
+*/
+   if (IS_DG2(i915) &&
+   uabi_class == I915_ENGINE_CLASS_COMPUTE &&
+   uabi_ccs++)
+   continue;
+
GEM_BUG_ON(uabi_class >=
   ARRAY_SIZE(i915->engine_uabi_class_count));
i915->engine_uabi_class_count[uabi_class]++;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index a425db5ed3a2..0aac97439552 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -168,6 +168,26 @@ static void init_unused_rings(struct intel_gt *gt)
}
 }
 
+static void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+   u32 mode;
+   int cslice;
+
+   if (!IS_DG2(gt->i915))
+   return;
+
+   /* Set '0' as a default CCS id to all the cslices */
+   mode = 0;
+
+   for (cslice = 0; cslice < hweight32(CCS_MASK(gt)); cslice++)
+   /* Write 0x7 if no CCS context dispatches to this cslice */
+   if (!(CCS_MASK(gt) & BIT(cslice)))
+   mode |= XEHP_CCS_MODE_CSLICE(cslice,
+XEHP_CCS_MODE_CSLICE_MASK);
+
+   intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
+
 int intel_gt_init_hw(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
@@ -195,6 +215,9 @@ int intel_gt_init_hw(struct intel_gt *gt)
 
intel_gt_init_swizzling(gt);
 
+   /* Configure CCS mode */
+   intel_gt_apply_ccs_mode(gt);
+
/*
 * At least 830 can leave some of the unused rings
 * "active" (ie. head != tail) after resume which
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index cf709f6c05ae..8224dd99c7d7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1480,6 +1480,11 @@
 #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
 #define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 
+#define XEHP_CCS_MODE  _MMIO(0x14804)
+#define   XEHP_CCS_MODE_CSLICE_MASKREG_GENMASK(2, 0) /* CCS0-3 + 
rsvd */
+#define   XEHP_CCS_MODE_CSLICE_WIDTH   ilog2(XEHP_CCS_MODE_CSLICE_MASK 
+ 1)
+#define   XEHP_CCS_MODE_CSLICE(cslice, ccs)(ccs << (cslice * 
XEHP_CCS_MODE_CSLICE_WIDTH))
+
 #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0  (1 << 10)
 #define   CHV_FGT_DISABLE_SS1  (1 << 11)
-- 
2.43.0



[PATCH v4 2/3] drm/i915/gt: Refactor uabi engine class/instance list creation

2024-03-05 Thread Andi Shyti
For the upcoming changes we need a cleaner way to build the list
of uabi engines.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Andi Shyti 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 29 -
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 833987015b8b..11cc06c0c785 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -203,7 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, 
const char *name, u16
 
 void intel_engines_driver_register(struct drm_i915_private *i915)
 {
-   u16 name_instance, other_instance = 0;
+   u16 class_instance[I915_LAST_UABI_ENGINE_CLASS + 2] = { };
struct legacy_ring ring = {};
struct list_head *it, *next;
struct rb_node **p, *prev;
@@ -214,6 +214,8 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
prev = NULL;
p = >uabi_engines.rb_node;
list_for_each_safe(it, next, ) {
+   u16 uabi_class;
+
struct intel_engine_cs *engine =
container_of(it, typeof(*engine), uabi_list);
 
@@ -222,15 +224,14 @@ void intel_engines_driver_register(struct 
drm_i915_private *i915)
 
GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
engine->uabi_class = uabi_classes[engine->class];
-   if (engine->uabi_class == I915_NO_UABI_CLASS) {
-   name_instance = other_instance++;
-   } else {
-   GEM_BUG_ON(engine->uabi_class >=
-  ARRAY_SIZE(i915->engine_uabi_class_count));
-   name_instance =
-   
i915->engine_uabi_class_count[engine->uabi_class]++;
-   }
-   engine->uabi_instance = name_instance;
+
+   if (engine->uabi_class == I915_NO_UABI_CLASS)
+   uabi_class = I915_LAST_UABI_ENGINE_CLASS + 1;
+   else
+   uabi_class = engine->uabi_class;
+
+   GEM_BUG_ON(uabi_class >= ARRAY_SIZE(class_instance));
+   engine->uabi_instance = class_instance[uabi_class]++;
 
/*
 * Replace the internal name with the final user and log facing
@@ -238,11 +239,15 @@ void intel_engines_driver_register(struct 
drm_i915_private *i915)
 */
engine_rename(engine,
  intel_engine_class_repr(engine->class),
- name_instance);
+ engine->uabi_instance);
 
-   if (engine->uabi_class == I915_NO_UABI_CLASS)
+   if (uabi_class > I915_LAST_UABI_ENGINE_CLASS)
continue;
 
+   GEM_BUG_ON(uabi_class >=
+  ARRAY_SIZE(i915->engine_uabi_class_count));
+   i915->engine_uabi_class_count[uabi_class]++;
+
rb_link_node(>uabi_node, prev, p);
rb_insert_color(>uabi_node, >uabi_engines);
 
-- 
2.43.0



[PATCH v4 1/3] drm/i915/gt: Disable HW load balancing for CCS

2024-03-05 Thread Andi Shyti
The hardware should not dynamically balance the load between CCS
engines. Wa_14019159160 recommends disabling it across all
platforms.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 50962cfd1353..cf709f6c05ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1478,6 +1478,7 @@
 
 #define GEN12_RCU_MODE _MMIO(0x14800)
 #define   GEN12_RCU_MODE_CCS_ENABLEREG_BIT(0)
+#define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 
 #define CHV_FUSE_GT_MMIO(VLV_GUNIT_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0  (1 << 10)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index d67d44611c28..a2e78cf0b5f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2945,6 +2945,11 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
 
/* Wa_18028616096 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, 
UGM_FRAGMENT_THRESHOLD_TO_3);
+
+   /*
+* Wa_14019159160: disable the automatic CCS load balancing
+*/
+   wa_masked_en(wal, GEN12_RCU_MODE, 
XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
}
 
if (IS_DG2_G11(i915)) {
-- 
2.43.0



[PATCH v4 0/3] Disable automatic load CCS load balancing

2024-03-05 Thread Andi Shyti
Hi,

I have to admit that v3 was a lazy attempt. This one should be on
the right path.

this series does basically two things:

1. Disables automatic load balancing as adviced by the hardware
   workaround.

2. Assigns all the CCS slices to one single user engine. The user
   will then be able to query only one CCS engine

I'm using here the "Requires: " tag, but I'm not sure the commit
id will be valid, on the other hand, I don't know what commit id
I should use.

Thanks Tvrtko, Matt, John and Joonas for your reviews!

Andi

Changelog
=
v3 -> v4
- Reword correctly the comment in the workaround
- Fix a buffer overflow (Thanks Joonas)
- Handle properly the fused engines when setting the CCS mode.

v2 -> v3
- Simplified the algorithm for creating the list of the exported
  uabi engines. (Patch 1) (Thanks, Tvrtko)
- Consider the fused engines when creating the uabi engine list
  (Patch 2) (Thanks, Matt)
- Patch 4 now uses a the refactoring from patch 1, in a cleaner
  outcome.

v1 -> v2
- In Patch 1 use the correct workaround number (thanks Matt).
- In Patch 2 do not add the extra CCS engines to the exposed UABI
  engine list and adapt the engine counting accordingly (thanks
  Tvrtko).
- Reword the commit of Patch 2 (thanks John).


Andi Shyti (3):
  drm/i915/gt: Disable HW load balancing for CCS
  drm/i915/gt: Refactor uabi engine class/instance list creation
  drm/i915/gt: Enable only one CCS for compute workload

 drivers/gpu/drm/i915/gt/intel_engine_user.c | 40 ++---
 drivers/gpu/drm/i915/gt/intel_gt.c  | 23 
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  6 
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  5 +++
 4 files changed, 62 insertions(+), 12 deletions(-)

-- 
2.43.0



Re: [PATCH] drm/i915/selftests: Fix dependency of some timeouts on HZ

2024-03-05 Thread Andi Shyti
Hi Janusz,

On Thu, Feb 22, 2024 at 12:32:40PM +0100, Janusz Krzysztofik wrote:
> Third argument of i915_request_wait() accepts a timeout value in jiffies.
> Most users pass either a simple HZ based expression, or a result of
> msecs_to_jiffies(), or MAX_SCHEDULE_TIMEOUT, or a very small number not
> exceeding 4 if applicable as that value.  However, there is one user --
> intel_selftest_wait_for_rq() -- that passes a WAIT_FOR_RESET_TIME symbol,
> defined as a large constant value that most probably represents a desired
> timeout in ms.  While that usage results in the intended value of timeout
> on usual x86_64 kernel configurations, it is not portable across different
> architectures and custom kernel configs.
> 
> Rename the symbol to clearly indicate intended units and convert it to
> jiffies before use.
> 
> Fixes: 3a4bfa091c46 ("drm/i915/selftest: Fix workarounds selftest for GuC 
> submission")
> Signed-off-by: Janusz Krzysztofik 
> Cc: Rahul Kumar Singh 
> Cc: John Harrison 
> Cc: Matthew Brost 

pushed in drm-intel-gt-next.

Thank you,
Andi


Re: [PATCH] drm/i915/selftest_hangcheck: Check sanity with more patience

2024-03-05 Thread Andi Shyti
Hi Janusz,

On Wed, Feb 28, 2024 at 04:24:41PM +0100, Janusz Krzysztofik wrote:
> While trying to reproduce some other issues reported by CI for i915
> hangcheck live selftest, I found them hidden behind timeout failures
> reported by igt_hang_sanitycheck -- the very first hangcheck test case
> executed.
> 
> Feb 22 19:49:06 DUT1394ACMR kernel: calling  mei_gsc_driver_init+0x0/0xff0 
> [mei_gsc] @ 121074
> Feb 22 19:49:06 DUT1394ACMR kernel: i915 :03:00.0: [drm] DRM_I915_DEBUG 
> enabled
> Feb 22 19:49:06 DUT1394ACMR kernel: i915 :03:00.0: [drm] Cannot find any 
> crtc or sizes
> Feb 22 19:49:06 DUT1394ACMR kernel: probe of i915.mei-gsc.768 returned 0 
> after 1475 usecs
> Feb 22 19:49:06 DUT1394ACMR kernel: probe of i915.mei-gscfi.768 returned 0 
> after 1441 usecs
> Feb 22 19:49:06 DUT1394ACMR kernel: initcall mei_gsc_driver_init+0x0/0xff0 
> [mei_gsc] returned 0 after 3010 usecs
> Feb 22 19:49:06 DUT1394ACMR kernel: i915 :03:00.0: [drm] 
> DRM_I915_DEBUG_GEM enabled
> Feb 22 19:49:06 DUT1394ACMR kernel: i915 :03:00.0: [drm] 
> DRM_I915_DEBUG_RUNTIME_PM enabled
> Feb 22 19:49:06 DUT1394ACMR kernel: i915: Performing live selftests with 
> st_random_seed=0x4c26c048 st_timeout=500
> Feb 22 19:49:07 DUT1394ACMR kernel: i915: Running hangcheck
> Feb 22 19:49:07 DUT1394ACMR kernel: calling  mei_hdcp_driver_init+0x0/0xff0 
> [mei_hdcp] @ 121074
> Feb 22 19:49:07 DUT1394ACMR kernel: i915: Running 
> intel_hangcheck_live_selftests/igt_hang_sanitycheck
> Feb 22 19:49:07 DUT1394ACMR kernel: probe of 
> :00:16.0-b638ab7e-94e2-4ea2-a552-d1c54b627f04 returned 0 after 1398 usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: probe of 
> i915.mei-gsc.768-b638ab7e-94e2-4ea2-a552-d1c54b627f04 returned 0 after 97 
> usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: initcall mei_hdcp_driver_init+0x0/0xff0 
> [mei_hdcp] returned 0 after 101960 usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: calling  mei_pxp_driver_init+0x0/0xff0 
> [mei_pxp] @ 121094
> Feb 22 19:49:07 DUT1394ACMR kernel: probe of 
> :00:16.0-fbf6fcf1-96cf-4e2e-a6a6-1bab8cbe36b1 returned 0 after 435 usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: mei_pxp 
> i915.mei-gsc.768-fbf6fcf1-96cf-4e2e-a6a6-1bab8cbe36b1: bound :03:00.0 
> (ops i915_pxp_tee_component_ops [i915])
> Feb 22 19:49:07 DUT1394ACMR kernel: 100ms wait for request failed on rcs0, 
> err=-62
> Feb 22 19:49:07 DUT1394ACMR kernel: probe of 
> i915.mei-gsc.768-fbf6fcf1-96cf-4e2e-a6a6-1bab8cbe36b1 returned 0 after 158425 
> usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: initcall mei_pxp_driver_init+0x0/0xff0 
> [mei_pxp] returned 0 after 224159 usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: i915/intel_hangcheck_live_selftests: 
> igt_hang_sanitycheck failed with error -5
> Feb 22 19:49:07 DUT1394ACMR kernel: i915: probe of :03:00.0 failed with 
> error -5
> 
> Those request waits, once timed out after 100ms, have never been
> confirmed to still persist over another 100ms, always being able to
> complete within the originally requested wait time doubled.
> 
> Taking into account potentially significant additional concurrent workload
> generated by new auxiliary drivers that didn't exist before and now are
> loaded in parallel with the i915 module also when loaded in selftest mode,
> relax our expectations on time consumed by the sanity check request before
> it completes.
> 
> Signed-off-by: Janusz Krzysztofik 

pushed to drm-intel-gt-next.

Thank you,
Andi


Re: [PATCH v3 1/4] drm/i915/gt: Refactor uabi engine class/instance list creation

2024-03-05 Thread Andi Shyti
Hi Joonas,

...

> >  void intel_engines_driver_register(struct drm_i915_private *i915)
> >  {
> > -   u16 name_instance, other_instance = 0;
> > +   u16 class_instance[I915_LAST_UABI_ENGINE_CLASS + 1] = { };
> 
> Do you mean this to be size I915_LAST_UABI_ENGINE_CLASS + 2? Because ...

Yes, this is an oversight. I was playing around with indexes to
optimize the code a bit more and I forgot to restore this back.

Thanks,
Andi

> 
> 
> > @@ -222,15 +224,14 @@ void intel_engines_driver_register(struct 
> > drm_i915_private *i915)
> >  
> > GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
> > engine->uabi_class = uabi_classes[engine->class];
> > -   if (engine->uabi_class == I915_NO_UABI_CLASS) {
> > -   name_instance = other_instance++;
> > -   } else {
> > -   GEM_BUG_ON(engine->uabi_class >=
> > -  
> > ARRAY_SIZE(i915->engine_uabi_class_count));
> > -   name_instance =
> > -   
> > i915->engine_uabi_class_count[engine->uabi_class]++;
> > -   }
> > -   engine->uabi_instance = name_instance;
> > +
> > +   if (engine->uabi_class == I915_NO_UABI_CLASS)
> > +   uabi_class = I915_LAST_UABI_ENGINE_CLASS + 1;
> 
> .. otherwise this ...
> 
> > +   else
> > +   uabi_class = engine->uabi_class;
> > +
> > +   GEM_BUG_ON(uabi_class >= ARRAY_SIZE(class_instance));
> 
> .. will trigger this assertion?
> 
> Regards, Joonas


Re: [PATCH] drm/i915/selftest_hangcheck: Check sanity with more patience

2024-02-29 Thread Andi Shyti
Hi Janusz,

On Wed, Feb 28, 2024 at 04:24:41PM +0100, Janusz Krzysztofik wrote:
> While trying to reproduce some other issues reported by CI for i915
> hangcheck live selftest, I found them hidden behind timeout failures
> reported by igt_hang_sanitycheck -- the very first hangcheck test case
> executed.
> 
> Feb 22 19:49:06 DUT1394ACMR kernel: calling  mei_gsc_driver_init+0x0/0xff0 
> [mei_gsc] @ 121074
> Feb 22 19:49:06 DUT1394ACMR kernel: i915 :03:00.0: [drm] DRM_I915_DEBUG 
> enabled
> Feb 22 19:49:06 DUT1394ACMR kernel: i915 :03:00.0: [drm] Cannot find any 
> crtc or sizes
> Feb 22 19:49:06 DUT1394ACMR kernel: probe of i915.mei-gsc.768 returned 0 
> after 1475 usecs
> Feb 22 19:49:06 DUT1394ACMR kernel: probe of i915.mei-gscfi.768 returned 0 
> after 1441 usecs
> Feb 22 19:49:06 DUT1394ACMR kernel: initcall mei_gsc_driver_init+0x0/0xff0 
> [mei_gsc] returned 0 after 3010 usecs
> Feb 22 19:49:06 DUT1394ACMR kernel: i915 :03:00.0: [drm] 
> DRM_I915_DEBUG_GEM enabled
> Feb 22 19:49:06 DUT1394ACMR kernel: i915 :03:00.0: [drm] 
> DRM_I915_DEBUG_RUNTIME_PM enabled
> Feb 22 19:49:06 DUT1394ACMR kernel: i915: Performing live selftests with 
> st_random_seed=0x4c26c048 st_timeout=500
> Feb 22 19:49:07 DUT1394ACMR kernel: i915: Running hangcheck
> Feb 22 19:49:07 DUT1394ACMR kernel: calling  mei_hdcp_driver_init+0x0/0xff0 
> [mei_hdcp] @ 121074
> Feb 22 19:49:07 DUT1394ACMR kernel: i915: Running 
> intel_hangcheck_live_selftests/igt_hang_sanitycheck
> Feb 22 19:49:07 DUT1394ACMR kernel: probe of 
> :00:16.0-b638ab7e-94e2-4ea2-a552-d1c54b627f04 returned 0 after 1398 usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: probe of 
> i915.mei-gsc.768-b638ab7e-94e2-4ea2-a552-d1c54b627f04 returned 0 after 97 
> usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: initcall mei_hdcp_driver_init+0x0/0xff0 
> [mei_hdcp] returned 0 after 101960 usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: calling  mei_pxp_driver_init+0x0/0xff0 
> [mei_pxp] @ 121094
> Feb 22 19:49:07 DUT1394ACMR kernel: probe of 
> :00:16.0-fbf6fcf1-96cf-4e2e-a6a6-1bab8cbe36b1 returned 0 after 435 usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: mei_pxp 
> i915.mei-gsc.768-fbf6fcf1-96cf-4e2e-a6a6-1bab8cbe36b1: bound :03:00.0 
> (ops i915_pxp_tee_component_ops [i915])
> Feb 22 19:49:07 DUT1394ACMR kernel: 100ms wait for request failed on rcs0, 
> err=-62
> Feb 22 19:49:07 DUT1394ACMR kernel: probe of 
> i915.mei-gsc.768-fbf6fcf1-96cf-4e2e-a6a6-1bab8cbe36b1 returned 0 after 158425 
> usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: initcall mei_pxp_driver_init+0x0/0xff0 
> [mei_pxp] returned 0 after 224159 usecs
> Feb 22 19:49:07 DUT1394ACMR kernel: i915/intel_hangcheck_live_selftests: 
> igt_hang_sanitycheck failed with error -5
> Feb 22 19:49:07 DUT1394ACMR kernel: i915: probe of :03:00.0 failed with 
> error -5
> 
> Those request waits, once timed out after 100ms, have never been
> confirmed to still persist over another 100ms, always being able to
> complete within the originally requested wait time doubled.
> 
> Taking into account potentially significant additional concurrent workload
> generated by new auxiliary drivers that didn't exist before and now are
> loaded in parallel with the i915 module also when loaded in selftest mode,
> relax our expectations on time consumed by the sanity check request before
> it completes.
> 
> Signed-off-by: Janusz Krzysztofik 

I'm OK with it...

Reviewed-by: Andi Shyti 

Thanks,
Andi


Re: [PATCH] drm/i915/selftests: Fix dependency of some timeouts on HZ

2024-02-29 Thread Andi Shyti
Hi Janusz,

On Thu, Feb 22, 2024 at 12:32:40PM +0100, Janusz Krzysztofik wrote:
> Third argument of i915_request_wait() accepts a timeout value in jiffies.
> Most users pass either a simple HZ based expression, or a result of
> msecs_to_jiffies(), or MAX_SCHEDULE_TIMEOUT, or a very small number not
> exceeding 4 if applicable as that value.  However, there is one user --
> intel_selftest_wait_for_rq() -- that passes a WAIT_FOR_RESET_TIME symbol,
> defined as a large constant value that most probably represents a desired
> timeout in ms.  While that usage results in the intended value of timeout
> on usual x86_64 kernel configurations, it is not portable across different
> architectures and custom kernel configs.
> 
> Rename the symbol to clearly indicate intended units and convert it to
> jiffies before use.
> 
> Fixes: 3a4bfa091c46 ("drm/i915/selftest: Fix workarounds selftest for GuC 
> submission")
> Signed-off-by: Janusz Krzysztofik 
> Cc: Rahul Kumar Singh 
> Cc: John Harrison 
> Cc: Matthew Brost 

Reviewed-by: Andi Shyti 

Thanks,
Andi


[PATCH v3 4/4] drm/i915/gt: Enable only one CCS for compute workload

2024-02-29 Thread Andi Shyti
Enable only one CCS engine by default with all the compute sices
allocated to it.

While generating the list of UABI engines to be exposed to the
user, exclude any additional CCS engines beyond the first
instance.

This change can be tested with igt i915_query.

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Requires: 4e4f77d74878 ("drm/i915/gt: Refactor uabi engine class/instance list 
creation")
Signed-off-by: Andi Shyti 
Cc: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Matt Roper 
Cc:  # v6.2+
---
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 11 +++
 drivers/gpu/drm/i915/gt/intel_gt.c  | 11 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  2 ++
 3 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index ec5bcd1c1ec4..6d6ef11f55e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -208,6 +208,7 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
struct list_head *it, *next;
struct rb_node **p, *prev;
LIST_HEAD(engines);
+   u16 uabi_ccs = 0;
 
sort_engines(i915, );
 
@@ -256,6 +257,16 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
  BIT(_CCS(engine->uabi_instance
continue;
 
+   /*
+* The load is balanced among all the available compute
+* slices. Expose only the first instance of the compute
+* engine.
+*/
+   if (IS_DG2(i915) &&
+   uabi_class == I915_ENGINE_CLASS_COMPUTE &&
+   uabi_ccs++)
+   continue;
+
GEM_BUG_ON(uabi_class >=
   ARRAY_SIZE(i915->engine_uabi_class_count));
i915->engine_uabi_class_count[uabi_class]++;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index a425db5ed3a2..e19df4ef47f6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -168,6 +168,14 @@ static void init_unused_rings(struct intel_gt *gt)
}
 }
 
+static void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+   if (!IS_DG2(gt->i915))
+   return;
+
+   intel_uncore_write(gt->uncore, XEHP_CCS_MODE, 0);
+}
+
 int intel_gt_init_hw(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
@@ -195,6 +203,9 @@ int intel_gt_init_hw(struct intel_gt *gt)
 
intel_gt_init_swizzling(gt);
 
+   /* Configure CCS mode */
+   intel_gt_apply_ccs_mode(gt);
+
/*
 * At least 830 can leave some of the unused rings
 * "active" (ie. head != tail) after resume which
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index cf709f6c05ae..c148113770ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1605,6 +1605,8 @@
 #define   GEN12_VOLTAGE_MASK   REG_GENMASK(10, 0)
 #define   GEN12_CAGF_MASK  REG_GENMASK(19, 11)
 
+#define XEHP_CCS_MODE  _MMIO(0x14804)
+
 #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4))
 #define   GEN11_CSME   (31)
 #define   GEN12_HECI_2 (30)
-- 
2.43.0



  1   2   3   4   5   6   7   8   9   10   >