[PATCH v3 03/14] drm/i915: Use managed allocations for extra uncore objects

2022-09-06 Thread Matt Roper
We're slowly transitioning the init-time kzalloc's of the driver over to
DRM-managed allocations; let's make sure the uncore objects allocated
for non-root GTs are thus allocated.

Signed-off-by: Matt Roper 
Reviewed-by: Radhakrishna Sripada 
---
 drivers/gpu/drm/i915/gt/intel_gt.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index a82b5e2e0d83..cf7aab7adb30 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -783,7 +783,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, 
phys_addr_t phys_addr)
if (!gt_is_root(gt)) {
struct intel_uncore *uncore;
 
-   uncore = kzalloc(sizeof(*uncore), GFP_KERNEL);
+   uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), 
GFP_KERNEL);
if (!uncore)
return -ENOMEM;
 
@@ -808,10 +808,8 @@ intel_gt_tile_cleanup(struct intel_gt *gt)
 {
intel_uncore_cleanup_mmio(gt->uncore);
 
-   if (!gt_is_root(gt)) {
-   kfree(gt->uncore);
+   if (!gt_is_root(gt))
kfree(gt);
-   }
 }
 
 int intel_gt_probe_all(struct drm_i915_private *i915)
-- 
2.37.2



[PATCH v3 01/14] drm/i915: Move locking and unclaimed check into mmio_debug_{suspend, resume}

2022-09-06 Thread Matt Roper
Moving the locking for MMIO debug (and the final check for unclaimed
accesses when resuming debug after a userspace-initiated forcewake) will
make it simpler to completely skip MMIO debug handling on uncores that
don't support it in future patches.

Signed-off-by: Matt Roper 
Reviewed-by: Radhakrishna Sripada 
---
 drivers/gpu/drm/i915/intel_uncore.c | 41 +++--
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index 9b81b2543ce2..e717ea55484a 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -50,23 +50,33 @@ intel_uncore_mmio_debug_init_early(struct 
intel_uncore_mmio_debug *mmio_debug)
mmio_debug->unclaimed_mmio_check = 1;
 }
 
-static void mmio_debug_suspend(struct intel_uncore_mmio_debug *mmio_debug)
+static void mmio_debug_suspend(struct intel_uncore *uncore)
 {
-   lockdep_assert_held(&mmio_debug->lock);
+   spin_lock(&uncore->debug->lock);
 
/* Save and disable mmio debugging for the user bypass */
-   if (!mmio_debug->suspend_count++) {
-   mmio_debug->saved_mmio_check = mmio_debug->unclaimed_mmio_check;
-   mmio_debug->unclaimed_mmio_check = 0;
+   if (!uncore->debug->suspend_count++) {
+   uncore->debug->saved_mmio_check = 
uncore->debug->unclaimed_mmio_check;
+   uncore->debug->unclaimed_mmio_check = 0;
}
+
+   spin_unlock(&uncore->debug->lock);
 }
 
-static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug)
+static bool check_for_unclaimed_mmio(struct intel_uncore *uncore);
+
+static void mmio_debug_resume(struct intel_uncore *uncore)
 {
-   lockdep_assert_held(&mmio_debug->lock);
+   spin_lock(&uncore->debug->lock);
+
+   if (!--uncore->debug->suspend_count)
+   uncore->debug->unclaimed_mmio_check = 
uncore->debug->saved_mmio_check;
 
-   if (!--mmio_debug->suspend_count)
-   mmio_debug->unclaimed_mmio_check = mmio_debug->saved_mmio_check;
+   if (check_for_unclaimed_mmio(uncore))
+   drm_info(&uncore->i915->drm,
+"Invalid mmio detected during user access\n");
+
+   spin_unlock(&uncore->debug->lock);
 }
 
 static const char * const forcewake_domain_names[] = {
@@ -677,9 +687,7 @@ void intel_uncore_forcewake_user_get(struct intel_uncore 
*uncore)
spin_lock_irq(&uncore->lock);
if (!uncore->user_forcewake_count++) {
intel_uncore_forcewake_get__locked(uncore, FORCEWAKE_ALL);
-   spin_lock(&uncore->debug->lock);
-   mmio_debug_suspend(uncore->debug);
-   spin_unlock(&uncore->debug->lock);
+   mmio_debug_suspend(uncore);
}
spin_unlock_irq(&uncore->lock);
 }
@@ -695,14 +703,7 @@ void intel_uncore_forcewake_user_put(struct intel_uncore 
*uncore)
 {
spin_lock_irq(&uncore->lock);
if (!--uncore->user_forcewake_count) {
-   spin_lock(&uncore->debug->lock);
-   mmio_debug_resume(uncore->debug);
-
-   if (check_for_unclaimed_mmio(uncore))
-   drm_info(&uncore->i915->drm,
-"Invalid mmio detected during user access\n");
-   spin_unlock(&uncore->debug->lock);
-
+   mmio_debug_resume(uncore);
intel_uncore_forcewake_put__locked(uncore, FORCEWAKE_ALL);
}
spin_unlock_irq(&uncore->lock);
-- 
2.37.2



[PATCH v3 07/14] drm/i915: Use a DRM-managed action to release the PCI bridge device

2022-09-06 Thread Matt Roper
As we start supporting multiple uncore structures in future patches, the
MMIO cleanup (which make also get called mid-init if there's a failure)
will become more complicated.  Moving to DRM-managed actions will help
keep things simple.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/i915_driver.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index 18acba1bc3b0..1f46dd1ffaf7 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -105,6 +105,12 @@ static const char irst_name[] = "INT3392";
 
 static const struct drm_driver i915_drm_driver;
 
+static void i915_release_bridge_dev(struct drm_device *dev,
+   void *bridge)
+{
+   pci_dev_put(bridge);
+}
+
 static int i915_get_bridge_dev(struct drm_i915_private *dev_priv)
 {
int domain = pci_domain_nr(to_pci_dev(dev_priv->drm.dev)->bus);
@@ -115,7 +121,9 @@ static int i915_get_bridge_dev(struct drm_i915_private 
*dev_priv)
drm_err(&dev_priv->drm, "bridge device not found\n");
return -EIO;
}
-   return 0;
+
+   return drmm_add_action_or_reset(&dev_priv->drm, i915_release_bridge_dev,
+   dev_priv->bridge_dev);
 }
 
 /* Allocate space for the MCH regs if needed, return nonzero on error */
@@ -452,7 +460,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private 
*dev_priv)
 err_uncore:
intel_teardown_mchbar(dev_priv);
intel_uncore_fini_mmio(&dev_priv->uncore);
-   pci_dev_put(dev_priv->bridge_dev);
 
return ret;
 }
@@ -465,7 +472,6 @@ static void i915_driver_mmio_release(struct 
drm_i915_private *dev_priv)
 {
intel_teardown_mchbar(dev_priv);
intel_uncore_fini_mmio(&dev_priv->uncore);
-   pci_dev_put(dev_priv->bridge_dev);
 }
 
 /**
-- 
2.37.2



[PATCH v3 00/14] i915: Add "standalone media" support for MTL

2022-09-06 Thread Matt Roper
Starting with MTL, media functionality has moved into a new, second GT
at the hardware level.  This new GT, referred to as "standalone media"
in the spec, has its own GuC, power management/forcewake, etc.  The
general non-engine GT registers for standalone media start at 0x38,
but otherwise use the same MMIO offsets as the primary GT.

Standalone media has a lot of similarity to the remote tiles
present on platforms like xehpsdv and pvc, and our i915 implementation
can share much of the general "multi GT" infrastructure between the two
types of platforms.  However there are a few notable differences
we must deal with:
 - The 0x38 offset only applies to the non-engine GT registers
   (which the specs refer to as "GSI" registers).  The engine registers
   remain at their usual locations (e.g., 0x1C for VCS0).
 - Unlike platforms with remote tiles, all interrupt handling for
   standalone media still happens via the primary GT.


v2:
 - Added new patches to ensure each GT, not just the primary, is
   handled properly during various init/suspend/resume/teardown flows.
   (Daniele)
 - Simplified GSI offset handling and split it into its own patch.
 - Correct gt->irq_lock assignment for media GT.  (Daniele)
 - Fix jump target for intel_root_gt_init_early() errors.  (Daniele)

v3:
 - Move intel_gt_definition struct to intel_gt_types.h.  (Jani)
 - Drop gtdef->setup() and just switch() on type.  (Jani)
 - Honor GSI offset during AUX table invalidation.  (Aravind)
 - Drop intel_gt_tile_cleanup() through more intelligent use
   of DRM-managed actions.  This also fixes the fault-injection
   failures reported by CI.

Cc: Radhakrishna Sripada 
Cc: Daniele Ceraolo Spurio 
Cc: Aravind Iddamsetty 
Cc: Jani Nikula 

Matt Roper (12):
  drm/i915: Move locking and unclaimed check into
mmio_debug_{suspend,resume}
  drm/i915: Only hook up uncore->debug for primary uncore
  drm/i915: Use managed allocations for extra uncore objects
  drm/i915: Drop intel_gt_tile_cleanup()
  drm/i915: Prepare more multi-GT initialization
  drm/i915: Rename and expose common GT early init routine
  drm/i915: Use a DRM-managed action to release the PCI bridge device
  drm/i915: Initialize MMIO access for each GT
  drm/i915: Handle each GT on init/release and suspend/resume
  drm/i915/uncore: Add GSI offset to uncore
  drm/i915/mtl: Add gsi_offset when emitting aux table invalidation
  drm/i915/xelpmp: Expose media as another GT
  drm/i915/mtl: Use primary GT's irq lock for media GT
  drm/i915/mtl: Hook up interrupts for standalone media

 drivers/gpu/drm/i915/Makefile |   1 +
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c  |  15 ++-
 drivers/gpu/drm/i915/gt/gen8_engine_cs.h  |   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  10 +-
 drivers/gpu/drm/i915/gt/intel_gt.c| 108 +-
 drivers/gpu/drm/i915/gt/intel_gt.h|   4 +-
 drivers/gpu/drm/i915/gt/intel_gt_irq.c|  35 --
 drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c |   8 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  10 ++
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  19 ++-
 drivers/gpu/drm/i915/gt/intel_lrc.c   |   9 +-
 drivers/gpu/drm/i915/gt/intel_rps.c   |  26 ++---
 drivers/gpu/drm/i915/gt/intel_sa_media.c  |  47 
 drivers/gpu/drm/i915/gt/intel_sa_media.h  |  15 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  24 ++--
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |   4 +-
 drivers/gpu/drm/i915/i915_driver.c| 105 -
 drivers/gpu/drm/i915/i915_drv.h   |   5 +
 drivers/gpu/drm/i915/i915_irq.c   |   4 +-
 drivers/gpu/drm/i915/i915_pci.c   |  14 +++
 drivers/gpu/drm/i915/intel_device_info.h  |   3 +
 drivers/gpu/drm/i915/intel_uncore.c   |  92 +--
 drivers/gpu/drm/i915/intel_uncore.h   |  28 -
 drivers/gpu/drm/i915/pxp/intel_pxp.c  |   4 +-
 drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c  |   4 +-
 drivers/gpu/drm/i915/pxp/intel_pxp_irq.c  |  14 +--
 drivers/gpu/drm/i915/pxp/intel_pxp_session.c  |   4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   1 +
 29 files changed, 449 insertions(+), 171 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h

-- 
2.37.2



Re: [Intel-gfx] [PATCH v2 09/12] drm/i915/uncore: Add GSI offset to uncore

2022-09-06 Thread Matt Roper
On Tue, Sep 06, 2022 at 04:14:21PM +0530, Iddamsetty, Aravind wrote:
> 
> 
> On 03-09-2022 05:02, Matt Roper wrote:
> > GT non-engine registers (referred to as "GSI" registers by the spec)
> > have the same relative offsets on standalone media as they do on the
> > primary GT, just with an additional "GSI offset" added to their MMIO
> > address.  If we store this GSI offset in the standalone media's
> > intel_uncore structure, it can be automatically applied to all GSI reg
> > reads/writes that happen on that GT, allowing us to re-use our existing
> > GT code with minimal changes.
> > 
> > Forcewake and shadowed register tables for the media GT (which will be
> > added in a future patch) are listed as final addresses that already
> > include the GSI offset, so we also need to add the GSI offset before
> > doing lookups of registers in one of those tables.
> > 
> > Cc: Daniele Ceraolo Spurio 
> > Signed-off-by: Matt Roper 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt.c   | 17 ++---
> >  drivers/gpu/drm/i915/intel_device_info.h |  4 +++-
> >  drivers/gpu/drm/i915/intel_uncore.c  | 10 --
> >  drivers/gpu/drm/i915/intel_uncore.h  | 22 --
> >  4 files changed, 45 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt.c
> > index fbb5e32979a4..a6ed11b933eb 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> > @@ -776,10 +776,20 @@ void intel_gt_driver_late_release_all(struct 
> > drm_i915_private *i915)
> > }
> >  }
> >  
> > -static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
> > +/*
> > + * Note: the gsi_offset parameter here isn't used, but we want to keep the
> > + * function signature equivalent to gtdef->setup() so that it can be 
> > plugged
> > + * in when we enabled remote tiles in the future.
> > + */
> > +static int intel_gt_tile_setup(struct intel_gt *gt,
> > +  phys_addr_t phys_addr,
> > +  u32 gsi_offset)
> >  {
> > int ret;
> >  
> > +   /* GSI offset is only applicable for media GTs */
> > +   drm_WARN_ON(>->i915->drm, gsi_offset);
> > +
> > if (!gt_is_root(gt)) {
> > struct intel_uncore *uncore;
> >  
> > @@ -832,7 +842,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
> > gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
> >  
> > drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
> > -   ret = intel_gt_tile_setup(gt, phys_addr);
> > +   ret = intel_gt_tile_setup(gt, phys_addr, 0);
> > if (ret)
> > return ret;
> >  
> > @@ -865,7 +875,8 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
> > goto err;
> > }
> >  
> > -   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base);
> > +   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base,
> > +  gtdef->gsi_offset);
> > if (ret)
> > goto err;
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
> > b/drivers/gpu/drm/i915/intel_device_info.h
> > index b408ce384cd7..85e0ef0e91b1 100644
> > --- a/drivers/gpu/drm/i915/intel_device_info.h
> > +++ b/drivers/gpu/drm/i915/intel_device_info.h
> > @@ -254,8 +254,10 @@ struct intel_gt_definition {
> > enum intel_gt_type type;
> > char *name;
> > int (*setup)(struct intel_gt *gt,
> > -phys_addr_t phys_addr);
> > +phys_addr_t phys_addr,
> > +u32 gsi_offset);
> > u32 mapping_base;
> > +   u32 gsi_offset;
> > intel_engine_mask_t engine_mask;
> >  };
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> > b/drivers/gpu/drm/i915/intel_uncore.c
> > index 33bdcbc77ab2..ecb02421502d 100644
> > --- a/drivers/gpu/drm/i915/intel_uncore.c
> > +++ b/drivers/gpu/drm/i915/intel_uncore.c
> > @@ -927,6 +927,9 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset)
> >  {
> > const struct intel_forcewake_range *entry;
> >  
> > +   if (IS_GSI_REG(offset))
> > +   offset += uncore->gsi_offset;
> > +
> > entry = BSEARCH(offset,
> > uncore->fw_domains_table,
> >   

Re: [PATCH v2 01/12] drm/i915: Move locking and unclaimed check into mmio_debug_{suspend, resume}

2022-09-06 Thread Matt Roper
On Tue, Sep 06, 2022 at 06:39:14AM -0700, Ruhl, Michael J wrote:
> >-Original Message-
> >From: dri-devel  On Behalf Of
> >Matt Roper
> >Sent: Friday, September 2, 2022 7:33 PM
> >To: intel-...@lists.freedesktop.org
> >Cc: dri-devel@lists.freedesktop.org; Sripada, Radhakrishna
> >
> >Subject: [PATCH v2 01/12] drm/i915: Move locking and unclaimed check into
> >mmio_debug_{suspend, resume}
> >
> >Moving the locking for MMIO debug (and the final check for unclaimed
> >accesses when resuming debug after a userspace-initiated forcewake) will
> >make it simpler to completely skip MMIO debug handling on uncores that
> >don't support it in future patches.
> >
> >Signed-off-by: Matt Roper 
> >Reviewed-by: Radhakrishna Sripada 
> >---
> > drivers/gpu/drm/i915/intel_uncore.c | 41 +++--
> > 1 file changed, 21 insertions(+), 20 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_uncore.c
> >b/drivers/gpu/drm/i915/intel_uncore.c
> >index 9b81b2543ce2..e717ea55484a 100644
> >--- a/drivers/gpu/drm/i915/intel_uncore.c
> >+++ b/drivers/gpu/drm/i915/intel_uncore.c
> >@@ -50,23 +50,33 @@ intel_uncore_mmio_debug_init_early(struct
> >intel_uncore_mmio_debug *mmio_debug)
> > mmio_debug->unclaimed_mmio_check = 1;
> > }
> >
> >-static void mmio_debug_suspend(struct intel_uncore_mmio_debug
> >*mmio_debug)
> >+static void mmio_debug_suspend(struct intel_uncore *uncore)
> 
> /bike-shedding...
> 
> It seems like there has been a tend to name functions with the
> 
> _unlocked
> 
> postfix when the lock is being taken within the function.
> 
> Would this be a reasonable name update for these changes?

I think foo_unlocked() naming is usually used when there's also a
separate foo() that can be called if/when locks are already held (or
sometimes it's foo() and foo_locked() if the situation is the other way
around).  In this case we still only have one version of the function,
and it's only called from a single place in the code
(intel_uncore_forcewake_user_get) so I don't think the special naming is
necessary.  It might actually add confusion here since there's a
different lock (the general uncore lock) that is still held by the
caller.  It's just the mmio_debug-specific lock that's been moved into
the mmio-debug specific function here.


Matt

> 
> M
> 
> > {
> >-lockdep_assert_held(&mmio_debug->lock);
> >+spin_lock(&uncore->debug->lock);
> >
> > /* Save and disable mmio debugging for the user bypass */
> >-if (!mmio_debug->suspend_count++) {
> >-mmio_debug->saved_mmio_check = mmio_debug-
> >>unclaimed_mmio_check;
> >-mmio_debug->unclaimed_mmio_check = 0;
> >+if (!uncore->debug->suspend_count++) {
> >+uncore->debug->saved_mmio_check = uncore->debug-
> >>unclaimed_mmio_check;
> >+uncore->debug->unclaimed_mmio_check = 0;
> > }
> >+
> >+spin_unlock(&uncore->debug->lock);
> > }
> >
> >-static void mmio_debug_resume(struct intel_uncore_mmio_debug
> >*mmio_debug)
> >+static bool check_for_unclaimed_mmio(struct intel_uncore *uncore);
> >+
> >+static void mmio_debug_resume(struct intel_uncore *uncore)
> > {
> >-lockdep_assert_held(&mmio_debug->lock);
> >+spin_lock(&uncore->debug->lock);
> >+
> >+if (!--uncore->debug->suspend_count)
> >+uncore->debug->unclaimed_mmio_check = uncore->debug-
> >>saved_mmio_check;
> >
> >-if (!--mmio_debug->suspend_count)
> >-mmio_debug->unclaimed_mmio_check = mmio_debug-
> >>saved_mmio_check;
> >+if (check_for_unclaimed_mmio(uncore))
> >+drm_info(&uncore->i915->drm,
> >+ "Invalid mmio detected during user access\n");
> >+
> >+spin_unlock(&uncore->debug->lock);
> > }
> >
> > static const char * const forcewake_domain_names[] = {
> >@@ -677,9 +687,7 @@ void intel_uncore_forcewake_user_get(struct
> >intel_uncore *uncore)
> > spin_lock_irq(&uncore->lock);
> > if (!uncore->user_forcewake_count++) {
> > intel_uncore_forcewake_get__locked(uncore,
> >FORCEWAKE_ALL);
> >-spin_lock(&uncore->debug->lock);
> >-mmio_debug_suspend(uncore->debug);
> >-spin_unlock(&uncore->debug->lock);
> >+mmio_debug_suspend

[PATCH v2 08/12] drm/i915: Handle each GT on init/release and suspend/resume

2022-09-02 Thread Matt Roper
In preparation for enabling a second GT, there are a number of GT/uncore
operations that happen during initialization or suspend flows that need
to be performed on each GT, not just the primary,

Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/i915_driver.c | 59 +-
 1 file changed, 42 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index bb9ba1aed1bb..e5c3cf5045d4 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -310,8 +310,13 @@ static void intel_detect_preproduction_hw(struct 
drm_i915_private *dev_priv)
 
 static void sanitize_gpu(struct drm_i915_private *i915)
 {
-   if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
-   __intel_gt_reset(to_gt(i915), ALL_ENGINES);
+   if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) {
+   struct intel_gt *gt;
+   unsigned int i;
+
+   for_each_gt(gt, i915, i)
+   __intel_gt_reset(gt, ALL_ENGINES);
+   }
 }
 
 /**
@@ -730,6 +735,8 @@ static void i915_driver_hw_remove(struct drm_i915_private 
*dev_priv)
 static void i915_driver_register(struct drm_i915_private *dev_priv)
 {
struct drm_device *dev = &dev_priv->drm;
+   struct intel_gt *gt;
+   unsigned int i;
 
i915_gem_driver_register(dev_priv);
i915_pmu_register(dev_priv);
@@ -749,7 +756,8 @@ static void i915_driver_register(struct drm_i915_private 
*dev_priv)
/* Depends on sysfs having been initialized */
i915_perf_register(dev_priv);
 
-   intel_gt_driver_register(to_gt(dev_priv));
+   for_each_gt(gt, dev_priv, i)
+   intel_gt_driver_register(gt);
 
intel_display_driver_register(dev_priv);
 
@@ -768,6 +776,9 @@ static void i915_driver_register(struct drm_i915_private 
*dev_priv)
  */
 static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 {
+   struct intel_gt *gt;
+   unsigned int i;
+
i915_switcheroo_unregister(dev_priv);
 
intel_unregister_dsm_handler();
@@ -777,7 +788,8 @@ static void i915_driver_unregister(struct drm_i915_private 
*dev_priv)
 
intel_display_driver_unregister(dev_priv);
 
-   intel_gt_driver_unregister(to_gt(dev_priv));
+   for_each_gt(gt, dev_priv, i)
+   intel_gt_driver_unregister(gt);
 
i915_perf_unregister(dev_priv);
i915_pmu_unregister(dev_priv);
@@ -799,6 +811,8 @@ static void i915_welcome_messages(struct drm_i915_private 
*dev_priv)
 {
if (drm_debug_enabled(DRM_UT_DRIVER)) {
struct drm_printer p = drm_debug_printer("i915 device info:");
+   struct intel_gt *gt;
+   unsigned int i;
 
drm_printf(&p, "pciid=0x%04x rev=0x%02x platform=%s 
(subplatform=0x%x) gen=%i\n",
   INTEL_DEVID(dev_priv),
@@ -811,7 +825,8 @@ static void i915_welcome_messages(struct drm_i915_private 
*dev_priv)
intel_device_info_print(INTEL_INFO(dev_priv),
RUNTIME_INFO(dev_priv), &p);
i915_print_iommu_status(dev_priv, &p);
-   intel_gt_info_print(&to_gt(dev_priv)->info, &p);
+   for_each_gt(gt, dev_priv, i)
+   intel_gt_info_print(>->info, &p);
}
 
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
@@ -1230,13 +1245,15 @@ static int i915_drm_suspend_late(struct drm_device 
*dev, bool hibernation)
struct drm_i915_private *dev_priv = to_i915(dev);
struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
-   int ret;
+   struct intel_gt *gt;
+   int ret, i;
 
disable_rpm_wakeref_asserts(rpm);
 
i915_gem_suspend_late(dev_priv);
 
-   intel_uncore_suspend(&dev_priv->uncore);
+   for_each_gt(gt, dev_priv, i)
+   intel_uncore_suspend(gt->uncore);
 
intel_power_domains_suspend(dev_priv,
get_suspend_mode(dev_priv, hibernation));
@@ -1368,7 +1385,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-   int ret;
+   struct intel_gt *gt;
+   int ret, i;
 
/*
 * We have a resume ordering issue with the snd-hda driver also
@@ -1422,9 +1440,10 @@ static int i915_drm_resume_early(struct drm_device *dev)
drm_err(&dev_priv->drm,
"Resume prepare failed: %d, continuing anyway\n", ret);
 
-   intel_uncore_resume_early(&dev_priv->uncore);
-
-   intel_gt_check_and_clear_faults(to_gt(dev_priv));
+   for_each_gt(gt, dev_pri

[PATCH v2 07/12] drm/i915: Initialize MMIO access for each GT

2022-09-02 Thread Matt Roper
In a multi-GT system we need to initialize MMIO access for each GT, not
just the primary GT.

Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/i915_driver.c  | 27 ++-
 drivers/gpu/drm/i915/intel_uncore.c |  5 -
 drivers/gpu/drm/i915/intel_uncore.h |  3 ++-
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index 1f46dd1ffaf7..bb9ba1aed1bb 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -431,7 +431,8 @@ static void i915_driver_late_release(struct 
drm_i915_private *dev_priv)
  */
 static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
 {
-   int ret;
+   struct intel_gt *gt;
+   int ret, i;
 
if (i915_inject_probe_failure(dev_priv))
return -ENODEV;
@@ -440,17 +441,27 @@ static int i915_driver_mmio_probe(struct drm_i915_private 
*dev_priv)
if (ret < 0)
return ret;
 
-   ret = intel_uncore_init_mmio(&dev_priv->uncore);
-   if (ret)
-   return ret;
+   for_each_gt(gt, dev_priv, i) {
+   ret = intel_uncore_init_mmio(gt->uncore);
+   if (ret)
+   return ret;
+
+   ret = drmm_add_action_or_reset(&dev_priv->drm,
+  intel_uncore_fini_mmio,
+  gt->uncore);
+   if (ret)
+   return ret;
+   }
 
/* Try to make sure MCHBAR is enabled before poking at it */
intel_setup_mchbar(dev_priv);
intel_device_info_runtime_init(dev_priv);
 
-   ret = intel_gt_init_mmio(to_gt(dev_priv));
-   if (ret)
-   goto err_uncore;
+   for_each_gt(gt, dev_priv, i) {
+   ret = intel_gt_init_mmio(gt);
+   if (ret)
+   goto err_uncore;
+   }
 
/* As early as possible, scrub existing GPU state before clobbering */
sanitize_gpu(dev_priv);
@@ -459,7 +470,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private 
*dev_priv)
 
 err_uncore:
intel_teardown_mchbar(dev_priv);
-   intel_uncore_fini_mmio(&dev_priv->uncore);
 
return ret;
 }
@@ -471,7 +481,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private 
*dev_priv)
 static void i915_driver_mmio_release(struct drm_i915_private *dev_priv)
 {
intel_teardown_mchbar(dev_priv);
-   intel_uncore_fini_mmio(&dev_priv->uncore);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index 6841f76533f9..33bdcbc77ab2 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -2454,8 +2454,11 @@ void intel_uncore_prune_engine_fw_domains(struct 
intel_uncore *uncore,
}
 }
 
-void intel_uncore_fini_mmio(struct intel_uncore *uncore)
+/* Called via drm-managed action */
+void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
 {
+   struct intel_uncore *uncore = data;
+
if (intel_uncore_has_forcewake(uncore)) {
iosf_mbi_punit_acquire();
iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(
diff --git a/drivers/gpu/drm/i915/intel_uncore.h 
b/drivers/gpu/drm/i915/intel_uncore.h
index 6100d0f4498a..4acb78a03233 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -33,6 +33,7 @@
 
 #include "i915_reg_defs.h"
 
+struct drm_device;
 struct drm_i915_private;
 struct intel_runtime_pm;
 struct intel_uncore;
@@ -220,7 +221,7 @@ void intel_uncore_prune_engine_fw_domains(struct 
intel_uncore *uncore,
 bool intel_uncore_unclaimed_mmio(struct intel_uncore *uncore);
 bool intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore);
 void intel_uncore_cleanup_mmio(struct intel_uncore *uncore);
-void intel_uncore_fini_mmio(struct intel_uncore *uncore);
+void intel_uncore_fini_mmio(struct drm_device *dev, void *data);
 void intel_uncore_suspend(struct intel_uncore *uncore);
 void intel_uncore_resume_early(struct intel_uncore *uncore);
 void intel_uncore_runtime_resume(struct intel_uncore *uncore);
-- 
2.37.2



[PATCH v2 11/12] drm/i915/mtl: Use primary GT's irq lock for media GT

2022-09-02 Thread Matt Roper
When we hook up interrupts (in the next patch), interrupts for the media
GT are still processed as part of the primary GT's interrupt flow.  As
such, we should share the same IRQ lock with the primary GT.  Let's
convert gt->irq_lock into a pointer and just point the media GT's
instance at the same lock the primary GT is using.

v2:
 - Point media's gt->irq_lock at the primary GT lock properly.  (Daniele)
 - Fix jump target for intel_root_gt_init_early errors.  (Daniele)

Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  8 +++---
 drivers/gpu/drm/i915/gt/intel_gt.c| 15 +--
 drivers/gpu/drm/i915/gt/intel_gt.h|  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_irq.c| 16 ++--
 drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c |  8 +++---
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  2 +-
 drivers/gpu/drm/i915/gt/intel_rps.c   | 26 +--
 drivers/gpu/drm/i915/gt/intel_sa_media.c  |  1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c| 24 -
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  4 +--
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  4 +--
 drivers/gpu/drm/i915/i915_driver.c|  5 +++-
 drivers/gpu/drm/i915/i915_irq.c   |  4 +--
 drivers/gpu/drm/i915/pxp/intel_pxp.c  |  4 +--
 drivers/gpu/drm/i915/pxp/intel_pxp_irq.c  | 14 +-
 drivers/gpu/drm/i915/pxp/intel_pxp_session.c  |  4 +--
 16 files changed, 78 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 41acc285e8bf..6e0122b3dca2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1688,9 +1688,9 @@ bool intel_engine_irq_enable(struct intel_engine_cs 
*engine)
return false;
 
/* Caller disables interrupts */
-   spin_lock(&engine->gt->irq_lock);
+   spin_lock(engine->gt->irq_lock);
engine->irq_enable(engine);
-   spin_unlock(&engine->gt->irq_lock);
+   spin_unlock(engine->gt->irq_lock);
 
return true;
 }
@@ -1701,9 +1701,9 @@ void intel_engine_irq_disable(struct intel_engine_cs 
*engine)
return;
 
/* Caller disables interrupts */
-   spin_lock(&engine->gt->irq_lock);
+   spin_lock(engine->gt->irq_lock);
engine->irq_disable(engine);
-   spin_unlock(&engine->gt->irq_lock);
+   spin_unlock(engine->gt->irq_lock);
 }
 
 void intel_engines_reset_default_submission(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index a6ed11b933eb..b5c187eb9c2e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -37,7 +37,7 @@
 
 void intel_gt_common_init_early(struct intel_gt *gt)
 {
-   spin_lock_init(>->irq_lock);
+   spin_lock_init(gt->irq_lock);
 
INIT_LIST_HEAD(>->closed_vma);
spin_lock_init(>->closed_lock);
@@ -58,14 +58,19 @@ void intel_gt_common_init_early(struct intel_gt *gt)
 }
 
 /* Preliminary initialization of Tile 0 */
-void intel_root_gt_init_early(struct drm_i915_private *i915)
+int intel_root_gt_init_early(struct drm_i915_private *i915)
 {
struct intel_gt *gt = to_gt(i915);
 
gt->i915 = i915;
gt->uncore = &i915->uncore;
+   gt->irq_lock = drmm_kzalloc(&i915->drm, sizeof(*gt->irq_lock), 
GFP_KERNEL);
+   if (!gt->irq_lock)
+   return -ENOMEM;
 
intel_gt_common_init_early(gt);
+
+   return 0;
 }
 
 static int intel_gt_probe_lmem(struct intel_gt *gt)
@@ -792,12 +797,18 @@ static int intel_gt_tile_setup(struct intel_gt *gt,
 
if (!gt_is_root(gt)) {
struct intel_uncore *uncore;
+   spinlock_t *irq_lock;
 
uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), 
GFP_KERNEL);
if (!uncore)
return -ENOMEM;
 
+   irq_lock = drmm_kzalloc(>->i915->drm, sizeof(*irq_lock), 
GFP_KERNEL);
+   if (!irq_lock)
+   return -ENOMEM;
+
gt->uncore = uncore;
+   gt->irq_lock = irq_lock;
 
intel_gt_common_init_early(gt);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index c9a359f35d0f..2ee582e287c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -45,7 +45,7 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc 
*gsc)
 }
 
 void intel_gt_common_init_early(struct intel_gt *gt);
-void intel_root_gt_init_early(struct drm_i915_private *i915);
+int intel_root_gt_init_early(struct drm_i915_private *i915);
 int intel_gt_assign_ggtt(struct intel_gt *gt);
 in

[PATCH v2 10/12] drm/i915/xelpmp: Expose media as another GT

2022-09-02 Thread Matt Roper
Xe_LPM+ platforms have "standalone media."  I.e., the media unit is
designed as an additional GT with its own engine list, GuC, forcewake,
etc.  Let's allow platforms to include media GTs in their device info.

v2:
 - Simplify GSI register handling and split it out to a separate patch
   for ease of review.  (Daniele)

Cc: Aravind Iddamsetty 
Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/Makefile|  1 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h  |  8 +
 drivers/gpu/drm/i915/gt/intel_sa_media.c | 39 
 drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 +
 drivers/gpu/drm/i915/i915_pci.c  | 15 +
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 drivers/gpu/drm/i915/intel_uncore.c  |  4 +++
 7 files changed, 83 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 522ef9b4aff3..e83e4cd46968 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -123,6 +123,7 @@ gt-y += \
gt/intel_ring.o \
gt/intel_ring_submission.o \
gt/intel_rps.o \
+   gt/intel_sa_media.o \
gt/intel_sseu.o \
gt/intel_sseu_debugfs.o \
gt/intel_timeline.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index d414785003cc..fb2c56777480 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1578,4 +1578,12 @@
 
 #define GEN12_SFC_DONE(n)  _MMIO(0x1cc000 + (n) * 0x1000)
 
+/*
+ * Standalone Media's non-engine GT registers are located at their regular GT
+ * offsets plus 0x38.  This extra offset is stored inside the intel_uncore
+ * structure so that the existing code can be used for both GTs without
+ * modification.
+ */
+#define MTL_MEDIA_GSI_BASE 0x38
+
 #endif /* __INTEL_GT_REGS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c 
b/drivers/gpu/drm/i915/gt/intel_sa_media.c
new file mode 100644
index ..8c5c519457cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include 
+
+#include "i915_drv.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_sa_media.h"
+
+int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr,
+  u32 gsi_offset)
+{
+   struct drm_i915_private *i915 = gt->i915;
+   struct intel_uncore *uncore;
+
+   uncore = drmm_kzalloc(&i915->drm, sizeof(*uncore), GFP_KERNEL);
+   if (!uncore)
+   return -ENOMEM;
+
+   uncore->gsi_offset = gsi_offset;
+
+   intel_gt_common_init_early(gt);
+   intel_uncore_init_early(uncore, gt);
+
+   /*
+* Standalone media shares the general MMIO space with the primary
+* GT.  We'll re-use the primary GT's mapping.
+*/
+   uncore->regs = i915->uncore.regs;
+   if (drm_WARN_ON(&i915->drm, uncore->regs == NULL))
+   return -EIO;
+
+   gt->uncore = uncore;
+   gt->phys_addr = phys_addr;
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.h 
b/drivers/gpu/drm/i915/gt/intel_sa_media.h
new file mode 100644
index ..3afb310de932
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_sa_media.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef __INTEL_SA_MEDIA__
+#define __INTEL_SA_MEDIA__
+
+#include 
+
+struct intel_gt;
+
+int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr,
+  u32 gsi_offset);
+
+#endif /* __INTEL_SA_MEDIA_H__ */
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 26b25d9434d6..18d3722331e4 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -26,6 +26,9 @@
 #include 
 #include 
 
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_sa_media.h"
+
 #include "i915_driver.h"
 #include "i915_drv.h"
 #include "i915_pci.h"
@@ -1115,6 +1118,17 @@ static const struct intel_device_info pvc_info = {
.display.has_cdclk_crawl = 1, \
.__runtime.fbc_mask = BIT(INTEL_FBC_A) | BIT(INTEL_FBC_B)
 
+static const struct intel_gt_definition xelpmp_extra_gt[] = {
+   {
+   .type = GT_MEDIA,
+   .name = "Standalone Media GT",
+   .setup = intel_sa_mediagt_setup,
+   .gsi_offset = MTL_MEDIA_GSI_BASE,
+   .engine_mask = BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
+   },
+   {}
+};
+
 __maybe_unused
 static const struct intel_device_info mtl_info = {

[PATCH v2 04/12] drm/i915: Prepare more multi-GT initialization

2022-09-02 Thread Matt Roper
We're going to introduce an additional intel_gt for MTL's media unit
soon.  Let's provide a bit more multi-GT initialization framework in
preparation for that.  The initialization will pull the list of GTs for
a platform from the device info structure.  Although necessary for the
immediate MTL media enabling, this same framework will also be used
farther down the road when we enable remote tiles on xehpsdv and pvc.

v2:
 - Re-add missing test for !HAS_EXTRA_GT_LIST in intel_gt_probe_all().

Cc: Aravind Iddamsetty 
Signed-off-by: Matt Roper 
Reviewed-by: Radhakrishna Sripada 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c| 51 +--
 drivers/gpu/drm/i915/gt/intel_gt.h|  1 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
 drivers/gpu/drm/i915/i915_drv.h   |  2 +
 drivers/gpu/drm/i915/intel_device_info.h  | 16 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 7 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 275ad72940c1..41acc285e8bf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -736,7 +736,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt 
*gt)
u16 vdbox_mask;
u16 vebox_mask;
 
-   info->engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
+   GEM_BUG_ON(!info->engine_mask);
 
if (GRAPHICS_VER(i915) < 11)
return info->engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index cf7aab7adb30..7b880dbed6ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -807,17 +807,16 @@ static void
 intel_gt_tile_cleanup(struct intel_gt *gt)
 {
intel_uncore_cleanup_mmio(gt->uncore);
-
-   if (!gt_is_root(gt))
-   kfree(gt);
 }
 
 int intel_gt_probe_all(struct drm_i915_private *i915)
 {
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
struct intel_gt *gt = &i915->gt0;
+   const struct intel_gt_definition *gtdef;
phys_addr_t phys_addr;
unsigned int mmio_bar;
+   unsigned int i;
int ret;
 
mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
@@ -828,14 +827,58 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
 * and it has been already initialized early during probe
 * in i915_driver_probe()
 */
+   gt->i915 = i915;
+   gt->name = "Primary GT";
+   gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
+
+   drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
ret = intel_gt_tile_setup(gt, phys_addr);
if (ret)
return ret;
 
i915->gt[0] = gt;
 
-   /* TODO: add more tiles */
+   if (!HAS_EXTRA_GT_LIST(i915))
+   return 0;
+
+   for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1];
+gtdef->setup != NULL;
+i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) {
+   gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL);
+   if (!gt) {
+   ret = -ENOMEM;
+   goto err;
+   }
+
+   gt->i915 = i915;
+   gt->name = gtdef->name;
+   gt->type = gtdef->type;
+   gt->info.engine_mask = gtdef->engine_mask;
+   gt->info.id = i;
+
+   drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
+   if (GEM_WARN_ON(range_overflows_t(resource_size_t,
+ gtdef->mapping_base,
+ SZ_16M,
+ pci_resource_len(pdev, 
mmio_bar {
+   ret = -ENODEV;
+   goto err;
+   }
+
+   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base);
+   if (ret)
+   goto err;
+
+   i915->gt[i] = gt;
+   }
+
return 0;
+
+err:
+   i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, 
ret);
+   intel_gt_release_all(i915);
+
+   return ret;
 }
 
 int intel_gt_tiles_init(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 40b06adf509a..4d8779529cc2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -54,7 +54,6 @@ void intel_gt_driver_register(struct intel_gt *gt);
 void intel_gt_driver_unregister(struct intel_gt *gt);
 void intel_gt_driver_remove(struct intel_gt *gt);
 void i

[PATCH v2 12/12] drm/i915/mtl: Hook up interrupts for standalone media

2022-09-02 Thread Matt Roper
Top-level handling of standalone media interrupts will be processed as
part of the primary GT's interrupt handler (since primary and media GTs
share an MMIO space, unlike remote tile setups).  When we get down to
the point of handling engine interrupts, we need to take care to lookup
VCS and VECS engines in the media GT rather than the primary.

There are also a couple of additional "other" instance bits that
correspond to the media GT's GuC and media GT's power management
interrupts; we need to direct those to the media GT instance as well.

Bspec: 45605
Cc: Anusha Srivatsa 
Signed-off-by: Matt Roper 
Reviewed-by: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/intel_gt_irq.c   | 19 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h  |  2 ++
 drivers/gpu/drm/i915/gt/intel_sa_media.c |  7 +++
 drivers/gpu/drm/i915/i915_drv.h  |  3 +++
 4 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 0dfd0c42d00d..f26882fdc24c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -59,11 +59,17 @@ static void
 gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
const u16 iir)
 {
+   struct intel_gt *media_gt = gt->i915->media_gt;
+
if (instance == OTHER_GUC_INSTANCE)
return guc_irq_handler(>->uc.guc, iir);
+   if (instance == OTHER_MEDIA_GUC_INSTANCE && media_gt)
+   return guc_irq_handler(&media_gt->uc.guc, iir);
 
if (instance == OTHER_GTPM_INSTANCE)
return gen11_rps_irq_handler(>->rps, iir);
+   if (instance == OTHER_MEDIA_GTPM_INSTANCE && media_gt)
+   return gen11_rps_irq_handler(&media_gt->rps, iir);
 
if (instance == OTHER_KCR_INSTANCE)
return intel_pxp_irq_handler(>->pxp, iir);
@@ -81,6 +87,18 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
 {
struct intel_engine_cs *engine;
 
+   /*
+* Platforms with standalone media have their media engines in another
+* GT.
+*/
+   if (MEDIA_VER(gt->i915) >= 13 &&
+   (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS)) {
+   if (!gt->i915->media_gt)
+   goto err;
+
+   gt = gt->i915->media_gt;
+   }
+
if (instance <= MAX_ENGINE_INSTANCE)
engine = gt->engine_class[class][instance];
else
@@ -89,6 +107,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
if (likely(engine))
return intel_engine_cs_irq(engine, iir);
 
+err:
WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
  class, instance);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index fb2c56777480..2275ee47da95 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1554,6 +1554,8 @@
 #define   OTHER_GTPM_INSTANCE  1
 #define   OTHER_KCR_INSTANCE   4
 #define   OTHER_GSC_INSTANCE   6
+#define   OTHER_MEDIA_GUC_INSTANCE 16
+#define   OTHER_MEDIA_GTPM_INSTANCE17
 
 #define GEN11_IIR_REG_SELECTOR(x)  _MMIO(0x190070 + ((x) * 4))
 
diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c 
b/drivers/gpu/drm/i915/gt/intel_sa_media.c
index 5516e9c363a4..e8f3d18c12b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_sa_media.c
+++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c
@@ -36,5 +36,12 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t 
phys_addr,
gt->uncore = uncore;
gt->phys_addr = phys_addr;
 
+   /*
+* For current platforms we can assume there's only a single
+* media GT and cache it for quick lookup.
+*/
+   drm_WARN_ON(&i915->drm, i915->media_gt);
+   i915->media_gt = gt;
+
return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d4b45c7e931d..5a21242a6706 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -365,6 +365,9 @@ struct drm_i915_private {
 
struct kobject *sysfs_gt;
 
+   /* Quick lookup of media GT (current platforms only have one) */
+   struct intel_gt *media_gt;
+
struct {
struct i915_gem_contexts {
spinlock_t lock; /* locks list */
-- 
2.37.2



[PATCH v2 09/12] drm/i915/uncore: Add GSI offset to uncore

2022-09-02 Thread Matt Roper
GT non-engine registers (referred to as "GSI" registers by the spec)
have the same relative offsets on standalone media as they do on the
primary GT, just with an additional "GSI offset" added to their MMIO
address.  If we store this GSI offset in the standalone media's
intel_uncore structure, it can be automatically applied to all GSI reg
reads/writes that happen on that GT, allowing us to re-use our existing
GT code with minimal changes.

Forcewake and shadowed register tables for the media GT (which will be
added in a future patch) are listed as final addresses that already
include the GSI offset, so we also need to add the GSI offset before
doing lookups of registers in one of those tables.

Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c   | 17 ++---
 drivers/gpu/drm/i915/intel_device_info.h |  4 +++-
 drivers/gpu/drm/i915/intel_uncore.c  | 10 --
 drivers/gpu/drm/i915/intel_uncore.h  | 22 --
 4 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index fbb5e32979a4..a6ed11b933eb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -776,10 +776,20 @@ void intel_gt_driver_late_release_all(struct 
drm_i915_private *i915)
}
 }
 
-static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
+/*
+ * Note: the gsi_offset parameter here isn't used, but we want to keep the
+ * function signature equivalent to gtdef->setup() so that it can be plugged
+ * in when we enabled remote tiles in the future.
+ */
+static int intel_gt_tile_setup(struct intel_gt *gt,
+  phys_addr_t phys_addr,
+  u32 gsi_offset)
 {
int ret;
 
+   /* GSI offset is only applicable for media GTs */
+   drm_WARN_ON(>->i915->drm, gsi_offset);
+
if (!gt_is_root(gt)) {
struct intel_uncore *uncore;
 
@@ -832,7 +842,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
 
drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
-   ret = intel_gt_tile_setup(gt, phys_addr);
+   ret = intel_gt_tile_setup(gt, phys_addr, 0);
if (ret)
return ret;
 
@@ -865,7 +875,8 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
goto err;
}
 
-   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base);
+   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base,
+  gtdef->gsi_offset);
if (ret)
goto err;
 
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index b408ce384cd7..85e0ef0e91b1 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -254,8 +254,10 @@ struct intel_gt_definition {
enum intel_gt_type type;
char *name;
int (*setup)(struct intel_gt *gt,
-phys_addr_t phys_addr);
+phys_addr_t phys_addr,
+u32 gsi_offset);
u32 mapping_base;
+   u32 gsi_offset;
intel_engine_mask_t engine_mask;
 };
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index 33bdcbc77ab2..ecb02421502d 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -927,6 +927,9 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset)
 {
const struct intel_forcewake_range *entry;
 
+   if (IS_GSI_REG(offset))
+   offset += uncore->gsi_offset;
+
entry = BSEARCH(offset,
uncore->fw_domains_table,
uncore->fw_domains_table_entries,
@@ -1142,6 +1145,9 @@ static bool is_shadowed(struct intel_uncore *uncore, u32 
offset)
if (drm_WARN_ON(&uncore->i915->drm, !uncore->shadowed_reg_table))
return false;
 
+   if (IS_GSI_REG(offset))
+   offset += uncore->gsi_offset;
+
return BSEARCH(offset,
   uncore->shadowed_reg_table,
   uncore->shadowed_reg_table_entries,
@@ -1994,8 +2000,8 @@ static int __fw_domain_init(struct intel_uncore *uncore,
 
d->uncore = uncore;
d->wake_count = 0;
-   d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set);
-   d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack);
+   d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set) + 
uncore->gsi_offset;
+   d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack) + 
uncore->gsi_offset;
 
d->id = domain_id;
 
d

[PATCH v2 05/12] drm/i915: Rename and expose common GT early init routine

2022-09-02 Thread Matt Roper
The common early GT init is needed for initialization of all GT types
(root/primary, remote tile, standalone media).  Since standalone media
(coming in the next patch) will be implemented in a separate file,
rename and expose the function for use.

Signed-off-by: Matt Roper 
Reviewed-by: Radhakrishna Sripada 
---
 drivers/gpu/drm/i915/gt/intel_gt.c | 6 +++---
 drivers/gpu/drm/i915/gt/intel_gt.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 7b880dbed6ce..fbb5e32979a4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -35,7 +35,7 @@
 #include "intel_uncore.h"
 #include "shmem_utils.h"
 
-static void __intel_gt_init_early(struct intel_gt *gt)
+void intel_gt_common_init_early(struct intel_gt *gt)
 {
spin_lock_init(>->irq_lock);
 
@@ -65,7 +65,7 @@ void intel_root_gt_init_early(struct drm_i915_private *i915)
gt->i915 = i915;
gt->uncore = &i915->uncore;
 
-   __intel_gt_init_early(gt);
+   intel_gt_common_init_early(gt);
 }
 
 static int intel_gt_probe_lmem(struct intel_gt *gt)
@@ -789,7 +789,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, 
phys_addr_t phys_addr)
 
gt->uncore = uncore;
 
-   __intel_gt_init_early(gt);
+   intel_gt_common_init_early(gt);
}
 
intel_uncore_init_early(gt->uncore, gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 4d8779529cc2..c9a359f35d0f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -44,6 +44,7 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc 
*gsc)
return container_of(gsc, struct intel_gt, gsc);
 }
 
+void intel_gt_common_init_early(struct intel_gt *gt);
 void intel_root_gt_init_early(struct drm_i915_private *i915);
 int intel_gt_assign_ggtt(struct intel_gt *gt);
 int intel_gt_init_mmio(struct intel_gt *gt);
-- 
2.37.2



[PATCH v2 01/12] drm/i915: Move locking and unclaimed check into mmio_debug_{suspend, resume}

2022-09-02 Thread Matt Roper
Moving the locking for MMIO debug (and the final check for unclaimed
accesses when resuming debug after a userspace-initiated forcewake) will
make it simpler to completely skip MMIO debug handling on uncores that
don't support it in future patches.

Signed-off-by: Matt Roper 
Reviewed-by: Radhakrishna Sripada 
---
 drivers/gpu/drm/i915/intel_uncore.c | 41 +++--
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index 9b81b2543ce2..e717ea55484a 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -50,23 +50,33 @@ intel_uncore_mmio_debug_init_early(struct 
intel_uncore_mmio_debug *mmio_debug)
mmio_debug->unclaimed_mmio_check = 1;
 }
 
-static void mmio_debug_suspend(struct intel_uncore_mmio_debug *mmio_debug)
+static void mmio_debug_suspend(struct intel_uncore *uncore)
 {
-   lockdep_assert_held(&mmio_debug->lock);
+   spin_lock(&uncore->debug->lock);
 
/* Save and disable mmio debugging for the user bypass */
-   if (!mmio_debug->suspend_count++) {
-   mmio_debug->saved_mmio_check = mmio_debug->unclaimed_mmio_check;
-   mmio_debug->unclaimed_mmio_check = 0;
+   if (!uncore->debug->suspend_count++) {
+   uncore->debug->saved_mmio_check = 
uncore->debug->unclaimed_mmio_check;
+   uncore->debug->unclaimed_mmio_check = 0;
}
+
+   spin_unlock(&uncore->debug->lock);
 }
 
-static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug)
+static bool check_for_unclaimed_mmio(struct intel_uncore *uncore);
+
+static void mmio_debug_resume(struct intel_uncore *uncore)
 {
-   lockdep_assert_held(&mmio_debug->lock);
+   spin_lock(&uncore->debug->lock);
+
+   if (!--uncore->debug->suspend_count)
+   uncore->debug->unclaimed_mmio_check = 
uncore->debug->saved_mmio_check;
 
-   if (!--mmio_debug->suspend_count)
-   mmio_debug->unclaimed_mmio_check = mmio_debug->saved_mmio_check;
+   if (check_for_unclaimed_mmio(uncore))
+   drm_info(&uncore->i915->drm,
+"Invalid mmio detected during user access\n");
+
+   spin_unlock(&uncore->debug->lock);
 }
 
 static const char * const forcewake_domain_names[] = {
@@ -677,9 +687,7 @@ void intel_uncore_forcewake_user_get(struct intel_uncore 
*uncore)
spin_lock_irq(&uncore->lock);
if (!uncore->user_forcewake_count++) {
intel_uncore_forcewake_get__locked(uncore, FORCEWAKE_ALL);
-   spin_lock(&uncore->debug->lock);
-   mmio_debug_suspend(uncore->debug);
-   spin_unlock(&uncore->debug->lock);
+   mmio_debug_suspend(uncore);
}
spin_unlock_irq(&uncore->lock);
 }
@@ -695,14 +703,7 @@ void intel_uncore_forcewake_user_put(struct intel_uncore 
*uncore)
 {
spin_lock_irq(&uncore->lock);
if (!--uncore->user_forcewake_count) {
-   spin_lock(&uncore->debug->lock);
-   mmio_debug_resume(uncore->debug);
-
-   if (check_for_unclaimed_mmio(uncore))
-   drm_info(&uncore->i915->drm,
-"Invalid mmio detected during user access\n");
-   spin_unlock(&uncore->debug->lock);
-
+   mmio_debug_resume(uncore);
intel_uncore_forcewake_put__locked(uncore, FORCEWAKE_ALL);
}
spin_unlock_irq(&uncore->lock);
-- 
2.37.2



[PATCH v2 02/12] drm/i915: Only hook up uncore->debug for primary uncore

2022-09-02 Thread Matt Roper
The original intent of intel_uncore_mmio_debug as described in commit
0a9b26306d6a ("drm/i915: split out uncore_mmio_debug") was to be a
singleton structure that could be shared between multiple GTs' uncore
objects in a multi-tile system.  Somehow we went off track and
started allocating separate instances of this structure for each GT,
which defeats that original goal.

But in reality, there isn't even a need to share the mmio_debug between
multiple GTs; on all modern platforms (i.e., everything after gen7)
unclaimed register accesses are something that can only be detected for
display registers.  There's no point in grabbing the debug spinlock and
checking for unclaimed accesses on an uncore used by an xehpsdv or pvc
remote tile GT, or the uncore used by a mtl standalone media GT since
all of the display accesses go through the primary intel_uncore.

The simplest solution is to simply leave uncore->debug NULL on all
intel_uncore instances except for the primary one.  This will allow us
to avoid the pointless debug spinlock acquisition we've been doing on
MMIO accesses coming in through these intel_uncores.

Signed-off-by: Matt Roper 
Reviewed-by: Radhakrishna Sripada 
---
 drivers/gpu/drm/i915/gt/intel_gt.c  |  9 -
 drivers/gpu/drm/i915/i915_driver.c  |  2 +-
 drivers/gpu/drm/i915/intel_uncore.c | 23 ++-
 drivers/gpu/drm/i915/intel_uncore.h |  3 +--
 4 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index e4bac2431e41..a82b5e2e0d83 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -781,21 +781,13 @@ static int intel_gt_tile_setup(struct intel_gt *gt, 
phys_addr_t phys_addr)
int ret;
 
if (!gt_is_root(gt)) {
-   struct intel_uncore_mmio_debug *mmio_debug;
struct intel_uncore *uncore;
 
uncore = kzalloc(sizeof(*uncore), GFP_KERNEL);
if (!uncore)
return -ENOMEM;
 
-   mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL);
-   if (!mmio_debug) {
-   kfree(uncore);
-   return -ENOMEM;
-   }
-
gt->uncore = uncore;
-   gt->uncore->debug = mmio_debug;
 
__intel_gt_init_early(gt);
}
@@ -817,7 +809,6 @@ intel_gt_tile_cleanup(struct intel_gt *gt)
intel_uncore_cleanup_mmio(gt->uncore);
 
if (!gt_is_root(gt)) {
-   kfree(gt->uncore->debug);
kfree(gt->uncore);
kfree(gt);
}
diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index 56a2bcddb2af..18acba1bc3b0 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -326,7 +326,7 @@ static int i915_driver_early_probe(struct drm_i915_private 
*dev_priv)
intel_device_info_subplatform_init(dev_priv);
intel_step_init(dev_priv);
 
-   intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug);
+   intel_uncore_mmio_debug_init_early(dev_priv);
 
spin_lock_init(&dev_priv->irq_lock);
spin_lock_init(&dev_priv->gpu_error.lock);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index e717ea55484a..6841f76533f9 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -44,14 +44,19 @@ fw_domains_get(struct intel_uncore *uncore, enum 
forcewake_domains fw_domains)
 }
 
 void
-intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug)
+intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915)
 {
-   spin_lock_init(&mmio_debug->lock);
-   mmio_debug->unclaimed_mmio_check = 1;
+   spin_lock_init(&i915->mmio_debug.lock);
+   i915->mmio_debug.unclaimed_mmio_check = 1;
+
+   i915->uncore.debug = &i915->mmio_debug;
 }
 
 static void mmio_debug_suspend(struct intel_uncore *uncore)
 {
+   if (!uncore->debug)
+   return;
+
spin_lock(&uncore->debug->lock);
 
/* Save and disable mmio debugging for the user bypass */
@@ -67,6 +72,9 @@ static bool check_for_unclaimed_mmio(struct intel_uncore 
*uncore);
 
 static void mmio_debug_resume(struct intel_uncore *uncore)
 {
+   if (!uncore->debug)
+   return;
+
spin_lock(&uncore->debug->lock);
 
if (!--uncore->debug->suspend_count)
@@ -1705,7 +1713,7 @@ unclaimed_reg_debug(struct intel_uncore *uncore,
const bool read,
const bool before)
 {
-   if (likely(!uncore->i915->params.mmio_debug))
+   if (likely(!uncore->i915->params.mmio_debug) || !uncore->debug)
return;
 
/* interrupts are disabled and

[PATCH v2 03/12] drm/i915: Use managed allocations for extra uncore objects

2022-09-02 Thread Matt Roper
We're slowly transitioning the init-time kzalloc's of the driver over to
DRM-managed allocations; let's make sure the uncore objects allocated
for non-root GTs are thus allocated.

Signed-off-by: Matt Roper 
Reviewed-by: Radhakrishna Sripada 
---
 drivers/gpu/drm/i915/gt/intel_gt.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index a82b5e2e0d83..cf7aab7adb30 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -783,7 +783,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, 
phys_addr_t phys_addr)
if (!gt_is_root(gt)) {
struct intel_uncore *uncore;
 
-   uncore = kzalloc(sizeof(*uncore), GFP_KERNEL);
+   uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), 
GFP_KERNEL);
if (!uncore)
return -ENOMEM;
 
@@ -808,10 +808,8 @@ intel_gt_tile_cleanup(struct intel_gt *gt)
 {
intel_uncore_cleanup_mmio(gt->uncore);
 
-   if (!gt_is_root(gt)) {
-   kfree(gt->uncore);
+   if (!gt_is_root(gt))
kfree(gt);
-   }
 }
 
 int intel_gt_probe_all(struct drm_i915_private *i915)
-- 
2.37.2



[PATCH v2 00/12] i915: Add "standalone media" support for MTL

2022-09-02 Thread Matt Roper
Starting with MTL, media functionality has moved into a new, second GT
at the hardware level.  This new GT, referred to as "standalone media"
in the spec, has its own GuC, power management/forcewake, etc.  The
general non-engine GT registers for standalone media start at 0x38,
but otherwise use the same MMIO offsets as the primary GT.

Standalone media has a lot of similarity to the remote tiles
present on platforms like xehpsdv and pvc, and our i915 implementation
can share much of the general "multi GT" infrastructure between the two
types of platforms.  However there are a few notable differences
we must deal with:
 - The 0x38 offset only applies to the non-engine GT registers
   (which the specs refer to as "GSI" registers).  The engine registers
   remain at their usual locations (e.g., 0x1C for VCS0).
 - Unlike platforms with remote tiles, all interrupt handling for
   standalone media still happens via the primary GT.


v2:
 - Added new patches to ensure each GT, not just the primary, is
   handled properly during various init/suspend/resume/teardown flows.
 - Simplified GSI offset handling and split it into its own patch.
 - Correct gt->irq_lock assignment for media GT.
 - Fix jump target for intel_root_gt_init_early() errors.

Cc: Radhakrishna Sripada 
Cc: Daniele Ceraolo Spurio 
Cc: Aravind Iddamsetty 

Matt Roper (12):
  drm/i915: Move locking and unclaimed check into
mmio_debug_{suspend,resume}
  drm/i915: Only hook up uncore->debug for primary uncore
  drm/i915: Use managed allocations for extra uncore objects
  drm/i915: Prepare more multi-GT initialization
  drm/i915: Rename and expose common GT early init routine
  drm/i915: Use a DRM-managed action to release the PCI bridge device
  drm/i915: Initialize MMIO access for each GT
  drm/i915: Handle each GT on init/release and suspend/resume
  drm/i915/uncore: Add GSI offset to uncore
  drm/i915/xelpmp: Expose media as another GT
  drm/i915/mtl: Use primary GT's irq lock for media GT
  drm/i915/mtl: Hook up interrupts for standalone media

 drivers/gpu/drm/i915/Makefile |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  10 +-
 drivers/gpu/drm/i915/gt/intel_gt.c|  96 
 drivers/gpu/drm/i915/gt/intel_gt.h|   4 +-
 drivers/gpu/drm/i915/gt/intel_gt_irq.c|  35 --
 drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c |   8 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  10 ++
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |   5 +-
 drivers/gpu/drm/i915/gt/intel_rps.c   |  26 ++---
 drivers/gpu/drm/i915/gt/intel_sa_media.c  |  47 
 drivers/gpu/drm/i915/gt/intel_sa_media.h  |  15 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  24 ++--
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |   4 +-
 drivers/gpu/drm/i915/i915_driver.c| 105 --
 drivers/gpu/drm/i915/i915_drv.h   |   5 +
 drivers/gpu/drm/i915/i915_irq.c   |   4 +-
 drivers/gpu/drm/i915/i915_pci.c   |  15 +++
 drivers/gpu/drm/i915/intel_device_info.h  |  19 
 drivers/gpu/drm/i915/intel_uncore.c   |  83 +-
 drivers/gpu/drm/i915/intel_uncore.h   |  28 -
 drivers/gpu/drm/i915/pxp/intel_pxp.c  |   4 +-
 drivers/gpu/drm/i915/pxp/intel_pxp_irq.c  |  14 +--
 drivers/gpu/drm/i915/pxp/intel_pxp_session.c  |   4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   1 +
 25 files changed, 424 insertions(+), 147 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h

-- 
2.37.2



[PATCH v2 06/12] drm/i915: Use a DRM-managed action to release the PCI bridge device

2022-09-02 Thread Matt Roper
As we start supporting multiple uncore structures in future patches, the
MMIO cleanup (which make also get called mid-init if there's a failure)
will become more complicated.  Moving to DRM-managed actions will help
keep things simple.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/i915_driver.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index 18acba1bc3b0..1f46dd1ffaf7 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -105,6 +105,12 @@ static const char irst_name[] = "INT3392";
 
 static const struct drm_driver i915_drm_driver;
 
+static void i915_release_bridge_dev(struct drm_device *dev,
+   void *bridge)
+{
+   pci_dev_put(bridge);
+}
+
 static int i915_get_bridge_dev(struct drm_i915_private *dev_priv)
 {
int domain = pci_domain_nr(to_pci_dev(dev_priv->drm.dev)->bus);
@@ -115,7 +121,9 @@ static int i915_get_bridge_dev(struct drm_i915_private 
*dev_priv)
drm_err(&dev_priv->drm, "bridge device not found\n");
return -EIO;
}
-   return 0;
+
+   return drmm_add_action_or_reset(&dev_priv->drm, i915_release_bridge_dev,
+   dev_priv->bridge_dev);
 }
 
 /* Allocate space for the MCH regs if needed, return nonzero on error */
@@ -452,7 +460,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private 
*dev_priv)
 err_uncore:
intel_teardown_mchbar(dev_priv);
intel_uncore_fini_mmio(&dev_priv->uncore);
-   pci_dev_put(dev_priv->bridge_dev);
 
return ret;
 }
@@ -465,7 +472,6 @@ static void i915_driver_mmio_release(struct 
drm_i915_private *dev_priv)
 {
intel_teardown_mchbar(dev_priv);
intel_uncore_fini_mmio(&dev_priv->uncore);
-   pci_dev_put(dev_priv->bridge_dev);
 }
 
 /**
-- 
2.37.2



[PATCH 3/7] drm/i915/gt: Drop a few unused register definitions

2022-09-01 Thread Matt Roper
Let's drop a few register definitions that are unused anywhere in the
driver today.  Since the referenced offsets are part of what is now
considered a multicast register region, the current definitions would
not be correct for use on any future platform.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 17 -
 1 file changed, 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 6d4d15964b58..bf5f21d334f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -447,13 +447,6 @@
 #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC   REG_BIT(11)
 #define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE   REG_BIT(9)
 
-/* GEN9 chicken */
-#define SLICE_ECO_CHICKEN0 _MMIO(0x7308)
-#define   PIXEL_MASK_CAMMING_DISABLE   (1 << 14)
-
-#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308)
-#define   DISABLE_PIXEL_MASK_CAMMING   (1 << 14)
-
 #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
 #define XEHP_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE   REG_BIT(14)
@@ -958,11 +951,6 @@
 #define GEN7_L3LOG(slice, i)   _MMIO(0xb070 + (slice) * 0x200 
+ (i) * 4)
 #define   GEN7_L3LOG_SIZE  0x80
 
-#define GEN10_SCRATCH_LNCF2_MMIO(0xb0a0)
-#define   PMFLUSHDONE_LNICRSDROP   (1 << 20)
-#define   PMFLUSH_GAPL3UNBLOCK (1 << 21)
-#define   PMFLUSHDONE_LNEBLK   (1 << 22)
-
 #define XEHP_L3NODEARBCFG  _MMIO(0xb0b4)
 #define   XEHP_LNESPAREREG_BIT(19)
 
@@ -977,9 +965,6 @@
 #define   L3_HIGH_PRIO_CREDITS(x)  (((x) >> 1) << 14)
 #define   L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14))
 
-#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xb114)
-#define   GEN11_I2M_WRITE_DISABLE  (1 << 28)
-
 #define GEN8_L3SQCREG4 _MMIO(0xb118)
 #define   GEN11_LQSC_CLEAN_EVICT_DISABLE   (1 << 6)
 #define   GEN8_LQSC_RO_PERF_DIS(1 << 27)
@@ -1174,8 +1159,6 @@
 #define SARB_CHICKEN1  _MMIO(0xe90c)
 #define   COMP_CKN_IN  REG_GENMASK(30, 29)
 
-#define GEN7_HALF_SLICE_CHICKEN1_GT2   _MMIO(0xf100)
-
 #define GEN7_ROW_CHICKEN2_GT2  _MMIO(0xf4f4)
 #define   DOP_CLOCK_GATING_DISABLE (1 << 0)
 #define   PUSH_CONSTANT_DEREF_DISABLE  (1 << 8)
-- 
2.37.2



[PATCH 5/7] drm/i915: Define MCR registers explicitly

2022-09-01 Thread Matt Roper
Rather than using the same _MMIO() macro to define MCR registers as
singleton registers, let's use a new MCR_REG() macro to make it clear
that these registers are special and should be handled accordingly.  For
now MCR_REG() will still generate an i915_reg_t with the given offset,
but we'll change that in future patches.

Bspec: 66673, 66696, 66534, 67609
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 118 
 1 file changed, 60 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 94a1f964eaba..736fc01fa74f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -8,6 +8,8 @@
 
 #include "i915_reg_defs.h"
 
+#define MCR_REG(offset)_MMIO(offset)
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
 #define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3
@@ -326,12 +328,12 @@
 #define GEN7_TLB_RD_ADDR   _MMIO(0x4700)
 
 #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
-#define XEHP_PAT_INDEX(index)  _MMIO(0x4800 + (index) * 4)
+#define XEHP_PAT_INDEX(index)  MCR_REG(0x4800 + (index) * 4)
 
-#define XEHP_TILE0_ADDR_RANGE  _MMIO(0x4900)
+#define XEHP_TILE0_ADDR_RANGE  MCR_REG(0x4900)
 #define   XEHP_TILE_LMEM_RANGE_SHIFT   8
 
-#define XEHP_FLAT_CCS_BASE_ADDR_MMIO(0x4910)
+#define XEHP_FLAT_CCS_BASE_ADDRMCR_REG(0x4910)
 #define   XEHP_CCS_BASE_SHIFT  8
 
 #define GAMTARBMODE_MMIO(0x4a08)
@@ -381,18 +383,18 @@
 #define CHICKEN_RASTER_2   _MMIO(0x6208)
 #define   TBIMR_FAST_CLIP  REG_BIT(5)
 
-#define VFLSKPD_MMIO(0x62a8)
+#define VFLSKPDMCR_REG(0x62a8)
 #define   DIS_OVER_FETCH_CACHE REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH  REG_BIT(0)
 
 #define GEN12_FF_MODE2 _MMIO(0x6604)
-#define XEHP_FF_MODE2  _MMIO(0x6604)
+#define XEHP_FF_MODE2  MCR_REG(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK   REG_GENMASK(31, 24)
 #define   FF_MODE2_GS_TIMER_224
REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
 #define   FF_MODE2_TDS_TIMER_MASK  REG_GENMASK(23, 16)
 #define   FF_MODE2_TDS_TIMER_128   
REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
 
-#define XEHPG_INSTDONE_GEOM_SVG_MMIO(0x666c)
+#define XEHPG_INSTDONE_GEOM_SVGMCR_REG(0x666c)
 
 #define CACHE_MODE_0_GEN7  _MMIO(0x7000) /* IVB+ */
 #define   RC_OP_FLUSH_ENABLE   (1 << 0)
@@ -441,14 +443,14 @@
 #define GEN8_HDC_CHICKEN1  _MMIO(0x7304)
 
 #define GEN11_COMMON_SLICE_CHICKEN3_MMIO(0x7304)
-#define XEHP_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
+#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304)
 #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
 #define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLEREG_BIT(12)
 #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC   REG_BIT(11)
 #define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE   REG_BIT(9)
 
 #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
-#define XEHP_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE   REG_BIT(14)
 #define   GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
 
@@ -479,7 +481,7 @@
 
 #define GEN8_RC6_CTX_INFO  _MMIO(0x8504)
 
-#define XEHP_SQCM  _MMIO(0x8724)
+#define XEHP_SQCM  MCR_REG(0x8724)
 #define   EN_32B_ACCESSREG_BIT(30)
 
 #define HSW_IDICR  _MMIO(0x9008)
@@ -640,7 +642,7 @@
 #define GEN7_MISCCPCTL _MMIO(0x9424)
 #define   GEN7_DOP_CLOCK_GATE_ENABLE   (1 << 0)
 
-#define GEN8_MISCCPCTL _MMIO(0x9424)
+#define GEN8_MISCCPCTL MCR_REG(0x9424)
 #define   GEN8_DOP_CLOCK_GATE_ENABLE   REG_BIT(0)
 #define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE   REG_BIT(1)
 #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2)
@@ -696,7 +698,7 @@
 #define   LTCDD_CLKGATE_DISREG_BIT(10)
 
 #define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
-#define XEHP_SLICE_UNIT_LEVEL_CLKGATE  _MMIO(0x94d4)
+#define XEHP_SLICE_UNIT_LEVEL_CLKGATE  MCR_REG(0x94d4)
 #define   SARBUNIT_CLKGATE_DIS (1 << 5)
 #define   RCCUNIT_CLKGATE_DIS  (1 << 7)

[PATCH 4/7] drm/i915/gt: Correct prefix on a few registers

2022-09-01 Thread Matt Roper
We have a few registers that have existed for several hardware
generations, but are only used by the driver on Xe_HP and beyond.  In
cases where the Xe_HP version of the register is now replicated and uses
multicast behavior, but earlier generations were singleton, let's change
the register prefix to "XEHP_" to help clarify that we're using the
newer multicast form of the register.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  8 
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 10 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index bf5f21d334f3..94a1f964eaba 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -479,7 +479,7 @@
 
 #define GEN8_RC6_CTX_INFO  _MMIO(0x8504)
 
-#define GEN12_SQCM _MMIO(0x8724)
+#define XEHP_SQCM  _MMIO(0x8724)
 #define   EN_32B_ACCESSREG_BIT(30)
 
 #define HSW_IDICR  _MMIO(0x9008)
@@ -980,7 +980,7 @@
 #define GEN11_SCRATCH2 _MMIO(0xb140)
 #define   GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE(1 << 19)
 
-#define GEN11_L3SQCREG5_MMIO(0xb158)
+#define XEHP_L3SQCREG5 _MMIO(0xb158)
 #define   L3_PWM_TIMER_INIT_VAL_MASK   REG_GENMASK(9, 0)
 
 #define MLTICTXCTL _MMIO(0xb170)
@@ -1036,7 +1036,7 @@
 #define GEN12_BLT_TLB_INV_CR   _MMIO(0xcee4)
 #define GEN12_COMPCTX_TLB_INV_CR   _MMIO(0xcf04)
 
-#define GEN12_MERT_MOD_CTRL_MMIO(0xcf28)
+#define XEHP_MERT_MOD_CTRL _MMIO(0xcf28)
 #define RENDER_MOD_CTRL_MMIO(0xcf2c)
 #define COMP_MOD_CTRL  _MMIO(0xcf30)
 #define VDBX_MOD_CTRL  _MMIO(0xcf34)
@@ -1138,7 +1138,7 @@
 #define EU_PERF_CNTL1  _MMIO(0xe558)
 #define EU_PERF_CNTL5  _MMIO(0xe55c)
 
-#define GEN12_HDC_CHICKEN0 _MMIO(0xe5f0)
+#define XEHP_HDC_CHICKEN0  _MMIO(0xe5f0)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK   REG_GENMASK(13, 
11)
 #define ICL_HDC_MODE   _MMIO(0xe5f4)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index eff5f4f85374..3d48189021ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -569,7 +569,7 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs 
*engine,
   struct i915_wa_list *wal)
 {
wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
-   wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+   wa_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
wa_add(wal,
   XEHP_FF_MODE2,
@@ -1504,7 +1504,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
 * recommended tuning settings documented in the bspec's
 * performance guide section.
 */
-   wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
+   wa_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
 
/* Wa_14015795083 */
wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
@@ -2163,7 +2163,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
 * Wa_22010960976:dg2
 * Wa_14013347512:dg2
 */
-   wa_masked_dis(wal, GEN12_HDC_CHICKEN0,
+   wa_masked_dis(wal, XEHP_HDC_CHICKEN0,
  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
}
 
@@ -2216,7 +2216,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
/* Wa_14012362059:dg2 */
-   wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
}
 
if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
@@ -2757,7 +2757,7 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
}
 
/* Wa_14012362059:xehpsdv */
-   wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
 
/* Wa_14014368820:xehpsdv */
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, 
INVALIDATION_BROADCAST_MODE_DIS |
-- 
2.37.2



[PATCH 6/7] drm/i915/gt: Always use MCR functions on multicast registers

2022-09-01 Thread Matt Roper
Rather than relying on the implicit behavior of intel_uncore_*()
functions, let's always use the intel_gt_mcr_*() functions to operate on
multicast/replicated registers.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  4 +-
 drivers/gpu/drm/i915/gt/intel_gtt.c   | 49 ---
 drivers/gpu/drm/i915/gt/intel_gtt.h   |  2 +-
 drivers/gpu/drm/i915/gt/intel_mocs.c  | 13 +++---
 drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 12 --
 drivers/gpu/drm/i915/intel_pm.c   | 20 +
 6 files changed, 55 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 30cf5c3369d9..e2620f41d8b2 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -986,7 +986,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
-   setup_private_pat(ggtt->vm.gt->uncore);
+   setup_private_pat(ggtt->vm.gt);
 
return ggtt_probe_common(ggtt, size);
 }
@@ -1302,7 +1302,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
wbinvd_on_all_cpus();
 
if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
-   setup_private_pat(ggtt->vm.gt->uncore);
+   setup_private_pat(ggtt->vm.gt);
 
intel_ggtt_restore_fences(ggtt);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 601d89b4feb1..6f61c8da0b61 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -15,6 +15,7 @@
 #include "i915_trace.h"
 #include "i915_utils.h"
 #include "intel_gt.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_regs.h"
 #include "intel_gtt.h"
 
@@ -477,27 +478,27 @@ void gtt_write_workarounds(struct intel_gt *gt)
}
 }
 
-static void tgl_setup_private_ppat(struct intel_uncore *uncore)
+static void tgl_setup_private_ppat(struct intel_gt *gt)
 {
-   if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50)) {
-   intel_uncore_write(uncore, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
-   intel_uncore_write(uncore, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
-   intel_uncore_write(uncore, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
-   intel_uncore_write(uncore, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
-   intel_uncore_write(uncore, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
-   intel_uncore_write(uncore, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
-   intel_uncore_write(uncore, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
-   intel_uncore_write(uncore, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) {
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), 
GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), 
GEN8_PPAT_WC);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), 
GEN8_PPAT_WT);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), 
GEN8_PPAT_UC);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), 
GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), 
GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), 
GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), 
GEN8_PPAT_WB);
} else {
/* TGL doesn't support LLC or AGE settings */
-   intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
-   intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
-   intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
-   intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
-   intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
-   intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
-   intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
-   intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
+   intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(0), 
GEN8_PPAT_WB);
+   intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(1), 
GEN8_PPAT_WC);
+   intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(2), 
GEN8_PPAT_WT);
+   intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(3), 
GEN8_PPAT_UC);
+   intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(4), 
GEN8_PPAT_WB);
+   intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(5), 
GEN8_PPAT_WB);
+   intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(6), 
GEN8_PPAT_WB);
+   intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(7), 
GEN8_PPAT_WB);
}
 }
 
@@ -593,20 +594,20 @@ static void chv_setup_private_ppat(struct intel_uncore 
*uncore)

[PATCH 7/7] drm/i915/gt: Add MCR-specific workaround initializers

2022-09-01 Thread Matt Roper
Let's be more explicit about which of our workarounds are updating MCR
registers.  This will also allow us to record whether a workaround
register has MCR behavior or not so that we'll only need to do a
steering lookup for the registers that truly need it.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c   | 414 +++---
 .../gpu/drm/i915/gt/intel_workarounds_types.h |   4 +-
 2 files changed, 252 insertions(+), 166 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3d48189021ad..c13abac19b06 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -166,12 +166,33 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t 
reg,
_wa_add(wal, &wa);
 }
 
+static void wa_mcr_add(struct i915_wa_list *wal, i915_reg_t reg,
+  u32 clear, u32 set, u32 read_mask, bool masked_reg)
+{
+   struct i915_wa wa = {
+   .reg  = reg,
+   .clr  = clear,
+   .set  = set,
+   .read = read_mask,
+   .masked_reg = masked_reg,
+   .is_mcr = 1,
+   };
+
+   _wa_add(wal, &wa);
+}
+
 static void
 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 {
wa_add(wal, reg, clear, set, clear, false);
 }
 
+static void
+wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 
set)
+{
+   wa_mcr_add(wal, reg, clear, set, clear, false);
+}
+
 static void
 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 {
@@ -184,12 +205,24 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 
set)
wa_write_clr_set(wal, reg, set, set);
 }
 
+static void
+wa_mcr_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
+{
+   wa_mcr_write_clr_set(wal, reg, set, set);
+}
+
 static void
 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
 {
wa_write_clr_set(wal, reg, clr, 0);
 }
 
+static void
+wa_mcr_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
+{
+   wa_mcr_write_clr_set(wal, reg, clr, 0);
+}
+
 /*
  * WA operations on "masked register". A masked register has the upper 16 bits
  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
@@ -207,12 +240,24 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, 
u32 val)
wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+   wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
+}
+
 static void
 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+   wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
+}
+
 static void
 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
u32 mask, u32 val)
@@ -220,6 +265,13 @@ wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t 
reg,
wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 }
 
+static void
+wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
+   u32 mask, u32 val)
+{
+   wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
+}
+
 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
  struct i915_wa_list *wal)
 {
@@ -241,7 +293,7 @@ static void gen8_ctx_workarounds_init(struct 
intel_engine_cs *engine,
wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), 
ASYNC_FLIP_PERF_DISABLE);
 
/* WaDisablePartialInstShootdown:bdw,chv */
-   wa_masked_en(wal, GEN8_ROW_CHICKEN,
+   wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
/* Use Force Non-Coherent whenever executing a 3D context. This is a
@@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct 
intel_engine_cs *engine,
gen8_ctx_workarounds_init(engine, wal);
 
/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
-   wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+   wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
/* WaDisableDopClockGating:bdw
 *
 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
 * to disable EUTC clock gating.
 */
-   wa_masked_en(wal, GEN8_ROW_CHICKEN2,
-DOP_CLOCK_GATING_DISABLE);
+   wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+DOP_CLOCK_GATING_DISABLE);
 
-   wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
-GEN8_SAMPLER_POWER_BYPASS_DIS);
+   wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+   

[PATCH 2/7] drm/i915/xehp: Create separate reg definitions for new MCR registers

2022-09-01 Thread Matt Roper
Starting in Xe_HP, several registers our driver works with have been
converted from singleton registers into replicated registers with
multicast behavior.  Although the registers are still located at the
same MMIO offsets as on previous platforms, let's duplicate the register
definitions in preparation for upcoming patches that will handle
multicast registers in a special manner.

The registers that are now replicated on Xe_HP are:
 * PAT_INDEX (mslice replication)
 * FF_MODE2 (gslice replication)
 * COMMON_SLICE_CHICKEN3 (gslice replication)
 * SLICE_COMMON_ECO_CHICKEN1 (gslice replication)
 * SLICE_UNIT_LEVEL_CLKGATE (gslice replication)
 * LNCFCMOCS (lncf replication)

The *_TLB_INV_CR registers are also replicated (mslice replication), but
I'm skipping those for now because I think that code might need more
work in general for multicast behavior (e.g., do we need to wait for
the invalidation to report as completed on every mslice?).

Bspec: 66534
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 18 -
 drivers/gpu/drm/i915/gt/intel_gtt.c | 29 ++---
 drivers/gpu/drm/i915/gt/intel_mocs.c|  5 +++-
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 -
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |  7 +++--
 5 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index ad8e2b52ff64..6d4d15964b58 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -326,6 +326,7 @@
 #define GEN7_TLB_RD_ADDR   _MMIO(0x4700)
 
 #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
+#define XEHP_PAT_INDEX(index)  _MMIO(0x4800 + (index) * 4)
 
 #define XEHP_TILE0_ADDR_RANGE  _MMIO(0x4900)
 #define   XEHP_TILE_LMEM_RANGE_SHIFT   8
@@ -384,7 +385,8 @@
 #define   DIS_OVER_FETCH_CACHE REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH  REG_BIT(0)
 
-#define FF_MODE2   _MMIO(0x6604)
+#define GEN12_FF_MODE2 _MMIO(0x6604)
+#define XEHP_FF_MODE2  _MMIO(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK   REG_GENMASK(31, 24)
 #define   FF_MODE2_GS_TIMER_224
REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
 #define   FF_MODE2_TDS_TIMER_MASK  REG_GENMASK(23, 16)
@@ -439,6 +441,7 @@
 #define GEN8_HDC_CHICKEN1  _MMIO(0x7304)
 
 #define GEN11_COMMON_SLICE_CHICKEN3_MMIO(0x7304)
+#define XEHP_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
 #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
 #define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLEREG_BIT(12)
 #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC   REG_BIT(11)
@@ -452,10 +455,9 @@
 #define   DISABLE_PIXEL_MASK_CAMMING   (1 << 14)
 
 #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
-#define   GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
-
-#define SLICE_COMMON_ECO_CHICKEN1  _MMIO(0x731c)
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE   REG_BIT(14)
+#define   GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
 
 #define GEN9_SLICE_PGCTL_ACK(slice)_MMIO(0x804c + (slice) * 0x4)
 #define GEN10_SLICE_PGCTL_ACK(slice)   _MMIO(0x804c + ((slice) / 3) * 
0x34 + \
@@ -700,7 +702,8 @@
 #define   GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
 #define   LTCDD_CLKGATE_DISREG_BIT(10)
 
-#define SLICE_UNIT_LEVEL_CLKGATE   _MMIO(0x94d4)
+#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
+#define XEHP_SLICE_UNIT_LEVEL_CLKGATE  _MMIO(0x94d4)
 #define   SARBUNIT_CLKGATE_DIS (1 << 5)
 #define   RCCUNIT_CLKGATE_DIS  (1 << 7)
 #define   MSCUNIT_CLKGATE_DIS  (1 << 10)
@@ -715,7 +718,7 @@
 #define   VSUNIT_CLKGATE_DIS_TGL   REG_BIT(19)
 #define   PSDUNIT_CLKGATE_DIS  REG_BIT(5)
 
-#define SUBSLICE_UNIT_LEVEL_CLKGATE_MMIO(0x9524)
+#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE  _MMIO(0x9524)
 #define   DSS_ROUTER_CLKGATE_DIS   REG_BIT(28)
 #define   GWUNIT_CLKGATE_DIS   REG_BIT(16)
 
@@ -938,7 +941,8 @@
 
 /* MOCS (Memory Object Control State) registers */
 #define GEN9_LNCFCMOCS(i)  _MMIO(0xb020 + (i) * 4) /* L3 
Cache Control */
-#define GEN9_LNCFCMOCS_REG_COUNT   32
+#define XEHP_LNCFCMOCS(i)  _MMIO(0xb020 + (i) * 4) /* L3 
Cache Control */
+#define LNCFCMOCS_REG_COUNT32
 
 #define GEN7_L3CNTLREG3_MMIO(0xb024)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index b67831833c9a..

[PATCH 1/7] drm/i915/gen8: Create separate reg definitions for new MCR registers

2022-09-01 Thread Matt Roper
Gen8 was the first time our hardware had multicast registers (or at
least the first time the multicast nature was exposed and MMIO accesses
could be steered).  There are some registers that transitioned from
singleton behavior to multicast during the gen7 -> gen8 transition;
let's duplicate the register definitions for those registers in
preparation for upcoming patches that will handle MCR registers in a
special manner.

The registers adjusted are:
 * MISCCPCTL
 * SAMPLER_INSTDONE
 * ROW_INSTDONE
 * ROW_CHICKEN2
 * HALF_SLICE_CHICKEN1
 * HALF_SLICE_CHICKEN3

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  4 ++--
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 11 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c   | 22 +--
 .../gpu/drm/i915/gt/uc/intel_guc_capture.c|  4 ++--
 drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c |  2 +-
 drivers/gpu/drm/i915/gvt/handlers.c   |  2 +-
 drivers/gpu/drm/i915/gvt/mmio_context.c   |  2 +-
 drivers/gpu/drm/i915/intel_gvt_mmio_table.c   |  2 +-
 drivers/gpu/drm/i915/intel_pm.c   | 10 -
 9 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 275ad72940c1..29980bff9c10 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1544,11 +1544,11 @@ void intel_engine_get_instdone(const struct 
intel_engine_cs *engine,
for_each_ss_steering(iter, engine->gt, slice, subslice) {
instdone->sampler[slice][subslice] =
intel_gt_mcr_read(engine->gt,
- GEN7_SAMPLER_INSTDONE,
+ GEN8_SAMPLER_INSTDONE,
  slice, subslice);
instdone->row[slice][subslice] =
intel_gt_mcr_read(engine->gt,
- GEN7_ROW_INSTDONE,
+ GEN8_ROW_INSTDONE,
  slice, subslice);
}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index d414785003cc..ad8e2b52ff64 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -644,6 +644,9 @@
 
 #define GEN7_MISCCPCTL _MMIO(0x9424)
 #define   GEN7_DOP_CLOCK_GATE_ENABLE   (1 << 0)
+
+#define GEN8_MISCCPCTL _MMIO(0x9424)
+#define   GEN8_DOP_CLOCK_GATE_ENABLE   REG_BIT(0)
 #define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE   REG_BIT(1)
 #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2)
 #define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE   (1 << 4)
@@ -1063,18 +1066,22 @@
 #define GEN12_GAM_DONE _MMIO(0xcf68)
 
 #define GEN7_HALF_SLICE_CHICKEN1   _MMIO(0xe100) /* IVB GT1 + VLV 
*/
+#define GEN8_HALF_SLICE_CHICKEN1   _MMIO(0xe100)
 #define   GEN7_MAX_PS_THREAD_DEP   (8 << 12)
 #define   GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE  (1 << 10)
 #define   GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE  (1 << 4)
 #define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1 << 3)
 
 #define GEN7_SAMPLER_INSTDONE  _MMIO(0xe160)
+#define GEN8_SAMPLER_INSTDONE  _MMIO(0xe160)
 #define GEN7_ROW_INSTDONE  _MMIO(0xe164)
+#define GEN8_ROW_INSTDONE  _MMIO(0xe164)
 
 #define HALF_SLICE_CHICKEN2_MMIO(0xe180)
 #define   GEN8_ST_PO_DISABLE   (1 << 13)
 
-#define HALF_SLICE_CHICKEN3_MMIO(0xe184)
+#define HSW_HALF_SLICE_CHICKEN3_MMIO(0xe184)
+#define GEN8_HALF_SLICE_CHICKEN3   _MMIO(0xe184)
 #define   HSW_SAMPLE_C_PERFORMANCE (1 << 9)
 #define   GEN8_CENTROID_PIXEL_OPT_DIS  (1 << 8)
 #define   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC  (1 << 5)
@@ -1127,6 +1134,8 @@
 #define   DISABLE_EARLY_EOTREG_BIT(1)
 
 #define GEN7_ROW_CHICKEN2  _MMIO(0xe4f4)
+
+#define GEN8_ROW_CHICKEN2  _MMIO(0xe4f4)
 #define   GEN12_DISABLE_READ_SUPPRESSION   REG_BIT(15)
 #define   GEN12_DISABLE_EARLY_READ REG_BIT(14)
 #define   GEN12_ENABLE_LARGE_GRF_MODE  REG_BIT(12)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 6d2003d598e6..e382cb4c6971 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -295,10 +295,10 @@ static void bdw_ctx_workarounds_init(struct 
intel_engine_cs *engine,
 * Also see the

[PATCH 0/7] i915: Prep work for explicit MCR handling

2022-09-01 Thread Matt Roper
Steering of multicast/replicated registers becomes a bit more
complicated on Meteor Lake.  Whereas previously the control register we
used to manage the steering was only used by our driver[*], software's
control of steering has now been consolidated with the controls for
various other hardware/firmware agents into a single register.  We can
no longer utilize pre-programmed implicit steering since other firmware
agents may change the steering target and not restore it afterward;
we'll need to explicitly steer all types of MCR registers (including the
GSLICE/COMPUTE/DSS ranges that have been handled implicitly in the
past).  Furthermore, since multiple agents will now be sharing a single
steering control register, races are possible.  To address this, the
hardware adds a new MCR semaphore register which is supposed to be used
to temporarily lock the steering while performing MCR operations.

It's going to become important for us to handle accesses of multicast
registers very explicitly going forward.  This series provides some prep
work for that by updating our register definitions to clearly define
registers as either MCR or non-MCR and ensure that we're using the
intel_gt_mcr_*() functions rather than intel_uncore_*() when operating
on MCR registers.  In a future series we plan to change the MCR_REG()
register definitions to actually declare MCR registers as a new C type
(i.e., not an i915_reg_t) so that the compiler will be able to help us
find any mistakes where non-MCR functions are used on MCR registers and
vice-versa.  Introduction of the MTL steering tables and introduction of
the steering semaphore support will also arrive in future patch series.


[*] This is a bit of an oversimplification; there are some hardware and
software debug tools that use the same MCR_SELECTOR register that i915
does and which could potentially re-steer MCR accesses behind our back.
E.g., simply using IGT's "intel_reg" tool to write the MCR_SELECTOR
register at the wrong time could interfere with driver operation.  But
given that these debug facilities require root privileges to run and are
only used by people intentionally debugging the driver or hardware, we
can ignore such races for real-world usage.


Matt Roper (7):
  drm/i915/gen8: Create separate reg definitions for new MCR registers
  drm/i915/xehp: Create separate reg definitions for new MCR registers
  drm/i915/gt: Drop a few unused register definitions
  drm/i915/gt: Correct prefix on a few registers
  drm/i915: Define MCR registers explicitly
  drm/i915/gt: Always use MCR functions on multicast registers
  drm/i915/gt: Add MCR-specific workaround initializers

 drivers/gpu/drm/i915/gt/intel_engine_cs.c |   4 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |   4 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 138 +++---
 drivers/gpu/drm/i915/gt/intel_gtt.c   |  44 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   |   2 +-
 drivers/gpu/drm/i915/gt/intel_mocs.c  |  12 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c   | 426 +++---
 .../gpu/drm/i915/gt/intel_workarounds_types.h |   4 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c|   7 +-
 .../gpu/drm/i915/gt/uc/intel_guc_capture.c|   4 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c |  12 +-
 drivers/gpu/drm/i915/gvt/handlers.c   |   2 +-
 drivers/gpu/drm/i915/gvt/mmio_context.c   |   2 +-
 drivers/gpu/drm/i915/intel_gvt_mmio_table.c   |   2 +-
 drivers/gpu/drm/i915/intel_pm.c   |  20 +-
 15 files changed, 397 insertions(+), 286 deletions(-)

-- 
2.37.2



[PATCH v2 4/8] drm/i915: Prepare more multi-GT initialization

2022-08-29 Thread Matt Roper
We're going to introduce an additional intel_gt for MTL's media unit
soon.  Let's provide a bit more multi-GT initialization framework in
preparation for that.  The initialization will pull the list of GTs for
a platform from the device info structure.  Although necessary for the
immediate MTL media enabling, this same framework will also be used
farther down the road when we enable remote tiles on xehpsdv and pvc.

v2:
 - Re-add missing test for !HAS_EXTRA_GT_LIST in intel_gt_probe_all().

Cc: Aravind Iddamsetty 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c| 51 +--
 drivers/gpu/drm/i915/gt/intel_gt.h|  1 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
 drivers/gpu/drm/i915/i915_drv.h   |  2 +
 drivers/gpu/drm/i915/intel_device_info.h  | 16 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 7 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 275ad72940c1..41acc285e8bf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -736,7 +736,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt 
*gt)
u16 vdbox_mask;
u16 vebox_mask;
 
-   info->engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
+   GEM_BUG_ON(!info->engine_mask);
 
if (GRAPHICS_VER(i915) < 11)
return info->engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index cf7aab7adb30..7b880dbed6ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -807,17 +807,16 @@ static void
 intel_gt_tile_cleanup(struct intel_gt *gt)
 {
intel_uncore_cleanup_mmio(gt->uncore);
-
-   if (!gt_is_root(gt))
-   kfree(gt);
 }
 
 int intel_gt_probe_all(struct drm_i915_private *i915)
 {
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
struct intel_gt *gt = &i915->gt0;
+   const struct intel_gt_definition *gtdef;
phys_addr_t phys_addr;
unsigned int mmio_bar;
+   unsigned int i;
int ret;
 
mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
@@ -828,14 +827,58 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
 * and it has been already initialized early during probe
 * in i915_driver_probe()
 */
+   gt->i915 = i915;
+   gt->name = "Primary GT";
+   gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
+
+   drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
ret = intel_gt_tile_setup(gt, phys_addr);
if (ret)
return ret;
 
i915->gt[0] = gt;
 
-   /* TODO: add more tiles */
+   if (!HAS_EXTRA_GT_LIST(i915))
+   return 0;
+
+   for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1];
+gtdef->setup != NULL;
+i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) {
+   gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL);
+   if (!gt) {
+   ret = -ENOMEM;
+   goto err;
+   }
+
+   gt->i915 = i915;
+   gt->name = gtdef->name;
+   gt->type = gtdef->type;
+   gt->info.engine_mask = gtdef->engine_mask;
+   gt->info.id = i;
+
+   drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
+   if (GEM_WARN_ON(range_overflows_t(resource_size_t,
+ gtdef->mapping_base,
+ SZ_16M,
+ pci_resource_len(pdev, 
mmio_bar {
+   ret = -ENODEV;
+   goto err;
+   }
+
+   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base);
+   if (ret)
+   goto err;
+
+   i915->gt[i] = gt;
+   }
+
return 0;
+
+err:
+   i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, 
ret);
+   intel_gt_release_all(i915);
+
+   return ret;
 }
 
 int intel_gt_tiles_init(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 40b06adf509a..4d8779529cc2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -54,7 +54,6 @@ void intel_gt_driver_register(struct intel_gt *gt);
 void intel_gt_driver_unregister(struct intel_gt *gt);
 void intel_gt_driver_remove(struct intel_gt *gt);
 void intel_gt_driver_release(struct i

[PATCH 1/8] drm/i915: Move locking and unclaimed check into mmio_debug_{suspend, resume}

2022-08-29 Thread Matt Roper
Moving the locking for MMIO debug (and the final check for unclaimed
accesses when resuming debug after a userspace-initiated forcewake) will
make it simpler to completely skip MMIO debug handling on uncores that
don't support it in future patches.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/intel_uncore.c | 41 +++--
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index 9b81b2543ce2..e717ea55484a 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -50,23 +50,33 @@ intel_uncore_mmio_debug_init_early(struct 
intel_uncore_mmio_debug *mmio_debug)
mmio_debug->unclaimed_mmio_check = 1;
 }
 
-static void mmio_debug_suspend(struct intel_uncore_mmio_debug *mmio_debug)
+static void mmio_debug_suspend(struct intel_uncore *uncore)
 {
-   lockdep_assert_held(&mmio_debug->lock);
+   spin_lock(&uncore->debug->lock);
 
/* Save and disable mmio debugging for the user bypass */
-   if (!mmio_debug->suspend_count++) {
-   mmio_debug->saved_mmio_check = mmio_debug->unclaimed_mmio_check;
-   mmio_debug->unclaimed_mmio_check = 0;
+   if (!uncore->debug->suspend_count++) {
+   uncore->debug->saved_mmio_check = 
uncore->debug->unclaimed_mmio_check;
+   uncore->debug->unclaimed_mmio_check = 0;
}
+
+   spin_unlock(&uncore->debug->lock);
 }
 
-static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug)
+static bool check_for_unclaimed_mmio(struct intel_uncore *uncore);
+
+static void mmio_debug_resume(struct intel_uncore *uncore)
 {
-   lockdep_assert_held(&mmio_debug->lock);
+   spin_lock(&uncore->debug->lock);
+
+   if (!--uncore->debug->suspend_count)
+   uncore->debug->unclaimed_mmio_check = 
uncore->debug->saved_mmio_check;
 
-   if (!--mmio_debug->suspend_count)
-   mmio_debug->unclaimed_mmio_check = mmio_debug->saved_mmio_check;
+   if (check_for_unclaimed_mmio(uncore))
+   drm_info(&uncore->i915->drm,
+"Invalid mmio detected during user access\n");
+
+   spin_unlock(&uncore->debug->lock);
 }
 
 static const char * const forcewake_domain_names[] = {
@@ -677,9 +687,7 @@ void intel_uncore_forcewake_user_get(struct intel_uncore 
*uncore)
spin_lock_irq(&uncore->lock);
if (!uncore->user_forcewake_count++) {
intel_uncore_forcewake_get__locked(uncore, FORCEWAKE_ALL);
-   spin_lock(&uncore->debug->lock);
-   mmio_debug_suspend(uncore->debug);
-   spin_unlock(&uncore->debug->lock);
+   mmio_debug_suspend(uncore);
}
spin_unlock_irq(&uncore->lock);
 }
@@ -695,14 +703,7 @@ void intel_uncore_forcewake_user_put(struct intel_uncore 
*uncore)
 {
spin_lock_irq(&uncore->lock);
if (!--uncore->user_forcewake_count) {
-   spin_lock(&uncore->debug->lock);
-   mmio_debug_resume(uncore->debug);
-
-   if (check_for_unclaimed_mmio(uncore))
-   drm_info(&uncore->i915->drm,
-"Invalid mmio detected during user access\n");
-   spin_unlock(&uncore->debug->lock);
-
+   mmio_debug_resume(uncore);
intel_uncore_forcewake_put__locked(uncore, FORCEWAKE_ALL);
}
spin_unlock_irq(&uncore->lock);
-- 
2.37.2



[PATCH 6/8] drm/i915/xelpmp: Expose media as another GT

2022-08-29 Thread Matt Roper
Xe_LPM+ platforms have "standalone media."  I.e., the media unit is
designed as an additional GT with its own engine list, GuC, forcewake,
etc.  Let's allow platforms to include media GTs in their device info.

Cc: Aravind Iddamsetty 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/Makefile|  1 +
 drivers/gpu/drm/i915/gt/intel_gt.c   | 12 ++--
 drivers/gpu/drm/i915/gt/intel_gt_regs.h  |  8 +
 drivers/gpu/drm/i915/gt/intel_sa_media.c | 39 
 drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 +
 drivers/gpu/drm/i915/i915_pci.c  | 15 +
 drivers/gpu/drm/i915/intel_device_info.h |  5 ++-
 drivers/gpu/drm/i915/intel_uncore.c  | 16 --
 drivers/gpu/drm/i915/intel_uncore.h  | 20 ++--
 9 files changed, 123 insertions(+), 8 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 522ef9b4aff3..e83e4cd46968 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -123,6 +123,7 @@ gt-y += \
gt/intel_ring.o \
gt/intel_ring_submission.o \
gt/intel_rps.o \
+   gt/intel_sa_media.o \
gt/intel_sseu.o \
gt/intel_sseu_debugfs.o \
gt/intel_timeline.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index d21ec11346a5..2a29502289cb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -776,10 +776,15 @@ void intel_gt_driver_late_release_all(struct 
drm_i915_private *i915)
}
 }
 
-static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
+static int intel_gt_tile_setup(struct intel_gt *gt,
+  phys_addr_t phys_addr,
+  u32 gsi_offset)
 {
int ret;
 
+   /* GSI offset is only applicable for media GTs */
+   drm_WARN_ON(>->i915->drm, gsi_offset);
+
if (!gt_is_root(gt)) {
struct intel_uncore *uncore;
 
@@ -832,7 +837,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
 
drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
-   ret = intel_gt_tile_setup(gt, phys_addr);
+   ret = intel_gt_tile_setup(gt, phys_addr, 0);
if (ret)
return ret;
 
@@ -862,7 +867,8 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
goto err;
}
 
-   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base);
+   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base,
+  gtdef->gsi_offset);
if (ret)
goto err;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 94f9ddcfb3a5..05a40ef33258 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1576,4 +1576,12 @@
 
 #define GEN12_SFC_DONE(n)  _MMIO(0x1cc000 + (n) * 0x1000)
 
+/*
+ * Standalone Media's non-engine GT registers are located at their regular GT
+ * offsets plus 0x38.  This extra offset is stored inside the intel_uncore
+ * structure so that the existing code can be used for both GTs without
+ * modification.
+ */
+#define MTL_MEDIA_GSI_BASE 0x38
+
 #endif /* __INTEL_GT_REGS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c 
b/drivers/gpu/drm/i915/gt/intel_sa_media.c
new file mode 100644
index ..8c5c519457cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include 
+
+#include "i915_drv.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_sa_media.h"
+
+int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr,
+  u32 gsi_offset)
+{
+   struct drm_i915_private *i915 = gt->i915;
+   struct intel_uncore *uncore;
+
+   uncore = drmm_kzalloc(&i915->drm, sizeof(*uncore), GFP_KERNEL);
+   if (!uncore)
+   return -ENOMEM;
+
+   uncore->gsi_offset = gsi_offset;
+
+   intel_gt_common_init_early(gt);
+   intel_uncore_init_early(uncore, gt);
+
+   /*
+* Standalone media shares the general MMIO space with the primary
+* GT.  We'll re-use the primary GT's mapping.
+*/
+   uncore->regs = i915->uncore.regs;
+   if (drm_WARN_ON(&i915->drm, uncore->regs == NULL))
+   return -EIO;
+
+   gt->uncore = uncore;
+   gt->phys_addr = phys_addr;
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_sa_m

[PATCH 3/8] drm/i915: Use managed allocations for extra uncore objects

2022-08-29 Thread Matt Roper
We're slowly transitioning the init-time kzalloc's of the driver over to
DRM-managed allocations; let's make sure the uncore objects allocated
for non-root GTs are thus allocated.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index a82b5e2e0d83..cf7aab7adb30 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -783,7 +783,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, 
phys_addr_t phys_addr)
if (!gt_is_root(gt)) {
struct intel_uncore *uncore;
 
-   uncore = kzalloc(sizeof(*uncore), GFP_KERNEL);
+   uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), 
GFP_KERNEL);
if (!uncore)
return -ENOMEM;
 
@@ -808,10 +808,8 @@ intel_gt_tile_cleanup(struct intel_gt *gt)
 {
intel_uncore_cleanup_mmio(gt->uncore);
 
-   if (!gt_is_root(gt)) {
-   kfree(gt->uncore);
+   if (!gt_is_root(gt))
kfree(gt);
-   }
 }
 
 int intel_gt_probe_all(struct drm_i915_private *i915)
-- 
2.37.2



[PATCH 7/8] drm/i915/mtl: Use primary GT's irq lock for media GT

2022-08-29 Thread Matt Roper
When we hook up interrupts (in the next patch), interrupts for the media
GT are still processed as part of the primary GT's interrupt flow.  As
such, we should share the same IRQ lock with the primary GT.  Let's
convert gt->irq_lock into a pointer and just point the media GT's
instance at the same lock the primary GT is using.

Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  8 +++---
 drivers/gpu/drm/i915/gt/intel_gt.c| 15 +--
 drivers/gpu/drm/i915/gt/intel_gt.h|  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_irq.c| 16 ++--
 drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c |  8 +++---
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  2 +-
 drivers/gpu/drm/i915/gt/intel_rps.c   | 26 +--
 drivers/gpu/drm/i915/gt/intel_sa_media.c  |  1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c| 24 -
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  4 +--
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  4 +--
 drivers/gpu/drm/i915/i915_driver.c|  4 ++-
 drivers/gpu/drm/i915/i915_irq.c   |  4 +--
 drivers/gpu/drm/i915/pxp/intel_pxp.c  |  4 +--
 drivers/gpu/drm/i915/pxp/intel_pxp_irq.c  | 14 +-
 drivers/gpu/drm/i915/pxp/intel_pxp_session.c  |  4 +--
 16 files changed, 77 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 41acc285e8bf..6e0122b3dca2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1688,9 +1688,9 @@ bool intel_engine_irq_enable(struct intel_engine_cs 
*engine)
return false;
 
/* Caller disables interrupts */
-   spin_lock(&engine->gt->irq_lock);
+   spin_lock(engine->gt->irq_lock);
engine->irq_enable(engine);
-   spin_unlock(&engine->gt->irq_lock);
+   spin_unlock(engine->gt->irq_lock);
 
return true;
 }
@@ -1701,9 +1701,9 @@ void intel_engine_irq_disable(struct intel_engine_cs 
*engine)
return;
 
/* Caller disables interrupts */
-   spin_lock(&engine->gt->irq_lock);
+   spin_lock(engine->gt->irq_lock);
engine->irq_disable(engine);
-   spin_unlock(&engine->gt->irq_lock);
+   spin_unlock(engine->gt->irq_lock);
 }
 
 void intel_engines_reset_default_submission(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 2a29502289cb..b974a6d23281 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -37,7 +37,7 @@
 
 void intel_gt_common_init_early(struct intel_gt *gt)
 {
-   spin_lock_init(>->irq_lock);
+   spin_lock_init(gt->irq_lock);
 
INIT_LIST_HEAD(>->closed_vma);
spin_lock_init(>->closed_lock);
@@ -58,14 +58,19 @@ void intel_gt_common_init_early(struct intel_gt *gt)
 }
 
 /* Preliminary initialization of Tile 0 */
-void intel_root_gt_init_early(struct drm_i915_private *i915)
+int intel_root_gt_init_early(struct drm_i915_private *i915)
 {
struct intel_gt *gt = to_gt(i915);
 
gt->i915 = i915;
gt->uncore = &i915->uncore;
+   gt->irq_lock = drmm_kzalloc(&i915->drm, sizeof(*gt->irq_lock), 
GFP_KERNEL);
+   if (!gt->irq_lock)
+   return -ENOMEM;
 
intel_gt_common_init_early(gt);
+
+   return 0;
 }
 
 static int intel_gt_probe_lmem(struct intel_gt *gt)
@@ -787,12 +792,18 @@ static int intel_gt_tile_setup(struct intel_gt *gt,
 
if (!gt_is_root(gt)) {
struct intel_uncore *uncore;
+   spinlock_t *irq_lock;
 
uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), 
GFP_KERNEL);
if (!uncore)
return -ENOMEM;
 
+   irq_lock = drmm_kzalloc(>->i915->drm, sizeof(*irq_lock), 
GFP_KERNEL);
+   if (!irq_lock)
+   return -ENOMEM;
+
gt->uncore = uncore;
+   gt->irq_lock = irq_lock;
 
intel_gt_common_init_early(gt);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index c9a359f35d0f..2ee582e287c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -45,7 +45,7 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc 
*gsc)
 }
 
 void intel_gt_common_init_early(struct intel_gt *gt);
-void intel_root_gt_init_early(struct drm_i915_private *i915);
+int intel_root_gt_init_early(struct drm_i915_private *i915);
 int intel_gt_assign_ggtt(struct intel_gt *gt);
 int intel_gt_init_mmio(struct intel_gt *gt);
 int __must_check intel_gt_init_hw(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_

[PATCH 8/8] drm/i915/mtl: Hook up interrupts for standalone media

2022-08-29 Thread Matt Roper
Top-level handling of standalone media interrupts will be processed as
part of the primary GT's interrupt handler (since primary and media GTs
share an MMIO space, unlike remote tile setups).  When we get down to
the point of handling engine interrupts, we need to take care to lookup
VCS and VECS engines in the media GT rather than the primary.

There are also a couple of additional "other" instance bits that
correspond to the media GT's GuC and media GT's power management
interrupts; we need to direct those to the media GT instance as well.

Bspec: 45605
Cc: Anusha Srivatsa 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_irq.c   | 19 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h  |  2 ++
 drivers/gpu/drm/i915/gt/intel_sa_media.c |  7 +++
 drivers/gpu/drm/i915/i915_drv.h  |  3 +++
 4 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 0dfd0c42d00d..f26882fdc24c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -59,11 +59,17 @@ static void
 gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
const u16 iir)
 {
+   struct intel_gt *media_gt = gt->i915->media_gt;
+
if (instance == OTHER_GUC_INSTANCE)
return guc_irq_handler(>->uc.guc, iir);
+   if (instance == OTHER_MEDIA_GUC_INSTANCE && media_gt)
+   return guc_irq_handler(&media_gt->uc.guc, iir);
 
if (instance == OTHER_GTPM_INSTANCE)
return gen11_rps_irq_handler(>->rps, iir);
+   if (instance == OTHER_MEDIA_GTPM_INSTANCE && media_gt)
+   return gen11_rps_irq_handler(&media_gt->rps, iir);
 
if (instance == OTHER_KCR_INSTANCE)
return intel_pxp_irq_handler(>->pxp, iir);
@@ -81,6 +87,18 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
 {
struct intel_engine_cs *engine;
 
+   /*
+* Platforms with standalone media have their media engines in another
+* GT.
+*/
+   if (MEDIA_VER(gt->i915) >= 13 &&
+   (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS)) {
+   if (!gt->i915->media_gt)
+   goto err;
+
+   gt = gt->i915->media_gt;
+   }
+
if (instance <= MAX_ENGINE_INSTANCE)
engine = gt->engine_class[class][instance];
else
@@ -89,6 +107,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
if (likely(engine))
return intel_engine_cs_irq(engine, iir);
 
+err:
WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
  class, instance);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 05a40ef33258..21c7a225157f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1552,6 +1552,8 @@
 #define   OTHER_GTPM_INSTANCE  1
 #define   OTHER_KCR_INSTANCE   4
 #define   OTHER_GSC_INSTANCE   6
+#define   OTHER_MEDIA_GUC_INSTANCE 16
+#define   OTHER_MEDIA_GTPM_INSTANCE17
 
 #define GEN11_IIR_REG_SELECTOR(x)  _MMIO(0x190070 + ((x) * 4))
 
diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c 
b/drivers/gpu/drm/i915/gt/intel_sa_media.c
index cf3053710bbf..41c270f103cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_sa_media.c
+++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c
@@ -36,5 +36,12 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t 
phys_addr,
gt->uncore = uncore;
gt->phys_addr = phys_addr;
 
+   /*
+* For current platforms we can assume there's only a single
+* media GT and cache it for quick lookup.
+*/
+   drm_WARN_ON(&i915->drm, i915->media_gt);
+   i915->media_gt = gt;
+
return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d45dca70bfa6..917958d42805 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -497,6 +497,9 @@ struct drm_i915_private {
 
struct kobject *sysfs_gt;
 
+   /* Quick lookup of media GT (current platforms only have one) */
+   struct intel_gt *media_gt;
+
struct {
struct i915_gem_contexts {
spinlock_t lock; /* locks list */
-- 
2.37.2



[PATCH 4/8] drm/i915: Prepare more multi-GT initialization

2022-08-29 Thread Matt Roper
We're going to introduce an additional intel_gt for MTL's media unit
soon.  Let's provide a bit more multi-GT initialization framework in
preparation for that.  The initialization will pull the list of GTs for
a platform from the device info structure.  Although necessary for the
immediate MTL media enabling, this same framework will also be used
farther down the road when we enable remote tiles on xehpsdv and pvc.

Cc: Aravind Iddamsetty 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c| 48 +--
 drivers/gpu/drm/i915/gt/intel_gt.h|  1 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
 drivers/gpu/drm/i915/i915_drv.h   |  2 +
 drivers/gpu/drm/i915/intel_device_info.h  | 16 +++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 7 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 275ad72940c1..41acc285e8bf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -736,7 +736,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt 
*gt)
u16 vdbox_mask;
u16 vebox_mask;
 
-   info->engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
+   GEM_BUG_ON(!info->engine_mask);
 
if (GRAPHICS_VER(i915) < 11)
return info->engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index cf7aab7adb30..7c0525e96155 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -807,17 +807,16 @@ static void
 intel_gt_tile_cleanup(struct intel_gt *gt)
 {
intel_uncore_cleanup_mmio(gt->uncore);
-
-   if (!gt_is_root(gt))
-   kfree(gt);
 }
 
 int intel_gt_probe_all(struct drm_i915_private *i915)
 {
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
struct intel_gt *gt = &i915->gt0;
+   const struct intel_gt_definition *gtdef;
phys_addr_t phys_addr;
unsigned int mmio_bar;
+   unsigned int i;
int ret;
 
mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
@@ -828,14 +827,55 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
 * and it has been already initialized early during probe
 * in i915_driver_probe()
 */
+   gt->i915 = i915;
+   gt->name = "Primary GT";
+   gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
+
+   drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
ret = intel_gt_tile_setup(gt, phys_addr);
if (ret)
return ret;
 
i915->gt[0] = gt;
 
-   /* TODO: add more tiles */
+   for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1];
+gtdef->setup != NULL;
+i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) {
+   gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL);
+   if (!gt) {
+   ret = -ENOMEM;
+   goto err;
+   }
+
+   gt->i915 = i915;
+   gt->name = gtdef->name;
+   gt->type = gtdef->type;
+   gt->info.engine_mask = gtdef->engine_mask;
+   gt->info.id = i;
+
+   drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
+   if (GEM_WARN_ON(range_overflows_t(resource_size_t,
+ gtdef->mapping_base,
+ SZ_16M,
+ pci_resource_len(pdev, 
mmio_bar {
+   ret = -ENODEV;
+   goto err;
+   }
+
+   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base);
+   if (ret)
+   goto err;
+
+   i915->gt[i] = gt;
+   }
+
return 0;
+
+err:
+   i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, 
ret);
+   intel_gt_release_all(i915);
+
+   return ret;
 }
 
 int intel_gt_tiles_init(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 40b06adf509a..4d8779529cc2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -54,7 +54,6 @@ void intel_gt_driver_register(struct intel_gt *gt);
 void intel_gt_driver_unregister(struct intel_gt *gt);
 void intel_gt_driver_remove(struct intel_gt *gt);
 void intel_gt_driver_release(struct intel_gt *gt);
-
 void intel_gt_driver_late_release_all(struct drm_i915_private *i915);
 
 int intel_gt_wait_for_idle(struct intel_gt *gt, l

[PATCH 5/8] drm/i915: Rename and expose common GT early init routine

2022-08-29 Thread Matt Roper
The common early GT init is needed for initialization of all GT types
(root/primary, remote tile, standalone media).  Since standalone media
(coming in the next patch) will be implemented in a separate file,
rename and expose the function for use.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c | 6 +++---
 drivers/gpu/drm/i915/gt/intel_gt.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 7c0525e96155..d21ec11346a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -35,7 +35,7 @@
 #include "intel_uncore.h"
 #include "shmem_utils.h"
 
-static void __intel_gt_init_early(struct intel_gt *gt)
+void intel_gt_common_init_early(struct intel_gt *gt)
 {
spin_lock_init(>->irq_lock);
 
@@ -65,7 +65,7 @@ void intel_root_gt_init_early(struct drm_i915_private *i915)
gt->i915 = i915;
gt->uncore = &i915->uncore;
 
-   __intel_gt_init_early(gt);
+   intel_gt_common_init_early(gt);
 }
 
 static int intel_gt_probe_lmem(struct intel_gt *gt)
@@ -789,7 +789,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, 
phys_addr_t phys_addr)
 
gt->uncore = uncore;
 
-   __intel_gt_init_early(gt);
+   intel_gt_common_init_early(gt);
}
 
intel_uncore_init_early(gt->uncore, gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 4d8779529cc2..c9a359f35d0f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -44,6 +44,7 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc 
*gsc)
return container_of(gsc, struct intel_gt, gsc);
 }
 
+void intel_gt_common_init_early(struct intel_gt *gt);
 void intel_root_gt_init_early(struct drm_i915_private *i915);
 int intel_gt_assign_ggtt(struct intel_gt *gt);
 int intel_gt_init_mmio(struct intel_gt *gt);
-- 
2.37.2



[PATCH 2/8] drm/i915: Only hook up uncore->debug for primary uncore

2022-08-29 Thread Matt Roper
The original intent of intel_uncore_mmio_debug as described in commit
0a9b26306d6a ("drm/i915: split out uncore_mmio_debug") was to be a
singleton structure that could be shared between multiple GTs' uncore
objects in a multi-tile system.  Somehow we went off track and
started allocating separate instances of this structure for each GT,
which defeats that original goal.

But in reality, there isn't even a need to share the mmio_debug between
multiple GTs; on all modern platforms (i.e., everything after gen7)
unclaimed register accesses are something that can only be detected for
display registers.  There's no point in grabbing the debug spinlock and
checking for unclaimed accesses on an uncore used by an xehpsdv or pvc
remote tile GT, or the uncore used by a mtl standalone media GT since
all of the display accesses go through the primary intel_uncore.

The simplest solution is to simply leave uncore->debug NULL on all
intel_uncore instances except for the primary one.  This will allow us
to avoid the pointless debug spinlock acquisition we've been doing on
MMIO accesses coming in through these intel_uncores.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c  |  9 -
 drivers/gpu/drm/i915/i915_driver.c  |  2 +-
 drivers/gpu/drm/i915/intel_uncore.c | 23 ++-
 drivers/gpu/drm/i915/intel_uncore.h |  3 +--
 4 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index e4bac2431e41..a82b5e2e0d83 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -781,21 +781,13 @@ static int intel_gt_tile_setup(struct intel_gt *gt, 
phys_addr_t phys_addr)
int ret;
 
if (!gt_is_root(gt)) {
-   struct intel_uncore_mmio_debug *mmio_debug;
struct intel_uncore *uncore;
 
uncore = kzalloc(sizeof(*uncore), GFP_KERNEL);
if (!uncore)
return -ENOMEM;
 
-   mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL);
-   if (!mmio_debug) {
-   kfree(uncore);
-   return -ENOMEM;
-   }
-
gt->uncore = uncore;
-   gt->uncore->debug = mmio_debug;
 
__intel_gt_init_early(gt);
}
@@ -817,7 +809,6 @@ intel_gt_tile_cleanup(struct intel_gt *gt)
intel_uncore_cleanup_mmio(gt->uncore);
 
if (!gt_is_root(gt)) {
-   kfree(gt->uncore->debug);
kfree(gt->uncore);
kfree(gt);
}
diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index 053a7dab5506..de9020771836 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -326,7 +326,7 @@ static int i915_driver_early_probe(struct drm_i915_private 
*dev_priv)
intel_device_info_subplatform_init(dev_priv);
intel_step_init(dev_priv);
 
-   intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug);
+   intel_uncore_mmio_debug_init_early(dev_priv);
 
spin_lock_init(&dev_priv->irq_lock);
spin_lock_init(&dev_priv->gpu_error.lock);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index e717ea55484a..6841f76533f9 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -44,14 +44,19 @@ fw_domains_get(struct intel_uncore *uncore, enum 
forcewake_domains fw_domains)
 }
 
 void
-intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug)
+intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915)
 {
-   spin_lock_init(&mmio_debug->lock);
-   mmio_debug->unclaimed_mmio_check = 1;
+   spin_lock_init(&i915->mmio_debug.lock);
+   i915->mmio_debug.unclaimed_mmio_check = 1;
+
+   i915->uncore.debug = &i915->mmio_debug;
 }
 
 static void mmio_debug_suspend(struct intel_uncore *uncore)
 {
+   if (!uncore->debug)
+   return;
+
spin_lock(&uncore->debug->lock);
 
/* Save and disable mmio debugging for the user bypass */
@@ -67,6 +72,9 @@ static bool check_for_unclaimed_mmio(struct intel_uncore 
*uncore);
 
 static void mmio_debug_resume(struct intel_uncore *uncore)
 {
+   if (!uncore->debug)
+   return;
+
spin_lock(&uncore->debug->lock);
 
if (!--uncore->debug->suspend_count)
@@ -1705,7 +1713,7 @@ unclaimed_reg_debug(struct intel_uncore *uncore,
const bool read,
const bool before)
 {
-   if (likely(!uncore->i915->params.mmio_debug))
+   if (likely(!uncore->i915->params.mmio_debug) || !uncore->debug)
return;
 
/* interrupts are disabled and re-enabled around uncore->lock usage 

[PATCH 0/8] i915: Add "standalone media" support for MTL

2022-08-29 Thread Matt Roper
Starting with MTL, media functionality has moved into a new, second GT
at the hardware level.  This new GT, referred to as "standalone media"
in the spec, has its own GuC, power management/forcewake, etc.  The
general non-engine GT registers for standalone media start at 0x38,
but otherwise use the same MMIO offsets as the primary GT.

Standalone media has a lot of similarity to the remote tiles
present on platforms like xehpsdv and pvc, and our i915 implementation 
can share much of the general "multi GT" infrastructure between the two
types of platforms.  However there are a few notable differences
we must deal with:
 - The 0x38 offset only applies to the non-engine GT registers
   (which the specs refer to as "GSI" registers).  The engine registers
   remain at their usual locations (e.g., 0x1C for VCS0).
 - Unlike platforms with remote tiles, all interrupt handling for
   standalone media still happens via the primary GT.


Matt Roper (8):
  drm/i915: Move locking and unclaimed check into
mmio_debug_{suspend,resume}
  drm/i915: Only hook up uncore->debug for primary uncore
  drm/i915: Use managed allocations for extra uncore objects
  drm/i915: Prepare more multi-GT initialization
  drm/i915: Rename and expose common GT early init routine
  drm/i915/xelpmp: Expose media as another GT
  drm/i915/mtl: Use primary GT's irq lock for media GT
  drm/i915/mtl: Hook up interrupts for standalone media

 drivers/gpu/drm/i915/Makefile |  1 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 10 +--
 drivers/gpu/drm/i915/gt/intel_gt.c| 88 ++-
 drivers/gpu/drm/i915/gt/intel_gt.h|  4 +-
 drivers/gpu/drm/i915/gt/intel_gt_irq.c| 35 ++--
 drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c |  8 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 10 +++
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  5 +-
 drivers/gpu/drm/i915/gt/intel_rps.c   | 26 +++---
 drivers/gpu/drm/i915/gt/intel_sa_media.c  | 47 ++
 drivers/gpu/drm/i915/gt/intel_sa_media.h  | 15 
 drivers/gpu/drm/i915/gt/uc/intel_guc.c| 24 ++---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  4 +-
 drivers/gpu/drm/i915/i915_driver.c|  6 +-
 drivers/gpu/drm/i915/i915_drv.h   |  5 ++
 drivers/gpu/drm/i915/i915_irq.c   |  4 +-
 drivers/gpu/drm/i915/i915_pci.c   | 15 
 drivers/gpu/drm/i915/intel_device_info.h  | 19 
 drivers/gpu/drm/i915/intel_uncore.c   | 80 +++--
 drivers/gpu/drm/i915/intel_uncore.h   | 23 -
 drivers/gpu/drm/i915/pxp/intel_pxp.c  |  4 +-
 drivers/gpu/drm/i915/pxp/intel_pxp_irq.c  | 14 +--
 drivers/gpu/drm/i915/pxp/intel_pxp_session.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 25 files changed, 340 insertions(+), 116 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h

-- 
2.37.2



Re: [PATCH v2 15/21] drm/i915/mtl: Obtain SAGV values from MMIO instead of GT pcode mailbox

2022-08-26 Thread Matt Roper
On Thu, Aug 18, 2022 at 04:41:56PM -0700, Radhakrishna Sripada wrote:
> From Meteorlake, Latency Level, SAGV bloack time are read from
> LATENCY_SAGV register instead of the GT driver pcode mailbox. DDR type
> and QGV information are also to be read from Mem SS registers.
> 
> v2:
>  - Simplify MTL_MEM_SS_INFO_QGV_POINT macro(MattR)
>  - Nit: Rearrange the bit def's from higher to lower(MattR)
>  - Restore platform definition for ADL-P(MattR)
>  - Move back intel_qgv_point def to intel_bw.c(Jani)
> Bspec: 64636, 64608
> 
> Cc: Matt Roper 
> Cc: Jani Nikula 
> Original Author: Caz Yokoyama
> Signed-off-by: José Roberto de Souza 
> Signed-off-by: Radhakrishna Sripada 
> ---
>  drivers/gpu/drm/i915/display/intel_bw.c | 42 ++---
>  drivers/gpu/drm/i915/i915_reg.h | 16 ++
>  drivers/gpu/drm/i915/intel_dram.c   | 41 +++-
>  drivers/gpu/drm/i915/intel_pm.c |  8 -
>  4 files changed, 100 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.c 
> b/drivers/gpu/drm/i915/display/intel_bw.c
> index 79269d2c476b..46b63afd536a 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.c
> +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> @@ -137,6 +137,42 @@ int icl_pcode_restrict_qgv_points(struct 
> drm_i915_private *dev_priv,
>   return 0;
>  }
>  
> +static int mtl_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +struct intel_qgv_point *sp, int point)
> +{
> + u32 val, val2;
> + u16 dclk;
> +
> + val = intel_uncore_read(&dev_priv->uncore,
> + MTL_MEM_SS_INFO_QGV_POINT_LOW(point));
> + val2 = intel_uncore_read(&dev_priv->uncore,
> +  MTL_MEM_SS_INFO_QGV_POINT_HIGH(point));
> + dclk = REG_FIELD_GET(MTL_DCLK_MASK, val);
> + sp->dclk = DIV_ROUND_UP((16667 * dclk), 1000);
> + sp->t_rp = REG_FIELD_GET(MTL_TRP_MASK, val);
> + sp->t_rcd = REG_FIELD_GET(MTL_TRCD_MASK, val);
> +
> + sp->t_rdpre = REG_FIELD_GET(MTL_TRDPRE_MASK, val2);
> + sp->t_ras = REG_FIELD_GET(MTL_TRAS_MASK, val2);
> +
> + sp->t_rc = sp->t_rp + sp->t_ras;
> +
> + return 0;
> +}
> +
> +static int
> +intel_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +   struct intel_qgv_point *sp,
> +   int point)
> +{
> + if (DISPLAY_VER(dev_priv) >= 14)
> + return mtl_read_qgv_point_info(dev_priv, sp, point);
> + else if (IS_DG1(dev_priv))
> + return dg1_mchbar_read_qgv_point_info(dev_priv, sp, point);
> + else
> + return icl_pcode_read_qgv_point_info(dev_priv, sp, point);
> +}
> +
>  static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> struct intel_qgv_info *qi,
> bool is_y_tile)
> @@ -193,11 +229,7 @@ static int icl_get_qgv_points(struct drm_i915_private 
> *dev_priv,
>   for (i = 0; i < qi->num_points; i++) {
>   struct intel_qgv_point *sp = &qi->points[i];
>  
> - if (IS_DG1(dev_priv))
> - ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i);
> - else
> - ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> -
> + ret = intel_read_qgv_point_info(dev_priv, sp, i);
>   if (ret)
>   return ret;
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index b2d5e1230c25..5245af8d0ea8 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8397,4 +8397,20 @@ enum skl_power_gate {
>  #define  MTL_LATENCY_LEVEL0_2_4_MASK REG_GENMASK(12, 0)
>  #define  MTL_LATENCY_LEVEL1_3_5_MASK REG_GENMASK(28, 16)
>  
> +#define MTL_LATENCY_SAGV _MMIO(0x4578c)
> +#define  MTL_LATENCY_QCLK_SAGV   REG_GENMASK(12, 0)

Minor nitpick:  we usually have two additional spaces (for a total of
three) between the 'define' and the field name.

> +
> +#define MTL_MEM_SS_INFO_GLOBAL   _MMIO(0x45700)
> +#define  MTL_DDR_TYPE_MASK   REG_GENMASK(3, 0)
> +#define  MTL_N_OF_POPULATED_CH_MASK  REG_GENMASK(7, 4)
> +#define  MTL_N_OF_ENABLED_QGV_POINTS_MASKREG_GENMASK(11, 8)

Another nitpick:  we usually order fields from low to high (which is
also usually how the spec orders them).

> +
> +#define MTL_MEM_SS_INFO_QGV_POINT_LOW(point)  _MMIO(0x45710 + (point) * 2)
> +#define MTL_MEM_SS_INFO_QGV_POINT_HIGH(point) _MMIO(0x45714 + 
> (point) * 2)
> +#def

[PATCH] drm/i915/ats-m: Add thread execution tuning setting

2022-08-26 Thread Matt Roper
On client DG2 platforms, optimal performance is achieved with the
hardware's default "age based" thread execution setting.  However on
ATS-M, switching this to "round robin after dependencies" provides
better performance.  We'll add a new "tuning" feature flag to the ATS-M
device info to enable/disable this setting.

Bspec: 68331
Cc: Lucas De Marchi 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +
 drivers/gpu/drm/i915/i915_pci.c | 1 +
 drivers/gpu/drm/i915/intel_device_info.h| 1 +
 4 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 94f9ddcfb3a5..d414785003cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1110,6 +1110,8 @@
 #define   GEN12_DISABLE_TDL_PUSH   REG_BIT(9)
 #define   GEN11_DIS_PICK_2ND_EUREG_BIT(7)
 #define   GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX  REG_BIT(4)
+#define   THREAD_EX_ARB_MODE   REG_GENMASK(3, 2)
+#define   THREAD_EX_ARB_MODE_RR_AFTER_DEP  
REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
 
 #define HSW_ROW_CHICKEN3   _MMIO(0xe49c)
 #define   HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE   (1 << 6)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3cdb8294e13f..ff8c3735abc9 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2700,6 +2700,15 @@ add_render_compute_tuning_settings(struct 
drm_i915_private *i915,
   0 /* write-only, so skip validation */,
   true);
}
+
+   /*
+* This tuning setting proves beneficial only on ATS-M designs; the
+* default "age based" setting is optimal on regular DG2 and other
+* platforms.
+*/
+   if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
+   wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
+   THREAD_EX_ARB_MODE_RR_AFTER_DEP);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 857e8bb6865c..26b25d9434d6 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1080,6 +1080,7 @@ static const struct intel_device_info ats_m_info = {
DG2_FEATURES,
.display = { 0 },
.require_force_probe = 1,
+   .tuning_thread_rr_after_dep = 1,
 };
 
 #define XE_HPC_FEATURES \
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 0ccde94b225f..6904ad03ca19 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -171,6 +171,7 @@ enum intel_ppgtt_type {
func(has_runtime_pm); \
func(has_snoop); \
func(has_coherent_ggtt); \
+   func(tuning_thread_rr_after_dep); \
func(unfenced_needs_alignment); \
func(hws_needs_physical);
 
-- 
2.37.2



[PATCH] Revert "drm/i915/dg2: Add preemption changes for Wa_14015141709"

2022-08-26 Thread Matt Roper
This reverts commit ca6920811aa5428270dd78af0a7a36b10119065a.

The intent of Wa_14015141709 was to inform us that userspace can no
longer control object-level preemption as it has on past platforms
(i.e., by twiddling register bit CS_CHICKEN1[0]).  The description of
the workaround in the spec wasn't terribly well-written, and when we
requested clarification from the hardware teams we were told that on the
kernel side we should also probably stop setting
FF_SLICE_CS_CHICKEN1[14], which is the register bit that directs the
hardware to honor the settings in per-context register CS_CHICKEN1.  It
turns out that this guidance about FF_SLICE_CS_CHICKEN1[14] was a
mistake; even though CS_CHICKEN1[0] is non-operational and useless to
userspace, there are other bits in the register that do still work and
might need to be adjusted by userspace in the future (e.g., to implement
other workarounds that show up).  If we don't set
FF_SLICE_CS_CHICKEN1[14] in i915, then those future workarounds would
not take effect.

This miscommunication came to light because another workaround
(Wa_16013994831) has now shown up that requires userspace to adjust the
value of CS_CHICKEN[10] in certain circumstances.  To ensure userspace's
updates to this chicken bit are handled properly by the hardware, we
need to make sure that FF_SLICE_CS_CHICKEN1[14] is once again set by the
kernel.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +-
 drivers/gpu/drm/i915/i915_drv.h | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3cdb8294e13f..69a0c6a74474 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2389,7 +2389,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
 FF_DOP_CLOCK_GATE_DISABLE);
}
 
-   if (HAS_PERCTX_PREEMPT_CTRL(i915)) {
+   if (IS_GRAPHICS_VER(i915, 9, 12)) {
/* 
FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
wa_masked_en(wal,
 GEN7_FF_SLICE_CS_CHICKEN1,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2b00ef3626db..d6a1ab6f65de 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1352,9 +1352,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_GUC_DEPRIVILEGE(dev_priv) \
(INTEL_INFO(dev_priv)->has_guc_deprivilege)
 
-#define HAS_PERCTX_PREEMPT_CTRL(i915) \
-   ((GRAPHICS_VER(i915) >= 9) &&  GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
-
 #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \
  IS_ALDERLAKE_S(dev_priv))
 
-- 
2.37.2



[PATCH v2 1/2] drm/i915/gt: Add dedicated function for non-ctx register tuning settings

2022-08-24 Thread Matt Roper
The bspec performance tuning section gives recommended settings that the
driver should program for various MMIO registers.  Although these
settings aren't "workarounds" we use the workaround infrastructure to do
this programming to make sure it is handled at the appropriate places
and doesn't conflict with any real workarounds.

Since more of these are starting to show up on recent platforms, it's a
good time to create a dedicated function to hold them so that there's
less ambiguity about how/where to implement new ones.

Cc: Lucas De Marchi 
Signed-off-by: Matt Roper 
Reviewed-by: Lucas De Marchi 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 42 ++---
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 59cf28baa472..a68d279b01f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2102,13 +2102,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
/* Wa_1509235366:dg2 */
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, 
INVALIDATION_BROADCAST_MODE_DIS |
GLOBAL_INVALIDATION_MODE);
-
-   /*
-* The following are not actually "workarounds" but rather
-* recommended tuning settings documented in the bspec's
-* performance guide section.
-*/
-   wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
}
 
if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
@@ -2676,6 +2669,32 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
}
 }
 
+/*
+ * The bspec performance guide has recommended MMIO tuning settings.  These
+ * aren't truly "workarounds" but we want to program them with the same
+ * workaround infrastructure to ensure that they're automatically added to
+ * the GuC save/restore lists, re-applied at the right times, and checked for
+ * any conflicting programming requested by real workarounds.
+ *
+ * Programming settings should be added here only if their registers are not
+ * part of an engine's register state context.  If a register is part of a
+ * context, then any tuning settings should be programmed in an appropriate
+ * function invoked by __intel_engine_init_ctx_wa().
+ */
+static void
+add_render_compute_tuning_settings(struct drm_i915_private *i915,
+  struct i915_wa_list *wal)
+{
+   if (IS_PONTEVECCHIO(i915)) {
+   wa_write(wal, XEHPC_L3SCRUB,
+SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
+   }
+
+   if (IS_DG2(i915)) {
+   wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+   }
+}
+
 /*
  * The workarounds in this function apply to shared registers in
  * the general render reset domain that aren't tied to a
@@ -2690,14 +2709,9 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
 {
struct drm_i915_private *i915 = engine->i915;
 
-   if (IS_PONTEVECCHIO(i915)) {
-   /*
-* The following is not actually a "workaround" but rather
-* a recommended tuning setting documented in the bspec's
-* performance guide section.
-*/
-   wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | 
SCRUB_RATE_4B_PER_CLK);
+   add_render_compute_tuning_settings(i915, wal);
 
+   if (IS_PONTEVECCHIO(i915)) {
/* Wa_16016694945 */
wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
}
-- 
2.37.1



[PATCH] drm/i915/dg2: Incorporate Wa_16014892111 into DRAW_WATERMARK tuning

2022-08-23 Thread Matt Roper
Although register tuning settings are generally implemented via the
workaround infrastructure, it turns out that the DRAW_WATERMARK register
is not properly saved/restored by hardware around power events (i.e.,
RC6 entry) so updates to the value cannot be applied in the usual
manner.  New workaround Wa_16014892111 informs us that any tuning
updates to this register must instead be applied via an INDIRECT_CTX
batch buffer.  This will ensure that the necessary value is re-applied
when a context begins running, even if an RC6 entry had wiped the
register back to hardware defaults since the last context ran.

Fixes: 6dc85721df74 ("drm/i915/dg2: Add additional tuning settings")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6642
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 21 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  2 --
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index eec73c66406c..070cec4ff8a4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1242,6 +1242,23 @@ dg2_emit_rcs_hang_wabb(const struct intel_context *ce, 
u32 *cs)
return cs;
 }
 
+/*
+ * The bspec's tuning guide asks us to program a vertical watermark value of
+ * 0x3FF.  However this register is not saved/restored properly by the
+ * hardware, so we're required to apply the desired value via INDIRECT_CTX
+ * batch buffer to ensure the value takes effect properly.  All other bits
+ * in this register should remain at 0 (the hardware default).
+ */
+static u32 *
+dg2_emit_draw_watermark_setting(u32 *cs)
+{
+   *cs++ = MI_LOAD_REGISTER_IMM(1);
+   *cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
+   *cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF);
+
+   return cs;
+}
+
 static u32 *
 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
 {
@@ -1263,6 +1280,10 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context 
*ce, u32 *cs)
if (!HAS_FLAT_CCS(ce->engine->i915))
cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV);
 
+   /* Wa_16014892111 */
+   if (IS_DG2(ce->engine->i915))
+   cs = dg2_emit_draw_watermark_setting(cs);
+
return cs;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 31e129329fb0..3cdb8294e13f 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2685,8 +2685,6 @@ add_render_compute_tuning_settings(struct 
drm_i915_private *i915,
if (IS_DG2(i915)) {
wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
-   wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL,
-REG_FIELD_PREP(VERT_WM_VAL, 0x3FF));
 
/*
 * This is also listed as Wa_22012654132 for certain DG2
-- 
2.37.2



Re: [Intel-gfx] [PATCH v2 21/21] drm/i915/mtl: Do not update GV point, mask value

2022-08-19 Thread Matt Roper
On Thu, Aug 18, 2022 at 04:42:02PM -0700, Radhakrishna Sripada wrote:
> No need to update mask value/restrict because
> "Pcode only wants to use GV bandwidth value, not the mask value."
> for Display version greater than 14.

While the code changes might be correct, I can't decipher what the
commit message here is trying to tell us.  I'm not sure what the source
or context of the quote is, but the description in the commit message
should be more focused on why it's correct for our driver to skip these
operations.  I assume it has something to do with the new pm_demand
interfaces we'll be using to program this information into the hardware
in a future series?  If so, maybe this patch (with a modified commit
message) is better suited for inclusion in that future series where the
context makes more sense.

> 
> Bspec: 646365

This page number seems to be incorrect too.


Matt

> Cc: Matt Roper 
> Original Author: Caz Yokoyama
> Signed-off-by: Radhakrishna Sripada 
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 18 ++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index d09e9e5f4481..47869fe964ba 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3924,6 +3924,14 @@ void intel_sagv_pre_plane_update(struct 
> intel_atomic_state *state)
>  {
>   struct drm_i915_private *i915 = to_i915(state->base.dev);
>  
> + /*
> +  * No need to update mask value/restrict because
> +  * "Pcode only wants to use GV bandwidth value, not the mask value."
> +  * for DISPLAY_VER() >= 14.
> +  */
> + if (DISPLAY_VER(i915) >= 14)
> + return;
> +
>   /*
>* Just return if we can't control SAGV or don't have it.
>* This is different from situation when we have SAGV but just can't
> @@ -3944,6 +3952,16 @@ void intel_sagv_post_plane_update(struct 
> intel_atomic_state *state)
>  {
>   struct drm_i915_private *i915 = to_i915(state->base.dev);
>  
> + /*
> +  * No need to update mask value/restrict because
> +  * "Pcode only wants to use GV bandwidth value, not the mask value."
> +  * for DISPLAY_VER() >= 14.
> +  *
> +  * GV bandwidth will be set by intel_pmdemand_post_plane_update()
> +  */
> + if (DISPLAY_VER(i915) >= 14)
> + return;
> +
>   /*
>* Just return if we can't control SAGV or don't have it.
>* This is different from situation when we have SAGV but just can't
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH v2 14/21] drm/i915/mtl: memory latency data from LATENCY_LPX_LPY for WM

2022-08-19 Thread Matt Roper
On Thu, Aug 18, 2022 at 04:41:55PM -0700, Radhakrishna Sripada wrote:
> Since Xe LPD+, Memory latency data are in LATENCY_LPX_LPY registers
> instead of GT driver mailbox.
> 
> v2: Use the extracted wm latency adjustment function(Matt)
> 
> Bspec: 64608
> 
> Cc: Matt Roper 
> Original Author: Caz Yokoyama
> Signed-off-by: Radhakrishna Sripada 
> ---
>  drivers/gpu/drm/i915/i915_reg.h |  7 +++
>  drivers/gpu/drm/i915/intel_pm.c | 21 ++---
>  2 files changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 04a269fa8717..b2d5e1230c25 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8390,4 +8390,11 @@ enum skl_power_gate {
>  #define GEN12_STATE_ACK_DEBUG_MMIO(0x20BC)
>  
>  #define MTL_MEDIA_GSI_BASE   0x38
> +
> +#define MTL_LATENCY_LP0_LP1  _MMIO(0x45780)
> +#define MTL_LATENCY_LP2_LP3  _MMIO(0x45784)
> +#define MTL_LATENCY_LP4_LP5  _MMIO(0x45788)
> +#define  MTL_LATENCY_LEVEL0_2_4_MASK REG_GENMASK(12, 0)
> +#define  MTL_LATENCY_LEVEL1_3_5_MASK REG_GENMASK(28, 16)

You might consider "_{EVEN,ODD}_LEVEL_MASK" naming here, just in case
future IP versions add additional levels beyond LP5.

Otherwise,

Reviewed-by: Matt Roper 

> +
>  #endif /* _I915_REG_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 898e56d2eaf7..fac565d23d57 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2908,13 +2908,28 @@ static void intel_read_wm_latency(struct 
> drm_i915_private *dev_priv,
> u16 wm[])
>  {
>   struct intel_uncore *uncore = &dev_priv->uncore;
> + int max_level = ilk_wm_max_level(dev_priv);
>  
> - if (DISPLAY_VER(dev_priv) >= 9) {
> + if (DISPLAY_VER(dev_priv) >= 14) {
> + u32 val;
> +
> + val = intel_uncore_read(uncore, MTL_LATENCY_LP0_LP1);
> + wm[0] = REG_FIELD_GET(MTL_LATENCY_LEVEL0_2_4_MASK, val);
> + wm[1] = REG_FIELD_GET(MTL_LATENCY_LEVEL1_3_5_MASK, val);
> + val = intel_uncore_read(uncore, MTL_LATENCY_LP2_LP3);
> + wm[2] = REG_FIELD_GET(MTL_LATENCY_LEVEL0_2_4_MASK, val);
> + wm[3] = REG_FIELD_GET(MTL_LATENCY_LEVEL1_3_5_MASK, val);
> + val = intel_uncore_read(uncore, MTL_LATENCY_LP4_LP5);
> + wm[4] = REG_FIELD_GET(MTL_LATENCY_LEVEL0_2_4_MASK, val);
> + wm[5] = REG_FIELD_GET(MTL_LATENCY_LEVEL1_3_5_MASK, val);
> +
> + adjust_wm_latency(wm, max_level, 6,
> +   dev_priv->dram_info.wm_lv_0_adjust_needed);
> + } else if (DISPLAY_VER(dev_priv) >= 9) {
>   int read_latency = DISPLAY_VER(dev_priv) >= 12 ? 3 : 2;
> + int mult = IS_DG2(dev_priv) ? 2 : 1;
>   u32 val;
>   int ret;
> - int max_level = ilk_wm_max_level(dev_priv);
> - int mult = IS_DG2(dev_priv) ? 2 : 1;
>  
>   /* read the first set of memory latencies[0:3] */
>   val = 0; /* data0 to be programmed to 0 for first set */
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH v2 13/21] drm/i915: Extract wm latency adjustment to its own function

2022-08-19 Thread Matt Roper
On Thu, Aug 18, 2022 at 04:41:54PM -0700, Radhakrishna Sripada wrote:
> Watermark latency is adjusted in cases when latency is 0us for level
> greater than 1, the subsequent levels are disabled. Extract this logic
> into its own function.
> 
> Suggested-by: Matt Roper 
> Signed-off-by: Radhakrishna Sripada 
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 88 ++---
>  1 file changed, 48 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index ef7553b494ea..898e56d2eaf7 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2861,15 +2861,59 @@ static void ilk_compute_wm_level(const struct 
> drm_i915_private *dev_priv,
>   result->enable = true;
>  }
>  
> +static void
> +adjust_wm_latency(u16 wm[], int max_level, int read_latency,
> +   bool wm_lv_0_adjust_needed)
> +{
> + int i, level;
> +
> + /*
> +  * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
> +  * need to be disabled. We make sure to sanitize the values out
> +  * of the punit to satisfy this requirement.
> +  */
> + for (level = 1; level <= max_level; level++) {
> + if (wm[level] == 0) {
> + for (i = level + 1; i <= max_level; i++)
> + wm[i] = 0;
> +
> + max_level = level - 1;
> + break;
> + }
> + }
> +
> + /*
> +  * WaWmMemoryReadLatency
> +  *
> +  * punit doesn't take into account the read latency so we need
> +  * to add proper adjustement to each valid level we retrieve
> +  * from the punit when level 0 response data is 0us.
> +  */
> + if (wm[0] == 0) {
> + for (level = 0; level <= max_level; level++)
> + wm[level] += read_latency;
> + }
> +
> + /*
> +  * WA Level-0 adjustment for 16GB DIMMs: SKL+
> +  * If we could not get dimm info enable this WA to prevent from
> +  * any underrun. If not able to get Dimm info assume 16GB dimm
> +  * to avoid any underrun.
> +  */
> + if (wm_lv_0_adjust_needed)
> + wm[0] += 1;
> +}
> +
>  static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
> u16 wm[])
>  {
>   struct intel_uncore *uncore = &dev_priv->uncore;
>  
>   if (DISPLAY_VER(dev_priv) >= 9) {
> + int read_latency = DISPLAY_VER(dev_priv) >= 12 ? 3 : 2;
>   u32 val;
> - int ret, i;
> - int level, max_level = ilk_wm_max_level(dev_priv);
> + int ret;
> + int max_level = ilk_wm_max_level(dev_priv);
>   int mult = IS_DG2(dev_priv) ? 2 : 1;
>  
>   /* read the first set of memory latencies[0:3] */
> @@ -2909,44 +2953,8 @@ static void intel_read_wm_latency(struct 
> drm_i915_private *dev_priv,
>   wm[7] = ((val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
>   GEN9_MEM_LATENCY_LEVEL_MASK) * mult;
>  
> - /*
> -  * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
> -  * need to be disabled. We make sure to sanitize the values out
> -  * of the punit to satisfy this requirement.
> -  */
> - for (level = 1; level <= max_level; level++) {
> - if (wm[level] == 0) {
> - for (i = level + 1; i <= max_level; i++)
> - wm[i] = 0;
> -
> - max_level = level - 1;
> -
> - break;
> - }
> - }
> -
> - /*
> -  * WaWmMemoryReadLatency
> -  *
> -  * punit doesn't take into account the read latency so we need
> -  * to add proper adjustement to each valid level we retrieve
> -  * from the punit when level 0 response data is 0us.
> -  */
> - if (wm[0] == 0) {
> - u8 adjust = DISPLAY_VER(dev_priv) >= 12 ? 3 : 2;
> -
> - for (level = 0; level <= max_level; level++)
> - wm[level] += adjust;
> - }
> -
> - /*
> -  * WA Level-0 adjustment for 16GB DIMMs: SKL+
> -  * If we could not get dimm info enable this WA to prevent from
> -  * any underrun. If not able to get Dimm info assume 16GB dimm
> -  * to 

Re: [PATCH v2 07/21] drm/i915/mtl: Add gmbus and gpio support

2022-08-19 Thread Matt Roper
On Thu, Aug 18, 2022 at 04:41:48PM -0700, Radhakrishna Sripada wrote:
> Add tables to map the GMBUS pin pairs to GPIO registers and port to DDC.
> From spec we have registers GPIO_CTL[1-5] mapped to native display phys and
> GPIO_CTL[9-14] are mapped to TC ports.
> 
> BSpec: 49306
> 
> Original Author: Brian J Lovin
> Signed-off-by: Radhakrishna Sripada 
> ---
>  drivers/gpu/drm/i915/display/intel_gmbus.c | 17 +
>  drivers/gpu/drm/i915/display/intel_gmbus.h |  1 +
>  2 files changed, 18 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c 
> b/drivers/gpu/drm/i915/display/intel_gmbus.c
> index a6ba7fb72339..542b8b2654be 100644
> --- a/drivers/gpu/drm/i915/display/intel_gmbus.c
> +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
> @@ -116,6 +116,20 @@ static const struct gmbus_pin gmbus_pins_dg2[] = {
>   [GMBUS_PIN_9_TC1_ICP] = { "tc1", GPIOJ },
>  };
>  
> +static const struct gmbus_pin gmbus_pins_mtp[] = {
> + [GMBUS_PIN_1_BXT] = { "dpa", GPIOB },
> + [GMBUS_PIN_2_BXT] = { "dpb", GPIOC },
> + [GMBUS_PIN_3_BXT] = { "dpc", GPIOD },
> + [GMBUS_PIN_4_CNP] = { "dpd", GPIOE },
> + [GMBUS_PIN_5_MTP] = { "dpe", GPIOF },
> + [GMBUS_PIN_9_TC1_ICP] = { "tc1", GPIOJ },
> + [GMBUS_PIN_10_TC2_ICP] = { "tc2", GPIOK },
> + [GMBUS_PIN_11_TC3_ICP] = { "tc3", GPIOL },
> + [GMBUS_PIN_12_TC4_ICP] = { "tc4", GPIOM },
> + [GMBUS_PIN_13_TC5_TGP] = { "tc5", GPION },
> + [GMBUS_PIN_14_TC6_TGP] = { "tc6", GPIOO },

There's no GPIO_CTL registers for pin 13 or pin 14 on on MTP so these
last two entries shouldn't be here (and the commit message should be
fixed too).


Matt

> +};
> +
>  static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *i915,
>unsigned int pin)
>  {
> @@ -128,6 +142,9 @@ static const struct gmbus_pin *get_gmbus_pin(struct 
> drm_i915_private *i915,
>   } else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) {
>   pins = gmbus_pins_dg1;
>   size = ARRAY_SIZE(gmbus_pins_dg1);
> + } else if (INTEL_PCH_TYPE(i915) >= PCH_MTP) {
> + pins = gmbus_pins_mtp;
> + size = ARRAY_SIZE(gmbus_pins_mtp);
>   } else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) {
>   pins = gmbus_pins_icp;
>   size = ARRAY_SIZE(gmbus_pins_icp);
> diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.h 
> b/drivers/gpu/drm/i915/display/intel_gmbus.h
> index 8edc2e99cf53..20f704bd4e70 100644
> --- a/drivers/gpu/drm/i915/display/intel_gmbus.h
> +++ b/drivers/gpu/drm/i915/display/intel_gmbus.h
> @@ -24,6 +24,7 @@ struct i2c_adapter;
>  #define GMBUS_PIN_2_BXT  2
>  #define GMBUS_PIN_3_BXT  3
>  #define GMBUS_PIN_4_CNP  4
> +#define GMBUS_PIN_5_MTP  5
>  #define GMBUS_PIN_9_TC1_ICP  9
>  #define GMBUS_PIN_10_TC2_ICP 10
>  #define GMBUS_PIN_11_TC3_ICP 11
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH v2 01/21] drm/i915: Read graphics/media/display arch version from hw

2022-08-19 Thread Matt Roper
On Thu, Aug 18, 2022 at 04:41:42PM -0700, Radhakrishna Sripada wrote:
> From: Matt Roper 
> 
> Going forward, the hardware teams no longer consider new platforms to
> have a "generation" in the way we've defined it for past platforms.
> Instead, each IP block (graphics, media, display) will have their own
> architecture major.minor versions and stepping ID's which should be read
> directly from a register in the MMIO space.  New hardware programming
> styles, features, and workarounds should be conditional solely on the
> architecture version, and should no longer be derived from the PCI
> device ID, revision ID, or platform-specific feature flags.
> 
> v1.1: Fix build error

As Jani noted on the previous version, this patch needs to be split into
three patches (and/or be based on top of the other series that Jani has
in flight).  Also the giant macro is no longer necessary on current
drm-tip now that we the version values stored consistently in
structures; we can just use a regular function and pass pointers to the
structures.

Bala also had feedback on the previous version that hasn't been
incorporated here yet either.


Matt

> 
> Bspec: 63361, 64111
> 
> Signed-off-by: Matt Roper 
> Signed-off-by: Rodrigo Vivi 
> Signed-off-by: Radhakrishna Sripada 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  2 +
>  drivers/gpu/drm/i915/i915_driver.c| 80 ++-
>  drivers/gpu/drm/i915/i915_drv.h   | 16 ++--
>  drivers/gpu/drm/i915/i915_pci.c   |  1 +
>  drivers/gpu/drm/i915/i915_reg.h   |  6 ++
>  drivers/gpu/drm/i915/intel_device_info.c  | 32 
>  drivers/gpu/drm/i915/intel_device_info.h  | 14 
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
>  8 files changed, 128 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 94f9ddcfb3a5..a053493dae24 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -39,6 +39,8 @@
>  #define FORCEWAKE_ACK_RENDER_GEN9_MMIO(0xd84)
>  #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0xd88)
>  
> +#define GMD_ID_GRAPHICS  _MMIO(0xd8c)
> +
>  #define MCFG_MCR_SELECTOR_MMIO(0xfd0)
>  #define SF_MCR_SELECTOR  _MMIO(0xfd8)
>  #define GEN8_MCR_SELECTOR_MMIO(0xfdc)
> diff --git a/drivers/gpu/drm/i915/i915_driver.c 
> b/drivers/gpu/drm/i915/i915_driver.c
> index deb8a8b76965..33566f6e9546 100644
> --- a/drivers/gpu/drm/i915/i915_driver.c
> +++ b/drivers/gpu/drm/i915/i915_driver.c
> @@ -70,6 +70,7 @@
>  #include "gem/i915_gem_pm.h"
>  #include "gt/intel_gt.h"
>  #include "gt/intel_gt_pm.h"
> +#include "gt/intel_gt_regs.h"
>  #include "gt/intel_rc6.h"
>  
>  #include "pxp/intel_pxp_pm.h"
> @@ -306,15 +307,83 @@ static void sanitize_gpu(struct drm_i915_private *i915)
>   __intel_gt_reset(to_gt(i915), ALL_ENGINES);
>  }
>  
> +#define IP_VER_READ(offset, ri_prefix) \
> + addr = pci_iomap_range(pdev, 0, offset, sizeof(u32)); \
> + if (drm_WARN_ON(&i915->drm, !addr)) { \
> + /* Fall back to whatever was in the device info */ \
> + RUNTIME_INFO(i915)->ri_prefix.ver = 
> INTEL_INFO(i915)->ri_prefix.ver; \
> + RUNTIME_INFO(i915)->ri_prefix.rel = 
> INTEL_INFO(i915)->ri_prefix.rel; \
> + goto ri_prefix##done; \
> + } \
> + \
> + ver = ioread32(addr); \
> + pci_iounmap(pdev, addr); \
> + \
> + RUNTIME_INFO(i915)->ri_prefix.ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, 
> ver); \
> + RUNTIME_INFO(i915)->ri_prefix.rel = REG_FIELD_GET(GMD_ID_RELEASE_MASK, 
> ver); \
> + RUNTIME_INFO(i915)->ri_prefix.step = REG_FIELD_GET(GMD_ID_STEP, ver); \
> + \
> + /* Sanity check against expected versions from device info */ \
> + if (RUNTIME_INFO(i915)->ri_prefix.ver != 
> INTEL_INFO(i915)->ri_prefix.ver || \
> + RUNTIME_INFO(i915)->ri_prefix.rel > 
> INTEL_INFO(i915)->ri_prefix.rel) \
> + drm_dbg(&i915->drm, \
> + "Hardware reports " #ri_prefix " IP version %u.%u but 
> minimum expected is %u.%u\n", \
> + RUNTIME_INFO(i915)->ri_prefix.ver, \
> + RUNTIME_INFO(i915)->ri_prefix.rel, \
> + INTEL_INFO(i915)->ri_prefix.ver, \
> + INTEL_INFO(i915)->ri_prefix.rel); \
> +ri_prefix##done:
> +
> +/**
> + * intel_ip

[PATCH v2 2/2] drm/i915/dg2: Add additional tuning settings

2022-08-16 Thread Matt Roper
Some additional MMIO tuning settings have appeared in the bspec's
performance tuning guide section.

One of the tuning settings here is also documented as formal workaround
Wa_22012654132 for some steppings of DG2.  However the tuning setting
applies to all DG2 variants and steppings, making it a superset of the
workaround.

v2:
 - Move DRAW_WATERMARK to engine workaround section.  It only moves into
   the engine context on future platforms.  (Lucas)
 - CHICKEN_RASTER_2 needs to be handled as a masked register.  (Lucas)

Bspec: 68331
Cc: Lucas De Marchi 
Cc: Lionel Landwerlin 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  8 ++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 ++---
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index b3b49f6d6d1c..f64fafe28f72 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -259,6 +259,9 @@
 #define   GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
 #define   GEN9_PREEMPT_GPGPU_LEVEL_MASK
GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
 
+#define DRAW_WATERMARK _MMIO(0x26c0)
+#define   VERT_WM_VAL  REG_GENMASK(9, 0)
+
 #define GEN12_GLOBAL_MOCS(i)   _MMIO(0x4000 + (i) * 4) /* 
Global MOCS regs */
 
 #define RENDER_HWS_PGA_GEN7_MMIO(0x4080)
@@ -374,6 +377,9 @@
 #define CHICKEN_RASTER_1   _MMIO(0x6204)
 #define   DIS_SF_ROUND_NEAREST_EVENREG_BIT(8)
 
+#define CHICKEN_RASTER_2   _MMIO(0x6208)
+#define   TBIMR_FAST_CLIP  REG_BIT(5)
+
 #define VFLSKPD_MMIO(0x62a8)
 #define   DIS_OVER_FETCH_CACHE REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH  REG_BIT(0)
@@ -1124,6 +1130,8 @@
 
 #define RT_CTRL_MMIO(0xe530)
 #define   DIS_NULL_QUERY   REG_BIT(10)
+#define   STACKID_CTRL REG_GENMASK(6, 5)
+#define   STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 
0x2)
 
 #define EU_PERF_CNTL1  _MMIO(0xe558)
 #define EU_PERF_CNTL5  _MMIO(0xe55c)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index a68d279b01f0..31e129329fb0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -568,6 +568,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs 
*engine,
 static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
   struct i915_wa_list *wal)
 {
+   wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
wa_add(wal,
@@ -2195,15 +2196,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
}
 
-   if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) ||
-   IS_DG2_G11(i915)) {
-   /* Wa_22012654132:dg2 */
-   wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-  _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
-  0 /* write-only, so skip validation */,
-  true);
-   }
-
/* Wa_14013202645:dg2 */
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
@@ -2692,6 +2684,23 @@ add_render_compute_tuning_settings(struct 
drm_i915_private *i915,
 
if (IS_DG2(i915)) {
wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+   wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
+   wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL,
+REG_FIELD_PREP(VERT_WM_VAL, 0x3FF));
+
+   /*
+* This is also listed as Wa_22012654132 for certain DG2
+* steppings, but the tuning setting programming is a superset
+* since it applies to all DG2 variants and steppings.
+*
+* Note that register 0xE420 is write-only and cannot be read
+* back for verification on DG2 (due to Wa_14012342262), so
+* we need to explicitly skip the readback.
+*/
+   wa_add(wal, GEN10_CACHE_MODE_SS, 0,
+  _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+  0 /* write-only, so skip validation */,
+  true);
}
 }
 
-- 
2.37.1



[PATCH v2 2/2] drm/i915/dg2: Add additional tuning settings

2022-08-15 Thread Matt Roper
Some additional MMIO tuning settings have appeared in the bspec's
performance tuning guide section.

One of the tuning settings here is also documented as formal workaround
Wa_22012654132 for some steppings of DG2.  However the tuning setting
applies to all DG2 variants and steppings, making it a superset of the
workaround.

v2:
 - Move DRAW_WATERMARK to engine workaround section.  It only moves into
   the engine context on future platforms.  (Lucas)
 - CHICKEN_RASTER_2 needs to be handled as a masked register.  (Lucas)

Bspec: 68331
Cc: Lucas De Marchi 
Cc: Lionel Landwerlin 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  8 ++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 ++---
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index b3b49f6d6d1c..f64fafe28f72 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -259,6 +259,9 @@
 #define   GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
 #define   GEN9_PREEMPT_GPGPU_LEVEL_MASK
GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
 
+#define DRAW_WATERMARK _MMIO(0x26c0)
+#define   VERT_WM_VAL  REG_GENMASK(9, 0)
+
 #define GEN12_GLOBAL_MOCS(i)   _MMIO(0x4000 + (i) * 4) /* 
Global MOCS regs */
 
 #define RENDER_HWS_PGA_GEN7_MMIO(0x4080)
@@ -374,6 +377,9 @@
 #define CHICKEN_RASTER_1   _MMIO(0x6204)
 #define   DIS_SF_ROUND_NEAREST_EVENREG_BIT(8)
 
+#define CHICKEN_RASTER_2   _MMIO(0x6208)
+#define   TBIMR_FAST_CLIP  REG_BIT(5)
+
 #define VFLSKPD_MMIO(0x62a8)
 #define   DIS_OVER_FETCH_CACHE REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH  REG_BIT(0)
@@ -1124,6 +1130,8 @@
 
 #define RT_CTRL_MMIO(0xe530)
 #define   DIS_NULL_QUERY   REG_BIT(10)
+#define   STACKID_CTRL REG_GENMASK(6, 5)
+#define   STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 
0x2)
 
 #define EU_PERF_CNTL1  _MMIO(0xe558)
 #define EU_PERF_CNTL5  _MMIO(0xe55c)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index a68d279b01f0..31e129329fb0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -568,6 +568,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs 
*engine,
 static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
   struct i915_wa_list *wal)
 {
+   wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
wa_add(wal,
@@ -2195,15 +2196,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
}
 
-   if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) ||
-   IS_DG2_G11(i915)) {
-   /* Wa_22012654132:dg2 */
-   wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-  _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
-  0 /* write-only, so skip validation */,
-  true);
-   }
-
/* Wa_14013202645:dg2 */
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
@@ -2692,6 +2684,23 @@ add_render_compute_tuning_settings(struct 
drm_i915_private *i915,
 
if (IS_DG2(i915)) {
wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+   wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
+   wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL,
+REG_FIELD_PREP(VERT_WM_VAL, 0x3FF));
+
+   /*
+* This is also listed as Wa_22012654132 for certain DG2
+* steppings, but the tuning setting programming is a superset
+* since it applies to all DG2 variants and steppings.
+*
+* Note that register 0xE420 is write-only and cannot be read
+* back for verification on DG2 (due to Wa_14012342262), so
+* we need to explicitly skip the readback.
+*/
+   wa_add(wal, GEN10_CACHE_MODE_SS, 0,
+  _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+  0 /* write-only, so skip validation */,
+  true);
}
 }
 
-- 
2.37.1



[PATCH] drm/i915: Correct ss -> steering calculation for pre-Xe_HP platforms

2022-07-12 Thread Matt Roper
Accidental use of a "SLICE" macro where a "SUBSLICE" macro was intended
causes the group ID for steering to be calculated incorrectly on
pre-Xe_HP platforms.

Fixes: 9a92732f040a ("drm/i915/gt: Add general DSS steering iterator to 
intel_gt_mcr")
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index f8c64ab9d3ca..e79405a45312 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -515,7 +515,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, 
unsigned int dss,
*group = dss / GEN_DSS_PER_GSLICE;
*instance = dss % GEN_DSS_PER_GSLICE;
} else {
-   *group = dss / GEN_MAX_HSW_SLICES;
+   *group = dss / GEN_MAX_SS_PER_HSW_SLICE;
*instance = dss % GEN_MAX_SS_PER_HSW_SLICE;
return;
}
-- 
2.36.1



Re: [PATCH] drm/i915/display: clean up comments

2022-07-01 Thread Matt Roper
On Fri, Jul 01, 2022 at 04:32:36PM -0400, Tom Rix wrote:
> spelling changes
> resoluition -> resolution
> dont-> don't
> commmit -> commit
> Invalidade  -> Invalidate
> 
> Signed-off-by: Tom Rix 

Reviewed-by: Matt Roper 

and applied to drm-intel-next.  Thanks for the patch.


Matt

> ---
>  drivers/gpu/drm/i915/display/intel_psr.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_psr.c 
> b/drivers/gpu/drm/i915/display/intel_psr.c
> index 7d61c55184e5..e6a870641cd2 100644
> --- a/drivers/gpu/drm/i915/display/intel_psr.c
> +++ b/drivers/gpu/drm/i915/display/intel_psr.c
> @@ -555,7 +555,7 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
>   /*
>* TODO: 7 lines of IO_BUFFER_WAKE and FAST_WAKE are default
>* values from BSpec. In order to setting an optimal power
> -  * consumption, lower than 4k resoluition mode needs to decrese
> +  * consumption, lower than 4k resolution mode needs to decrease
>* IO_BUFFER_WAKE and FAST_WAKE. And higher than 4K resolution
>* mode needs to increase IO_BUFFER_WAKE and FAST_WAKE.
>*/
> @@ -959,7 +959,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
>   int psr_setup_time;
>  
>   /*
> -  * Current PSR panels dont work reliably with VRR enabled
> +  * Current PSR panels don't work reliably with VRR enabled
>* So if VRR is enabled, do not enable PSR.
>*/
>   if (crtc_state->vrr.enable)
> @@ -1664,7 +1664,7 @@ static void intel_psr2_sel_fetch_pipe_alignment(const 
> struct intel_crtc_state *c
>   *
>   * Plane scaling and rotation is not supported by selective fetch and both
>   * properties can change without a modeset, so need to be check at every
> - * atomic commmit.
> + * atomic commit.
>   */
>  static bool psr2_sel_fetch_plane_state_supported(const struct 
> intel_plane_state *plane_state)
>  {
> @@ -2203,7 +2203,7 @@ static void _psr_invalidate_handle(struct intel_dp 
> *intel_dp)
>  }
>  
>  /**
> - * intel_psr_invalidate - Invalidade PSR
> + * intel_psr_invalidate - Invalidate PSR
>   * @dev_priv: i915 device
>   * @frontbuffer_bits: frontbuffer plane tracking bits
>   * @origin: which operation caused the invalidate
> -- 
> 2.27.0
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH v8 0/3] drm/doc/rfc: i915 VM_BIND feature design + uapi

2022-07-01 Thread Matt Roper
On Thu, Jun 30, 2022 at 05:31:07PM -0700, Niranjana Vishwanathapura wrote:
> This is the i915 driver VM_BIND feature design RFC patch series along
> with the required uapi definition and description of intended use cases.
> 
> v2: Reduce the scope to simple Mesa use case.
> Remove all compute related uapi, vm_bind/unbind queue support and
> only support a timeline out fence instead of an in/out timeline
> fence array.
> v3: Expand documentation on dma-resv usage, TLB flushing, execbuf3 and
> VM_UNBIND. Add FENCE_VALID and TLB_FLUSH flags.
> v4: Remove I915_GEM_VM_BIND_TLB_FLUSH flag and add additional
> uapi documentation for vm_bind/unbind.
> v5: Update TLB flushing documentation.
> Add version support to stage implementation.
> v6: Define and use drm_i915_gem_timeline_fence structure for
> execbuf3 and vm_bind/unbind timeline fences.
> v7: Rename I915_PARAM_HAS_VM_BIND to I915_PARAM_VM_BIND_VERSION.
> Update documentation on async vm_bind/unbind and versioning.
> Remove redundant vm_bind/unbind FENCE_VALID flag, execbuf3
> batch_count field and I915_EXEC3_SECURE flag.
> v8: Remove I915_GEM_VM_BIND_READONLY and minor documentation
> updates.
> 
> Signed-off-by: Niranjana Vishwanathapura 
> Reviewed-by: Daniel Vetter 
> Acked-by: Paulo Zanoni 

Series applied to drm-intel-gt-next.  Thanks for the patches and
reviews.


Matt

> 
> Niranjana Vishwanathapura (3):
>   drm/doc/rfc: VM_BIND feature design document
>   drm/i915: Update i915 uapi documentation
>   drm/doc/rfc: VM_BIND uapi definition
> 
>  Documentation/gpu/rfc/i915_vm_bind.h   | 291 +
>  Documentation/gpu/rfc/i915_vm_bind.rst | 245 +
>  Documentation/gpu/rfc/index.rst|   4 +
>  include/uapi/drm/i915_drm.h| 205 +
>  4 files changed, 700 insertions(+), 45 deletions(-)
>  create mode 100644 Documentation/gpu/rfc/i915_vm_bind.h
>  create mode 100644 Documentation/gpu/rfc/i915_vm_bind.rst
> 
> -- 
> 2.21.0.rc0.32.g243a4c7e27
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH 2/2] drm/i915: DG2 and ATS-M device ID updates

2022-07-01 Thread Matt Roper
On Fri, Jul 01, 2022 at 09:59:50AM -0700, Lucas De Marchi wrote:
> On Fri, Jul 01, 2022 at 08:22:31AM -0700, Matt Roper wrote:
> > Small BAR support has now landed, which allows us to add the PCI IDs
> > that correspond to add-in card designs of DG2 and ATS-M.  There's also
> > one additional MB-down PCI ID that recently appeared (0x5698) so we add
> > it too.
> > 
> > Cc: Lucas De Marchi 
> > Signed-off-by: Matt Roper 
> 
> 
> Reviewed-by: Lucas De Marchi 

Thanks.  Applied to drm-intel-gt-next (since that's the branch that has
the small BAR patches that were a pre-req to adding these IDs) and
dropped the equivalent patch from topic/core-from-CI.


Matt

> 
> Lucas De Marchi

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


[PATCH] drm/i915/gt: Add general DSS steering iterator to intel_gt_mcr

2022-07-01 Thread Matt Roper
Although all DSS belong to a single pool on Xe_HP platforms (i.e.,
they're not organized into slices from a topology point of view), we do
still need to pass 'group' and 'instance' targets when steering register
accesses to a specific instance of a per-DSS multicast register.  The
rules for how to determine group and instance IDs (which previously used
legacy terms "slice" and "subslice") varies by platform.  Some platforms
determine steering by gslice membership, some platforms by cslice
membership, and future platforms may have other rules.

Since looping over each DSS and performing steered unicast register
accesses is a relatively common pattern, let's add a dedicated iteration
macro to handle this (and replace the platform-specific "instdone" loop
we were using previously.  This will avoid the calling code needing to
figure out the details about how to obtain steering IDs for a specific
DSS.

Most of the places where we use this new loop are in the GPU errorstate
code at the moment, but we do have some additional features coming in
the future that will also need to loop over each DSS and steer some
register accesses accordingly.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 34 ++-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 22 
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c| 25 ++
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h| 24 +
 .../gpu/drm/i915/gt/uc/intel_guc_capture.c| 13 ---
 drivers/gpu/drm/i915/i915_gpu_error.c | 32 ++---
 6 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 283870c65991..37fa813af766 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1517,7 +1517,6 @@ void intel_engine_get_instdone(const struct 
intel_engine_cs *engine,
   struct intel_instdone *instdone)
 {
struct drm_i915_private *i915 = engine->i915;
-   const struct sseu_dev_info *sseu = &engine->gt->info.sseu;
struct intel_uncore *uncore = engine->uncore;
u32 mmio_base = engine->mmio_base;
int slice;
@@ -1542,32 +1541,19 @@ void intel_engine_get_instdone(const struct 
intel_engine_cs *engine,
intel_uncore_read(uncore, 
GEN12_SC_INSTDONE_EXTRA2);
}
 
-   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
-   for_each_instdone_gslice_dss_xehp(i915, sseu, iter, 
slice, subslice) {
-   instdone->sampler[slice][subslice] =
-   intel_gt_mcr_read(engine->gt,
- GEN7_SAMPLER_INSTDONE,
- slice, subslice);
-   instdone->row[slice][subslice] =
-   intel_gt_mcr_read(engine->gt,
- GEN7_ROW_INSTDONE,
- slice, subslice);
-   }
-   } else {
-   for_each_instdone_slice_subslice(i915, sseu, slice, 
subslice) {
-   instdone->sampler[slice][subslice] =
-   intel_gt_mcr_read(engine->gt,
- GEN7_SAMPLER_INSTDONE,
- slice, subslice);
-   instdone->row[slice][subslice] =
-   intel_gt_mcr_read(engine->gt,
- GEN7_ROW_INSTDONE,
- slice, subslice);
-   }
+   for_each_ss_steering(iter, engine->gt, slice, subslice) {
+   instdone->sampler[slice][subslice] =
+   intel_gt_mcr_read(engine->gt,
+ GEN7_SAMPLER_INSTDONE,
+ slice, subslice);
+   instdone->row[slice][subslice] =
+   intel_gt_mcr_read(engine->gt,
+ GEN7_ROW_INSTDONE,
+ slice, subslice);
}
 
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
-   for_each_instdone_gslice_dss_xehp(i915, sseu, iter, 
slice, subslice)
+   for_each_ss_steering(iter, engine->gt, slice, subslice)
   

[PATCH 1/2] Revert "topic/core-for-CI: Add remaining DG2 and ATS-M device IDs"

2022-07-01 Thread Matt Roper
This reverts commit f7d7dddaab81eeae4508197b5f38f0b974d97b8c.

In reality we'll just rebase this patch out of the topic/core-for-CI
branch after landing the "real" copy in drm-intel.  But to prevent
pre-merge CI from getting confused, we send it as an explicit revert on
the mailing list.

Cc: Lucas De Marchi 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/i915_pci.c  |  2 +-
 drivers/gpu/drm/i915/intel_device_info.c |  2 --
 include/drm/i915_pciids.h| 25 +++-
 3 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 0cdd6513fbb7..5edc8fbf1dff 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1075,6 +1075,7 @@ static const struct intel_device_info dg2_info = {
.require_force_probe = 1,
 };
 
+__maybe_unused
 static const struct intel_device_info ats_m_info = {
DG2_FEATURES,
.display = { 0 },
@@ -1188,7 +1189,6 @@ static const struct pci_device_id pciidlist[] = {
INTEL_RPLS_IDS(&adl_s_info),
INTEL_RPLP_IDS(&adl_p_info),
INTEL_DG2_IDS(&dg2_info),
-   INTEL_ATS_M_IDS(&ats_m_info),
{0, 0, 0}
 };
 MODULE_DEVICE_TABLE(pci, pciidlist);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
b/drivers/gpu/drm/i915/intel_device_info.c
index f0bf23726ed8..7eb893666595 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -189,12 +189,10 @@ static const u16 subplatform_rpl_ids[] = {
 
 static const u16 subplatform_g10_ids[] = {
INTEL_DG2_G10_IDS(0),
-   INTEL_ATS_M150_IDS(0),
 };
 
 static const u16 subplatform_g11_ids[] = {
INTEL_DG2_G11_IDS(0),
-   INTEL_ATS_M75_IDS(0),
 };
 
 static const u16 subplatform_g12_ids[] = {
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 4585fed4e41e..283dadfbb4db 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -696,41 +696,22 @@
 #define INTEL_DG2_G10_IDS(info) \
INTEL_VGA_DEVICE(0x5690, info), \
INTEL_VGA_DEVICE(0x5691, info), \
-   INTEL_VGA_DEVICE(0x5692, info), \
-   INTEL_VGA_DEVICE(0x56A0, info), \
-   INTEL_VGA_DEVICE(0x56A1, info), \
-   INTEL_VGA_DEVICE(0x56A2, info)
+   INTEL_VGA_DEVICE(0x5692, info)
 
 #define INTEL_DG2_G11_IDS(info) \
INTEL_VGA_DEVICE(0x5693, info), \
INTEL_VGA_DEVICE(0x5694, info), \
INTEL_VGA_DEVICE(0x5695, info), \
-   INTEL_VGA_DEVICE(0x56A5, info), \
-   INTEL_VGA_DEVICE(0x56A6, info), \
-   INTEL_VGA_DEVICE(0x56B0, info), \
-   INTEL_VGA_DEVICE(0x56B1, info)
+   INTEL_VGA_DEVICE(0x56B0, info)
 
 #define INTEL_DG2_G12_IDS(info) \
INTEL_VGA_DEVICE(0x5696, info), \
INTEL_VGA_DEVICE(0x5697, info), \
-   INTEL_VGA_DEVICE(0x56A3, info), \
-   INTEL_VGA_DEVICE(0x56A4, info), \
-   INTEL_VGA_DEVICE(0x56B2, info), \
-   INTEL_VGA_DEVICE(0x56B3, info)
+   INTEL_VGA_DEVICE(0x56B2, info)
 
 #define INTEL_DG2_IDS(info) \
INTEL_DG2_G10_IDS(info), \
INTEL_DG2_G11_IDS(info), \
INTEL_DG2_G12_IDS(info)
 
-#define INTEL_ATS_M150_IDS(info) \
-   INTEL_VGA_DEVICE(0x56C0, info)
-
-#define INTEL_ATS_M75_IDS(info) \
-   INTEL_VGA_DEVICE(0x56C1, info)
-
-#define INTEL_ATS_M_IDS(info) \
-   INTEL_ATS_M150_IDS(info), \
-   INTEL_ATS_M75_IDS(info)
-
 #endif /* _I915_PCIIDS_H */
-- 
2.36.1



[PATCH 2/2] drm/i915: DG2 and ATS-M device ID updates

2022-07-01 Thread Matt Roper
Small BAR support has now landed, which allows us to add the PCI IDs
that correspond to add-in card designs of DG2 and ATS-M.  There's also
one additional MB-down PCI ID that recently appeared (0x5698) so we add
it too.

Cc: Lucas De Marchi 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/i915_pci.c  |  2 +-
 drivers/gpu/drm/i915/intel_device_info.c |  2 ++
 include/drm/i915_pciids.h| 26 +---
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 5edc8fbf1dff..0cdd6513fbb7 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1075,7 +1075,6 @@ static const struct intel_device_info dg2_info = {
.require_force_probe = 1,
 };
 
-__maybe_unused
 static const struct intel_device_info ats_m_info = {
DG2_FEATURES,
.display = { 0 },
@@ -1189,6 +1188,7 @@ static const struct pci_device_id pciidlist[] = {
INTEL_RPLS_IDS(&adl_s_info),
INTEL_RPLP_IDS(&adl_p_info),
INTEL_DG2_IDS(&dg2_info),
+   INTEL_ATS_M_IDS(&ats_m_info),
{0, 0, 0}
 };
 MODULE_DEVICE_TABLE(pci, pciidlist);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
b/drivers/gpu/drm/i915/intel_device_info.c
index 7eb893666595..f0bf23726ed8 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -189,10 +189,12 @@ static const u16 subplatform_rpl_ids[] = {
 
 static const u16 subplatform_g10_ids[] = {
INTEL_DG2_G10_IDS(0),
+   INTEL_ATS_M150_IDS(0),
 };
 
 static const u16 subplatform_g11_ids[] = {
INTEL_DG2_G11_IDS(0),
+   INTEL_ATS_M75_IDS(0),
 };
 
 static const u16 subplatform_g12_ids[] = {
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 283dadfbb4db..1bd0420a213d 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -696,22 +696,42 @@
 #define INTEL_DG2_G10_IDS(info) \
INTEL_VGA_DEVICE(0x5690, info), \
INTEL_VGA_DEVICE(0x5691, info), \
-   INTEL_VGA_DEVICE(0x5692, info)
+   INTEL_VGA_DEVICE(0x5692, info), \
+   INTEL_VGA_DEVICE(0x56A0, info), \
+   INTEL_VGA_DEVICE(0x56A1, info), \
+   INTEL_VGA_DEVICE(0x56A2, info)
 
 #define INTEL_DG2_G11_IDS(info) \
INTEL_VGA_DEVICE(0x5693, info), \
INTEL_VGA_DEVICE(0x5694, info), \
INTEL_VGA_DEVICE(0x5695, info), \
-   INTEL_VGA_DEVICE(0x56B0, info)
+   INTEL_VGA_DEVICE(0x5698, info), \
+   INTEL_VGA_DEVICE(0x56A5, info), \
+   INTEL_VGA_DEVICE(0x56A6, info), \
+   INTEL_VGA_DEVICE(0x56B0, info), \
+   INTEL_VGA_DEVICE(0x56B1, info)
 
 #define INTEL_DG2_G12_IDS(info) \
INTEL_VGA_DEVICE(0x5696, info), \
INTEL_VGA_DEVICE(0x5697, info), \
-   INTEL_VGA_DEVICE(0x56B2, info)
+   INTEL_VGA_DEVICE(0x56A3, info), \
+   INTEL_VGA_DEVICE(0x56A4, info), \
+   INTEL_VGA_DEVICE(0x56B2, info), \
+   INTEL_VGA_DEVICE(0x56B3, info)
 
 #define INTEL_DG2_IDS(info) \
INTEL_DG2_G10_IDS(info), \
INTEL_DG2_G11_IDS(info), \
INTEL_DG2_G12_IDS(info)
 
+#define INTEL_ATS_M150_IDS(info) \
+   INTEL_VGA_DEVICE(0x56C0, info)
+
+#define INTEL_ATS_M75_IDS(info) \
+   INTEL_VGA_DEVICE(0x56C1, info)
+
+#define INTEL_ATS_M_IDS(info) \
+   INTEL_ATS_M150_IDS(info), \
+   INTEL_ATS_M75_IDS(info)
+
 #endif /* _I915_PCIIDS_H */
-- 
2.36.1



[PATCH 2/2] drm/i915: Prefer "XEHP_" prefix for registers

2022-06-24 Thread Matt Roper
We've been introducing new registers with a mix of "XEHP_"
(architecture) and "XEHPSDV_" (platform) prefixes.  For consistency,
let's settle on "XEHP_" as the preferred form.

XEHPSDV_RP_STATE_CAP stays with its current name since that's truly a
platform-specific register and not something that applies to the Xe_HP
architecture as a whole.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c  | 4 ++--
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 4 ++--
 drivers/gpu/drm/i915/gt/intel_region_lmem.c | 8 
 drivers/gpu/drm/i915/i915_reg.h | 6 +++---
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index e63de9c06596..166d0a4b9e8c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -836,8 +836,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
} else {
resource_size_t lmem_range;
 
-   lmem_range = intel_gt_mcr_read_any(&i915->gt0, 
XEHPSDV_TILE0_ADDR_RANGE) & 0x;
-   lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT;
+   lmem_range = intel_gt_mcr_read_any(&i915->gt0, 
XEHP_TILE0_ADDR_RANGE) & 0x;
+   lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT;
lmem_size *= SZ_1G;
}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 61815b6e87de..37c1095d8603 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -324,11 +324,11 @@
 
 #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
 
-#define XEHPSDV_TILE0_ADDR_RANGE   _MMIO(0x4900)
-#define   XEHPSDV_TILE_LMEM_RANGE_SHIFT8
+#define XEHP_TILE0_ADDR_RANGE  _MMIO(0x4900)
+#define   XEHP_TILE_LMEM_RANGE_SHIFT   8
 
-#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910)
-#define   XEHPSDV_CCS_BASE_SHIFT   8
+#define XEHP_FLAT_CCS_BASE_ADDR_MMIO(0x4910)
+#define   XEHP_CCS_BASE_SHIFT  8
 
 #define GAMTARBMODE_MMIO(0x4a08)
 #define   ARB_MODE_BWGTLB_DISABLE  (1 << 9)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c 
b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index ae8a8f725f01..73a8b46e0234 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -679,7 +679,7 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev,
u32 val;
int err;
 
-   err = snb_pcode_read_p(gt->uncore, XEHPSDV_PCODE_FREQUENCY_CONFIG,
+   err = snb_pcode_read_p(gt->uncore, XEHP_PCODE_FREQUENCY_CONFIG,
   PCODE_MBOX_FC_SC_READ_FUSED_P0,
   PCODE_MBOX_DOMAIN_MEDIAFF, &val);
 
@@ -700,7 +700,7 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev,
u32 val;
int err;
 
-   err = snb_pcode_read_p(gt->uncore, XEHPSDV_PCODE_FREQUENCY_CONFIG,
+   err = snb_pcode_read_p(gt->uncore, XEHP_PCODE_FREQUENCY_CONFIG,
   PCODE_MBOX_FC_SC_READ_FUSED_PN,
   PCODE_MBOX_DOMAIN_MEDIAFF, &val);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c 
b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 2ff448047020..d09b996a9759 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -105,12 +105,12 @@ static struct intel_memory_region *setup_lmem(struct 
intel_gt *gt)
resource_size_t lmem_range;
u64 tile_stolen, flat_ccs_base;
 
-   lmem_range = intel_gt_mcr_read_any(&i915->gt0, 
XEHPSDV_TILE0_ADDR_RANGE) & 0x;
-   lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT;
+   lmem_range = intel_gt_mcr_read_any(&i915->gt0, 
XEHP_TILE0_ADDR_RANGE) & 0x;
+   lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT;
lmem_size *= SZ_1G;
 
-   flat_ccs_base = intel_gt_mcr_read_any(gt, 
XEHPSDV_FLAT_CCS_BASE_ADDR);
-   flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * 
SZ_64K;
+   flat_ccs_base = intel_gt_mcr_read_any(gt, 
XEHP_FLAT_CCS_BASE_ADDR);
+   flat_ccs_base = (flat_ccs_base >> XEHP_CCS_BASE_SHIFT) * SZ_64K;
 
/* FIXME: Remove this when we have small-bar enabled */
if (pci_resource_len(pdev, 2) < lmem_size) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index cf5e16abf6c7..643d7f020a4a 100644
--- a/

[PATCH 1/2] drm/i915: Correct duplicated/misplaced GT register definitions

2022-06-24 Thread Matt Roper
XEHPSDV_FLAT_CCS_BASE_ADDR, GEN8_L3_LRA_1_GPGPU, and MMCD_MISC_CTRL were
duplicated between i915_reg.h and intel_gt_regs.h.  These are all GT
registers, so we should drop the copy from i915_reg.h.

XEHPSDV_TILE0_ADDR_RANGE was defined in i915_reg.h, but really belongs
in intel_gt_regs.h.  Move it.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c |  1 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h|  3 +++
 drivers/gpu/drm/i915/i915_reg.h| 17 -
 3 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index fa54823d1219..e63de9c06596 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -14,6 +14,7 @@
 #include "gem/i915_gem_region.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_mcr.h"
+#include "gt/intel_gt_regs.h"
 #include "gt/intel_region_lmem.h"
 #include "i915_drv.h"
 #include "i915_gem_stolen.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 07ef111947b8..61815b6e87de 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -324,6 +324,9 @@
 
 #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
 
+#define XEHPSDV_TILE0_ADDR_RANGE   _MMIO(0x4900)
+#define   XEHPSDV_TILE_LMEM_RANGE_SHIFT8
+
 #define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910)
 #define   XEHPSDV_CCS_BASE_SHIFT   8
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 932bd6aa4a0a..cf5e16abf6c7 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8345,23 +8345,6 @@ enum skl_power_gate {
 #define   SGGI_DIS REG_BIT(15)
 #define   SGR_DIS  REG_BIT(13)
 
-#define XEHPSDV_TILE0_ADDR_RANGE   _MMIO(0x4900)
-#define   XEHPSDV_TILE_LMEM_RANGE_SHIFT  8
-
-#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910)
-#define   XEHPSDV_CCS_BASE_SHIFT   8
-
-/* gamt regs */
-#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
-#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW  0x67F1427F /* max/min for 
LRA1/2 */
-#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV  0x5FF101FF /* max/min for 
LRA1/2 */
-#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL  0x67F1427F /*"" */
-#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT  0x5FF101FF /*"" */
-
-#define MMCD_MISC_CTRL _MMIO(0x4ddc) /* skl+ */
-#define  MMCD_PCLA (1 << 31)
-#define  MMCD_HOTSPOT_EN   (1 << 27)
-
 #define _ICL_PHY_MISC_A0x64C00
 #define _ICL_PHY_MISC_B0x64C04
 #define _DG2_PHY_MISC_TC1  0x64C14 /* TC1="PHY E" but offset as if "PHY F" 
*/
-- 
2.36.1



Re: [Intel-gfx] [PATCH] drm/i915/guc: ADL-N should use the same GuC FW as ADL-S

2022-06-21 Thread Matt Roper
On Tue, Jun 21, 2022 at 04:30:05PM -0700, Daniele Ceraolo Spurio wrote:
> The only difference between the ADL S and P GuC FWs is the HWConfig
> support. ADL-N does not support HWConfig, so we should use the same
> binary as ADL-S, otherwise the GuC might attempt to fetch a config
> table that does not exist. ADL-N is internally identified as an ADL-P,
> so we need to special-case it in the FW selection code.
> 
> Fixes: 7e28d0b26759 ("drm/i915/adl-n: Enable ADL-N platform")
> Cc: John Harrison 
> Cc: Tejas Upadhyay 
> Cc: Anusha Srivatsa 
> Cc: Jani Nikula 
> Signed-off-by: Daniele Ceraolo Spurio 

Would the config table still get used somehow even though we return
false for ADL-N in has_table()?

Even if it couldn't be used, this change makes the behavior more clear
and explicit.

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> index d2c5c9367cc4..ef2d10184ee2 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> @@ -162,6 +162,15 @@ __uc_fw_auto_select(struct drm_i915_private *i915, 
> struct intel_uc_fw *uc_fw)
>   u8 rev = INTEL_REVID(i915);
>   int i;
>  
> + /*
> +  * The only difference between the ADL GuC FWs is the HWConfig support.
> +  * ADL-N does not support HWConfig, so we should use the same binary as
> +  * ADL-S, otherwise the GuC might attempt to fetch a config table that
> +  * does not exist.
> +  */
> + if (IS_ADLP_N(i915))
> + p = INTEL_ALDERLAKE_S;
> +
>   GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
>   fw_blobs = blobs_all[uc_fw->type].blobs;
>   fw_count = blobs_all[uc_fw->type].count;
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH 2/2] drm/i915/gt: Re-do the intel-gtt split

2022-06-17 Thread Matt Roper
snb_pte_encode;
> +
> + ggtt->vm.vma_ops.bind_vma= intel_ggtt_bind_vma;
> + ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
> +
> + return ggtt_probe_common(ggtt, size);
> +}
> +
>  static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
>  {
>   struct drm_i915_private *i915 = gt->i915;
> @@ -576,12 +1104,12 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, 
> struct intel_gt *gt)
>   ggtt->vm.dma = i915->drm.dev;
>   dma_resv_init(&ggtt->vm._resv);
>  
> - if (GRAPHICS_VER(i915) <= 5)
> - ret = intel_gt_gmch_gen5_probe(ggtt);
> + if (GRAPHICS_VER(i915) < 6)
> + ret = intel_ggtt_gmch_probe(ggtt);
>   else if (GRAPHICS_VER(i915) < 8)
> - ret = intel_gt_gmch_gen6_probe(ggtt);
> + ret = gen6_gmch_probe(ggtt);
>   else
> - ret = intel_gt_gmch_gen8_probe(ggtt);
> + ret = gen8_gmch_probe(ggtt);

We could also take the opportunity to reverse the if/else ladder here
and put it in the i915-preferred "newest at top" order.

>   if (ret) {
>   dma_resv_fini(&ggtt->vm._resv);
>   return ret;
> @@ -635,7 +1163,10 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
>  
>  int i915_ggtt_enable_hw(struct drm_i915_private *i915)
>  {
> - return intel_gt_gmch_gen5_enable_hw(i915);
> + if (GRAPHICS_VER(i915) < 6)
> + return intel_ggtt_gmch_enable_hw(i915);
> +
> + return 0;
>  }
>  
>  void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c 
> b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
> new file mode 100644
> index ..1c15825d4bd3
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
> @@ -0,0 +1,132 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +
> +#include "intel_ggtt_gmch.h"
> +
> +#include 
> +#include 
> +
> +#include 
> +
> +#include "i915_drv.h"
> +#include "i915_utils.h"
> +#include "intel_gtt.h"
> +#include "intel_gt_regs.h"
> +#include "intel_gt.h"
> +
> +static void gen5_ggtt_insert_page(struct i915_address_space *vm,

All the "gen5_" prefixes in this file seem a bit misleading if they wind
up getting used on earlier platforms too; most of the driver uses the
prefix to indicate the first platform/architecture that the function was
used on.  Maybe we should also rename these with a "gmch_" prefix as
well to indicate that they're intended for the platforms where the GMCH
was an independent chip and not integrated into the CPU?


Anyway, all of my review comments are just bikeshedding so feel free to
use or ignore them as you see fit.  Either way, this series should fix
the issue and restore GGTT handling for non-x86.

Reviewed-by: Matt Roper 

> +   dma_addr_t addr,
> +   u64 offset,
> +   enum i915_cache_level cache_level,
> +   u32 unused)
> +{
> + unsigned int flags = (cache_level == I915_CACHE_NONE) ?
> + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
> +
> + intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
> +}
> +
> +static void gen5_ggtt_insert_entries(struct i915_address_space *vm,
> +  struct i915_vma_resource *vma_res,
> +  enum i915_cache_level cache_level,
> +  u32 unused)
> +{
> + unsigned int flags = (cache_level == I915_CACHE_NONE) ?
> + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
> +
> + intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> 
> PAGE_SHIFT,
> +  flags);
> +}
> +
> +static void gen5_ggtt_invalidate(struct i915_ggtt *ggtt)
> +{
> + intel_gmch_gtt_flush();
> +}
> +
> +static void gen5_ggtt_clear_range(struct i915_address_space *vm,
> +   u64 start, u64 length)
> +{
> + intel_gmch_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
> +}
> +
> +static void gen5_ggtt_remove(struct i915_address_space *vm)
> +{
> + intel_gmch_remove();
> +}
> +
> +/*
> + * Certain Gen5 chipsets require idling the GPU before unmapping anything 
> from
> + * the GTT when VT-d is enabled.
> + */
> +static bool needs_idle_maps(struct drm_i915_private *i915)
> +{
> + /*
> +  * Query intel_iommu to see if we need the workaround. Presumably that

Re: [PATCH v2 2/2] drm/i915/gt: Cleanup interface for MCR operations

2022-06-17 Thread Matt Roper
On Fri, Jun 17, 2022 at 06:57:20AM -0700, Harish Chegondi wrote:
> On Tue, Jun 14, 2022 at 05:10:19PM -0700, Matt Roper wrote:
> > Let's replace the assortment of intel_gt_* and intel_uncore_* functions
> > that operate on MCR registers with a cleaner set of interfaces:
> > 
> >   * intel_gt_mcr_read -- unicast read from specific instance
> >   * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated
> > instance
> >   * intel_gt_mcr_unicast_write -- unicast write to specific instance
> >   * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances
> > 
> > We'll also replace the historic "slice" and "subslice" terminology with
> > "group" and "instance" to match the documentation for more recent
> > platforms; these days MCR steering applies to more types of replication
> > than just slice/subslice.
> > 
> > v2:
> >  - Reference the new kerneldoc from i915.rst.  (Jani)
> >  - Tweak the wording of the documentation for a couple functions to
> >clarify the difference between "_fw" and non-"_fw" forms.
> > 
> > Signed-off-by: Matt Roper 
> > Acked-by: Jani Nikula 
> > ---
> >  Documentation/gpu/i915.rst  |  12 +
> >  drivers/gpu/drm/i915/gem/i915_gem_stolen.c  |   2 +-
> >  drivers/gpu/drm/i915/gt/intel_engine_cs.c   |  33 ++-
> >  drivers/gpu/drm/i915/gt/intel_gt_debugfs.c  |   2 +-
> >  drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 239 
> >  drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |  43 ++--
> >  drivers/gpu/drm/i915/gt/intel_region_lmem.c |   4 +-
> >  drivers/gpu/drm/i915/gt/intel_workarounds.c |   8 +-
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |   2 +-
> >  9 files changed, 200 insertions(+), 145 deletions(-)
> > 
> > diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
> > index 54060cd6c419..4e59db1cfb00 100644
> > --- a/Documentation/gpu/i915.rst
> > +++ b/Documentation/gpu/i915.rst
> > @@ -246,6 +246,18 @@ Display State Buffer
> >  .. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c
> > :internal:
> >  
> > +GT Programming
> > +==
> > +
> > +Multicast/Replicated (MCR) Registers
> > +
> > +
> > +.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > +   :doc: GT Multicast/Replicated (MCR) Register Support
> > +
> > +.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > +   :internal:
> > +
> >  Memory Management and Command Submission
> >  
> >  
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> > index da30503d3ca2..fa54823d1219 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> > @@ -835,7 +835,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private 
> > *i915, u16 type,
> > } else {
> > resource_size_t lmem_range;
> >  
> > -   lmem_range = intel_gt_read_register(&i915->gt0, 
> > XEHPSDV_TILE0_ADDR_RANGE) & 0x;
> > +   lmem_range = intel_gt_mcr_read_any(&i915->gt0, 
> > XEHPSDV_TILE0_ADDR_RANGE) & 0x;
> > lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT;
> > lmem_size *= SZ_1G;
> > }
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index 244af1bdb7db..136cc44c3deb 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -1428,14 +1428,6 @@ void intel_engine_cancel_stop_cs(struct 
> > intel_engine_cs *engine)
> > ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
> >  }
> >  
> > -static u32
> > -read_subslice_reg(const struct intel_engine_cs *engine,
> > - int slice, int subslice, i915_reg_t reg)
> > -{
> > -   return intel_uncore_read_with_mcr_steering(engine->uncore, reg,
> > -  slice, subslice);
> > -}
> > -
> >  /* NB: please notice the memset */
> >  void intel_engine_get_instdone(const struct intel_engine_cs *engine,
> >struct intel_instdone *instdone)
> > @@ -1469,28 +1461,33 @@ void intel_engine_get_instdone(const struct 
> > intel_engine_cs *engine,
> >

Re: [PATCH v2 3/9] drm/i915/dg2: Add DG2_NB_MBD subplatform

2022-06-16 Thread Matt Roper
On Thu, Jun 16, 2022 at 05:31:00PM +0530, Anshuman Gupta wrote:
> DG2 NB SKU need to distinguish between MBD and AIC to probe
> the VRAM Self Refresh feature support. Adding those sub platform
> accordingly.
> 
> Cc: Matt Roper 
> Signed-off-by: Anshuman Gupta 
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  3 +++
>  drivers/gpu/drm/i915/intel_device_info.c | 21 +
>  drivers/gpu/drm/i915/intel_device_info.h | 11 +++
>  include/drm/i915_pciids.h| 23 ---
>  4 files changed, 47 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a5bc6a774c5a..f1f8699eedfd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1007,10 +1007,13 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>  #define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO)
>  
>  #define IS_DG2_G10(dev_priv) \
> + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10_NB_MBD) || \
>   IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10)
>  #define IS_DG2_G11(dev_priv) \
> + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11_NB_MBD) || \
>   IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11)
>  #define IS_DG2_G12(dev_priv) \
> + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G12_NB_MBD) || \
>   IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G12)
>  #define IS_ADLS_RPLS(dev_priv) \
>   IS_SUBPLATFORM(dev_priv, INTEL_ALDERLAKE_S, INTEL_SUBPLATFORM_RPL)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
> b/drivers/gpu/drm/i915/intel_device_info.c
> index f0bf23726ed8..93da555adc4e 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -187,6 +187,18 @@ static const u16 subplatform_rpl_ids[] = {
>   INTEL_RPLP_IDS(0),
>  };
>  
> +static const u16 subplatform_g10_mb_mbd_ids[] = {
> + INTEL_DG2_G10_NB_MBD_IDS(0),
> +};
> +
> +static const u16 subplatform_g11_mb_mbd_ids[] = {
> + INTEL_DG2_G11_NB_MBD_IDS(0),
> +};
> +
> +static const u16 subplatform_g12_mb_mbd_ids[] = {
> + INTEL_DG2_G12_NB_MBD_IDS(0),
> +};

We only need a single MBD subplatform, not three new subplatforms.
Unless I'm forgetting something, a single device ID can be assigned two
two independent subplatforms at the same time.  So the decision about
whether to set the G10, G11, or G12 bit is one decision.  The decision
about whether to set the MBD bit is a completely separate decision that
doesn't care about the G10/G11/G12 stuff.

> +
>  static const u16 subplatform_g10_ids[] = {
>   INTEL_DG2_G10_IDS(0),
>   INTEL_ATS_M150_IDS(0),
> @@ -246,6 +258,15 @@ void intel_device_info_subplatform_init(struct 
> drm_i915_private *i915)
>   } else if (find_devid(devid, subplatform_rpl_ids,
> ARRAY_SIZE(subplatform_rpl_ids))) {
>   mask = BIT(INTEL_SUBPLATFORM_RPL);
> + } else if (find_devid(devid, subplatform_g10_mb_mbd_ids,
> +   ARRAY_SIZE(subplatform_g10_mb_mbd_ids))) {
> + mask = BIT(INTEL_SUBPLATFORM_G10_NB_MBD);
> + } else if (find_devid(devid, subplatform_g11_mb_mbd_ids,
> +   ARRAY_SIZE(subplatform_g11_mb_mbd_ids))) {
> + mask = BIT(INTEL_SUBPLATFORM_G11_NB_MBD);
> + } else if (find_devid(devid, subplatform_g12_mb_mbd_ids,
> +   ARRAY_SIZE(subplatform_g12_mb_mbd_ids))) {
> + mask = BIT(INTEL_SUBPLATFORM_G12_NB_MBD);

Assuming you consolidate MBD back down to just a single extra
subplatform, the lookup and bit setting should happen in a separate 'if'
statement (not an 'else' block).

if (find_devid(devid, subplatform_mbd_ids,
   ARRAY_SIZE(subplatform_mbd_ids)))
mask |= BIT(INTEL_SUBPLATFORM_MBD);


Matt

>   } else if (find_devid(devid, subplatform_g10_ids,
> ARRAY_SIZE(subplatform_g10_ids))) {
>   mask = BIT(INTEL_SUBPLATFORM_G10);
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
> b/drivers/gpu/drm/i915/intel_device_info.h
> index 08341174ee0a..c929e2d7e59c 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -97,7 +97,7 @@ enum intel_platform {
>   * it is fine for the same bit to be used on multiple parent platforms.
>   */
>  
> -#define INTEL_SUBPLATFORM_BITS (3)
> +#define INTEL_SUBPLATFORM_BITS (6)
>  #define INTEL_SUBPLATFORM_MASK (BIT(INTEL_SUBPLATFORM_BITS) - 1)
>  
>  /* HSW/BDW/SKL/KBL/CFL */
> @@ -111,9 +111,12 @@ enum

[PATCH v2 2/2] drm/i915/gt: Cleanup interface for MCR operations

2022-06-14 Thread Matt Roper
Let's replace the assortment of intel_gt_* and intel_uncore_* functions
that operate on MCR registers with a cleaner set of interfaces:

  * intel_gt_mcr_read -- unicast read from specific instance
  * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated
instance
  * intel_gt_mcr_unicast_write -- unicast write to specific instance
  * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances

We'll also replace the historic "slice" and "subslice" terminology with
"group" and "instance" to match the documentation for more recent
platforms; these days MCR steering applies to more types of replication
than just slice/subslice.

v2:
 - Reference the new kerneldoc from i915.rst.  (Jani)
 - Tweak the wording of the documentation for a couple functions to
   clarify the difference between "_fw" and non-"_fw" forms.

Signed-off-by: Matt Roper 
Acked-by: Jani Nikula 
---
 Documentation/gpu/i915.rst  |  12 +
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c  |   2 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   |  33 ++-
 drivers/gpu/drm/i915/gt/intel_gt_debugfs.c  |   2 +-
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 239 
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |  43 ++--
 drivers/gpu/drm/i915/gt/intel_region_lmem.c |   4 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c |   8 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |   2 +-
 9 files changed, 200 insertions(+), 145 deletions(-)

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 54060cd6c419..4e59db1cfb00 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -246,6 +246,18 @@ Display State Buffer
 .. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c
:internal:
 
+GT Programming
+==
+
+Multicast/Replicated (MCR) Registers
+
+
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+   :doc: GT Multicast/Replicated (MCR) Register Support
+
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+   :internal:
+
 Memory Management and Command Submission
 
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index da30503d3ca2..fa54823d1219 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -835,7 +835,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
} else {
resource_size_t lmem_range;
 
-   lmem_range = intel_gt_read_register(&i915->gt0, 
XEHPSDV_TILE0_ADDR_RANGE) & 0x;
+   lmem_range = intel_gt_mcr_read_any(&i915->gt0, 
XEHPSDV_TILE0_ADDR_RANGE) & 0x;
lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT;
lmem_size *= SZ_1G;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 244af1bdb7db..136cc44c3deb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1428,14 +1428,6 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs 
*engine)
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
-static u32
-read_subslice_reg(const struct intel_engine_cs *engine,
- int slice, int subslice, i915_reg_t reg)
-{
-   return intel_uncore_read_with_mcr_steering(engine->uncore, reg,
-  slice, subslice);
-}
-
 /* NB: please notice the memset */
 void intel_engine_get_instdone(const struct intel_engine_cs *engine,
   struct intel_instdone *instdone)
@@ -1469,28 +1461,33 @@ void intel_engine_get_instdone(const struct 
intel_engine_cs *engine,
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
for_each_instdone_gslice_dss_xehp(i915, sseu, iter, 
slice, subslice) {
instdone->sampler[slice][subslice] =
-   read_subslice_reg(engine, slice, 
subslice,
- 
GEN7_SAMPLER_INSTDONE);
+   intel_gt_mcr_read(engine->gt,
+ GEN7_SAMPLER_INSTDONE,
+ slice, subslice);
instdone->row[slice][subslice] =
-   read_subslice_reg(engine, slice, 
subslice,
- GEN7_ROW_INSTDONE);
+   intel_gt_mcr_read(engine->gt,
+ GEN7_ROW_INSTDONE,
+ slice, subslice);
  

[PATCH v2 0/2] i915: Extract, polish, and document multicast handling

2022-06-14 Thread Matt Roper
Multicast/replicated (MCR) registers on Intel hardware are a purely
GT-specific concept.  Rather than leaving MCR register handling spread
across several places throughout the driver (intel_uncore.c, intel_gt.c,
etc.) with confusing combinations of handler functions living in
different namespaces, let's consolidate it all into a single place
(intel_gt_mcr.c) and provide a more consistent and clearly-documented
interface for the rest of the driver to access such registers:

 * intel_gt_mcr_read -- unicast read from specific instance
 * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated
   instance
 * intel_gt_mcr_unicast_write -- unicast write to specific instance
 * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances


v2:
 - Reference the new kerneldoc from i915.rst.  (Jani)
 - Tweak the wording of the documentation for a couple functions to
   clarify the difference between "_fw" and non-"_fw" forms.

Matt Roper (2):
  drm/i915/gt: Move multicast register handling to a dedicated file
  drm/i915/gt: Cleanup interface for MCR operations

 Documentation/gpu/i915.rst  |  12 +
 drivers/gpu/drm/i915/Makefile   |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c  |   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   |  36 +-
 drivers/gpu/drm/i915/gt/intel_gt.c  | 297 +---
 drivers/gpu/drm/i915/gt/intel_gt.h  |  15 -
 drivers/gpu/drm/i915/gt/intel_gt_debugfs.c  |   3 +-
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 497 
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |  34 ++
 drivers/gpu/drm/i915/gt/intel_region_lmem.c |   5 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c |   9 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |   3 +-
 drivers/gpu/drm/i915/i915_drv.h |   2 -
 drivers/gpu/drm/i915/intel_uncore.c | 112 -
 drivers/gpu/drm/i915/intel_uncore.h |   8 -
 15 files changed, 577 insertions(+), 460 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.h

-- 
2.35.3



[PATCH v2 1/2] drm/i915/gt: Move multicast register handling to a dedicated file

2022-06-14 Thread Matt Roper
Handling of multicast/replicated registers is spread across intel_gt.c
and intel_uncore.c today.  As multicast handling and the related
steering logic gets more complicated with the addition of new platforms
and new rules it makes sense to centralize it all in one place.

For now the existing functions have been moved to the new .c/.h as-is.
Function renames and updates to operate in a more consistent manner will
be done in subsequent patches.

Signed-off-by: Matt Roper 
Acked-by: Jani Nikula 
---
 drivers/gpu/drm/i915/Makefile   |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c  |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   |   3 +-
 drivers/gpu/drm/i915/gt/intel_gt.c  | 297 +
 drivers/gpu/drm/i915/gt/intel_gt.h  |  15 -
 drivers/gpu/drm/i915/gt/intel_gt_debugfs.c  |   1 +
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 448 
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |  37 ++
 drivers/gpu/drm/i915/gt/intel_region_lmem.c |   1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c |   1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |   1 +
 drivers/gpu/drm/i915/i915_drv.h |   2 -
 drivers/gpu/drm/i915/intel_uncore.c | 112 -
 drivers/gpu/drm/i915/intel_uncore.h |   8 -
 14 files changed, 495 insertions(+), 433 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d2b18f03a33c..08f5d0d6e83a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -103,6 +103,7 @@ gt-y += \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
gt/intel_gt_irq.o \
+   gt/intel_gt_mcr.o \
gt/intel_gt_pm.o \
gt/intel_gt_pm_debugfs.o \
gt/intel_gt_pm_irq.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 47b5e0e342ab..da30503d3ca2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -13,6 +13,7 @@
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
 #include "gt/intel_region_lmem.h"
 #include "i915_drv.h"
 #include "i915_gem_stolen.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f0acf8518a51..244af1bdb7db 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -21,8 +21,9 @@
 #include "intel_engine_user.h"
 #include "intel_execlists_submission.h"
 #include "intel_gt.h"
-#include "intel_gt_requests.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc.h"
 #include "intel_lrc_reg.h"
 #include "intel_reset.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index f33290358c51..be9877c4b496 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -17,6 +17,7 @@
 #include "intel_gt_clock_utils.h"
 #include "intel_gt_debugfs.h"
 #include "intel_gt_gmch.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_regs.h"
 #include "intel_gt_requests.h"
@@ -102,107 +103,13 @@ int intel_gt_assign_ggtt(struct intel_gt *gt)
return gt->ggtt ? 0 : -ENOMEM;
 }
 
-static const char * const intel_steering_types[] = {
-   "L3BANK",
-   "MSLICE",
-   "LNCF",
-   "INSTANCE 0",
-};
-
-static const struct intel_mmio_range icl_l3bank_steering_table[] = {
-   { 0x00B100, 0x00B3FF },
-   {},
-};
-
-static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
-   { 0x004000, 0x004AFF },
-   { 0x00C800, 0x00CFFF },
-   { 0x00DD00, 0x00DDFF },
-   { 0x00E900, 0x00 }, /* 0xEA00 - OxEFFF is unused */
-   {},
-};
-
-static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
-   { 0x00B000, 0x00B0FF },
-   { 0x00D800, 0x00D8FF },
-   {},
-};
-
-static const struct intel_mmio_range dg2_lncf_steering_table[] = {
-   { 0x00B000, 0x00B0FF },
-   { 0x00D880, 0x00D8FF },
-   {},
-};
-
-/*
- * We have several types of MCR registers on PVC where steering to (0,0)
- * will always provide us with a non-terminated value.  We'll stick them
- * all in the same table for simplicity.
- */
-static const struct intel_mmio_range pvc_instance0_steering_table[] = {
-   { 0x004000, 0x004AFF }, /* HALF-BSLICE */
-   { 0x008800, 0x00887F }, /* CC */
-   { 0x008A80, 0x008AFF }, /* TILEPSMI */
-   { 

[PATCH] drm/i915/pvc: Add recommended MMIO setting

2022-06-13 Thread Matt Roper
As with past platforms, the bspec's performance tuning guide provides
recommended MMIO settings.  Although not technically "workarounds" we
apply these through the workaround framework to ensure that they're
re-applied at the proper times (e.g., on engine resets) and that any
conflicts with real workarounds are flagged.

Bspec: 72161
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +
 2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 226557018037..07ef111947b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -981,6 +981,11 @@
 #define XEHP_L3SCQREG7 _MMIO(0xb188)
 #define   BLEND_FILL_CACHING_OPT_DIS   REG_BIT(3)
 
+#define XEHPC_L3SCRUB  _MMIO(0xb18c)
+#define   SCRUB_CL_DWNGRADE_SHARED REG_BIT(12)
+#define   SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0)
+#define   SCRUB_RATE_4B_PER_CLK
REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6)
+
 #define L3SQCREG1_CCS0 _MMIO(0xb200)
 #define   FLUSHALLNONCOH   REG_BIT(5)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 1e982ac931dc..c4af51144216 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2679,6 +2679,15 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
 {
struct drm_i915_private *i915 = engine->i915;
 
+   if (IS_PONTEVECCHIO(i915)) {
+   /*
+* The following is not actually a "workaround" but rather
+* a recommended tuning setting documented in the bspec's
+* performance guide section.
+*/
+   wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | 
SCRUB_RATE_4B_PER_CLK);
+   }
+
if (IS_XEHPSDV(i915)) {
/* Wa_1409954639 */
wa_masked_en(wal,
-- 
2.35.3



[PATCH 2/2] drm/i915/gt: Cleanup interface for MCR operations

2022-06-10 Thread Matt Roper
Let's replace the assortment of intel_gt_* and intel_uncore_* functions
that operate on MCR registers with a cleaner set of interfaces:

  * intel_gt_mcr_read -- unicast read from specific instance
  * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated
instance
  * intel_gt_mcr_unicast_write -- unicast write to specific instance
  * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances

We'll also replace the historic "slice" and "subslice" terminology with
"group" and "instance" to match the documentation for more recent
platforms; these days MCR steering applies to more types of replication
than just slice/subslice.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c  |   2 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   |  33 ++-
 drivers/gpu/drm/i915/gt/intel_gt_debugfs.c  |   2 +-
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 225 +++-
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |  43 ++--
 drivers/gpu/drm/i915/gt/intel_region_lmem.c |   4 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c |   8 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |   2 +-
 8 files changed, 174 insertions(+), 145 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index da30503d3ca2..fa54823d1219 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -835,7 +835,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
} else {
resource_size_t lmem_range;
 
-   lmem_range = intel_gt_read_register(&i915->gt0, 
XEHPSDV_TILE0_ADDR_RANGE) & 0x;
+   lmem_range = intel_gt_mcr_read_any(&i915->gt0, 
XEHPSDV_TILE0_ADDR_RANGE) & 0x;
lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT;
lmem_size *= SZ_1G;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 244af1bdb7db..136cc44c3deb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1428,14 +1428,6 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs 
*engine)
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
-static u32
-read_subslice_reg(const struct intel_engine_cs *engine,
- int slice, int subslice, i915_reg_t reg)
-{
-   return intel_uncore_read_with_mcr_steering(engine->uncore, reg,
-  slice, subslice);
-}
-
 /* NB: please notice the memset */
 void intel_engine_get_instdone(const struct intel_engine_cs *engine,
   struct intel_instdone *instdone)
@@ -1469,28 +1461,33 @@ void intel_engine_get_instdone(const struct 
intel_engine_cs *engine,
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
for_each_instdone_gslice_dss_xehp(i915, sseu, iter, 
slice, subslice) {
instdone->sampler[slice][subslice] =
-   read_subslice_reg(engine, slice, 
subslice,
- 
GEN7_SAMPLER_INSTDONE);
+   intel_gt_mcr_read(engine->gt,
+ GEN7_SAMPLER_INSTDONE,
+ slice, subslice);
instdone->row[slice][subslice] =
-   read_subslice_reg(engine, slice, 
subslice,
- GEN7_ROW_INSTDONE);
+   intel_gt_mcr_read(engine->gt,
+ GEN7_ROW_INSTDONE,
+ slice, subslice);
}
} else {
for_each_instdone_slice_subslice(i915, sseu, slice, 
subslice) {
instdone->sampler[slice][subslice] =
-   read_subslice_reg(engine, slice, 
subslice,
- 
GEN7_SAMPLER_INSTDONE);
+   intel_gt_mcr_read(engine->gt,
+ GEN7_SAMPLER_INSTDONE,
+ slice, subslice);
instdone->row[slice][subslice] =
-   read_subslice_reg(engine, slice, 
subslice,
- GEN7_ROW_INSTDONE);
+   intel_gt_mcr_read(engine->gt,
+ 

[PATCH 1/2] drm/i915/gt: Move multicast register handling to a dedicated file

2022-06-10 Thread Matt Roper
Handling of multicast/replicated registers is spread across intel_gt.c
and intel_uncore.c today.  As multicast handling and the related
steering logic gets more complicated with the addition of new platforms
and new rules it makes sense to centralize it all in one place.

For now the existing functions have been moved to the new .c/.h as-is.
Function renames and updates to operate in a more consistent manner will
be done in subsequent patches.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/Makefile   |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c  |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   |   3 +-
 drivers/gpu/drm/i915/gt/intel_gt.c  | 297 +
 drivers/gpu/drm/i915/gt/intel_gt.h  |  15 -
 drivers/gpu/drm/i915/gt/intel_gt_debugfs.c  |   1 +
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 448 
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |  37 ++
 drivers/gpu/drm/i915/gt/intel_region_lmem.c |   1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c |   1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |   1 +
 drivers/gpu/drm/i915/i915_drv.h |   2 -
 drivers/gpu/drm/i915/intel_uncore.c | 112 -
 drivers/gpu/drm/i915/intel_uncore.h |   8 -
 14 files changed, 495 insertions(+), 433 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d2b18f03a33c..08f5d0d6e83a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -103,6 +103,7 @@ gt-y += \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
gt/intel_gt_irq.o \
+   gt/intel_gt_mcr.o \
gt/intel_gt_pm.o \
gt/intel_gt_pm_debugfs.o \
gt/intel_gt_pm_irq.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 47b5e0e342ab..da30503d3ca2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -13,6 +13,7 @@
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
 #include "gt/intel_region_lmem.h"
 #include "i915_drv.h"
 #include "i915_gem_stolen.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f0acf8518a51..244af1bdb7db 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -21,8 +21,9 @@
 #include "intel_engine_user.h"
 #include "intel_execlists_submission.h"
 #include "intel_gt.h"
-#include "intel_gt_requests.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc.h"
 #include "intel_lrc_reg.h"
 #include "intel_reset.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index f33290358c51..be9877c4b496 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -17,6 +17,7 @@
 #include "intel_gt_clock_utils.h"
 #include "intel_gt_debugfs.h"
 #include "intel_gt_gmch.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_regs.h"
 #include "intel_gt_requests.h"
@@ -102,107 +103,13 @@ int intel_gt_assign_ggtt(struct intel_gt *gt)
return gt->ggtt ? 0 : -ENOMEM;
 }
 
-static const char * const intel_steering_types[] = {
-   "L3BANK",
-   "MSLICE",
-   "LNCF",
-   "INSTANCE 0",
-};
-
-static const struct intel_mmio_range icl_l3bank_steering_table[] = {
-   { 0x00B100, 0x00B3FF },
-   {},
-};
-
-static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
-   { 0x004000, 0x004AFF },
-   { 0x00C800, 0x00CFFF },
-   { 0x00DD00, 0x00DDFF },
-   { 0x00E900, 0x00 }, /* 0xEA00 - OxEFFF is unused */
-   {},
-};
-
-static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
-   { 0x00B000, 0x00B0FF },
-   { 0x00D800, 0x00D8FF },
-   {},
-};
-
-static const struct intel_mmio_range dg2_lncf_steering_table[] = {
-   { 0x00B000, 0x00B0FF },
-   { 0x00D880, 0x00D8FF },
-   {},
-};
-
-/*
- * We have several types of MCR registers on PVC where steering to (0,0)
- * will always provide us with a non-terminated value.  We'll stick them
- * all in the same table for simplicity.
- */
-static const struct intel_mmio_range pvc_instance0_steering_table[] = {
-   { 0x004000, 0x004AFF }, /* HALF-BSLICE */
-   { 0x008800, 0x00887F }, /* CC */
-   { 0x008A80, 0x008AFF }, /* TILEPSMI */
-   { 0x00B000, 

[PATCH 0/2] i915: Extract, polish, and document multicast handling

2022-06-10 Thread Matt Roper
Multicast/replicated (MCR) registers on Intel hardware are a purely
GT-specific concept.  Rather than leaving MCR register handling spread
across several places throughout the driver (intel_uncore.c, intel_gt.c,
etc.) with confusing combinations of handler functions living in
different namespaces, let's consolidate it all into a single place
(intel_gt_mcr.c) and provide a more consistent and clearly-documented
interface for the rest of the driver to access such registers:

 * intel_gt_mcr_read -- unicast read from specific instance
 * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated
   instance
 * intel_gt_mcr_unicast_write -- unicast write to specific instance
 * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances

Matt Roper (2):
  drm/i915/gt: Move multicast register handling to a dedicated file
  drm/i915/gt: Cleanup interface for MCR operations

 drivers/gpu/drm/i915/Makefile   |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c  |   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   |  36 +-
 drivers/gpu/drm/i915/gt/intel_gt.c  | 297 +---
 drivers/gpu/drm/i915/gt/intel_gt.h  |  15 -
 drivers/gpu/drm/i915/gt/intel_gt_debugfs.c  |   3 +-
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 483 
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |  34 ++
 drivers/gpu/drm/i915/gt/intel_region_lmem.c |   5 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c |   9 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |   3 +-
 drivers/gpu/drm/i915/i915_drv.h |   2 -
 drivers/gpu/drm/i915/intel_uncore.c | 112 -
 drivers/gpu/drm/i915/intel_uncore.h |   8 -
 14 files changed, 551 insertions(+), 460 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.h

-- 
2.35.3



[PATCH] drm/i915: More PVC+DG2 workarounds

2022-06-07 Thread Matt Roper
A new PVC+DG2 workaround has appeared recently:
 - Wa_16015675438

And a couple existing DG2 workarounds have been extended to PVC:
 - Wa_14015795083
 - Wa_18018781329

Note that Wa_16015675438 asks us to program a register that is in the
0x2xxx range typically associated with the RCS engine, even though PVC
does not have an RCS.  By default the GuC will think we've made a
mistake and throw an exception when it sees this register on a CCS
engine's save/restore list, so we need to pass an extra GuC control flag
to tell it that this is expected and not a problem.

Signed-off-by: Anshuman Gupta 
Signed-off-by: Badal Nilawar 
Signed-off-by: Prathap Kumar Valsan 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +++--
 drivers/gpu/drm/i915/gt/uc/intel_guc.c  |  4 
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h |  1 +
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index c8129a351731..226557018037 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -140,6 +140,7 @@
 #define FF_SLICE_CS_CHICKEN2   _MMIO(0x20e4)
 #define   GEN9_TSG_BARRIER_ACK_DISABLE (1 << 8)
 #define   GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE(1 << 10)
+#define   GEN12_PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15)
 
 #define GEN9_CS_DEBUG_MODE1_MMIO(0x20ec)
 #define   FF_DOP_CLOCK_GATE_DISABLEREG_BIT(1)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 1e7ca3863f20..e1e70eff9aac 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1491,13 +1491,20 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
 }
 
+static void
+pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+   /* Wa_14015795083 */
+   wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+}
+
 static void
 gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
 {
struct drm_i915_private *i915 = gt->i915;
 
if (IS_PONTEVECCHIO(i915))
-   ; /* none yet */
+   pvc_gt_workarounds_init(gt, wal);
else if (IS_DG2(i915))
dg2_gt_workarounds_init(gt, wal);
else if (IS_XEHPSDV(i915))
@@ -2082,12 +2089,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
 * performance guide section.
 */
wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
-
-   /* Wa_18018781329:dg2 */
-   wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
-   wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
-   wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
-   wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
}
 
if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
@@ -2700,6 +2701,15 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
 
/* Wa_22014226127:dg2,pvc */
wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+
+   /* Wa_16015675438:dg2,pvc */
+   wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, 
GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
+
+   /* Wa_18018781329:dg2,pvc */
+   wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 2c4ad4a65089..35887cb53201 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -327,6 +327,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
flags |= GUC_WA_CONTEXT_ISOLATION;
 
+   /* Wa_16015675438 */
+   if (!RCS_MASK(gt))
+   flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+
return flags;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 42cb7a9a6199..b3c9a9327f76 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -105,6 +105,7 @@
 #define   GUC_WA_PRE_PARSERBIT(14)
 #define   GUC_WA_HOLD_CCS_SWITCHOUTBIT(17)
 #define   GUC_WA_POLLCSBIT(18)
+#define   GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21

[PATCH] drm/i915/xehp: Correct steering initialization

2022-06-07 Thread Matt Roper
Another mistake during the conversion to DSS bitmaps:  after retrieving
the DSS ID intel_sseu_find_first_xehp_dss() we forgot to modulo it down
to obtain which ID within the current gslice it is.

Fixes: b87d39019651 ("drm/i915/sseu: Disassociate internal subslice mask 
representation from uapi")
Cc: Balasubramani Vivekanandan 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index b7421f109c13..a5c0508c5b63 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1177,8 +1177,8 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list 
*wal)
}
 
slice = __ffs(slice_mask);
-   subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, 
slice);
-   WARN_ON(subslice > GEN_DSS_PER_GSLICE);
+   subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, 
slice) %
+   GEN_DSS_PER_GSLICE;
 
__add_mcr_wa(gt, wal, slice, subslice);
 
-- 
2.35.3



[PATCH] drm/i915/dg2: Correct DSS check for Wa_1308578152

2022-06-07 Thread Matt Roper
When converting our DSS masks to bitmaps, we fumbled the condition used
to check whether any DSS are present in the first gslice.  Since
intel_sseu_find_first_xehp_dss() returns a 0-based number, we need a >=
condition rather than >.

Fixes: b87d39019651 ("drm/i915/sseu: Disassociate internal subslice mask 
representation from uapi")
Reported-by: Balasubramani Vivekanandan 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 1b191b234160..67104ba8951e 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2079,7 +2079,7 @@ engine_fake_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
 
 static bool needs_wa_1308578152(struct intel_engine_cs *engine)
 {
-   return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >
+   return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
GEN_DSS_PER_GSLICE;
 }
 
-- 
2.35.3



Re: [PATCH] drm/i915/dg2: Catch and log more unexpected values in DG1_MSTR_TILE_INTR

2022-06-06 Thread Matt Roper
On Mon, Jun 06, 2022 at 12:55:20PM +0100, Tvrtko Ursulin wrote:
> 
> On 27/05/2022 19:42, Matt Roper wrote:
> > On Thu, May 26, 2022 at 11:18:17AM +0100, Tvrtko Ursulin wrote:
> > > On 25/05/2022 19:05, Matt Roper wrote:
> > > > On Wed, May 25, 2022 at 05:03:13PM +0100, Tvrtko Ursulin wrote:
> > > > > 
> > > > > On 24/05/2022 18:51, Matt Roper wrote:
> > > > > > On Tue, May 24, 2022 at 10:43:39AM +0100, Tvrtko Ursulin wrote:
> > > > > > > From: Tvrtko Ursulin 
> > > > > > > 
> > > > > > > Catch and log any garbage in the register, including no tiles 
> > > > > > > marked, or
> > > > > > > multiple tiles marked.
> > > > > > > 
> > > > > > > Signed-off-by: Tvrtko Ursulin 
> > > > > > > Cc: Matt Roper 
> > > > > > > ---
> > > > > > > We caught garbage in DG1_MSTR_TILE_INTR with DG2 (actual value 
> > > > > > > 0xF9D2C008)
> > > > > > > during glmark and more badness. So I thought lets log all 
> > > > > > > possible failure
> > > > > > > modes from here and also use per device logging.
> > > > > > > ---
> > > > > > > drivers/gpu/drm/i915/i915_irq.c | 33 
> > > > > > > ++---
> > > > > > > drivers/gpu/drm/i915/i915_reg.h |  1 +
> > > > > > > 2 files changed, 23 insertions(+), 11 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c 
> > > > > > > b/drivers/gpu/drm/i915/i915_irq.c
> > > > > > > index 73cebc6aa650..79853d3fc1ed 100644
> > > > > > > --- a/drivers/gpu/drm/i915/i915_irq.c
> > > > > > > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > > > > > > @@ -2778,24 +2778,30 @@ static irqreturn_t dg1_irq_handler(int 
> > > > > > > irq, void *arg)
> > > > > > >   u32 gu_misc_iir;
> > > > > > >   if (!intel_irqs_enabled(i915))
> > > > > > > - return IRQ_NONE;
> > > > > > > + goto none;
> > > > > > >   master_tile_ctl = dg1_master_intr_disable(regs);
> > > > > > > - if (!master_tile_ctl) {
> > > > > > > - dg1_master_intr_enable(regs);
> > > > > > > - return IRQ_NONE;
> > > > > > > + if (!master_tile_ctl)
> > > > > > > + goto enable_none;
> > > > > > > +
> > > > > > > + if (master_tile_ctl & ~(DG1_MSTR_IRQ | DG1_MSTR_TILE_MASK)) {
> > > > > > > + drm_warn(&i915->drm, "Garbage in master_tile_ctl: 
> > > > > > > 0x%08x!\n",
> > > > > > > +  master_tile_ctl);
> > > > > > 
> > > > > > I know we have a bunch of them already, but shouldn't we be avoiding
> > > > > > printk-based stuff like this inside interrupt handlers?  Should we 
> > > > > > be
> > > > > > migrating all these error messages over to trace_printk or something
> > > > > > similar that's safer to use?
> > > > > 
> > > > > Not sure - I kind of think some really unexpected and worrying 
> > > > > situations
> > > > > should be loud and on by default. Risk is then spam if not 
> > > > > ratelimited.
> > > > > Maybe we should instead ratelimit most errors/warnings coming for irq
> > > > > handlers?
> > > > 
> > > > It's not the risk of spam that's the problem, but rather that
> > > > printk-based stuff eventually calls into the console code to flush its
> > > > buffers.  That's way more overhead than you want in an interrupt handler
> > > > so it's bad on its own, but if you're using something slow like a serial
> > > > console, it becomes even more of a problem.
> > > 
> > > Is it a problem for messages which we never expect to see?
> > 
> > Kind of.  While not as catastrophic, it's the same argument for why we
> > don't use BUG() anymore...when the impossible does manage to happen
> > there's unnecessary collateral damage on things outside of graphics.  If
> > 

[PATCH] drm/i915/pvc: Add register steering

2022-06-02 Thread Matt Roper
Ponte Vecchio no longer has MSLICE or LNCF steering, but the bspec does
document several new types of multicast register ranges.  Fortunately,
most of the different MCR types all provide valid values at instance
(0,0) so there's no need to read fuse registers and calculate a valid
instance.  We'll lump all of those range types (BSLICE, HALFBSLICE,
TILEPSMI, CC, and L3BANK) into a single category called "INSTANCE0" to
keep things simple.  We'll also perform explicit steering for each of
these multicast register types, even if the implicit steering setup for
COMPUTE/DSS ranges would have worked too; this is based on guidance from
our hardware architects who suggested that we move away from implicit
steering and start explicitly steer all MCR register accesses on modern
platforms (we'll work on transitioning COMPUTE/DSS to explicit steering
in the future).

Note that there's one additional MCR range type defined in the bspec
(SQIDI) that we don't handle here.  Those ranges use a different
steering control register that we never touch; since instance 0 is also
always a valid setting there, we can just ignore those ranges.

Finally, we'll rename the HAS_MSLICES() macro to HAS_MSLICE_STEERING().
PVC hardware still has units referred to as mslices, but there's no
register steering based on mslice for this platform.

Bspec: 67609
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c  | 50 ++---
 drivers/gpu/drm/i915/gt/intel_gt_types.h|  7 +++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 22 -
 drivers/gpu/drm/i915/i915_drv.h |  3 +-
 drivers/gpu/drm/i915/i915_pci.c |  3 +-
 drivers/gpu/drm/i915/intel_device_info.h|  2 +-
 6 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index ddfb98f70489..b335eacd7a6c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -106,6 +106,7 @@ static const char * const intel_steering_types[] = {
"L3BANK",
"MSLICE",
"LNCF",
+   "INSTANCE 0",
 };
 
 static const struct intel_mmio_range icl_l3bank_steering_table[] = {
@@ -133,6 +134,27 @@ static const struct intel_mmio_range 
dg2_lncf_steering_table[] = {
{},
 };
 
+/*
+ * We have several types of MCR registers on PVC where steering to (0,0)
+ * will always provide us with a non-terminated value.  We'll stick them
+ * all in the same table for simplicity.
+ */
+static const struct intel_mmio_range pvc_instance0_steering_table[] = {
+   { 0x004000, 0x004AFF }, /* HALF-BSLICE */
+   { 0x008A80, 0x008AFF }, /* TILEPSMI */
+   { 0x008800, 0x00887F }, /* CC */
+   { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */
+   { 0x00B100, 0x00B3FF }, /* L3BANK */
+   { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */
+   { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */
+   { 0x00DD00, 0x00DDFF }, /* BSLICE */
+   { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */
+   { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */
+   { 0x00F000, 0x00 }, /* HALF-BSLICE */
+   { 0x024180, 0x0241FF }, /* HALF-BSLICE */
+   {},
+};
+
 int intel_gt_init_mmio(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
@@ -146,7 +168,7 @@ int intel_gt_init_mmio(struct intel_gt *gt)
 * An mslice is unavailable only if both the meml3 for the slice is
 * disabled *and* all of the DSS in the slice (quadrant) are disabled.
 */
-   if (HAS_MSLICES(i915)) {
+   if (HAS_MSLICE_STEERING(i915)) {
gt->info.mslice_mask =

intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask,
  GEN_DSS_PER_MSLICE);
@@ -158,7 +180,9 @@ int intel_gt_init_mmio(struct intel_gt *gt)
drm_warn(&i915->drm, "mslice mask all zero!\n");
}
 
-   if (IS_DG2(i915)) {
+   if (IS_PONTEVECCHIO(i915)) {
+   gt->steering_table[INSTANCE0] = pvc_instance0_steering_table;
+   } else if (IS_DG2(i915)) {
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
gt->steering_table[LNCF] = dg2_lncf_steering_table;
} else if (IS_XEHPSDV(i915)) {
@@ -172,7 +196,11 @@ int intel_gt_init_mmio(struct intel_gt *gt)
GEN10_L3BANK_MASK;
if (!gt->info.l3bank_mask) /* should be impossible! */
drm_warn(&i915->drm, "L3 bank mask is all zero!\n");
-   } else if (HAS_MSLICES(i915)) {
+   } else if (GRAPHICS_VER(i915) >= 11) {
+   /*
+* We expect all modern platforms to have at least some
+   

[PATCH] drm/i915/pvc: Add Wa_16015675438:pvc

2022-06-02 Thread Matt Roper
From: Badal Nilawar 

Even though PVC doesn't have an RCS engine, this workaround updates a
register in the 0x2xxx range that traditionally belongs to the RCS.  We
need to set a special flag to tell the GuC that the presence of an "RCS"
register on a CCS save/restore list is okay/expected.

Cc: Stuart Summers 
Cc: Lucas De Marchi 
Signed-off-by: Badal Nilawar 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c  | 4 
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 1 +
 4 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 6aa1ceaa8d27..1986579b18ba 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -140,6 +140,7 @@
 #define FF_SLICE_CS_CHICKEN2   _MMIO(0x20e4)
 #define   GEN9_TSG_BARRIER_ACK_DISABLE (1 << 8)
 #define   GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE(1 << 10)
+#define   GEN12_PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15)
 
 #define GEN9_CS_DEBUG_MODE1_MMIO(0x20ec)
 #define   FF_DOP_CLOCK_GATE_DISABLEREG_BIT(1)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 6e875d4f5f65..eb0598593724 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2698,6 +2698,11 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
/* Wa_22014226127:dg2,pvc */
wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
}
+
+   /* Wa_16015675438:pvc */
+   if (IS_PVC_BD_STEP(i915, STEP_B0, STEP_FOREVER))
+   wa_masked_en(wal, FF_SLICE_CS_CHICKEN2,
+GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 2c4ad4a65089..7043cf5a666a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -327,6 +327,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
flags |= GUC_WA_CONTEXT_ISOLATION;
 
+   /* Wa_16015675438:pvc */
+   if (IS_PVC_BD_STEP(gt->i915, STEP_B0, STEP_FOREVER))
+   flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+
return flags;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 42cb7a9a6199..b3c9a9327f76 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -105,6 +105,7 @@
 #define   GUC_WA_PRE_PARSERBIT(14)
 #define   GUC_WA_HOLD_CCS_SWITCHOUTBIT(17)
 #define   GUC_WA_POLLCSBIT(18)
+#define   GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21)
 
 #define GUC_CTL_FEATURE2
 #define   GUC_CTL_ENABLE_SLPC  BIT(2)
-- 
2.35.3



[PATCH] drm/i915/pvc: GuC depriv applies to PVC

2022-06-02 Thread Matt Roper
We missed this setting in the initial device info patch's definition of
XE_HPC_FEATURES.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/i915_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 047a6e326031..a5a1a7647320 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1089,6 +1089,7 @@ static const struct intel_device_info ats_m_info = {
XE_HP_FEATURES, \
.dma_mask_size = 52, \
.has_3d_pipeline = 0, \
+   .has_guc_deprivilege = 1, \
.has_l3_ccs_read = 1, \
.has_one_eu_per_fuse_bit = 1
 
-- 
2.35.3



[PATCH] drm/i915: Add extra registers to GPU error dump

2022-06-01 Thread Matt Roper
From: Stuart Summers 

Our internal teams have identified a few additional engine registers
that are worth inspecting in error state dumps during development &
debug.  Let's capture and print them as part of our error dump.

For simplicity we'll just dump these registers on gen11 and beyond.
Most of these registers have existed since earlier platforms (e.g., gen6
or gen7) but were initially introduced only for a subset of the
platforms' engines; gen11 seems to be where they became available on all
engines.

Signed-off-by: Stuart Summers 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_regs.h |  5 +
 drivers/gpu/drm/i915/i915_gpu_error.c   | 19 +++
 drivers/gpu/drm/i915/i915_gpu_error.h   |  7 +++
 3 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h 
b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index 44de10cf7837..889f0df3940b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -8,6 +8,7 @@
 
 #include "i915_reg_defs.h"
 
+#define RING_EXCC(base)_MMIO((base) + 0x28)
 #define RING_TAIL(base)_MMIO((base) + 0x30)
 #define   TAIL_ADDR0x0018
 #define RING_HEAD(base)_MMIO((base) + 0x34)
@@ -133,6 +134,8 @@
(REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \
 REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1))
 
+#define RING_CSCMDOP(base) _MMIO((base) + 0x20c)
+
 /*
  * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
  * The lsb of each can be considered a separate enabling bit for encryption.
@@ -149,6 +152,7 @@
 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
 
 #define RING_PREDICATE_RESULT(base)_MMIO((base) + 0x3b8) /* gen12+ 
*/
+
 #define MI_PREDICATE_RESULT_2(base)_MMIO((base) + 0x3bc)
 #define   LOWER_SLICE_ENABLED  (1 << 0)
 #define   LOWER_SLICE_DISABLED (0 << 0)
@@ -172,6 +176,7 @@
 #define  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT  REG_BIT(2)
 #define  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH   REG_BIT(3)
 #define  GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8)
+#define RING_CTX_SR_CTL(base)  _MMIO((base) + 0x244)
 #define RING_SEMA_WAIT_POLL(base)  _MMIO((base) + 0x24c)
 #define GEN8_RING_PDP_UDW(base, n) _MMIO((base) + 0x270 + (n) * 8 
+ 4)
 #define GEN8_RING_PDP_LDW(base, n) _MMIO((base) + 0x270 + (n) * 8)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 0512c66fa4f3..bff8a111424a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -581,6 +581,15 @@ static void error_print_engine(struct 
drm_i915_error_state_buf *m,
err_printf(m, "  RC PSMI: 0x%08x\n", ee->rc_psmi);
err_printf(m, "  FAULT_REG: 0x%08x\n", ee->fault_reg);
}
+   if (GRAPHICS_VER(m->i915) >= 11) {
+   err_printf(m, "  NOPID: 0x%08x\n", ee->nopid);
+   err_printf(m, "  EXCC: 0x%08x\n", ee->excc);
+   err_printf(m, "  CMD_CCTL: 0x%08x\n", ee->cmd_cctl);
+   err_printf(m, "  CSCMDOP: 0x%08x\n", ee->cscmdop);
+   err_printf(m, "  CTX_SR_CTL: 0x%08x\n", ee->ctx_sr_ctl);
+   err_printf(m, "  DMA_FADDR_HI: 0x%08x\n", ee->dma_faddr_hi);
+   err_printf(m, "  DMA_FADDR_LO: 0x%08x\n", ee->dma_faddr_lo);
+   }
if (HAS_PPGTT(m->i915)) {
err_printf(m, "  GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode);
 
@@ -1224,6 +1233,16 @@ static void engine_record_registers(struct 
intel_engine_coredump *ee)
ee->ipehr = ENGINE_READ(engine, IPEHR);
}
 
+   if (GRAPHICS_VER(i915) >= 11) {
+   ee->cmd_cctl = ENGINE_READ(engine, RING_CMD_CCTL);
+   ee->cscmdop = ENGINE_READ(engine, RING_CSCMDOP);
+   ee->ctx_sr_ctl = ENGINE_READ(engine, RING_CTX_SR_CTL);
+   ee->dma_faddr_hi = ENGINE_READ(engine, RING_DMA_FADD_UDW);
+   ee->dma_faddr_lo = ENGINE_READ(engine, RING_DMA_FADD);
+   ee->nopid = ENGINE_READ(engine, RING_NOPID);
+   ee->excc = ENGINE_READ(engine, RING_EXCC);
+   }
+
intel_engine_get_instdone(engine, &ee->instdone);
 
ee->instpm = ENGINE_READ(engine, RING_INSTPM);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h 
b/drivers/gpu/drm/i915/i915_gpu_error.h
index a611abacd9c2..55a143b92d10 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/driv

[PATCH v5 3/6] drm/i915/sseu: Simplify gen11+ SSEU handling

2022-06-01 Thread Matt Roper
Although gen11 and gen12 architectures supported the concept of multiple
slices, in practice all the platforms that were actually designed only
had a single slice (i.e., note the parameters to 'intel_sseu_set_info'
that we pass for each platform).  We can simplify the code slightly by
dropping the multi-slice logic from gen11+ platforms.

v2:
 - Promote drm_dbg to drm_WARN_ON if the slice fuse register reports
   unexpected fusing.  (Tvrtko)

Cc: Tvrtko Ursulin 
Signed-off-by: Matt Roper 
Acked-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 76 +---
 1 file changed, 36 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index b5fd479a7b85..7e5222ad2f98 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -119,52 +119,37 @@ static u16 compute_eu_total(const struct sseu_dev_info 
*sseu)
return total;
 }
 
-static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
-{
-   u32 ss_mask;
-
-   ss_mask = ss_en >> (s * sseu->max_subslices);
-   ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
-
-   return ss_mask;
-}
-
-static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
u32 g_ss_en, u32 c_ss_en, u16 eu_en)
 {
-   int s, ss;
+   u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0);
+   int ss;
 
/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
   sizeof(g_ss_en) * BITS_PER_BYTE);
 
-   for (s = 0; s < sseu->max_slices; s++) {
-   if ((s_en & BIT(s)) == 0)
-   continue;
+   sseu->slice_mask |= BIT(0);
 
-   sseu->slice_mask |= BIT(s);
-
-   /*
-* XeHP introduces the concept of compute vs geometry DSS. To
-* reduce variation between GENs around subslice usage, store a
-* mask for both the geometry and compute enabled masks since
-* userspace will need to be able to query these masks
-* independently.  Also compute a total enabled subslice count
-* for the purposes of selecting subslices to use in a
-* particular GEM context.
-*/
-   intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
-get_ss_stride_mask(sseu, s, c_ss_en));
-   intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
-get_ss_stride_mask(sseu, s, g_ss_en));
-   intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-get_ss_stride_mask(sseu, s,
-   g_ss_en | c_ss_en));
+   /*
+* XeHP introduces the concept of compute vs geometry DSS. To reduce
+* variation between GENs around subslice usage, store a mask for both
+* the geometry and compute enabled masks since userspace will need to
+* be able to query these masks independently.  Also compute a total
+* enabled subslice count for the purposes of selecting subslices to
+* use in a particular GEM context.
+*/
+   intel_sseu_set_subslices(sseu, 0, sseu->compute_subslice_mask,
+c_ss_en & valid_ss_mask);
+   intel_sseu_set_subslices(sseu, 0, sseu->geometry_subslice_mask,
+g_ss_en & valid_ss_mask);
+   intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask,
+(g_ss_en | c_ss_en) & valid_ss_mask);
+
+   for (ss = 0; ss < sseu->max_subslices; ss++)
+   if (intel_sseu_has_subslice(sseu, 0, ss))
+   sseu_set_eus(sseu, 0, ss, eu_en);
 
-   for (ss = 0; ss < sseu->max_subslices; ss++)
-   if (intel_sseu_has_subslice(sseu, s, ss))
-   sseu_set_eus(sseu, s, ss, eu_en);
-   }
sseu->eu_per_subslice = hweight16(eu_en);
sseu->eu_total = compute_eu_total(sseu);
 }
@@ -196,7 +181,7 @@ static void xehp_sseu_info_init(struct intel_gt *gt)
if (eu_en_fuse & BIT(eu))
eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
 
-   gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en);
+   gen11_compute_sseu_info(sseu, g_dss_en, c_dss_en, eu_en);
 }
 
 static void gen12_sseu_info_init(struct intel_gt *gt)
@@ -216,8 +201,13 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
 */
intel_sseu_set_info(sseu, 1, 6, 16);
 
+   /*
+* Althou

[PATCH v5 5/6] drm/i915/sseu: Disassociate internal subslice mask representation from uapi

2022-06-01 Thread Matt Roper
As with EU masks, it's easier to store subslice/DSS masks internally in
a format that's more natural for the driver to work with, and then only
covert into the u8[] uapi form when the query ioctl is invoked.  Since
the hardware design changed significantly with Xe_HP, we'll use a union
to choose between the old "hsw-style" subslice masks or the newer xehp
mask.  HSW-style masks will be stored in an array of u8's, indexed by
slice (there's never more than 6 subslices per slice on older
platforms).  For Xe_HP and beyond where slices no longer exist, we only
need a single bitmask.  However we already know that this mask is
eventually going to grow too large for a simple u64 to hold, so we'll
represent it in a manner that can be operated on by the utilities in
linux/bitmap.h.

v2:
 - Fix typo: BIT(s) -> BIT(ss) in gen9_sseu_device_status()

v3:
 - Eliminate sseu->ss_stride and just calculate the stride while
   specifically handling uapi.  (Tvrtko)
 - Use BITMAP_BITS() macro to refer to size of masks rather than
   passing I915_MAX_SS_FUSE_BITS directly.  (Tvrtko)
 - Report compute/geometry DSS masks separately when dumping Xe_HP SSEU
   info.  (Tvrtko)
 - Restore dropped range checks to intel_sseu_has_subslice().  (Tvrtko)

v4:
 - Make the bitmap size macro check the size of the .xehp field rather
   than the containing union.  (Tvrtko)
 - Don't add GEM_BUG_ON() intel_sseu_has_subslice()'s check for whether
   slice or subslice ID exceed sseu->max_[sub]slices; various loops
   in the driver are expected to exceed these, so we should just
   silently return 'false.'

v5:
 - Move XEHP_BITMAP_BITS() to the header so that we can also replace a
   usage of I915_MAX_SS_FUSE_BITS in one of the inline functions.
   (Bala)
 - Change the local variable in intel_slicemask_from_xehp_dssmask() from
   u16 to 'unsigned long' to make it a bit more future-proof.

Cc: Tvrtko Ursulin 
Cc: Balasubramani Vivekanandan 
Signed-off-by: Matt Roper 
Acked-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|   4 +-
 drivers/gpu/drm/i915/gt/intel_gt.c   |  12 +-
 drivers/gpu/drm/i915/gt/intel_sseu.c | 261 +++
 drivers/gpu/drm/i915/gt/intel_sseu.h |  79 --
 drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c |  30 +--
 drivers/gpu/drm/i915/gt/intel_workarounds.c  |  24 +-
 drivers/gpu/drm/i915/i915_getparam.c |   3 +-
 drivers/gpu/drm/i915/i915_query.c|  13 +-
 9 files changed, 243 insertions(+), 188 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ab4c5ab28e4d..a3bb73f5d53b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1875,6 +1875,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
 {
const struct sseu_dev_info *device = >->info.sseu;
struct drm_i915_private *i915 = gt->i915;
+   unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, 
0);
 
/* No zeros in any field. */
if (!user->slice_mask || !user->subslice_mask ||
@@ -1901,7 +1902,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
if (user->slice_mask & ~device->slice_mask)
return -EINVAL;
 
-   if (user->subslice_mask & ~device->subslice_mask[0])
+   if (user->subslice_mask & ~dev_subslice_mask)
return -EINVAL;
 
if (user->max_eus_per_subslice > device->max_eus_per_subslice)
@@ -1915,7 +1916,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
/* Part specific restrictions. */
if (GRAPHICS_VER(i915) == 11) {
unsigned int hw_s = hweight8(device->slice_mask);
-   unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+   unsigned int hw_ss_per_s = hweight8(dev_subslice_mask);
unsigned int req_s = hweight8(context->slice_mask);
unsigned int req_ss = hweight8(context->subslice_mask);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 1adbf34c3632..f0acf8518a51 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -674,8 +674,8 @@ static void engine_mask_apply_compute_fuses(struct intel_gt 
*gt)
if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
return;
 
-   ccs_mask = 
intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu),
-   ss_per_ccs);
+   ccs_mask = 
intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask,
+ss_per_ccs);
/*
 * If all DSS in a quadrant are fused off, th

[PATCH v5 2/6] drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK

2022-06-01 Thread Matt Roper
Slice/subslice/EU information should be obtained via the topology
queries provided by the I915_QUERY interface; let's turn off support for
the old GETPARAM lookups on Xe_HP and beyond where we can't return
meaningful values.

The slice mask lookup is meaningless since Xe_HP doesn't support
traditional slices (and we make no attempt to return the various new
units like gslices, cslices, mslices, etc.) here.

The subslice mask lookup is even more problematic; given the distinct
masks for geometry vs compute purposes, the combined mask returned here
is likely not what userspace would want to act upon anyway.  The value
is also limited to 32-bits by the nature of the GETPARAM ioctl which is
sufficient for the initial Xe_HP platforms, but is unable to convey the
larger masks that will be needed on other upcoming platforms.  Finally,
the value returned here becomes even less meaningful when used on
multi-tile platforms where each tile will have its own masks.

Signed-off-by: Matt Roper 
Acked-by: Tvrtko Ursulin 
Acked-by: Lionel Landwerlin   # mesa
---
 drivers/gpu/drm/i915/i915_getparam.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_getparam.c 
b/drivers/gpu/drm/i915/i915_getparam.c
index c12a0adefda5..ac9767c56619 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -148,11 +148,19 @@ int i915_getparam_ioctl(struct drm_device *dev, void 
*data,
value = intel_engines_has_context_isolation(i915);
break;
case I915_PARAM_SLICE_MASK:
+   /* Not supported from Xe_HP onward; use topology queries */
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+   return -EINVAL;
+
value = sseu->slice_mask;
if (!value)
return -ENODEV;
break;
case I915_PARAM_SUBSLICE_MASK:
+   /* Not supported from Xe_HP onward; use topology queries */
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+   return -EINVAL;
+
/* Only copy bits from the first slice */
memcpy(&value, sseu->subslice_mask,
   min(sseu->ss_stride, (u8)sizeof(value)));
-- 
2.35.3



[PATCH v5 4/6] drm/i915/sseu: Don't try to store EU mask internally in UAPI format

2022-06-01 Thread Matt Roper
Storing the EU mask internally in the same format the I915_QUERY
topology queries use makes the final copy_to_user() a bit simpler, but
makes the rest of the driver's SSEU more complicated and harder to
follow.  Let's switch to an internal representation that's more natural:
Xe_HP platforms will be a simple array of u16 masks, whereas pre-Xe_HP
platforms will be a two-dimensional array, indexed by [slice][subslice].
We'll convert to the uapi format only when the query uapi is called.

v2:
 - Drop has_common_ss_eumask.  We waste some space repeating identical
   EU masks for every single DSS, but the code is simpler without it.
   (Tvrtko)

v3:
 - Mask down EUs passed to sseu_set_eus at the callsite rather than
   inside the function.  (Tvrtko)
 - Eliminate sseu->eu_stride and calculate it when needed.  (Tvrtko)

Cc: Tvrtko Ursulin 
Signed-off-by: Matt Roper 
Acked-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 88 ++--
 drivers/gpu/drm/i915/gt/intel_sseu.h | 10 +++-
 drivers/gpu/drm/i915/i915_query.c| 13 ++--
 3 files changed, 73 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 7e5222ad2f98..285cfd758bdc 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -19,8 +19,6 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 
max_slices,
 
sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
-   sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
-   GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
 }
 
 unsigned int
@@ -78,47 +76,77 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info 
*sseu, u8 slice)
return hweight32(intel_sseu_get_subslices(sseu, slice));
 }
 
-static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
-  int subslice)
-{
-   int slice_stride = sseu->max_subslices * sseu->eu_stride;
-
-   return slice * slice_stride + subslice * sseu->eu_stride;
-}
-
 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
int subslice)
 {
-   int i, offset = sseu_eu_idx(sseu, slice, subslice);
-   u16 eu_mask = 0;
-
-   for (i = 0; i < sseu->eu_stride; i++)
-   eu_mask |=
-   ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
-
-   return eu_mask;
+   if (sseu->has_xehp_dss) {
+   WARN_ON(slice > 0);
+   return sseu->eu_mask.xehp[subslice];
+   } else {
+   return sseu->eu_mask.hsw[slice][subslice];
+   }
 }
 
 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
 u16 eu_mask)
 {
-   int i, offset = sseu_eu_idx(sseu, slice, subslice);
-
-   for (i = 0; i < sseu->eu_stride; i++)
-   sseu->eu_mask[offset + i] =
-   (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+   GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice);
+   if (sseu->has_xehp_dss) {
+   GEM_WARN_ON(slice > 0);
+   sseu->eu_mask.xehp[subslice] = eu_mask;
+   } else {
+   sseu->eu_mask.hsw[slice][subslice] = eu_mask;
+   }
 }
 
 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 {
-   u16 i, total = 0;
+   int s, ss, total = 0;
 
-   for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
-   total += hweight8(sseu->eu_mask[i]);
+   for (s = 0; s < sseu->max_slices; s++)
+   for (ss = 0; ss < sseu->max_subslices; ss++)
+   if (sseu->has_xehp_dss)
+   total += hweight16(sseu->eu_mask.xehp[ss]);
+   else
+   total += hweight16(sseu->eu_mask.hsw[s][ss]);
 
return total;
 }
 
+/**
+ * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer
+ * @to: Pointer to userspace buffer to copy to
+ * @sseu: SSEU structure containing EU mask to copy
+ *
+ * Copies the EU mask to a userspace buffer in the format expected by
+ * the query ioctl's topology queries.
+ *
+ * Returns the result of the copy_to_user() operation.
+ */
+int intel_sseu_copy_eumask_to_user(void __user *to,
+  const struct sseu_dev_info *sseu)
+{
+   u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {};
+   int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
+   int len = sseu->max_slices * sseu->max_subslices * eu_stride;
+   int s, ss, i;
+
+   for (s = 0; s < sseu->max_slices; s++) {
+   for (ss = 0; ss < sseu->max_subslices; ss++) {
+   int uapi_of

[PATCH v5 1/6] drm/i915/xehp: Use separate sseu init function

2022-06-01 Thread Matt Roper
Xe_HP has enough fundamental differences from previous platforms that it
makes sense to use a separate SSEU init function to keep things
straightforward and easy to understand.  We'll also add a has_xehp_dss
flag to the SSEU structure that will be used by other upcoming changes.

v2:
 - Add has_xehp_dss flag

Signed-off-by: Matt Roper 
Acked-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 86 
 drivers/gpu/drm/i915/gt/intel_sseu.h |  5 ++
 2 files changed, 54 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index fdd25691beda..b5fd479a7b85 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -169,13 +169,43 @@ static void gen11_compute_sseu_info(struct sseu_dev_info 
*sseu, u8 s_en,
sseu->eu_total = compute_eu_total(sseu);
 }
 
-static void gen12_sseu_info_init(struct intel_gt *gt)
+static void xehp_sseu_info_init(struct intel_gt *gt)
 {
struct sseu_dev_info *sseu = >->info.sseu;
struct intel_uncore *uncore = gt->uncore;
u32 g_dss_en, c_dss_en = 0;
u16 eu_en = 0;
u8 eu_en_fuse;
+   int eu;
+
+   /*
+* The concept of slice has been removed in Xe_HP.  To be compatible
+* with prior generations, assume a single slice across the entire
+* device. Then calculate out the DSS for each workload type within
+* that software slice.
+*/
+   intel_sseu_set_info(sseu, 1, 32, 16);
+   sseu->has_xehp_dss = 1;
+
+   g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
+   c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
+
+   eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & 
XEHP_EU_ENA_MASK;
+
+   for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+   if (eu_en_fuse & BIT(eu))
+   eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+
+   gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en);
+}
+
+static void gen12_sseu_info_init(struct intel_gt *gt)
+{
+   struct sseu_dev_info *sseu = >->info.sseu;
+   struct intel_uncore *uncore = gt->uncore;
+   u32 g_dss_en;
+   u16 eu_en = 0;
+   u8 eu_en_fuse;
u8 s_en;
int eu;
 
@@ -183,43 +213,23 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
 * Instead of splitting these, provide userspace with an array
 * of DSS to more closely represent the hardware resource.
-*
-* In addition, the concept of slice has been removed in Xe_HP.
-* To be compatible with prior generations, assume a single slice
-* across the entire device. Then calculate out the DSS for each
-* workload type within that software slice.
 */
-   if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915))
-   intel_sseu_set_info(sseu, 1, 32, 16);
-   else
-   intel_sseu_set_info(sseu, 1, 6, 16);
+   intel_sseu_set_info(sseu, 1, 6, 16);
 
-   /*
-* As mentioned above, Xe_HP does not have the concept of a slice.
-* Enable one for software backwards compatibility.
-*/
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   s_en = 0x1;
-   else
-   s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
-  GEN11_GT_S_ENA_MASK;
+   s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
+   GEN11_GT_S_ENA_MASK;
 
g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   c_dss_en = intel_uncore_read(uncore, 
GEN12_GT_COMPUTE_DSS_ENABLE);
 
/* one bit per pair of EUs */
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & 
XEHP_EU_ENA_MASK;
-   else
-   eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
-  GEN11_EU_DIS_MASK);
+   eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
+  GEN11_EU_DIS_MASK);
 
for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
if (eu_en_fuse & BIT(eu))
eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
 
-   gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
+   gen11_compute_sseu_info(sseu, s_en, g_dss_en, 0, eu_en);
 
/* TGL only supports slice-level power gating */
sseu->has_slice_pg = 1;
@@ -574,18 +584,20 @@ void intel_sseu_info_init(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
 
-   if (IS_HASWELL(i915))
-   hsw_sseu_info_init(gt);
-   else if 

[PATCH v5 0/6] i915: SSEU handling updates

2022-06-01 Thread Matt Roper
 
v5:
 - Rebase to latest drm-tip (resolve trivial conflicts)
 - Move XEHP_BITMAP_BITS() to the header so that we can also replace a usage of
   I915_MAX_SS_FUSE_BITS in one of the inline functions.  (Bala)
 - Change the local variable in intel_slicemask_from_xehp_dssmask() from u16 to
   'unsigned long' to make it a bit more future-proof.
 - Incorporate ack's received from Tvrtko and Lionel.

Cc: Tvrtko Ursulin 
Cc: Balasubramani Vivekanandan 

Matt Roper (6):
  drm/i915/xehp: Use separate sseu init function
  drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK
  drm/i915/sseu: Simplify gen11+ SSEU handling
  drm/i915/sseu: Don't try to store EU mask internally in UAPI format
  drm/i915/sseu: Disassociate internal subslice mask representation from
uapi
  drm/i915/pvc: Add SSEU changes

 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|   4 +-
 drivers/gpu/drm/i915/gt/intel_gt.c   |  12 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h  |   1 +
 drivers/gpu/drm/i915/gt/intel_sseu.c | 450 ---
 drivers/gpu/drm/i915/gt/intel_sseu.h |  92 ++--
 drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c |  30 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c  |  24 +-
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_getparam.c |  11 +-
 drivers/gpu/drm/i915/i915_pci.c  |   3 +-
 drivers/gpu/drm/i915/i915_query.c|  26 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 13 files changed, 397 insertions(+), 264 deletions(-)

-- 
2.35.3



[PATCH v5 6/6] drm/i915/pvc: Add SSEU changes

2022-06-01 Thread Matt Roper
PVC splits the mask of enabled DSS over two registers.  It also changes
the meaning of the EU fuse register such that each bit represents a
single EU rather than a pair of EUs.

Signed-off-by: Matt Roper 
Acked-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h  |  1 +
 drivers/gpu/drm/i915/gt/intel_sseu.c | 31 ++--
 drivers/gpu/drm/i915/gt/intel_sseu.h |  2 +-
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_pci.c  |  3 ++-
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 6 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 58e9b464d564..6aa1ceaa8d27 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -561,6 +561,7 @@
 #define   GEN11_GT_VEBOX_DISABLE_MASK  (0x0f << 
GEN11_GT_VEBOX_DISABLE_SHIFT)
 
 #define GEN12_GT_COMPUTE_DSS_ENABLE_MMIO(0x9144)
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT_MMIO(0x9148)
 
 #define GEN6_UCGCTL1   _MMIO(0x9400)
 #define   GEN6_GAMUNIT_CLOCK_GATE_DISABLE  (1 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 38069330d828..5cc1896a6cb9 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -210,27 +210,44 @@ static void xehp_sseu_info_init(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
u16 eu_en = 0;
u8 eu_en_fuse;
+   int num_compute_regs, num_geometry_regs;
int eu;
 
+   if (IS_PONTEVECCHIO(gt->i915)) {
+   num_geometry_regs = 0;
+   num_compute_regs = 2;
+   } else {
+   num_geometry_regs = 1;
+   num_compute_regs = 1;
+   }
+
/*
 * The concept of slice has been removed in Xe_HP.  To be compatible
 * with prior generations, assume a single slice across the entire
 * device. Then calculate out the DSS for each workload type within
 * that software slice.
 */
-   intel_sseu_set_info(sseu, 1, 32, 16);
+   intel_sseu_set_info(sseu, 1,
+   32 * max(num_geometry_regs, num_compute_regs),
+   16);
sseu->has_xehp_dss = 1;
 
-   xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, 1,
+   xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask,
+  num_geometry_regs,
   GEN12_GT_GEOMETRY_DSS_ENABLE);
-   xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, 1,
-  GEN12_GT_COMPUTE_DSS_ENABLE);
+   xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask,
+  num_compute_regs,
+  GEN12_GT_COMPUTE_DSS_ENABLE,
+  XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
 
eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & 
XEHP_EU_ENA_MASK;
 
-   for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
-   if (eu_en_fuse & BIT(eu))
-   eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+   if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915))
+   eu_en = eu_en_fuse;
+   else
+   for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+   if (eu_en_fuse & BIT(eu))
+   eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
 
xehp_compute_sseu_info(sseu, eu_en);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h 
b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 61abcab5a76b..b86f23f5b3a1 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -33,7 +33,7 @@ struct drm_printer;
  * Maximum number of 32-bit registers used by hardware to express the
  * enabled/disabled subslices.
  */
-#define I915_MAX_SS_FUSE_REGS  1
+#define I915_MAX_SS_FUSE_REGS  2
 #define I915_MAX_SS_FUSE_BITS  (I915_MAX_SS_FUSE_REGS * 32)
 
 /* Maximum number of EUs that can exist within a subslice or DSS. */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bc1a7ff19463..c3854b8a014f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1359,6 +1359,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 
 #define HAS_3D_PIPELINE(i915)  (INTEL_INFO(i915)->has_3d_pipeline)
 
+#define HAS_ONE_EU_PER_FUSE_BIT(i915)  
(INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
+
 /* i915_gem.c */
 void i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 61386d1bb07b..047a6e326031 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915

Re: [PATCH v5 5/6] drm/i915/sseu: Disassociate internal subslice mask representation from uapi

2022-06-01 Thread Matt Roper
On Wed, Jun 01, 2022 at 01:48:56PM +0530, Balasubramani Vivekanandan wrote:
> On 23.05.2022 13:45, Matt Roper wrote:
> > As with EU masks, it's easier to store subslice/DSS masks internally in
> > a format that's more natural for the driver to work with, and then only
> > covert into the u8[] uapi form when the query ioctl is invoked.  Since
> > the hardware design changed significantly with Xe_HP, we'll use a union
> > to choose between the old "hsw-style" subslice masks or the newer xehp
> > mask.  HSW-style masks will be stored in an array of u8's, indexed by
> > slice (there's never more than 6 subslices per slice on older
> > platforms).  For Xe_HP and beyond where slices no longer exist, we only
> > need a single bitmask.  However we already know that this mask is
> > eventually going to grow too large for a simple u64 to hold, so we'll
> > represent it in a manner that can be operated on by the utilities in
> > linux/bitmap.h.
> > 
> > v2:
> >  - Fix typo: BIT(s) -> BIT(ss) in gen9_sseu_device_status()
> > 
> > v3:
> >  - Eliminate sseu->ss_stride and just calculate the stride while
> >specifically handling uapi.  (Tvrtko)
> >  - Use BITMAP_BITS() macro to refer to size of masks rather than
> >passing I915_MAX_SS_FUSE_BITS directly.  (Tvrtko)
> >  - Report compute/geometry DSS masks separately when dumping Xe_HP SSEU
> >info.  (Tvrtko)
> >  - Restore dropped range checks to intel_sseu_has_subslice().  (Tvrtko)
> > 
> > v4:
> >  - Make the bitmap size macro check the size of the .xehp field rather
> >than the containing union.  (Tvrtko)
> >  - Don't add GEM_BUG_ON() intel_sseu_has_subslice()'s check for whether
> >slice or subslice ID exceed sseu->max_[sub]slices; various loops
> >in the driver are expected to exceed these, so we should just
> >silently return 'false.'
> > 
> > Cc: Tvrtko Ursulin 
> > Signed-off-by: Matt Roper 
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_context.c  |   5 +-
> >  drivers/gpu/drm/i915/gt/intel_engine_cs.c|   4 +-
> >  drivers/gpu/drm/i915/gt/intel_gt.c   |  12 +-
> >  drivers/gpu/drm/i915/gt/intel_sseu.c | 261 +++
> >  drivers/gpu/drm/i915/gt/intel_sseu.h |  76 --
> >  drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c |  30 +--
> >  drivers/gpu/drm/i915/gt/intel_workarounds.c  |  24 +-
> >  drivers/gpu/drm/i915/i915_getparam.c |   3 +-
> >  drivers/gpu/drm/i915/i915_query.c|  13 +-
> >  9 files changed, 241 insertions(+), 187 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index ab4c5ab28e4d..a3bb73f5d53b 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -1875,6 +1875,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
> >  {
> > const struct sseu_dev_info *device = >->info.sseu;
> > struct drm_i915_private *i915 = gt->i915;
> > +   unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, 
> > 0);
> >  
> > /* No zeros in any field. */
> > if (!user->slice_mask || !user->subslice_mask ||
> > @@ -1901,7 +1902,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
> > if (user->slice_mask & ~device->slice_mask)
> > return -EINVAL;
> >  
> > -   if (user->subslice_mask & ~device->subslice_mask[0])
> > +   if (user->subslice_mask & ~dev_subslice_mask)
> > return -EINVAL;
> >  
> > if (user->max_eus_per_subslice > device->max_eus_per_subslice)
> > @@ -1915,7 +1916,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
> > /* Part specific restrictions. */
> > if (GRAPHICS_VER(i915) == 11) {
> > unsigned int hw_s = hweight8(device->slice_mask);
> > -   unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
> > +   unsigned int hw_ss_per_s = hweight8(dev_subslice_mask);
> > unsigned int req_s = hweight8(context->slice_mask);
> > unsigned int req_ss = hweight8(context->subslice_mask);
> >  
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index 1adbf34c3632..f0acf8518a51 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -674,8 +674,8

Re: [PATCH] drm/i915/dg2: Catch and log more unexpected values in DG1_MSTR_TILE_INTR

2022-05-27 Thread Matt Roper
On Thu, May 26, 2022 at 11:18:17AM +0100, Tvrtko Ursulin wrote:
> 
> On 25/05/2022 19:05, Matt Roper wrote:
> > On Wed, May 25, 2022 at 05:03:13PM +0100, Tvrtko Ursulin wrote:
> > > 
> > > On 24/05/2022 18:51, Matt Roper wrote:
> > > > On Tue, May 24, 2022 at 10:43:39AM +0100, Tvrtko Ursulin wrote:
> > > > > From: Tvrtko Ursulin 
> > > > > 
> > > > > Catch and log any garbage in the register, including no tiles marked, 
> > > > > or
> > > > > multiple tiles marked.
> > > > > 
> > > > > Signed-off-by: Tvrtko Ursulin 
> > > > > Cc: Matt Roper 
> > > > > ---
> > > > > We caught garbage in DG1_MSTR_TILE_INTR with DG2 (actual value 
> > > > > 0xF9D2C008)
> > > > > during glmark and more badness. So I thought lets log all possible 
> > > > > failure
> > > > > modes from here and also use per device logging.
> > > > > ---
> > > > >drivers/gpu/drm/i915/i915_irq.c | 33 
> > > > > ++---
> > > > >drivers/gpu/drm/i915/i915_reg.h |  1 +
> > > > >2 files changed, 23 insertions(+), 11 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c 
> > > > > b/drivers/gpu/drm/i915/i915_irq.c
> > > > > index 73cebc6aa650..79853d3fc1ed 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_irq.c
> > > > > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > > > > @@ -2778,24 +2778,30 @@ static irqreturn_t dg1_irq_handler(int irq, 
> > > > > void *arg)
> > > > >   u32 gu_misc_iir;
> > > > >   if (!intel_irqs_enabled(i915))
> > > > > - return IRQ_NONE;
> > > > > + goto none;
> > > > >   master_tile_ctl = dg1_master_intr_disable(regs);
> > > > > - if (!master_tile_ctl) {
> > > > > - dg1_master_intr_enable(regs);
> > > > > - return IRQ_NONE;
> > > > > + if (!master_tile_ctl)
> > > > > + goto enable_none;
> > > > > +
> > > > > + if (master_tile_ctl & ~(DG1_MSTR_IRQ | DG1_MSTR_TILE_MASK)) {
> > > > > + drm_warn(&i915->drm, "Garbage in master_tile_ctl: 
> > > > > 0x%08x!\n",
> > > > > +  master_tile_ctl);
> > > > 
> > > > I know we have a bunch of them already, but shouldn't we be avoiding
> > > > printk-based stuff like this inside interrupt handlers?  Should we be
> > > > migrating all these error messages over to trace_printk or something
> > > > similar that's safer to use?
> > > 
> > > Not sure - I kind of think some really unexpected and worrying situations
> > > should be loud and on by default. Risk is then spam if not ratelimited.
> > > Maybe we should instead ratelimit most errors/warnings coming for irq
> > > handlers?
> > 
> > It's not the risk of spam that's the problem, but rather that
> > printk-based stuff eventually calls into the console code to flush its
> > buffers.  That's way more overhead than you want in an interrupt handler
> > so it's bad on its own, but if you're using something slow like a serial
> > console, it becomes even more of a problem.
> 
> Is it a problem for messages which we never expect to see?

Kind of.  While not as catastrophic, it's the same argument for why we
don't use BUG() anymore...when the impossible does manage to happen
there's unnecessary collateral damage on things outside of graphics.  If
we're adding huge delays inside an interrupt handler (while other
interrupts are disabled) that impacts the system-wide usability, not
just our own driver.

I'd also argue that these messages actually are semi-expected.  Random
bits being set shouldn't happen, but in the world of dgpu's, we do
occasionally see cases where the PCI link itself goes down for reasons
outside our control and then all registers read back as 0x,
which will probably trigger error messages here (as well as a bunch of
other places).

> 
> > While the unexpected bits in the master tile register are strange and
> > may point to a bigger problem somewhere else, they're also harmless on
> > their own since we should just ignore those bits and only process the
> > valid tiles.
> 
> Yes, I was expecting that a patch belonging to mult

Re: [Intel-gfx] [CI] Revert "drm/i915: Add relocation exceptions for two other platforms"

2022-05-27 Thread Matt Roper
On Fri, May 27, 2022 at 01:10:46AM -0700, Lucas De Marchi wrote:
> This reverts commit bcefacd359f07dee18795dd80f834acb47fd21eb.
> 
> IGT conversion is ready, so these execptions can already be removed.
> Once CI is clean, this commit will actually be removed from
> topic/core-for-CI.
> 
> Signed-off-by: Lucas De Marchi 

Acked-by: Matt Roper 

> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 21 +--
>  1 file changed, 5 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index b279588c0672..c326bd2b444f 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -492,27 +492,16 @@ eb_unreserve_vma(struct eb_vma *ev)
>   ev->flags &= ~__EXEC_OBJECT_RESERVED;
>  }
>  
> -static bool platform_has_relocs_enabled(const struct i915_execbuffer *eb)
> -{
> - /*
> -  * Relocations are disallowed starting from gen12 with Tigerlake
> -  * as an exception. To unblock CI, we are temporarily allowing it
> -  * for Rocketlake and Alderlake.
> -  */
> - if (GRAPHICS_VER(eb->i915) < 12 || IS_TIGERLAKE(eb->i915) ||
> - IS_ROCKETLAKE(eb->i915) || IS_ALDERLAKE_S(eb->i915) ||
> - IS_ALDERLAKE_P(eb->i915))
> - return true;
> -
> - return false;
> -}
> -
>  static int
>  eb_validate_vma(struct i915_execbuffer *eb,
>   struct drm_i915_gem_exec_object2 *entry,
>   struct i915_vma *vma)
>  {
> - if (entry->relocation_count && !platform_has_relocs_enabled(eb))
> + /* Relocations are disallowed for all platforms after TGL-LP.  This
> +  * also covers all platforms with local memory.
> +  */
> + if (entry->relocation_count &&
> +     GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915))
>   return -EINVAL;
>  
>   if (unlikely(entry->flags & eb->invalid_flags))
> -- 
> 2.36.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


[PATCH 2/2] drm/i915/pvc: Add initial PVC workarounds

2022-05-27 Thread Matt Roper
From: Stuart Summers 

Bspec: 64027
Signed-off-by: Stuart Summers 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_regs.h |  5 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  3 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 61 ++---
 drivers/gpu/drm/i915/intel_pm.c | 16 +-
 4 files changed, 73 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h 
b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index 75a0c55c5aa5..44de10cf7837 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -196,6 +196,7 @@
 #define RING_CTX_TIMESTAMP(base)   _MMIO((base) + 0x3a8) /* gen8+ 
*/
 #define RING_PREDICATE_RESULT(base)_MMIO((base) + 0x3b8)
 #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 
4)
+#define   RING_FORCE_TO_NONPRIV_DENY   REG_BIT(30)
 #define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK   REG_GENMASK(25, 2)
 #define   RING_FORCE_TO_NONPRIV_ACCESS_RW  (0 << 28)/* CFL+ & Gen11+ */
 #define   RING_FORCE_TO_NONPRIV_ACCESS_RD  (1 << 28)
@@ -208,7 +209,9 @@
 #define   RING_FORCE_TO_NONPRIV_RANGE_64   (3 << 0)
 #define   RING_FORCE_TO_NONPRIV_RANGE_MASK (3 << 0)
 #define   RING_FORCE_TO_NONPRIV_MASK_VALID \
-   (RING_FORCE_TO_NONPRIV_RANGE_MASK | RING_FORCE_TO_NONPRIV_ACCESS_MASK)
+   (RING_FORCE_TO_NONPRIV_RANGE_MASK | \
+RING_FORCE_TO_NONPRIV_ACCESS_MASK | \
+RING_FORCE_TO_NONPRIV_DENY)
 #define   RING_MAX_NONPRIV_SLOTS  12
 
 #define RING_EXECLIST_SQ_CONTENTS(base)_MMIO((base) + 0x510)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index b4642dcc192f..58e9b464d564 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1070,8 +1070,9 @@
 
 #define GEN10_CACHE_MODE_SS_MMIO(0xe420)
 #define   ENABLE_EU_COUNT_FOR_TDL_FLUSHREG_BIT(10)
-#define   ENABLE_PREFETCH_INTO_IC  REG_BIT(3)
+#define   DISABLE_ECC  REG_BIT(5)
 #define   FLOAT_BLEND_OPTIMIZATION_ENABLE  REG_BIT(4)
+#define   ENABLE_PREFETCH_INTO_IC  REG_BIT(3)
 
 #define EU_PERF_CNTL0  _MMIO(0xe458)
 #define EU_PERF_CNTL4  _MMIO(0xe45c)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 73b59ea6fd3b..a604bc7c0701 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -776,7 +776,9 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
goto done;
 
-   if (IS_DG2(i915))
+   if (IS_PONTEVECCHIO(i915))
+   ; /* noop; none at this time */
+   else if (IS_DG2(i915))
dg2_ctx_workarounds_init(engine, wal);
else if (IS_XEHPSDV(i915))
; /* noop; none at this time */
@@ -1494,7 +1496,9 @@ gt_init_workarounds(struct intel_gt *gt, struct 
i915_wa_list *wal)
 {
struct drm_i915_private *i915 = gt->i915;
 
-   if (IS_DG2(i915))
+   if (IS_PONTEVECCHIO(i915))
+   ; /* none yet */
+   else if (IS_DG2(i915))
dg2_gt_workarounds_init(gt, wal);
else if (IS_XEHPSDV(i915))
xehpsdv_gt_workarounds_init(gt, wal);
@@ -1924,6 +1928,32 @@ static void dg2_whitelist_build(struct intel_engine_cs 
*engine)
}
 }
 
+static void blacklist_trtt(struct intel_engine_cs *engine)
+{
+   struct i915_wa_list *w = &engine->whitelist;
+
+   /*
+* Prevent read/write access to [0x4400, 0x4600) which covers
+* the TRTT range across all engines. Note that normally userspace
+* cannot access the other engines' trtt control, but for simplicity
+* we cover the entire range on each engine.
+*/
+   whitelist_reg_ext(w, _MMIO(0x4400),
+ RING_FORCE_TO_NONPRIV_DENY |
+ RING_FORCE_TO_NONPRIV_RANGE_64);
+   whitelist_reg_ext(w, _MMIO(0x4500),
+ RING_FORCE_TO_NONPRIV_DENY |
+ RING_FORCE_TO_NONPRIV_RANGE_64);
+}
+
+static void pvc_whitelist_build(struct intel_engine_cs *engine)
+{
+   allow_read_ctx_timestamp(engine);
+
+   /* Wa_16014440446:pvc */
+   blacklist_trtt(engine);
+}
+
 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
 {
struct drm_i915_private *i915 = engine->i915;
@@ -1931,7 +1961,9 @@ void intel_engine_init_whitelist(struct intel_engine_cs 
*engine)
 
wa_init_start(w, "whitelist", engine->name);
 
-   if (IS_DG2(i915))
+   if (IS_PONTEVECCHIO(i915))
+   pvc_whitelist_build(engine);
+   else if (IS_DG2(i915))
   

[PATCH 1/2] drm/i915/pvc: Extract stepping information from PCI revid

2022-05-27 Thread Matt Roper
For PVC, the base die and compute tile have separate stepping values
that we need to track; we'll use the existing graphics_step field to
represent the compute tile stepping and add a new 'basedie_step' field.

Unlike past platforms, steppings for these components are represented by
specific bitfields within the PCI revision ID, and we shouldn't make
assumptions about the non-CT, non-BD bits staying 0.  Let's update our
stepping code accordingly.

Bspec: 44484
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/i915_drv.h   | 13 ++
 drivers/gpu/drm/i915/intel_step.c | 70 ++-
 drivers/gpu/drm/i915/intel_step.h |  4 +-
 3 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74b3caccd839..ec1b3484fdaf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -943,6 +943,7 @@ static inline struct intel_gt *to_gt(struct 
drm_i915_private *i915)
 #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step)
 #define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step)
 #define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step)
+#define INTEL_BASEDIE_STEP(__i915) (RUNTIME_INFO(__i915)->step.basedie_step)
 
 #define IS_DISPLAY_STEP(__i915, since, until) \
(drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \
@@ -956,6 +957,10 @@ static inline struct intel_gt *to_gt(struct 
drm_i915_private *i915)
(drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \
 INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < 
(until))
 
+#define IS_BASEDIE_STEP(__i915, since, until) \
+   (drm_WARN_ON(&(__i915)->drm, INTEL_BASEDIE_STEP(__i915) == STEP_NONE), \
+INTEL_BASEDIE_STEP(__i915) >= (since) && INTEL_BASEDIE_STEP(__i915) < 
(until))
+
 static __always_inline unsigned int
 __platform_mask_index(const struct intel_runtime_info *info,
  enum intel_platform p)
@@ -1208,6 +1213,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
(IS_DG2(__i915) && \
 IS_DISPLAY_STEP(__i915, since, until))
 
+#define IS_PVC_BD_STEP(__i915, since, until) \
+   (IS_PONTEVECCHIO(__i915) && \
+IS_BASEDIE_STEP(__i915, since, until))
+
+#define IS_PVC_CT_STEP(__i915, since, until) \
+   (IS_PONTEVECCHIO(__i915) && \
+IS_GRAPHICS_STEP(__i915, since, until))
+
 #define IS_LP(dev_priv)(INTEL_INFO(dev_priv)->is_lp)
 #define IS_GEN9_LP(dev_priv)   (GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv))
 #define IS_GEN9_BC(dev_priv)   (GRAPHICS_VER(dev_priv) == 9 && 
!IS_LP(dev_priv))
diff --git a/drivers/gpu/drm/i915/intel_step.c 
b/drivers/gpu/drm/i915/intel_step.c
index 74e8e4680028..42b3133d8387 100644
--- a/drivers/gpu/drm/i915/intel_step.c
+++ b/drivers/gpu/drm/i915/intel_step.c
@@ -135,6 +135,8 @@ static const struct intel_step_info adlp_n_revids[] = {
[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_D0 },
 };
 
+static void pvc_step_init(struct drm_i915_private *i915, int pci_revid);
+
 void intel_step_init(struct drm_i915_private *i915)
 {
const struct intel_step_info *revids = NULL;
@@ -142,7 +144,10 @@ void intel_step_init(struct drm_i915_private *i915)
int revid = INTEL_REVID(i915);
struct intel_step_info step = {};
 
-   if (IS_DG2_G10(i915)) {
+   if (IS_PONTEVECCHIO(i915)) {
+   pvc_step_init(i915, revid);
+   return;
+   } else if (IS_DG2_G10(i915)) {
revids = dg2_g10_revid_step_tbl;
size = ARRAY_SIZE(dg2_g10_revid_step_tbl);
} else if (IS_DG2_G11(i915)) {
@@ -235,6 +240,69 @@ void intel_step_init(struct drm_i915_private *i915)
RUNTIME_INFO(i915)->step = step;
 }
 
+#define PVC_BD_REVID   GENMASK(5, 3)
+#define PVC_CT_REVID   GENMASK(2, 0)
+
+static const int pvc_bd_subids[] = {
+   [0x0] = STEP_A0,
+   [0x3] = STEP_B0,
+   [0x4] = STEP_B1,
+   [0x5] = STEP_B3,
+};
+
+static const int pvc_ct_subids[] = {
+   [0x3] = STEP_A0,
+   [0x5] = STEP_B0,
+   [0x6] = STEP_B1,
+   [0x7] = STEP_C0,
+};
+
+static int
+pvc_step_lookup(struct drm_i915_private *i915, const char *type,
+   const int *table, int size, int subid)
+{
+   if (subid < size && table[subid] != STEP_NONE)
+   return table[subid];
+
+   drm_warn(&i915->drm, "Unknown %s id 0x%02x\n", type, subid);
+
+   /*
+* As on other platforms, try to use the next higher ID if we land on a
+* gap in the table.
+*/
+   while (subid < size && table[subid] == STEP_NONE)
+   subid++;
+
+   if (subid < size) {
+   drm_dbg(&i915->dr

[PATCH 0/2] i915: PVC steppings and initial workarounds

2022-05-27 Thread Matt Roper
Matt Roper (1):
  drm/i915/pvc: Extract stepping information from PCI revid

Stuart Summers (1):
  drm/i915/pvc: Add initial PVC workarounds

 drivers/gpu/drm/i915/gt/intel_engine_regs.h |  5 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  3 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 61 +++---
 drivers/gpu/drm/i915/i915_drv.h | 13 
 drivers/gpu/drm/i915/intel_pm.c | 16 -
 drivers/gpu/drm/i915/intel_step.c   | 70 -
 drivers/gpu/drm/i915/intel_step.h   |  4 +-
 7 files changed, 158 insertions(+), 14 deletions(-)

-- 
2.35.3



Re: [PATCH] drm/i915/dg2: Catch and log more unexpected values in DG1_MSTR_TILE_INTR

2022-05-25 Thread Matt Roper
On Wed, May 25, 2022 at 05:03:13PM +0100, Tvrtko Ursulin wrote:
> 
> On 24/05/2022 18:51, Matt Roper wrote:
> > On Tue, May 24, 2022 at 10:43:39AM +0100, Tvrtko Ursulin wrote:
> > > From: Tvrtko Ursulin 
> > > 
> > > Catch and log any garbage in the register, including no tiles marked, or
> > > multiple tiles marked.
> > > 
> > > Signed-off-by: Tvrtko Ursulin 
> > > Cc: Matt Roper 
> > > ---
> > > We caught garbage in DG1_MSTR_TILE_INTR with DG2 (actual value 0xF9D2C008)
> > > during glmark and more badness. So I thought lets log all possible failure
> > > modes from here and also use per device logging.
> > > ---
> > >   drivers/gpu/drm/i915/i915_irq.c | 33 ++---
> > >   drivers/gpu/drm/i915/i915_reg.h |  1 +
> > >   2 files changed, 23 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_irq.c 
> > > b/drivers/gpu/drm/i915/i915_irq.c
> > > index 73cebc6aa650..79853d3fc1ed 100644
> > > --- a/drivers/gpu/drm/i915/i915_irq.c
> > > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > > @@ -2778,24 +2778,30 @@ static irqreturn_t dg1_irq_handler(int irq, void 
> > > *arg)
> > >   u32 gu_misc_iir;
> > >   if (!intel_irqs_enabled(i915))
> > > - return IRQ_NONE;
> > > + goto none;
> > >   master_tile_ctl = dg1_master_intr_disable(regs);
> > > - if (!master_tile_ctl) {
> > > - dg1_master_intr_enable(regs);
> > > - return IRQ_NONE;
> > > + if (!master_tile_ctl)
> > > + goto enable_none;
> > > +
> > > + if (master_tile_ctl & ~(DG1_MSTR_IRQ | DG1_MSTR_TILE_MASK)) {
> > > + drm_warn(&i915->drm, "Garbage in master_tile_ctl: 0x%08x!\n",
> > > +  master_tile_ctl);
> > 
> > I know we have a bunch of them already, but shouldn't we be avoiding
> > printk-based stuff like this inside interrupt handlers?  Should we be
> > migrating all these error messages over to trace_printk or something
> > similar that's safer to use?
> 
> Not sure - I kind of think some really unexpected and worrying situations
> should be loud and on by default. Risk is then spam if not ratelimited.
> Maybe we should instead ratelimit most errors/warnings coming for irq
> handlers?

It's not the risk of spam that's the problem, but rather that
printk-based stuff eventually calls into the console code to flush its
buffers.  That's way more overhead than you want in an interrupt handler
so it's bad on its own, but if you're using something slow like a serial
console, it becomes even more of a problem.

While the unexpected bits in the master tile register are strange and
may point to a bigger problem somewhere else, they're also harmless on
their own since we should just ignore those bits and only process the
valid tiles.

> 
> In this particular case at least DRM_ERROR with no device info is the odd
> one out in the entire file so I'd suggest changing at least that, if the
> rest of my changes is of questionable benefit.

Changing DRM_ERROR -> drm_err would probably be fine in the short term
since it doesn't really make us any worse off.  Changing to drm_warn
might not be great since we're generating a lot more lines of output and
probably multiplying the already bad overhead that shouldn't be
happening in an interrupt handler.  But if we could update the interrupt
handler to just save away the details and do the actual drm_warn later,
outside the interrupt handler code, that would be okay.  We should
probably work toward something like that for all of our interrupt
handler warning/error messages.


Matt

> 
> Regards,
> 
> Tvrtko
> 
> > 
> > 
> > Matt
> > 
> > > + goto enable_none;
> > >   }
> > >   /* FIXME: we only support tile 0 for now. */
> > > - if (master_tile_ctl & DG1_MSTR_TILE(0)) {
> > > - master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ);
> > > - raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, master_ctl);
> > > - } else {
> > > - DRM_ERROR("Tile not supported: 0x%08x\n", master_tile_ctl);
> > > - dg1_master_intr_enable(regs);
> > > - return IRQ_NONE;
> > > + if (REG_FIELD_GET(DG1_MSTR_TILE_MASK, master_tile_ctl) !=
> > > + DG1_MSTR_TILE(0)) {
> > > + drm_warn(&i915->drm, "Unexpected irq from tile %u!\n",
> > > +  ilog2(REG_FIELD_GET(DG1_MSTR_TILE_MASK,
&

[PATCH] drm/i915/hwconfig: Future-proof platform checks

2022-05-24 Thread Matt Roper
PVC also has a hwconfig table.  Actually the current expectation is that
all future platforms will have hwconfig, so let's just change the
condition to an IP version check so that we don't need to keep updating
this for each new platform that shows up.

Cc: John Harrison 
Cc: Radhakrishna Sripada 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
index 5aaa3948de74..4781fccc2687 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -96,7 +96,7 @@ static bool has_table(struct drm_i915_private *i915)
 {
if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915))
return true;
-   if (IS_DG2(i915))
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
return true;
 
return false;
-- 
2.35.3



Re: [PATCH] drm/i915/dg2: Catch and log more unexpected values in DG1_MSTR_TILE_INTR

2022-05-24 Thread Matt Roper
On Tue, May 24, 2022 at 10:43:39AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Catch and log any garbage in the register, including no tiles marked, or
> multiple tiles marked.
> 
> Signed-off-by: Tvrtko Ursulin 
> Cc: Matt Roper 
> ---
> We caught garbage in DG1_MSTR_TILE_INTR with DG2 (actual value 0xF9D2C008)
> during glmark and more badness. So I thought lets log all possible failure
> modes from here and also use per device logging.
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 33 ++---
>  drivers/gpu/drm/i915/i915_reg.h |  1 +
>  2 files changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 73cebc6aa650..79853d3fc1ed 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2778,24 +2778,30 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
>   u32 gu_misc_iir;
>  
>   if (!intel_irqs_enabled(i915))
> - return IRQ_NONE;
> + goto none;
>  
>   master_tile_ctl = dg1_master_intr_disable(regs);
> - if (!master_tile_ctl) {
> - dg1_master_intr_enable(regs);
> - return IRQ_NONE;
> + if (!master_tile_ctl)
> + goto enable_none;
> +
> + if (master_tile_ctl & ~(DG1_MSTR_IRQ | DG1_MSTR_TILE_MASK)) {
> + drm_warn(&i915->drm, "Garbage in master_tile_ctl: 0x%08x!\n",
> +  master_tile_ctl);

I know we have a bunch of them already, but shouldn't we be avoiding
printk-based stuff like this inside interrupt handlers?  Should we be
migrating all these error messages over to trace_printk or something
similar that's safer to use?


Matt

> + goto enable_none;
>   }
>  
>   /* FIXME: we only support tile 0 for now. */
> - if (master_tile_ctl & DG1_MSTR_TILE(0)) {
> - master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ);
> - raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, master_ctl);
> - } else {
> - DRM_ERROR("Tile not supported: 0x%08x\n", master_tile_ctl);
> - dg1_master_intr_enable(regs);
> - return IRQ_NONE;
> + if (REG_FIELD_GET(DG1_MSTR_TILE_MASK, master_tile_ctl) !=
> + DG1_MSTR_TILE(0)) {
> + drm_warn(&i915->drm, "Unexpected irq from tile %u!\n",
> +  ilog2(REG_FIELD_GET(DG1_MSTR_TILE_MASK,
> +  master_tile_ctl)));
> + goto enable_none;
>   }
>  
> + master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ);
> + raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, master_ctl);
> +
>   gen11_gt_irq_handler(gt, master_ctl);
>  
>   if (master_ctl & GEN11_DISPLAY_IRQ)
> @@ -2810,6 +2816,11 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
>   pmu_irq_stats(i915, IRQ_HANDLED);
>  
>   return IRQ_HANDLED;
> +
> +enable_none:
> + dg1_master_intr_enable(regs);
> +none:
> + return IRQ_NONE;
>  }
>  
>  /* Called from drm generic code, passed 'crtc' which
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index d8579ab9384c..eefa301c6430 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -5774,6 +5774,7 @@
>  
>  #define DG1_MSTR_TILE_INTR   _MMIO(0x190008)
>  #define   DG1_MSTR_IRQ   REG_BIT(31)
> +#define   DG1_MSTR_TILE_MASK REG_GENMASK(3, 0)
>  #define   DG1_MSTR_TILE(t)   REG_BIT(t)
>  
>  #define GEN11_DISPLAY_INT_CTL_MMIO(0x44200)
> -- 
> 2.32.0
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH] drm/i915/hwconfig: Report no hwconfig support on ADL-N

2022-05-23 Thread Matt Roper
On Mon, May 23, 2022 at 01:21:16PM +0530, Balasubramani Vivekanandan wrote:
> ADL-N being a subplatform of ADL-P, it lacks support for hwconfig
> table. Explicit check added to skip ADL-N.
> 
> Signed-off-by: Balasubramani Vivekanandan 
> 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
> index 79c66b6b51a3..5aaa3948de74 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
> @@ -94,7 +94,7 @@ static int guc_hwconfig_fill_buffer(struct intel_guc *guc, 
> struct intel_hwconfig
>  
>  static bool has_table(struct drm_i915_private *i915)
>  {
> - if (IS_ALDERLAKE_P(i915))
> + if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915))
>   return true;
>   if (IS_DG2(i915))
>   return true;
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


[PATCH v5 5/6] drm/i915/sseu: Disassociate internal subslice mask representation from uapi

2022-05-23 Thread Matt Roper
As with EU masks, it's easier to store subslice/DSS masks internally in
a format that's more natural for the driver to work with, and then only
covert into the u8[] uapi form when the query ioctl is invoked.  Since
the hardware design changed significantly with Xe_HP, we'll use a union
to choose between the old "hsw-style" subslice masks or the newer xehp
mask.  HSW-style masks will be stored in an array of u8's, indexed by
slice (there's never more than 6 subslices per slice on older
platforms).  For Xe_HP and beyond where slices no longer exist, we only
need a single bitmask.  However we already know that this mask is
eventually going to grow too large for a simple u64 to hold, so we'll
represent it in a manner that can be operated on by the utilities in
linux/bitmap.h.

v2:
 - Fix typo: BIT(s) -> BIT(ss) in gen9_sseu_device_status()

v3:
 - Eliminate sseu->ss_stride and just calculate the stride while
   specifically handling uapi.  (Tvrtko)
 - Use BITMAP_BITS() macro to refer to size of masks rather than
   passing I915_MAX_SS_FUSE_BITS directly.  (Tvrtko)
 - Report compute/geometry DSS masks separately when dumping Xe_HP SSEU
   info.  (Tvrtko)
 - Restore dropped range checks to intel_sseu_has_subslice().  (Tvrtko)

v4:
 - Make the bitmap size macro check the size of the .xehp field rather
   than the containing union.  (Tvrtko)
 - Don't add GEM_BUG_ON() intel_sseu_has_subslice()'s check for whether
   slice or subslice ID exceed sseu->max_[sub]slices; various loops
   in the driver are expected to exceed these, so we should just
   silently return 'false.'

Cc: Tvrtko Ursulin 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|   4 +-
 drivers/gpu/drm/i915/gt/intel_gt.c   |  12 +-
 drivers/gpu/drm/i915/gt/intel_sseu.c | 261 +++
 drivers/gpu/drm/i915/gt/intel_sseu.h |  76 --
 drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c |  30 +--
 drivers/gpu/drm/i915/gt/intel_workarounds.c  |  24 +-
 drivers/gpu/drm/i915/i915_getparam.c |   3 +-
 drivers/gpu/drm/i915/i915_query.c|  13 +-
 9 files changed, 241 insertions(+), 187 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ab4c5ab28e4d..a3bb73f5d53b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1875,6 +1875,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
 {
const struct sseu_dev_info *device = >->info.sseu;
struct drm_i915_private *i915 = gt->i915;
+   unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, 
0);
 
/* No zeros in any field. */
if (!user->slice_mask || !user->subslice_mask ||
@@ -1901,7 +1902,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
if (user->slice_mask & ~device->slice_mask)
return -EINVAL;
 
-   if (user->subslice_mask & ~device->subslice_mask[0])
+   if (user->subslice_mask & ~dev_subslice_mask)
return -EINVAL;
 
if (user->max_eus_per_subslice > device->max_eus_per_subslice)
@@ -1915,7 +1916,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
/* Part specific restrictions. */
if (GRAPHICS_VER(i915) == 11) {
unsigned int hw_s = hweight8(device->slice_mask);
-   unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+   unsigned int hw_ss_per_s = hweight8(dev_subslice_mask);
unsigned int req_s = hweight8(context->slice_mask);
unsigned int req_ss = hweight8(context->subslice_mask);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 1adbf34c3632..f0acf8518a51 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -674,8 +674,8 @@ static void engine_mask_apply_compute_fuses(struct intel_gt 
*gt)
if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
return;
 
-   ccs_mask = 
intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu),
-   ss_per_ccs);
+   ccs_mask = 
intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask,
+ss_per_ccs);
/*
 * If all DSS in a quadrant are fused off, the corresponding CCS
 * engine is not available for use.
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 034182f85501..2921f510642f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -133,13 +133,6 @@ static const struct intel_mmio_range 
dg2_lncf_steering_table

[PATCH v4 1/6] drm/i915/xehp: Use separate sseu init function

2022-05-20 Thread Matt Roper
Xe_HP has enough fundamental differences from previous platforms that it
makes sense to use a separate SSEU init function to keep things
straightforward and easy to understand.  We'll also add a has_xehp_dss
flag to the SSEU structure that will be used by other upcoming changes.

v2:
 - Add has_xehp_dss flag

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 86 
 drivers/gpu/drm/i915/gt/intel_sseu.h |  5 ++
 2 files changed, 54 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index fdd25691beda..b5fd479a7b85 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -169,13 +169,43 @@ static void gen11_compute_sseu_info(struct sseu_dev_info 
*sseu, u8 s_en,
sseu->eu_total = compute_eu_total(sseu);
 }
 
-static void gen12_sseu_info_init(struct intel_gt *gt)
+static void xehp_sseu_info_init(struct intel_gt *gt)
 {
struct sseu_dev_info *sseu = >->info.sseu;
struct intel_uncore *uncore = gt->uncore;
u32 g_dss_en, c_dss_en = 0;
u16 eu_en = 0;
u8 eu_en_fuse;
+   int eu;
+
+   /*
+* The concept of slice has been removed in Xe_HP.  To be compatible
+* with prior generations, assume a single slice across the entire
+* device. Then calculate out the DSS for each workload type within
+* that software slice.
+*/
+   intel_sseu_set_info(sseu, 1, 32, 16);
+   sseu->has_xehp_dss = 1;
+
+   g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
+   c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
+
+   eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & 
XEHP_EU_ENA_MASK;
+
+   for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+   if (eu_en_fuse & BIT(eu))
+   eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+
+   gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en);
+}
+
+static void gen12_sseu_info_init(struct intel_gt *gt)
+{
+   struct sseu_dev_info *sseu = >->info.sseu;
+   struct intel_uncore *uncore = gt->uncore;
+   u32 g_dss_en;
+   u16 eu_en = 0;
+   u8 eu_en_fuse;
u8 s_en;
int eu;
 
@@ -183,43 +213,23 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
 * Instead of splitting these, provide userspace with an array
 * of DSS to more closely represent the hardware resource.
-*
-* In addition, the concept of slice has been removed in Xe_HP.
-* To be compatible with prior generations, assume a single slice
-* across the entire device. Then calculate out the DSS for each
-* workload type within that software slice.
 */
-   if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915))
-   intel_sseu_set_info(sseu, 1, 32, 16);
-   else
-   intel_sseu_set_info(sseu, 1, 6, 16);
+   intel_sseu_set_info(sseu, 1, 6, 16);
 
-   /*
-* As mentioned above, Xe_HP does not have the concept of a slice.
-* Enable one for software backwards compatibility.
-*/
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   s_en = 0x1;
-   else
-   s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
-  GEN11_GT_S_ENA_MASK;
+   s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
+   GEN11_GT_S_ENA_MASK;
 
g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   c_dss_en = intel_uncore_read(uncore, 
GEN12_GT_COMPUTE_DSS_ENABLE);
 
/* one bit per pair of EUs */
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & 
XEHP_EU_ENA_MASK;
-   else
-   eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
-  GEN11_EU_DIS_MASK);
+   eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
+  GEN11_EU_DIS_MASK);
 
for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
if (eu_en_fuse & BIT(eu))
eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
 
-   gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
+   gen11_compute_sseu_info(sseu, s_en, g_dss_en, 0, eu_en);
 
/* TGL only supports slice-level power gating */
sseu->has_slice_pg = 1;
@@ -574,18 +584,20 @@ void intel_sseu_info_init(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
 
-   if (IS_HASWELL(i915))
-   hsw_sseu_info_init(gt);
-   else if 

[PATCH v4 4/6] drm/i915/sseu: Don't try to store EU mask internally in UAPI format

2022-05-20 Thread Matt Roper
Storing the EU mask internally in the same format the I915_QUERY
topology queries use makes the final copy_to_user() a bit simpler, but
makes the rest of the driver's SSEU more complicated and harder to
follow.  Let's switch to an internal representation that's more natural:
Xe_HP platforms will be a simple array of u16 masks, whereas pre-Xe_HP
platforms will be a two-dimensional array, indexed by [slice][subslice].
We'll convert to the uapi format only when the query uapi is called.

v2:
 - Drop has_common_ss_eumask.  We waste some space repeating identical
   EU masks for every single DSS, but the code is simpler without it.
   (Tvrtko)

v3:
 - Mask down EUs passed to sseu_set_eus at the callsite rather than
   inside the function.  (Tvrtko)
 - Eliminate sseu->eu_stride and calculate it when needed.  (Tvrtko)

Cc: Tvrtko Ursulin 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 88 ++--
 drivers/gpu/drm/i915/gt/intel_sseu.h | 10 +++-
 drivers/gpu/drm/i915/i915_query.c| 13 ++--
 3 files changed, 73 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 7e5222ad2f98..285cfd758bdc 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -19,8 +19,6 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 
max_slices,
 
sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
-   sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
-   GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
 }
 
 unsigned int
@@ -78,47 +76,77 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info 
*sseu, u8 slice)
return hweight32(intel_sseu_get_subslices(sseu, slice));
 }
 
-static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
-  int subslice)
-{
-   int slice_stride = sseu->max_subslices * sseu->eu_stride;
-
-   return slice * slice_stride + subslice * sseu->eu_stride;
-}
-
 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
int subslice)
 {
-   int i, offset = sseu_eu_idx(sseu, slice, subslice);
-   u16 eu_mask = 0;
-
-   for (i = 0; i < sseu->eu_stride; i++)
-   eu_mask |=
-   ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
-
-   return eu_mask;
+   if (sseu->has_xehp_dss) {
+   WARN_ON(slice > 0);
+   return sseu->eu_mask.xehp[subslice];
+   } else {
+   return sseu->eu_mask.hsw[slice][subslice];
+   }
 }
 
 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
 u16 eu_mask)
 {
-   int i, offset = sseu_eu_idx(sseu, slice, subslice);
-
-   for (i = 0; i < sseu->eu_stride; i++)
-   sseu->eu_mask[offset + i] =
-   (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+   GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice);
+   if (sseu->has_xehp_dss) {
+   GEM_WARN_ON(slice > 0);
+   sseu->eu_mask.xehp[subslice] = eu_mask;
+   } else {
+   sseu->eu_mask.hsw[slice][subslice] = eu_mask;
+   }
 }
 
 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 {
-   u16 i, total = 0;
+   int s, ss, total = 0;
 
-   for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
-   total += hweight8(sseu->eu_mask[i]);
+   for (s = 0; s < sseu->max_slices; s++)
+   for (ss = 0; ss < sseu->max_subslices; ss++)
+   if (sseu->has_xehp_dss)
+   total += hweight16(sseu->eu_mask.xehp[ss]);
+   else
+   total += hweight16(sseu->eu_mask.hsw[s][ss]);
 
return total;
 }
 
+/**
+ * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer
+ * @to: Pointer to userspace buffer to copy to
+ * @sseu: SSEU structure containing EU mask to copy
+ *
+ * Copies the EU mask to a userspace buffer in the format expected by
+ * the query ioctl's topology queries.
+ *
+ * Returns the result of the copy_to_user() operation.
+ */
+int intel_sseu_copy_eumask_to_user(void __user *to,
+  const struct sseu_dev_info *sseu)
+{
+   u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {};
+   int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
+   int len = sseu->max_slices * sseu->max_subslices * eu_stride;
+   int s, ss, i;
+
+   for (s = 0; s < sseu->max_slices; s++) {
+   for (ss = 0; ss < sseu->max_subslices; ss++) {
+   int uapi_offset =
+   

[PATCH v4 0/6] i915: SSEU handling updates

2022-05-20 Thread Matt Roper
This series reworks i915's internal handling of slice/subslice/EU (SSEU)
data to represent platforms like Xe_HP in a more natural manner and to
prepare for future platforms where the masks will need to grow in size.
One key idea of this series is that although we have a fixed ABI to
convey SSEU data to userspace (i.e., multiple u8[] arrays with data
stored at different strides), we don't need to use this cumbersome
format for the driver's own internal storage.  As long as we can convert
into the uapi form properly when responding to the I915_QUERY ioctl,
it's preferable to use an internal storage format that's easier for the
driver to work with.

Another key point here is that we're reaching the point where subslice
(DSS) masks will soon not fit within simple u32/u64 integer values.
Xe_HP SDV and DG2 platforms today have subslice (DSS) masks that are 32
bits, which maxes out the current storage of a u32.  With PVC the masks
are represented by a pair of 32-bit registers, requiring a bump up to at
least 64-bits of storage internally.  We could switch to u64 for that in
the short term, but since we already know that upcoming architectures
intend to provide DSS fuse bits via three or more registers it's best to
switch to a representation that's more future-proof but still easy to
work with in the driver code.  To accomodate this, we start storing our
subslice mask for Xe_HP and beyond in a new typedef that can be
processed by the linux/bitmap.h operations.

Finally, since no userspace for Xe_HP or beyond is using the legacy
I915_GETPARAM ioctl lookups for I915_PARAM_SLICE_MASK and
I915_PARAM_SUBSLICE_MASK (since they've migrated to the more flexible
I915_QUERY ioctl that can return more than a simple u32 value), we take
the opportunity to officially drop support for those GETPARAM lookups on
modern platforms.  Maintaining support for these GETPARAM lookups don't
make sense for a number of reasons:

 * Traditional slices no longer exist, and newer ideas like gslices,
   cslices, mslices, etc. aren't something userspace needs to query
   since it can be inferred from other information.
 * The GETPARAM ioctl doesn't have a way to distinguish between geometry
   subslice masks and compute subslice masks, which are distinct on
   Xe_HP and beyond.
 * The I915_GETPARAM ioctl is limited to returning a 32-bit value, so
   when subslice masks begin to exceed 32-bits (on PVC), it simply can't
   return the entire mask.
 * The GETPARAM ioctl doesn't have a way to give sensible information
   for multi-tile devices.

v2:
 - Switch to union of hsw/xehp formats to keep the representation in a
   natural format for different types of hardware.
 - Avoid accessing internals of intel_sseu_ss_mask_t directly outside of
   intel_sseu.[ch].
 - Include PVC SSEU which needs the larger SS mask storage enabled by
   this series.

v3:
 - Correct a BIT(s) typo that should have been BIT(ss), causing
   incorrect handling on gen9 platforms.

v4:
 - Eliminate sseu->{ss,eu}_stride fields and just calculate the proper
   value in the UAPI code that needs them.
 - Handle unwanted ~u8 sign extension at the callsite instead of inside
   sseu_set_eus.
 - Use BITMAP_BITS() macro rather than passing I915_MAX_SS_FUSE_BITS
   around directly to bitmap operations.
 - Improved debugfs / dmesg reporting for Xe_HP dumps
 - Various assertion check improvements.

Cc: Tvrtko Ursulin 


Matt Roper (6):
  drm/i915/xehp: Use separate sseu init function
  drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK
  drm/i915/sseu: Simplify gen11+ SSEU handling
  drm/i915/sseu: Don't try to store EU mask internally in UAPI format
  drm/i915/sseu: Disassociate internal subslice mask representation from
uapi
  drm/i915/pvc: Add SSEU changes

 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|   4 +-
 drivers/gpu/drm/i915/gt/intel_gt.c   |  12 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h  |   1 +
 drivers/gpu/drm/i915/gt/intel_sseu.c | 450 ---
 drivers/gpu/drm/i915/gt/intel_sseu.h |  94 ++--
 drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c |  30 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c  |  24 +-
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_getparam.c |  11 +-
 drivers/gpu/drm/i915/i915_pci.c  |   3 +-
 drivers/gpu/drm/i915/i915_query.c|  26 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 13 files changed, 398 insertions(+), 265 deletions(-)

-- 
2.35.3



[PATCH v4 6/6] drm/i915/pvc: Add SSEU changes

2022-05-20 Thread Matt Roper
PVC splits the mask of enabled DSS over two registers.  It also changes
the meaning of the EU fuse register such that each bit represents a
single EU rather than a pair of EUs.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h  |  1 +
 drivers/gpu/drm/i915/gt/intel_sseu.c | 31 ++--
 drivers/gpu/drm/i915/gt/intel_sseu.h |  2 +-
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_pci.c  |  3 ++-
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 6 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 7246eb870c7e..3f2b46c884f9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -561,6 +561,7 @@
 #define   GEN11_GT_VEBOX_DISABLE_MASK  (0x0f << 
GEN11_GT_VEBOX_DISABLE_SHIFT)
 
 #define GEN12_GT_COMPUTE_DSS_ENABLE_MMIO(0x9144)
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT_MMIO(0x9148)
 
 #define GEN6_UCGCTL1   _MMIO(0x9400)
 #define   GEN6_GAMUNIT_CLOCK_GATE_DISABLE  (1 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6fbc2ac507b7..82e6f324b2ef 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -212,27 +212,44 @@ static void xehp_sseu_info_init(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
u16 eu_en = 0;
u8 eu_en_fuse;
+   int num_compute_regs, num_geometry_regs;
int eu;
 
+   if (IS_PONTEVECCHIO(gt->i915)) {
+   num_geometry_regs = 0;
+   num_compute_regs = 2;
+   } else {
+   num_geometry_regs = 1;
+   num_compute_regs = 1;
+   }
+
/*
 * The concept of slice has been removed in Xe_HP.  To be compatible
 * with prior generations, assume a single slice across the entire
 * device. Then calculate out the DSS for each workload type within
 * that software slice.
 */
-   intel_sseu_set_info(sseu, 1, 32, 16);
+   intel_sseu_set_info(sseu, 1,
+   32 * max(num_geometry_regs, num_compute_regs),
+   16);
sseu->has_xehp_dss = 1;
 
-   xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, 1,
+   xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask,
+  num_geometry_regs,
   GEN12_GT_GEOMETRY_DSS_ENABLE);
-   xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, 1,
-  GEN12_GT_COMPUTE_DSS_ENABLE);
+   xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask,
+  num_compute_regs,
+  GEN12_GT_COMPUTE_DSS_ENABLE,
+  XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
 
eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & 
XEHP_EU_ENA_MASK;
 
-   for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
-   if (eu_en_fuse & BIT(eu))
-   eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+   if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915))
+   eu_en = eu_en_fuse;
+   else
+   for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+   if (eu_en_fuse & BIT(eu))
+   eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
 
xehp_compute_sseu_info(sseu, eu_en);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h 
b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 0d3def55e770..79732c1af6c4 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -33,7 +33,7 @@ struct drm_printer;
  * Maximum number of 32-bit registers used by hardware to express the
  * enabled/disabled subslices.
  */
-#define I915_MAX_SS_FUSE_REGS  1
+#define I915_MAX_SS_FUSE_REGS  2
 #define I915_MAX_SS_FUSE_BITS  (I915_MAX_SS_FUSE_REGS * 32)
 
 /* Maximum number of EUs that can exist within a subslice or DSS. */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c5fc402d9c50..1f8422e9511b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1405,6 +1405,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 
 #define HAS_MBUS_JOINING(i915) (IS_ALDERLAKE_P(i915))
 
+#define HAS_ONE_EU_PER_FUSE_BIT(i915)  
(INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
+
 /* i915_gem.c */
 void i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 5ad9884874c2..9cee3c69edde 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1079,7 +1079,8 @@ static const struct int

[PATCH v4 3/6] drm/i915/sseu: Simplify gen11+ SSEU handling

2022-05-20 Thread Matt Roper
Although gen11 and gen12 architectures supported the concept of multiple
slices, in practice all the platforms that were actually designed only
had a single slice (i.e., note the parameters to 'intel_sseu_set_info'
that we pass for each platform).  We can simplify the code slightly by
dropping the multi-slice logic from gen11+ platforms.

v2:
 - Promote drm_dbg to drm_WARN_ON if the slice fuse register reports
   unexpected fusing.  (Tvrtko)

Cc: Tvrtko Ursulin 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 76 +---
 1 file changed, 36 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index b5fd479a7b85..7e5222ad2f98 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -119,52 +119,37 @@ static u16 compute_eu_total(const struct sseu_dev_info 
*sseu)
return total;
 }
 
-static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
-{
-   u32 ss_mask;
-
-   ss_mask = ss_en >> (s * sseu->max_subslices);
-   ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
-
-   return ss_mask;
-}
-
-static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
u32 g_ss_en, u32 c_ss_en, u16 eu_en)
 {
-   int s, ss;
+   u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0);
+   int ss;
 
/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
   sizeof(g_ss_en) * BITS_PER_BYTE);
 
-   for (s = 0; s < sseu->max_slices; s++) {
-   if ((s_en & BIT(s)) == 0)
-   continue;
+   sseu->slice_mask |= BIT(0);
 
-   sseu->slice_mask |= BIT(s);
-
-   /*
-* XeHP introduces the concept of compute vs geometry DSS. To
-* reduce variation between GENs around subslice usage, store a
-* mask for both the geometry and compute enabled masks since
-* userspace will need to be able to query these masks
-* independently.  Also compute a total enabled subslice count
-* for the purposes of selecting subslices to use in a
-* particular GEM context.
-*/
-   intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
-get_ss_stride_mask(sseu, s, c_ss_en));
-   intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
-get_ss_stride_mask(sseu, s, g_ss_en));
-   intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-get_ss_stride_mask(sseu, s,
-   g_ss_en | c_ss_en));
+   /*
+* XeHP introduces the concept of compute vs geometry DSS. To reduce
+* variation between GENs around subslice usage, store a mask for both
+* the geometry and compute enabled masks since userspace will need to
+* be able to query these masks independently.  Also compute a total
+* enabled subslice count for the purposes of selecting subslices to
+* use in a particular GEM context.
+*/
+   intel_sseu_set_subslices(sseu, 0, sseu->compute_subslice_mask,
+c_ss_en & valid_ss_mask);
+   intel_sseu_set_subslices(sseu, 0, sseu->geometry_subslice_mask,
+g_ss_en & valid_ss_mask);
+   intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask,
+(g_ss_en | c_ss_en) & valid_ss_mask);
+
+   for (ss = 0; ss < sseu->max_subslices; ss++)
+   if (intel_sseu_has_subslice(sseu, 0, ss))
+   sseu_set_eus(sseu, 0, ss, eu_en);
 
-   for (ss = 0; ss < sseu->max_subslices; ss++)
-   if (intel_sseu_has_subslice(sseu, s, ss))
-   sseu_set_eus(sseu, s, ss, eu_en);
-   }
sseu->eu_per_subslice = hweight16(eu_en);
sseu->eu_total = compute_eu_total(sseu);
 }
@@ -196,7 +181,7 @@ static void xehp_sseu_info_init(struct intel_gt *gt)
if (eu_en_fuse & BIT(eu))
eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
 
-   gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en);
+   gen11_compute_sseu_info(sseu, g_dss_en, c_dss_en, eu_en);
 }
 
 static void gen12_sseu_info_init(struct intel_gt *gt)
@@ -216,8 +201,13 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
 */
intel_sseu_set_info(sseu, 1, 6, 16);
 
+   /*
+* Although gen12 architecture supp

<    1   2   3   4   5   6   7   8   9   10   >