[PATCH v3 03/14] drm/i915: Use managed allocations for extra uncore objects
We're slowly transitioning the init-time kzalloc's of the driver over to DRM-managed allocations; let's make sure the uncore objects allocated for non-root GTs are thus allocated. Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada --- drivers/gpu/drm/i915/gt/intel_gt.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a82b5e2e0d83..cf7aab7adb30 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -783,7 +783,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) if (!gt_is_root(gt)) { struct intel_uncore *uncore; - uncore = kzalloc(sizeof(*uncore), GFP_KERNEL); + uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), GFP_KERNEL); if (!uncore) return -ENOMEM; @@ -808,10 +808,8 @@ intel_gt_tile_cleanup(struct intel_gt *gt) { intel_uncore_cleanup_mmio(gt->uncore); - if (!gt_is_root(gt)) { - kfree(gt->uncore); + if (!gt_is_root(gt)) kfree(gt); - } } int intel_gt_probe_all(struct drm_i915_private *i915) -- 2.37.2
[PATCH v3 01/14] drm/i915: Move locking and unclaimed check into mmio_debug_{suspend, resume}
Moving the locking for MMIO debug (and the final check for unclaimed accesses when resuming debug after a userspace-initiated forcewake) will make it simpler to completely skip MMIO debug handling on uncores that don't support it in future patches. Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada --- drivers/gpu/drm/i915/intel_uncore.c | 41 +++-- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9b81b2543ce2..e717ea55484a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -50,23 +50,33 @@ intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug) mmio_debug->unclaimed_mmio_check = 1; } -static void mmio_debug_suspend(struct intel_uncore_mmio_debug *mmio_debug) +static void mmio_debug_suspend(struct intel_uncore *uncore) { - lockdep_assert_held(&mmio_debug->lock); + spin_lock(&uncore->debug->lock); /* Save and disable mmio debugging for the user bypass */ - if (!mmio_debug->suspend_count++) { - mmio_debug->saved_mmio_check = mmio_debug->unclaimed_mmio_check; - mmio_debug->unclaimed_mmio_check = 0; + if (!uncore->debug->suspend_count++) { + uncore->debug->saved_mmio_check = uncore->debug->unclaimed_mmio_check; + uncore->debug->unclaimed_mmio_check = 0; } + + spin_unlock(&uncore->debug->lock); } -static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug) +static bool check_for_unclaimed_mmio(struct intel_uncore *uncore); + +static void mmio_debug_resume(struct intel_uncore *uncore) { - lockdep_assert_held(&mmio_debug->lock); + spin_lock(&uncore->debug->lock); + + if (!--uncore->debug->suspend_count) + uncore->debug->unclaimed_mmio_check = uncore->debug->saved_mmio_check; - if (!--mmio_debug->suspend_count) - mmio_debug->unclaimed_mmio_check = mmio_debug->saved_mmio_check; + if (check_for_unclaimed_mmio(uncore)) + drm_info(&uncore->i915->drm, +"Invalid mmio detected during user access\n"); + + spin_unlock(&uncore->debug->lock); } static const char * const forcewake_domain_names[] = { @@ -677,9 +687,7 @@ void intel_uncore_forcewake_user_get(struct intel_uncore *uncore) spin_lock_irq(&uncore->lock); if (!uncore->user_forcewake_count++) { intel_uncore_forcewake_get__locked(uncore, FORCEWAKE_ALL); - spin_lock(&uncore->debug->lock); - mmio_debug_suspend(uncore->debug); - spin_unlock(&uncore->debug->lock); + mmio_debug_suspend(uncore); } spin_unlock_irq(&uncore->lock); } @@ -695,14 +703,7 @@ void intel_uncore_forcewake_user_put(struct intel_uncore *uncore) { spin_lock_irq(&uncore->lock); if (!--uncore->user_forcewake_count) { - spin_lock(&uncore->debug->lock); - mmio_debug_resume(uncore->debug); - - if (check_for_unclaimed_mmio(uncore)) - drm_info(&uncore->i915->drm, -"Invalid mmio detected during user access\n"); - spin_unlock(&uncore->debug->lock); - + mmio_debug_resume(uncore); intel_uncore_forcewake_put__locked(uncore, FORCEWAKE_ALL); } spin_unlock_irq(&uncore->lock); -- 2.37.2
[PATCH v3 07/14] drm/i915: Use a DRM-managed action to release the PCI bridge device
As we start supporting multiple uncore structures in future patches, the MMIO cleanup (which make also get called mid-init if there's a failure) will become more complicated. Moving to DRM-managed actions will help keep things simple. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/i915_driver.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 18acba1bc3b0..1f46dd1ffaf7 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -105,6 +105,12 @@ static const char irst_name[] = "INT3392"; static const struct drm_driver i915_drm_driver; +static void i915_release_bridge_dev(struct drm_device *dev, + void *bridge) +{ + pci_dev_put(bridge); +} + static int i915_get_bridge_dev(struct drm_i915_private *dev_priv) { int domain = pci_domain_nr(to_pci_dev(dev_priv->drm.dev)->bus); @@ -115,7 +121,9 @@ static int i915_get_bridge_dev(struct drm_i915_private *dev_priv) drm_err(&dev_priv->drm, "bridge device not found\n"); return -EIO; } - return 0; + + return drmm_add_action_or_reset(&dev_priv->drm, i915_release_bridge_dev, + dev_priv->bridge_dev); } /* Allocate space for the MCH regs if needed, return nonzero on error */ @@ -452,7 +460,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) err_uncore: intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); - pci_dev_put(dev_priv->bridge_dev); return ret; } @@ -465,7 +472,6 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv) { intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); - pci_dev_put(dev_priv->bridge_dev); } /** -- 2.37.2
[PATCH v3 00/14] i915: Add "standalone media" support for MTL
Starting with MTL, media functionality has moved into a new, second GT at the hardware level. This new GT, referred to as "standalone media" in the spec, has its own GuC, power management/forcewake, etc. The general non-engine GT registers for standalone media start at 0x38, but otherwise use the same MMIO offsets as the primary GT. Standalone media has a lot of similarity to the remote tiles present on platforms like xehpsdv and pvc, and our i915 implementation can share much of the general "multi GT" infrastructure between the two types of platforms. However there are a few notable differences we must deal with: - The 0x38 offset only applies to the non-engine GT registers (which the specs refer to as "GSI" registers). The engine registers remain at their usual locations (e.g., 0x1C for VCS0). - Unlike platforms with remote tiles, all interrupt handling for standalone media still happens via the primary GT. v2: - Added new patches to ensure each GT, not just the primary, is handled properly during various init/suspend/resume/teardown flows. (Daniele) - Simplified GSI offset handling and split it into its own patch. - Correct gt->irq_lock assignment for media GT. (Daniele) - Fix jump target for intel_root_gt_init_early() errors. (Daniele) v3: - Move intel_gt_definition struct to intel_gt_types.h. (Jani) - Drop gtdef->setup() and just switch() on type. (Jani) - Honor GSI offset during AUX table invalidation. (Aravind) - Drop intel_gt_tile_cleanup() through more intelligent use of DRM-managed actions. This also fixes the fault-injection failures reported by CI. Cc: Radhakrishna Sripada Cc: Daniele Ceraolo Spurio Cc: Aravind Iddamsetty Cc: Jani Nikula Matt Roper (12): drm/i915: Move locking and unclaimed check into mmio_debug_{suspend,resume} drm/i915: Only hook up uncore->debug for primary uncore drm/i915: Use managed allocations for extra uncore objects drm/i915: Drop intel_gt_tile_cleanup() drm/i915: Prepare more multi-GT initialization drm/i915: Rename and expose common GT early init routine drm/i915: Use a DRM-managed action to release the PCI bridge device drm/i915: Initialize MMIO access for each GT drm/i915: Handle each GT on init/release and suspend/resume drm/i915/uncore: Add GSI offset to uncore drm/i915/mtl: Add gsi_offset when emitting aux table invalidation drm/i915/xelpmp: Expose media as another GT drm/i915/mtl: Use primary GT's irq lock for media GT drm/i915/mtl: Hook up interrupts for standalone media drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 15 ++- drivers/gpu/drm/i915/gt/gen8_engine_cs.h | 3 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 10 +- drivers/gpu/drm/i915/gt/intel_gt.c| 108 +- drivers/gpu/drm/i915/gt/intel_gt.h| 4 +- drivers/gpu/drm/i915/gt/intel_gt_irq.c| 35 -- drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c | 8 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 10 ++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 19 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 9 +- drivers/gpu/drm/i915/gt/intel_rps.c | 26 ++--- drivers/gpu/drm/i915/gt/intel_sa_media.c | 47 drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 +++ drivers/gpu/drm/i915/gt/uc/intel_guc.c| 24 ++-- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 4 +- drivers/gpu/drm/i915/i915_driver.c| 105 - drivers/gpu/drm/i915/i915_drv.h | 5 + drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/i915_pci.c | 14 +++ drivers/gpu/drm/i915/intel_device_info.h | 3 + drivers/gpu/drm/i915/intel_uncore.c | 92 +-- drivers/gpu/drm/i915/intel_uncore.h | 28 - drivers/gpu/drm/i915/pxp/intel_pxp.c | 4 +- drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c | 4 +- drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 14 +-- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + 29 files changed, 449 insertions(+), 171 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h -- 2.37.2
Re: [Intel-gfx] [PATCH v2 09/12] drm/i915/uncore: Add GSI offset to uncore
On Tue, Sep 06, 2022 at 04:14:21PM +0530, Iddamsetty, Aravind wrote: > > > On 03-09-2022 05:02, Matt Roper wrote: > > GT non-engine registers (referred to as "GSI" registers by the spec) > > have the same relative offsets on standalone media as they do on the > > primary GT, just with an additional "GSI offset" added to their MMIO > > address. If we store this GSI offset in the standalone media's > > intel_uncore structure, it can be automatically applied to all GSI reg > > reads/writes that happen on that GT, allowing us to re-use our existing > > GT code with minimal changes. > > > > Forcewake and shadowed register tables for the media GT (which will be > > added in a future patch) are listed as final addresses that already > > include the GSI offset, so we also need to add the GSI offset before > > doing lookups of registers in one of those tables. > > > > Cc: Daniele Ceraolo Spurio > > Signed-off-by: Matt Roper > > --- > > drivers/gpu/drm/i915/gt/intel_gt.c | 17 ++--- > > drivers/gpu/drm/i915/intel_device_info.h | 4 +++- > > drivers/gpu/drm/i915/intel_uncore.c | 10 -- > > drivers/gpu/drm/i915/intel_uncore.h | 22 -- > > 4 files changed, 45 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c > > b/drivers/gpu/drm/i915/gt/intel_gt.c > > index fbb5e32979a4..a6ed11b933eb 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > > @@ -776,10 +776,20 @@ void intel_gt_driver_late_release_all(struct > > drm_i915_private *i915) > > } > > } > > > > -static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) > > +/* > > + * Note: the gsi_offset parameter here isn't used, but we want to keep the > > + * function signature equivalent to gtdef->setup() so that it can be > > plugged > > + * in when we enabled remote tiles in the future. > > + */ > > +static int intel_gt_tile_setup(struct intel_gt *gt, > > + phys_addr_t phys_addr, > > + u32 gsi_offset) > > { > > int ret; > > > > + /* GSI offset is only applicable for media GTs */ > > + drm_WARN_ON(>->i915->drm, gsi_offset); > > + > > if (!gt_is_root(gt)) { > > struct intel_uncore *uncore; > > > > @@ -832,7 +842,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) > > gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; > > > > drm_dbg(&i915->drm, "Setting up %s\n", gt->name); > > - ret = intel_gt_tile_setup(gt, phys_addr); > > + ret = intel_gt_tile_setup(gt, phys_addr, 0); > > if (ret) > > return ret; > > > > @@ -865,7 +875,8 @@ int intel_gt_probe_all(struct drm_i915_private *i915) > > goto err; > > } > > > > - ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base); > > + ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base, > > + gtdef->gsi_offset); > > if (ret) > > goto err; > > > > diff --git a/drivers/gpu/drm/i915/intel_device_info.h > > b/drivers/gpu/drm/i915/intel_device_info.h > > index b408ce384cd7..85e0ef0e91b1 100644 > > --- a/drivers/gpu/drm/i915/intel_device_info.h > > +++ b/drivers/gpu/drm/i915/intel_device_info.h > > @@ -254,8 +254,10 @@ struct intel_gt_definition { > > enum intel_gt_type type; > > char *name; > > int (*setup)(struct intel_gt *gt, > > -phys_addr_t phys_addr); > > +phys_addr_t phys_addr, > > +u32 gsi_offset); > > u32 mapping_base; > > + u32 gsi_offset; > > intel_engine_mask_t engine_mask; > > }; > > > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c > > b/drivers/gpu/drm/i915/intel_uncore.c > > index 33bdcbc77ab2..ecb02421502d 100644 > > --- a/drivers/gpu/drm/i915/intel_uncore.c > > +++ b/drivers/gpu/drm/i915/intel_uncore.c > > @@ -927,6 +927,9 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset) > > { > > const struct intel_forcewake_range *entry; > > > > + if (IS_GSI_REG(offset)) > > + offset += uncore->gsi_offset; > > + > > entry = BSEARCH(offset, > > uncore->fw_domains_table, > >
Re: [PATCH v2 01/12] drm/i915: Move locking and unclaimed check into mmio_debug_{suspend, resume}
On Tue, Sep 06, 2022 at 06:39:14AM -0700, Ruhl, Michael J wrote: > >-Original Message- > >From: dri-devel On Behalf Of > >Matt Roper > >Sent: Friday, September 2, 2022 7:33 PM > >To: intel-...@lists.freedesktop.org > >Cc: dri-devel@lists.freedesktop.org; Sripada, Radhakrishna > > > >Subject: [PATCH v2 01/12] drm/i915: Move locking and unclaimed check into > >mmio_debug_{suspend, resume} > > > >Moving the locking for MMIO debug (and the final check for unclaimed > >accesses when resuming debug after a userspace-initiated forcewake) will > >make it simpler to completely skip MMIO debug handling on uncores that > >don't support it in future patches. > > > >Signed-off-by: Matt Roper > >Reviewed-by: Radhakrishna Sripada > >--- > > drivers/gpu/drm/i915/intel_uncore.c | 41 +++-- > > 1 file changed, 21 insertions(+), 20 deletions(-) > > > >diff --git a/drivers/gpu/drm/i915/intel_uncore.c > >b/drivers/gpu/drm/i915/intel_uncore.c > >index 9b81b2543ce2..e717ea55484a 100644 > >--- a/drivers/gpu/drm/i915/intel_uncore.c > >+++ b/drivers/gpu/drm/i915/intel_uncore.c > >@@ -50,23 +50,33 @@ intel_uncore_mmio_debug_init_early(struct > >intel_uncore_mmio_debug *mmio_debug) > > mmio_debug->unclaimed_mmio_check = 1; > > } > > > >-static void mmio_debug_suspend(struct intel_uncore_mmio_debug > >*mmio_debug) > >+static void mmio_debug_suspend(struct intel_uncore *uncore) > > /bike-shedding... > > It seems like there has been a tend to name functions with the > > _unlocked > > postfix when the lock is being taken within the function. > > Would this be a reasonable name update for these changes? I think foo_unlocked() naming is usually used when there's also a separate foo() that can be called if/when locks are already held (or sometimes it's foo() and foo_locked() if the situation is the other way around). In this case we still only have one version of the function, and it's only called from a single place in the code (intel_uncore_forcewake_user_get) so I don't think the special naming is necessary. It might actually add confusion here since there's a different lock (the general uncore lock) that is still held by the caller. It's just the mmio_debug-specific lock that's been moved into the mmio-debug specific function here. Matt > > M > > > { > >-lockdep_assert_held(&mmio_debug->lock); > >+spin_lock(&uncore->debug->lock); > > > > /* Save and disable mmio debugging for the user bypass */ > >-if (!mmio_debug->suspend_count++) { > >-mmio_debug->saved_mmio_check = mmio_debug- > >>unclaimed_mmio_check; > >-mmio_debug->unclaimed_mmio_check = 0; > >+if (!uncore->debug->suspend_count++) { > >+uncore->debug->saved_mmio_check = uncore->debug- > >>unclaimed_mmio_check; > >+uncore->debug->unclaimed_mmio_check = 0; > > } > >+ > >+spin_unlock(&uncore->debug->lock); > > } > > > >-static void mmio_debug_resume(struct intel_uncore_mmio_debug > >*mmio_debug) > >+static bool check_for_unclaimed_mmio(struct intel_uncore *uncore); > >+ > >+static void mmio_debug_resume(struct intel_uncore *uncore) > > { > >-lockdep_assert_held(&mmio_debug->lock); > >+spin_lock(&uncore->debug->lock); > >+ > >+if (!--uncore->debug->suspend_count) > >+uncore->debug->unclaimed_mmio_check = uncore->debug- > >>saved_mmio_check; > > > >-if (!--mmio_debug->suspend_count) > >-mmio_debug->unclaimed_mmio_check = mmio_debug- > >>saved_mmio_check; > >+if (check_for_unclaimed_mmio(uncore)) > >+drm_info(&uncore->i915->drm, > >+ "Invalid mmio detected during user access\n"); > >+ > >+spin_unlock(&uncore->debug->lock); > > } > > > > static const char * const forcewake_domain_names[] = { > >@@ -677,9 +687,7 @@ void intel_uncore_forcewake_user_get(struct > >intel_uncore *uncore) > > spin_lock_irq(&uncore->lock); > > if (!uncore->user_forcewake_count++) { > > intel_uncore_forcewake_get__locked(uncore, > >FORCEWAKE_ALL); > >-spin_lock(&uncore->debug->lock); > >-mmio_debug_suspend(uncore->debug); > >-spin_unlock(&uncore->debug->lock); > >+mmio_debug_suspend
[PATCH v2 08/12] drm/i915: Handle each GT on init/release and suspend/resume
In preparation for enabling a second GT, there are a number of GT/uncore operations that happen during initialization or suspend flows that need to be performed on each GT, not just the primary, Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/i915_driver.c | 59 +- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index bb9ba1aed1bb..e5c3cf5045d4 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -310,8 +310,13 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) static void sanitize_gpu(struct drm_i915_private *i915) { - if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) - __intel_gt_reset(to_gt(i915), ALL_ENGINES); + if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) { + struct intel_gt *gt; + unsigned int i; + + for_each_gt(gt, i915, i) + __intel_gt_reset(gt, ALL_ENGINES); + } } /** @@ -730,6 +735,8 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) static void i915_driver_register(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + struct intel_gt *gt; + unsigned int i; i915_gem_driver_register(dev_priv); i915_pmu_register(dev_priv); @@ -749,7 +756,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) /* Depends on sysfs having been initialized */ i915_perf_register(dev_priv); - intel_gt_driver_register(to_gt(dev_priv)); + for_each_gt(gt, dev_priv, i) + intel_gt_driver_register(gt); intel_display_driver_register(dev_priv); @@ -768,6 +776,9 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) */ static void i915_driver_unregister(struct drm_i915_private *dev_priv) { + struct intel_gt *gt; + unsigned int i; + i915_switcheroo_unregister(dev_priv); intel_unregister_dsm_handler(); @@ -777,7 +788,8 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_display_driver_unregister(dev_priv); - intel_gt_driver_unregister(to_gt(dev_priv)); + for_each_gt(gt, dev_priv, i) + intel_gt_driver_unregister(gt); i915_perf_unregister(dev_priv); i915_pmu_unregister(dev_priv); @@ -799,6 +811,8 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv) { if (drm_debug_enabled(DRM_UT_DRIVER)) { struct drm_printer p = drm_debug_printer("i915 device info:"); + struct intel_gt *gt; + unsigned int i; drm_printf(&p, "pciid=0x%04x rev=0x%02x platform=%s (subplatform=0x%x) gen=%i\n", INTEL_DEVID(dev_priv), @@ -811,7 +825,8 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv) intel_device_info_print(INTEL_INFO(dev_priv), RUNTIME_INFO(dev_priv), &p); i915_print_iommu_status(dev_priv, &p); - intel_gt_info_print(&to_gt(dev_priv)->info, &p); + for_each_gt(gt, dev_priv, i) + intel_gt_info_print(>->info, &p); } if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) @@ -1230,13 +1245,15 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; - int ret; + struct intel_gt *gt; + int ret, i; disable_rpm_wakeref_asserts(rpm); i915_gem_suspend_late(dev_priv); - intel_uncore_suspend(&dev_priv->uncore); + for_each_gt(gt, dev_priv, i) + intel_uncore_suspend(gt->uncore); intel_power_domains_suspend(dev_priv, get_suspend_mode(dev_priv, hibernation)); @@ -1368,7 +1385,8 @@ static int i915_drm_resume_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); - int ret; + struct intel_gt *gt; + int ret, i; /* * We have a resume ordering issue with the snd-hda driver also @@ -1422,9 +1440,10 @@ static int i915_drm_resume_early(struct drm_device *dev) drm_err(&dev_priv->drm, "Resume prepare failed: %d, continuing anyway\n", ret); - intel_uncore_resume_early(&dev_priv->uncore); - - intel_gt_check_and_clear_faults(to_gt(dev_priv)); + for_each_gt(gt, dev_pri
[PATCH v2 07/12] drm/i915: Initialize MMIO access for each GT
In a multi-GT system we need to initialize MMIO access for each GT, not just the primary GT. Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/i915_driver.c | 27 ++- drivers/gpu/drm/i915/intel_uncore.c | 5 - drivers/gpu/drm/i915/intel_uncore.h | 3 ++- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 1f46dd1ffaf7..bb9ba1aed1bb 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -431,7 +431,8 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv) */ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) { - int ret; + struct intel_gt *gt; + int ret, i; if (i915_inject_probe_failure(dev_priv)) return -ENODEV; @@ -440,17 +441,27 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) if (ret < 0) return ret; - ret = intel_uncore_init_mmio(&dev_priv->uncore); - if (ret) - return ret; + for_each_gt(gt, dev_priv, i) { + ret = intel_uncore_init_mmio(gt->uncore); + if (ret) + return ret; + + ret = drmm_add_action_or_reset(&dev_priv->drm, + intel_uncore_fini_mmio, + gt->uncore); + if (ret) + return ret; + } /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev_priv); intel_device_info_runtime_init(dev_priv); - ret = intel_gt_init_mmio(to_gt(dev_priv)); - if (ret) - goto err_uncore; + for_each_gt(gt, dev_priv, i) { + ret = intel_gt_init_mmio(gt); + if (ret) + goto err_uncore; + } /* As early as possible, scrub existing GPU state before clobbering */ sanitize_gpu(dev_priv); @@ -459,7 +470,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) err_uncore: intel_teardown_mchbar(dev_priv); - intel_uncore_fini_mmio(&dev_priv->uncore); return ret; } @@ -471,7 +481,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) static void i915_driver_mmio_release(struct drm_i915_private *dev_priv) { intel_teardown_mchbar(dev_priv); - intel_uncore_fini_mmio(&dev_priv->uncore); } /** diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6841f76533f9..33bdcbc77ab2 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2454,8 +2454,11 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, } } -void intel_uncore_fini_mmio(struct intel_uncore *uncore) +/* Called via drm-managed action */ +void intel_uncore_fini_mmio(struct drm_device *dev, void *data) { + struct intel_uncore *uncore = data; + if (intel_uncore_has_forcewake(uncore)) { iosf_mbi_punit_acquire(); iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 6100d0f4498a..4acb78a03233 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -33,6 +33,7 @@ #include "i915_reg_defs.h" +struct drm_device; struct drm_i915_private; struct intel_runtime_pm; struct intel_uncore; @@ -220,7 +221,7 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, bool intel_uncore_unclaimed_mmio(struct intel_uncore *uncore); bool intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore); void intel_uncore_cleanup_mmio(struct intel_uncore *uncore); -void intel_uncore_fini_mmio(struct intel_uncore *uncore); +void intel_uncore_fini_mmio(struct drm_device *dev, void *data); void intel_uncore_suspend(struct intel_uncore *uncore); void intel_uncore_resume_early(struct intel_uncore *uncore); void intel_uncore_runtime_resume(struct intel_uncore *uncore); -- 2.37.2
[PATCH v2 11/12] drm/i915/mtl: Use primary GT's irq lock for media GT
When we hook up interrupts (in the next patch), interrupts for the media GT are still processed as part of the primary GT's interrupt flow. As such, we should share the same IRQ lock with the primary GT. Let's convert gt->irq_lock into a pointer and just point the media GT's instance at the same lock the primary GT is using. v2: - Point media's gt->irq_lock at the primary GT lock properly. (Daniele) - Fix jump target for intel_root_gt_init_early errors. (Daniele) Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 8 +++--- drivers/gpu/drm/i915/gt/intel_gt.c| 15 +-- drivers/gpu/drm/i915/gt/intel_gt.h| 2 +- drivers/gpu/drm/i915/gt/intel_gt_irq.c| 16 ++-- drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c | 8 +++--- drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 26 +-- drivers/gpu/drm/i915/gt/intel_sa_media.c | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.c| 24 - .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +-- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 4 +-- drivers/gpu/drm/i915/i915_driver.c| 5 +++- drivers/gpu/drm/i915/i915_irq.c | 4 +-- drivers/gpu/drm/i915/pxp/intel_pxp.c | 4 +-- drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 14 +- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 4 +-- 16 files changed, 78 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 41acc285e8bf..6e0122b3dca2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1688,9 +1688,9 @@ bool intel_engine_irq_enable(struct intel_engine_cs *engine) return false; /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); + spin_lock(engine->gt->irq_lock); engine->irq_enable(engine); - spin_unlock(&engine->gt->irq_lock); + spin_unlock(engine->gt->irq_lock); return true; } @@ -1701,9 +1701,9 @@ void intel_engine_irq_disable(struct intel_engine_cs *engine) return; /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); + spin_lock(engine->gt->irq_lock); engine->irq_disable(engine); - spin_unlock(&engine->gt->irq_lock); + spin_unlock(engine->gt->irq_lock); } void intel_engines_reset_default_submission(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a6ed11b933eb..b5c187eb9c2e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -37,7 +37,7 @@ void intel_gt_common_init_early(struct intel_gt *gt) { - spin_lock_init(>->irq_lock); + spin_lock_init(gt->irq_lock); INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -58,14 +58,19 @@ void intel_gt_common_init_early(struct intel_gt *gt) } /* Preliminary initialization of Tile 0 */ -void intel_root_gt_init_early(struct drm_i915_private *i915) +int intel_root_gt_init_early(struct drm_i915_private *i915) { struct intel_gt *gt = to_gt(i915); gt->i915 = i915; gt->uncore = &i915->uncore; + gt->irq_lock = drmm_kzalloc(&i915->drm, sizeof(*gt->irq_lock), GFP_KERNEL); + if (!gt->irq_lock) + return -ENOMEM; intel_gt_common_init_early(gt); + + return 0; } static int intel_gt_probe_lmem(struct intel_gt *gt) @@ -792,12 +797,18 @@ static int intel_gt_tile_setup(struct intel_gt *gt, if (!gt_is_root(gt)) { struct intel_uncore *uncore; + spinlock_t *irq_lock; uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), GFP_KERNEL); if (!uncore) return -ENOMEM; + irq_lock = drmm_kzalloc(>->i915->drm, sizeof(*irq_lock), GFP_KERNEL); + if (!irq_lock) + return -ENOMEM; + gt->uncore = uncore; + gt->irq_lock = irq_lock; intel_gt_common_init_early(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index c9a359f35d0f..2ee582e287c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -45,7 +45,7 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) } void intel_gt_common_init_early(struct intel_gt *gt); -void intel_root_gt_init_early(struct drm_i915_private *i915); +int intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); in
[PATCH v2 10/12] drm/i915/xelpmp: Expose media as another GT
Xe_LPM+ platforms have "standalone media." I.e., the media unit is designed as an additional GT with its own engine list, GuC, forcewake, etc. Let's allow platforms to include media GTs in their device info. v2: - Simplify GSI register handling and split it out to a separate patch for ease of review. (Daniele) Cc: Aravind Iddamsetty Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/Makefile| 1 + drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 + drivers/gpu/drm/i915/gt/intel_sa_media.c | 39 drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 + drivers/gpu/drm/i915/i915_pci.c | 15 + drivers/gpu/drm/i915/intel_device_info.h | 1 + drivers/gpu/drm/i915/intel_uncore.c | 4 +++ 7 files changed, 83 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 522ef9b4aff3..e83e4cd46968 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -123,6 +123,7 @@ gt-y += \ gt/intel_ring.o \ gt/intel_ring_submission.o \ gt/intel_rps.o \ + gt/intel_sa_media.o \ gt/intel_sseu.o \ gt/intel_sseu_debugfs.o \ gt/intel_timeline.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index d414785003cc..fb2c56777480 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1578,4 +1578,12 @@ #define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) +/* + * Standalone Media's non-engine GT registers are located at their regular GT + * offsets plus 0x38. This extra offset is stored inside the intel_uncore + * structure so that the existing code can be used for both GTs without + * modification. + */ +#define MTL_MEDIA_GSI_BASE 0x38 + #endif /* __INTEL_GT_REGS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c new file mode 100644 index ..8c5c519457cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "gt/intel_gt.h" +#include "gt/intel_sa_media.h" + +int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, + u32 gsi_offset) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore; + + uncore = drmm_kzalloc(&i915->drm, sizeof(*uncore), GFP_KERNEL); + if (!uncore) + return -ENOMEM; + + uncore->gsi_offset = gsi_offset; + + intel_gt_common_init_early(gt); + intel_uncore_init_early(uncore, gt); + + /* +* Standalone media shares the general MMIO space with the primary +* GT. We'll re-use the primary GT's mapping. +*/ + uncore->regs = i915->uncore.regs; + if (drm_WARN_ON(&i915->drm, uncore->regs == NULL)) + return -EIO; + + gt->uncore = uncore; + gt->phys_addr = phys_addr; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.h b/drivers/gpu/drm/i915/gt/intel_sa_media.h new file mode 100644 index ..3afb310de932 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef __INTEL_SA_MEDIA__ +#define __INTEL_SA_MEDIA__ + +#include + +struct intel_gt; + +int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, + u32 gsi_offset); + +#endif /* __INTEL_SA_MEDIA_H__ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 26b25d9434d6..18d3722331e4 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -26,6 +26,9 @@ #include #include +#include "gt/intel_gt_regs.h" +#include "gt/intel_sa_media.h" + #include "i915_driver.h" #include "i915_drv.h" #include "i915_pci.h" @@ -1115,6 +1118,17 @@ static const struct intel_device_info pvc_info = { .display.has_cdclk_crawl = 1, \ .__runtime.fbc_mask = BIT(INTEL_FBC_A) | BIT(INTEL_FBC_B) +static const struct intel_gt_definition xelpmp_extra_gt[] = { + { + .type = GT_MEDIA, + .name = "Standalone Media GT", + .setup = intel_sa_mediagt_setup, + .gsi_offset = MTL_MEDIA_GSI_BASE, + .engine_mask = BIT(VECS0) | BIT(VCS0) | BIT(VCS2), + }, + {} +}; + __maybe_unused static const struct intel_device_info mtl_info = {
[PATCH v2 04/12] drm/i915: Prepare more multi-GT initialization
We're going to introduce an additional intel_gt for MTL's media unit soon. Let's provide a bit more multi-GT initialization framework in preparation for that. The initialization will pull the list of GTs for a platform from the device info structure. Although necessary for the immediate MTL media enabling, this same framework will also be used farther down the road when we enable remote tiles on xehpsdv and pvc. v2: - Re-add missing test for !HAS_EXTRA_GT_LIST in intel_gt_probe_all(). Cc: Aravind Iddamsetty Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt.c| 51 +-- drivers/gpu/drm/i915/gt/intel_gt.h| 1 - drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/intel_device_info.h | 16 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + 7 files changed, 70 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 275ad72940c1..41acc285e8bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -736,7 +736,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) u16 vdbox_mask; u16 vebox_mask; - info->engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; + GEM_BUG_ON(!info->engine_mask); if (GRAPHICS_VER(i915) < 11) return info->engine_mask; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index cf7aab7adb30..7b880dbed6ce 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -807,17 +807,16 @@ static void intel_gt_tile_cleanup(struct intel_gt *gt) { intel_uncore_cleanup_mmio(gt->uncore); - - if (!gt_is_root(gt)) - kfree(gt); } int intel_gt_probe_all(struct drm_i915_private *i915) { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct intel_gt *gt = &i915->gt0; + const struct intel_gt_definition *gtdef; phys_addr_t phys_addr; unsigned int mmio_bar; + unsigned int i; int ret; mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; @@ -828,14 +827,58 @@ int intel_gt_probe_all(struct drm_i915_private *i915) * and it has been already initialized early during probe * in i915_driver_probe() */ + gt->i915 = i915; + gt->name = "Primary GT"; + gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; + + drm_dbg(&i915->drm, "Setting up %s\n", gt->name); ret = intel_gt_tile_setup(gt, phys_addr); if (ret) return ret; i915->gt[0] = gt; - /* TODO: add more tiles */ + if (!HAS_EXTRA_GT_LIST(i915)) + return 0; + + for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]; +gtdef->setup != NULL; +i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) { + gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL); + if (!gt) { + ret = -ENOMEM; + goto err; + } + + gt->i915 = i915; + gt->name = gtdef->name; + gt->type = gtdef->type; + gt->info.engine_mask = gtdef->engine_mask; + gt->info.id = i; + + drm_dbg(&i915->drm, "Setting up %s\n", gt->name); + if (GEM_WARN_ON(range_overflows_t(resource_size_t, + gtdef->mapping_base, + SZ_16M, + pci_resource_len(pdev, mmio_bar { + ret = -ENODEV; + goto err; + } + + ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base); + if (ret) + goto err; + + i915->gt[i] = gt; + } + return 0; + +err: + i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret); + intel_gt_release_all(i915); + + return ret; } int intel_gt_tiles_init(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 40b06adf509a..4d8779529cc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -54,7 +54,6 @@ void intel_gt_driver_register(struct intel_gt *gt); void intel_gt_driver_unregister(struct intel_gt *gt); void intel_gt_driver_remove(struct intel_gt *gt); void i
[PATCH v2 12/12] drm/i915/mtl: Hook up interrupts for standalone media
Top-level handling of standalone media interrupts will be processed as part of the primary GT's interrupt handler (since primary and media GTs share an MMIO space, unlike remote tile setups). When we get down to the point of handling engine interrupts, we need to take care to lookup VCS and VECS engines in the media GT rather than the primary. There are also a couple of additional "other" instance bits that correspond to the media GT's GuC and media GT's power management interrupts; we need to direct those to the media GT instance as well. Bspec: 45605 Cc: Anusha Srivatsa Signed-off-by: Matt Roper Reviewed-by: Daniele Ceraolo Spurio --- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 19 +++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_sa_media.c | 7 +++ drivers/gpu/drm/i915/i915_drv.h | 3 +++ 4 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 0dfd0c42d00d..f26882fdc24c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -59,11 +59,17 @@ static void gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, const u16 iir) { + struct intel_gt *media_gt = gt->i915->media_gt; + if (instance == OTHER_GUC_INSTANCE) return guc_irq_handler(>->uc.guc, iir); + if (instance == OTHER_MEDIA_GUC_INSTANCE && media_gt) + return guc_irq_handler(&media_gt->uc.guc, iir); if (instance == OTHER_GTPM_INSTANCE) return gen11_rps_irq_handler(>->rps, iir); + if (instance == OTHER_MEDIA_GTPM_INSTANCE && media_gt) + return gen11_rps_irq_handler(&media_gt->rps, iir); if (instance == OTHER_KCR_INSTANCE) return intel_pxp_irq_handler(>->pxp, iir); @@ -81,6 +87,18 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, { struct intel_engine_cs *engine; + /* +* Platforms with standalone media have their media engines in another +* GT. +*/ + if (MEDIA_VER(gt->i915) >= 13 && + (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS)) { + if (!gt->i915->media_gt) + goto err; + + gt = gt->i915->media_gt; + } + if (instance <= MAX_ENGINE_INSTANCE) engine = gt->engine_class[class][instance]; else @@ -89,6 +107,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, if (likely(engine)) return intel_engine_cs_irq(engine, iir); +err: WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", class, instance); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index fb2c56777480..2275ee47da95 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1554,6 +1554,8 @@ #define OTHER_GTPM_INSTANCE 1 #define OTHER_KCR_INSTANCE 4 #define OTHER_GSC_INSTANCE 6 +#define OTHER_MEDIA_GUC_INSTANCE 16 +#define OTHER_MEDIA_GTPM_INSTANCE17 #define GEN11_IIR_REG_SELECTOR(x) _MMIO(0x190070 + ((x) * 4)) diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c index 5516e9c363a4..e8f3d18c12b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_sa_media.c +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c @@ -36,5 +36,12 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, gt->uncore = uncore; gt->phys_addr = phys_addr; + /* +* For current platforms we can assume there's only a single +* media GT and cache it for quick lookup. +*/ + drm_WARN_ON(&i915->drm, i915->media_gt); + i915->media_gt = gt; + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d4b45c7e931d..5a21242a6706 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -365,6 +365,9 @@ struct drm_i915_private { struct kobject *sysfs_gt; + /* Quick lookup of media GT (current platforms only have one) */ + struct intel_gt *media_gt; + struct { struct i915_gem_contexts { spinlock_t lock; /* locks list */ -- 2.37.2
[PATCH v2 09/12] drm/i915/uncore: Add GSI offset to uncore
GT non-engine registers (referred to as "GSI" registers by the spec) have the same relative offsets on standalone media as they do on the primary GT, just with an additional "GSI offset" added to their MMIO address. If we store this GSI offset in the standalone media's intel_uncore structure, it can be automatically applied to all GSI reg reads/writes that happen on that GT, allowing us to re-use our existing GT code with minimal changes. Forcewake and shadowed register tables for the media GT (which will be added in a future patch) are listed as final addresses that already include the GSI offset, so we also need to add the GSI offset before doing lookups of registers in one of those tables. Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt.c | 17 ++--- drivers/gpu/drm/i915/intel_device_info.h | 4 +++- drivers/gpu/drm/i915/intel_uncore.c | 10 -- drivers/gpu/drm/i915/intel_uncore.h | 22 -- 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index fbb5e32979a4..a6ed11b933eb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -776,10 +776,20 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915) } } -static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) +/* + * Note: the gsi_offset parameter here isn't used, but we want to keep the + * function signature equivalent to gtdef->setup() so that it can be plugged + * in when we enabled remote tiles in the future. + */ +static int intel_gt_tile_setup(struct intel_gt *gt, + phys_addr_t phys_addr, + u32 gsi_offset) { int ret; + /* GSI offset is only applicable for media GTs */ + drm_WARN_ON(>->i915->drm, gsi_offset); + if (!gt_is_root(gt)) { struct intel_uncore *uncore; @@ -832,7 +842,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; drm_dbg(&i915->drm, "Setting up %s\n", gt->name); - ret = intel_gt_tile_setup(gt, phys_addr); + ret = intel_gt_tile_setup(gt, phys_addr, 0); if (ret) return ret; @@ -865,7 +875,8 @@ int intel_gt_probe_all(struct drm_i915_private *i915) goto err; } - ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base); + ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base, + gtdef->gsi_offset); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index b408ce384cd7..85e0ef0e91b1 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -254,8 +254,10 @@ struct intel_gt_definition { enum intel_gt_type type; char *name; int (*setup)(struct intel_gt *gt, -phys_addr_t phys_addr); +phys_addr_t phys_addr, +u32 gsi_offset); u32 mapping_base; + u32 gsi_offset; intel_engine_mask_t engine_mask; }; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 33bdcbc77ab2..ecb02421502d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -927,6 +927,9 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset) { const struct intel_forcewake_range *entry; + if (IS_GSI_REG(offset)) + offset += uncore->gsi_offset; + entry = BSEARCH(offset, uncore->fw_domains_table, uncore->fw_domains_table_entries, @@ -1142,6 +1145,9 @@ static bool is_shadowed(struct intel_uncore *uncore, u32 offset) if (drm_WARN_ON(&uncore->i915->drm, !uncore->shadowed_reg_table)) return false; + if (IS_GSI_REG(offset)) + offset += uncore->gsi_offset; + return BSEARCH(offset, uncore->shadowed_reg_table, uncore->shadowed_reg_table_entries, @@ -1994,8 +2000,8 @@ static int __fw_domain_init(struct intel_uncore *uncore, d->uncore = uncore; d->wake_count = 0; - d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set); - d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack); + d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set) + uncore->gsi_offset; + d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack) + uncore->gsi_offset; d->id = domain_id; d
[PATCH v2 05/12] drm/i915: Rename and expose common GT early init routine
The common early GT init is needed for initialization of all GT types (root/primary, remote tile, standalone media). Since standalone media (coming in the next patch) will be implemented in a separate file, rename and expose the function for use. Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada --- drivers/gpu/drm/i915/gt/intel_gt.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_gt.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 7b880dbed6ce..fbb5e32979a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -35,7 +35,7 @@ #include "intel_uncore.h" #include "shmem_utils.h" -static void __intel_gt_init_early(struct intel_gt *gt) +void intel_gt_common_init_early(struct intel_gt *gt) { spin_lock_init(>->irq_lock); @@ -65,7 +65,7 @@ void intel_root_gt_init_early(struct drm_i915_private *i915) gt->i915 = i915; gt->uncore = &i915->uncore; - __intel_gt_init_early(gt); + intel_gt_common_init_early(gt); } static int intel_gt_probe_lmem(struct intel_gt *gt) @@ -789,7 +789,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) gt->uncore = uncore; - __intel_gt_init_early(gt); + intel_gt_common_init_early(gt); } intel_uncore_init_early(gt->uncore, gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 4d8779529cc2..c9a359f35d0f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -44,6 +44,7 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) return container_of(gsc, struct intel_gt, gsc); } +void intel_gt_common_init_early(struct intel_gt *gt); void intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); int intel_gt_init_mmio(struct intel_gt *gt); -- 2.37.2
[PATCH v2 01/12] drm/i915: Move locking and unclaimed check into mmio_debug_{suspend, resume}
Moving the locking for MMIO debug (and the final check for unclaimed accesses when resuming debug after a userspace-initiated forcewake) will make it simpler to completely skip MMIO debug handling on uncores that don't support it in future patches. Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada --- drivers/gpu/drm/i915/intel_uncore.c | 41 +++-- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9b81b2543ce2..e717ea55484a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -50,23 +50,33 @@ intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug) mmio_debug->unclaimed_mmio_check = 1; } -static void mmio_debug_suspend(struct intel_uncore_mmio_debug *mmio_debug) +static void mmio_debug_suspend(struct intel_uncore *uncore) { - lockdep_assert_held(&mmio_debug->lock); + spin_lock(&uncore->debug->lock); /* Save and disable mmio debugging for the user bypass */ - if (!mmio_debug->suspend_count++) { - mmio_debug->saved_mmio_check = mmio_debug->unclaimed_mmio_check; - mmio_debug->unclaimed_mmio_check = 0; + if (!uncore->debug->suspend_count++) { + uncore->debug->saved_mmio_check = uncore->debug->unclaimed_mmio_check; + uncore->debug->unclaimed_mmio_check = 0; } + + spin_unlock(&uncore->debug->lock); } -static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug) +static bool check_for_unclaimed_mmio(struct intel_uncore *uncore); + +static void mmio_debug_resume(struct intel_uncore *uncore) { - lockdep_assert_held(&mmio_debug->lock); + spin_lock(&uncore->debug->lock); + + if (!--uncore->debug->suspend_count) + uncore->debug->unclaimed_mmio_check = uncore->debug->saved_mmio_check; - if (!--mmio_debug->suspend_count) - mmio_debug->unclaimed_mmio_check = mmio_debug->saved_mmio_check; + if (check_for_unclaimed_mmio(uncore)) + drm_info(&uncore->i915->drm, +"Invalid mmio detected during user access\n"); + + spin_unlock(&uncore->debug->lock); } static const char * const forcewake_domain_names[] = { @@ -677,9 +687,7 @@ void intel_uncore_forcewake_user_get(struct intel_uncore *uncore) spin_lock_irq(&uncore->lock); if (!uncore->user_forcewake_count++) { intel_uncore_forcewake_get__locked(uncore, FORCEWAKE_ALL); - spin_lock(&uncore->debug->lock); - mmio_debug_suspend(uncore->debug); - spin_unlock(&uncore->debug->lock); + mmio_debug_suspend(uncore); } spin_unlock_irq(&uncore->lock); } @@ -695,14 +703,7 @@ void intel_uncore_forcewake_user_put(struct intel_uncore *uncore) { spin_lock_irq(&uncore->lock); if (!--uncore->user_forcewake_count) { - spin_lock(&uncore->debug->lock); - mmio_debug_resume(uncore->debug); - - if (check_for_unclaimed_mmio(uncore)) - drm_info(&uncore->i915->drm, -"Invalid mmio detected during user access\n"); - spin_unlock(&uncore->debug->lock); - + mmio_debug_resume(uncore); intel_uncore_forcewake_put__locked(uncore, FORCEWAKE_ALL); } spin_unlock_irq(&uncore->lock); -- 2.37.2
[PATCH v2 02/12] drm/i915: Only hook up uncore->debug for primary uncore
The original intent of intel_uncore_mmio_debug as described in commit 0a9b26306d6a ("drm/i915: split out uncore_mmio_debug") was to be a singleton structure that could be shared between multiple GTs' uncore objects in a multi-tile system. Somehow we went off track and started allocating separate instances of this structure for each GT, which defeats that original goal. But in reality, there isn't even a need to share the mmio_debug between multiple GTs; on all modern platforms (i.e., everything after gen7) unclaimed register accesses are something that can only be detected for display registers. There's no point in grabbing the debug spinlock and checking for unclaimed accesses on an uncore used by an xehpsdv or pvc remote tile GT, or the uncore used by a mtl standalone media GT since all of the display accesses go through the primary intel_uncore. The simplest solution is to simply leave uncore->debug NULL on all intel_uncore instances except for the primary one. This will allow us to avoid the pointless debug spinlock acquisition we've been doing on MMIO accesses coming in through these intel_uncores. Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada --- drivers/gpu/drm/i915/gt/intel_gt.c | 9 - drivers/gpu/drm/i915/i915_driver.c | 2 +- drivers/gpu/drm/i915/intel_uncore.c | 23 ++- drivers/gpu/drm/i915/intel_uncore.h | 3 +-- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index e4bac2431e41..a82b5e2e0d83 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -781,21 +781,13 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) int ret; if (!gt_is_root(gt)) { - struct intel_uncore_mmio_debug *mmio_debug; struct intel_uncore *uncore; uncore = kzalloc(sizeof(*uncore), GFP_KERNEL); if (!uncore) return -ENOMEM; - mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL); - if (!mmio_debug) { - kfree(uncore); - return -ENOMEM; - } - gt->uncore = uncore; - gt->uncore->debug = mmio_debug; __intel_gt_init_early(gt); } @@ -817,7 +809,6 @@ intel_gt_tile_cleanup(struct intel_gt *gt) intel_uncore_cleanup_mmio(gt->uncore); if (!gt_is_root(gt)) { - kfree(gt->uncore->debug); kfree(gt->uncore); kfree(gt); } diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 56a2bcddb2af..18acba1bc3b0 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -326,7 +326,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_device_info_subplatform_init(dev_priv); intel_step_init(dev_priv); - intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug); + intel_uncore_mmio_debug_init_early(dev_priv); spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index e717ea55484a..6841f76533f9 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -44,14 +44,19 @@ fw_domains_get(struct intel_uncore *uncore, enum forcewake_domains fw_domains) } void -intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug) +intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915) { - spin_lock_init(&mmio_debug->lock); - mmio_debug->unclaimed_mmio_check = 1; + spin_lock_init(&i915->mmio_debug.lock); + i915->mmio_debug.unclaimed_mmio_check = 1; + + i915->uncore.debug = &i915->mmio_debug; } static void mmio_debug_suspend(struct intel_uncore *uncore) { + if (!uncore->debug) + return; + spin_lock(&uncore->debug->lock); /* Save and disable mmio debugging for the user bypass */ @@ -67,6 +72,9 @@ static bool check_for_unclaimed_mmio(struct intel_uncore *uncore); static void mmio_debug_resume(struct intel_uncore *uncore) { + if (!uncore->debug) + return; + spin_lock(&uncore->debug->lock); if (!--uncore->debug->suspend_count) @@ -1705,7 +1713,7 @@ unclaimed_reg_debug(struct intel_uncore *uncore, const bool read, const bool before) { - if (likely(!uncore->i915->params.mmio_debug)) + if (likely(!uncore->i915->params.mmio_debug) || !uncore->debug) return; /* interrupts are disabled and
[PATCH v2 03/12] drm/i915: Use managed allocations for extra uncore objects
We're slowly transitioning the init-time kzalloc's of the driver over to DRM-managed allocations; let's make sure the uncore objects allocated for non-root GTs are thus allocated. Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada --- drivers/gpu/drm/i915/gt/intel_gt.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a82b5e2e0d83..cf7aab7adb30 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -783,7 +783,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) if (!gt_is_root(gt)) { struct intel_uncore *uncore; - uncore = kzalloc(sizeof(*uncore), GFP_KERNEL); + uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), GFP_KERNEL); if (!uncore) return -ENOMEM; @@ -808,10 +808,8 @@ intel_gt_tile_cleanup(struct intel_gt *gt) { intel_uncore_cleanup_mmio(gt->uncore); - if (!gt_is_root(gt)) { - kfree(gt->uncore); + if (!gt_is_root(gt)) kfree(gt); - } } int intel_gt_probe_all(struct drm_i915_private *i915) -- 2.37.2
[PATCH v2 00/12] i915: Add "standalone media" support for MTL
Starting with MTL, media functionality has moved into a new, second GT at the hardware level. This new GT, referred to as "standalone media" in the spec, has its own GuC, power management/forcewake, etc. The general non-engine GT registers for standalone media start at 0x38, but otherwise use the same MMIO offsets as the primary GT. Standalone media has a lot of similarity to the remote tiles present on platforms like xehpsdv and pvc, and our i915 implementation can share much of the general "multi GT" infrastructure between the two types of platforms. However there are a few notable differences we must deal with: - The 0x38 offset only applies to the non-engine GT registers (which the specs refer to as "GSI" registers). The engine registers remain at their usual locations (e.g., 0x1C for VCS0). - Unlike platforms with remote tiles, all interrupt handling for standalone media still happens via the primary GT. v2: - Added new patches to ensure each GT, not just the primary, is handled properly during various init/suspend/resume/teardown flows. - Simplified GSI offset handling and split it into its own patch. - Correct gt->irq_lock assignment for media GT. - Fix jump target for intel_root_gt_init_early() errors. Cc: Radhakrishna Sripada Cc: Daniele Ceraolo Spurio Cc: Aravind Iddamsetty Matt Roper (12): drm/i915: Move locking and unclaimed check into mmio_debug_{suspend,resume} drm/i915: Only hook up uncore->debug for primary uncore drm/i915: Use managed allocations for extra uncore objects drm/i915: Prepare more multi-GT initialization drm/i915: Rename and expose common GT early init routine drm/i915: Use a DRM-managed action to release the PCI bridge device drm/i915: Initialize MMIO access for each GT drm/i915: Handle each GT on init/release and suspend/resume drm/i915/uncore: Add GSI offset to uncore drm/i915/xelpmp: Expose media as another GT drm/i915/mtl: Use primary GT's irq lock for media GT drm/i915/mtl: Hook up interrupts for standalone media drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 10 +- drivers/gpu/drm/i915/gt/intel_gt.c| 96 drivers/gpu/drm/i915/gt/intel_gt.h| 4 +- drivers/gpu/drm/i915/gt/intel_gt_irq.c| 35 -- drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c | 8 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 10 ++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 5 +- drivers/gpu/drm/i915/gt/intel_rps.c | 26 ++--- drivers/gpu/drm/i915/gt/intel_sa_media.c | 47 drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 +++ drivers/gpu/drm/i915/gt/uc/intel_guc.c| 24 ++-- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 4 +- drivers/gpu/drm/i915/i915_driver.c| 105 -- drivers/gpu/drm/i915/i915_drv.h | 5 + drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/i915_pci.c | 15 +++ drivers/gpu/drm/i915/intel_device_info.h | 19 drivers/gpu/drm/i915/intel_uncore.c | 83 +- drivers/gpu/drm/i915/intel_uncore.h | 28 - drivers/gpu/drm/i915/pxp/intel_pxp.c | 4 +- drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 14 +-- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + 25 files changed, 424 insertions(+), 147 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h -- 2.37.2
[PATCH v2 06/12] drm/i915: Use a DRM-managed action to release the PCI bridge device
As we start supporting multiple uncore structures in future patches, the MMIO cleanup (which make also get called mid-init if there's a failure) will become more complicated. Moving to DRM-managed actions will help keep things simple. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/i915_driver.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 18acba1bc3b0..1f46dd1ffaf7 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -105,6 +105,12 @@ static const char irst_name[] = "INT3392"; static const struct drm_driver i915_drm_driver; +static void i915_release_bridge_dev(struct drm_device *dev, + void *bridge) +{ + pci_dev_put(bridge); +} + static int i915_get_bridge_dev(struct drm_i915_private *dev_priv) { int domain = pci_domain_nr(to_pci_dev(dev_priv->drm.dev)->bus); @@ -115,7 +121,9 @@ static int i915_get_bridge_dev(struct drm_i915_private *dev_priv) drm_err(&dev_priv->drm, "bridge device not found\n"); return -EIO; } - return 0; + + return drmm_add_action_or_reset(&dev_priv->drm, i915_release_bridge_dev, + dev_priv->bridge_dev); } /* Allocate space for the MCH regs if needed, return nonzero on error */ @@ -452,7 +460,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) err_uncore: intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); - pci_dev_put(dev_priv->bridge_dev); return ret; } @@ -465,7 +472,6 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv) { intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); - pci_dev_put(dev_priv->bridge_dev); } /** -- 2.37.2
[PATCH 3/7] drm/i915/gt: Drop a few unused register definitions
Let's drop a few register definitions that are unused anywhere in the driver today. Since the referenced offsets are part of what is now considered a multicast register region, the current definitions would not be correct for use on any future platform. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 17 - 1 file changed, 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 6d4d15964b58..bf5f21d334f3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -447,13 +447,6 @@ #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) -/* GEN9 chicken */ -#define SLICE_ECO_CHICKEN0 _MMIO(0x7308) -#define PIXEL_MASK_CAMMING_DISABLE (1 << 14) - -#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308) -#define DISABLE_PIXEL_MASK_CAMMING (1 << 14) - #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) #define XEHP_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) @@ -958,11 +951,6 @@ #define GEN7_L3LOG(slice, i) _MMIO(0xb070 + (slice) * 0x200 + (i) * 4) #define GEN7_L3LOG_SIZE 0x80 -#define GEN10_SCRATCH_LNCF2_MMIO(0xb0a0) -#define PMFLUSHDONE_LNICRSDROP (1 << 20) -#define PMFLUSH_GAPL3UNBLOCK (1 << 21) -#define PMFLUSHDONE_LNEBLK (1 << 22) - #define XEHP_L3NODEARBCFG _MMIO(0xb0b4) #define XEHP_LNESPAREREG_BIT(19) @@ -977,9 +965,6 @@ #define L3_HIGH_PRIO_CREDITS(x) (((x) >> 1) << 14) #define L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14)) -#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xb114) -#define GEN11_I2M_WRITE_DISABLE (1 << 28) - #define GEN8_L3SQCREG4 _MMIO(0xb118) #define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) #define GEN8_LQSC_RO_PERF_DIS(1 << 27) @@ -1174,8 +1159,6 @@ #define SARB_CHICKEN1 _MMIO(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) -#define GEN7_HALF_SLICE_CHICKEN1_GT2 _MMIO(0xf100) - #define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4) #define DOP_CLOCK_GATING_DISABLE (1 << 0) #define PUSH_CONSTANT_DEREF_DISABLE (1 << 8) -- 2.37.2
[PATCH 5/7] drm/i915: Define MCR registers explicitly
Rather than using the same _MMIO() macro to define MCR registers as singleton registers, let's use a new MCR_REG() macro to make it clear that these registers are special and should be handled accordingly. For now MCR_REG() will still generate an i915_reg_t with the given offset, but we'll change that in future patches. Bspec: 66673, 66696, 66534, 67609 Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 118 1 file changed, 60 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 94a1f964eaba..736fc01fa74f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -8,6 +8,8 @@ #include "i915_reg_defs.h" +#define MCR_REG(offset)_MMIO(offset) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0_MMIO(0xd00) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3 @@ -326,12 +328,12 @@ #define GEN7_TLB_RD_ADDR _MMIO(0x4700) #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) -#define XEHP_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) +#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4) -#define XEHP_TILE0_ADDR_RANGE _MMIO(0x4900) +#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) #define XEHP_TILE_LMEM_RANGE_SHIFT 8 -#define XEHP_FLAT_CCS_BASE_ADDR_MMIO(0x4910) +#define XEHP_FLAT_CCS_BASE_ADDRMCR_REG(0x4910) #define XEHP_CCS_BASE_SHIFT 8 #define GAMTARBMODE_MMIO(0x4a08) @@ -381,18 +383,18 @@ #define CHICKEN_RASTER_2 _MMIO(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) -#define VFLSKPD_MMIO(0x62a8) +#define VFLSKPDMCR_REG(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) #define GEN12_FF_MODE2 _MMIO(0x6604) -#define XEHP_FF_MODE2 _MMIO(0x6604) +#define XEHP_FF_MODE2 MCR_REG(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) #define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) -#define XEHPG_INSTDONE_GEOM_SVG_MMIO(0x666c) +#define XEHPG_INSTDONE_GEOM_SVGMCR_REG(0x666c) #define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */ #define RC_OP_FLUSH_ENABLE (1 << 0) @@ -441,14 +443,14 @@ #define GEN8_HDC_CHICKEN1 _MMIO(0x7304) #define GEN11_COMMON_SLICE_CHICKEN3_MMIO(0x7304) -#define XEHP_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) +#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304) #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) #define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLEREG_BIT(12) #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) -#define XEHP_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) +#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) #define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11) @@ -479,7 +481,7 @@ #define GEN8_RC6_CTX_INFO _MMIO(0x8504) -#define XEHP_SQCM _MMIO(0x8724) +#define XEHP_SQCM MCR_REG(0x8724) #define EN_32B_ACCESSREG_BIT(30) #define HSW_IDICR _MMIO(0x9008) @@ -640,7 +642,7 @@ #define GEN7_MISCCPCTL _MMIO(0x9424) #define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0) -#define GEN8_MISCCPCTL _MMIO(0x9424) +#define GEN8_MISCCPCTL MCR_REG(0x9424) #define GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0) #define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) #define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2) @@ -696,7 +698,7 @@ #define LTCDD_CLKGATE_DISREG_BIT(10) #define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) -#define XEHP_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) +#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7)
[PATCH 4/7] drm/i915/gt: Correct prefix on a few registers
We have a few registers that have existed for several hardware generations, but are only used by the driver on Xe_HP and beyond. In cases where the Xe_HP version of the register is now replicated and uses multicast behavior, but earlier generations were singleton, let's change the register prefix to "XEHP_" to help clarify that we're using the newer multicast form of the register. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 drivers/gpu/drm/i915/gt/intel_workarounds.c | 10 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index bf5f21d334f3..94a1f964eaba 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -479,7 +479,7 @@ #define GEN8_RC6_CTX_INFO _MMIO(0x8504) -#define GEN12_SQCM _MMIO(0x8724) +#define XEHP_SQCM _MMIO(0x8724) #define EN_32B_ACCESSREG_BIT(30) #define HSW_IDICR _MMIO(0x9008) @@ -980,7 +980,7 @@ #define GEN11_SCRATCH2 _MMIO(0xb140) #define GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE(1 << 19) -#define GEN11_L3SQCREG5_MMIO(0xb158) +#define XEHP_L3SQCREG5 _MMIO(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) #define MLTICTXCTL _MMIO(0xb170) @@ -1036,7 +1036,7 @@ #define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) #define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04) -#define GEN12_MERT_MOD_CTRL_MMIO(0xcf28) +#define XEHP_MERT_MOD_CTRL _MMIO(0xcf28) #define RENDER_MOD_CTRL_MMIO(0xcf2c) #define COMP_MOD_CTRL _MMIO(0xcf30) #define VDBX_MOD_CTRL _MMIO(0xcf34) @@ -1138,7 +1138,7 @@ #define EU_PERF_CNTL1 _MMIO(0xe558) #define EU_PERF_CNTL5 _MMIO(0xe55c) -#define GEN12_HDC_CHICKEN0 _MMIO(0xe5f0) +#define XEHP_HDC_CHICKEN0 _MMIO(0xe5f0) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) #define ICL_HDC_MODE _MMIO(0xe5f4) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index eff5f4f85374..3d48189021ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -569,7 +569,7 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); - wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + wa_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); wa_add(wal, XEHP_FF_MODE2, @@ -1504,7 +1504,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) * recommended tuning settings documented in the bspec's * performance guide section. */ - wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); + wa_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); /* Wa_14015795083 */ wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); @@ -2163,7 +2163,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_22010960976:dg2 * Wa_14013347512:dg2 */ - wa_masked_dis(wal, GEN12_HDC_CHICKEN0, + wa_masked_dis(wal, XEHP_HDC_CHICKEN0, LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } @@ -2216,7 +2216,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012362059:dg2 */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || @@ -2757,7 +2757,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li } /* Wa_14012362059:xehpsdv */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_14014368820:xehpsdv */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | -- 2.37.2
[PATCH 6/7] drm/i915/gt: Always use MCR functions on multicast registers
Rather than relying on the implicit behavior of intel_uncore_*() functions, let's always use the intel_gt_mcr_*() functions to operate on multicast/replicated registers. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 4 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 49 --- drivers/gpu/drm/i915/gt/intel_gtt.h | 2 +- drivers/gpu/drm/i915/gt/intel_mocs.c | 13 +++--- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 12 -- drivers/gpu/drm/i915/intel_pm.c | 20 + 6 files changed, 55 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 30cf5c3369d9..e2620f41d8b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -986,7 +986,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.pte_encode = gen8_ggtt_pte_encode; - setup_private_pat(ggtt->vm.gt->uncore); + setup_private_pat(ggtt->vm.gt); return ggtt_probe_common(ggtt, size); } @@ -1302,7 +1302,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) wbinvd_on_all_cpus(); if (GRAPHICS_VER(ggtt->vm.i915) >= 8) - setup_private_pat(ggtt->vm.gt->uncore); + setup_private_pat(ggtt->vm.gt); intel_ggtt_restore_fences(ggtt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 601d89b4feb1..6f61c8da0b61 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -15,6 +15,7 @@ #include "i915_trace.h" #include "i915_utils.h" #include "intel_gt.h" +#include "intel_gt_mcr.h" #include "intel_gt_regs.h" #include "intel_gtt.h" @@ -477,27 +478,27 @@ void gtt_write_workarounds(struct intel_gt *gt) } } -static void tgl_setup_private_ppat(struct intel_uncore *uncore) +static void tgl_setup_private_ppat(struct intel_gt *gt) { - if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50)) { - intel_uncore_write(uncore, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); - intel_uncore_write(uncore, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); - intel_uncore_write(uncore, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); - intel_uncore_write(uncore, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); - intel_uncore_write(uncore, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); - intel_uncore_write(uncore, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); - intel_uncore_write(uncore, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); - intel_uncore_write(uncore, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) { + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); } else { /* TGL doesn't support LLC or AGE settings */ - intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); - intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); - intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); - intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); + intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); + intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); + intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); + intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); + intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); + intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); + intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); + intel_uncore_write(gt->uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); } } @@ -593,20 +594,20 @@ static void chv_setup_private_ppat(struct intel_uncore *uncore)
[PATCH 7/7] drm/i915/gt: Add MCR-specific workaround initializers
Let's be more explicit about which of our workarounds are updating MCR registers. This will also allow us to record whether a workaround register has MCR behavior or not so that we'll only need to do a steering lookup for the registers that truly need it. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 414 +++--- .../gpu/drm/i915/gt/intel_workarounds_types.h | 4 +- 2 files changed, 252 insertions(+), 166 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3d48189021ad..c13abac19b06 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -166,12 +166,33 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, _wa_add(wal, &wa); } +static void wa_mcr_add(struct i915_wa_list *wal, i915_reg_t reg, + u32 clear, u32 set, u32 read_mask, bool masked_reg) +{ + struct i915_wa wa = { + .reg = reg, + .clr = clear, + .set = set, + .read = read_mask, + .masked_reg = masked_reg, + .is_mcr = 1, + }; + + _wa_add(wal, &wa); +} + static void wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { wa_add(wal, reg, clear, set, clear, false); } +static void +wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) +{ + wa_mcr_add(wal, reg, clear, set, clear, false); +} + static void wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) { @@ -184,12 +205,24 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) wa_write_clr_set(wal, reg, set, set); } +static void +wa_mcr_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) +{ + wa_mcr_write_clr_set(wal, reg, set, set); +} + static void wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) { wa_write_clr_set(wal, reg, clr, 0); } +static void +wa_mcr_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) +{ + wa_mcr_write_clr_set(wal, reg, clr, 0); +} + /* * WA operations on "masked register". A masked register has the upper 16 bits * documented as "masked" in b-spec. Its purpose is to allow writing to just a @@ -207,12 +240,24 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); } +static void +wa_mcr_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); +} + static void wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); } +static void +wa_mcr_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); +} + static void wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) @@ -220,6 +265,13 @@ wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); } +static void +wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, + u32 mask, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); +} + static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -241,7 +293,7 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE); /* WaDisablePartialInstShootdown:bdw,chv */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* Use Force Non-Coherent whenever executing a 3D context. This is a @@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:bdw * * Also see the related UCGTCL1 write in bdw_init_clock_gating() * to disable EUTC clock gating. */ - wa_masked_en(wal, GEN8_ROW_CHICKEN2, -DOP_CLOCK_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, +DOP_CLOCK_GATING_DISABLE); - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, -GEN8_SAMPLER_POWER_BYPASS_DIS); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, +
[PATCH 2/7] drm/i915/xehp: Create separate reg definitions for new MCR registers
Starting in Xe_HP, several registers our driver works with have been converted from singleton registers into replicated registers with multicast behavior. Although the registers are still located at the same MMIO offsets as on previous platforms, let's duplicate the register definitions in preparation for upcoming patches that will handle multicast registers in a special manner. The registers that are now replicated on Xe_HP are: * PAT_INDEX (mslice replication) * FF_MODE2 (gslice replication) * COMMON_SLICE_CHICKEN3 (gslice replication) * SLICE_COMMON_ECO_CHICKEN1 (gslice replication) * SLICE_UNIT_LEVEL_CLKGATE (gslice replication) * LNCFCMOCS (lncf replication) The *_TLB_INV_CR registers are also replicated (mslice replication), but I'm skipping those for now because I think that code might need more work in general for multicast behavior (e.g., do we need to wait for the invalidation to report as completed on every mslice?). Bspec: 66534 Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 18 - drivers/gpu/drm/i915/gt/intel_gtt.c | 29 ++--- drivers/gpu/drm/i915/gt/intel_mocs.c| 5 +++- drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 - drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 7 +++-- 5 files changed, 52 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index ad8e2b52ff64..6d4d15964b58 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -326,6 +326,7 @@ #define GEN7_TLB_RD_ADDR _MMIO(0x4700) #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) +#define XEHP_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) #define XEHP_TILE0_ADDR_RANGE _MMIO(0x4900) #define XEHP_TILE_LMEM_RANGE_SHIFT 8 @@ -384,7 +385,8 @@ #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) -#define FF_MODE2 _MMIO(0x6604) +#define GEN12_FF_MODE2 _MMIO(0x6604) +#define XEHP_FF_MODE2 _MMIO(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) #define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) @@ -439,6 +441,7 @@ #define GEN8_HDC_CHICKEN1 _MMIO(0x7304) #define GEN11_COMMON_SLICE_CHICKEN3_MMIO(0x7304) +#define XEHP_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) #define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLEREG_BIT(12) #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) @@ -452,10 +455,9 @@ #define DISABLE_PIXEL_MASK_CAMMING (1 << 14) #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) -#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11) - -#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) +#define XEHP_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11) #define GEN9_SLICE_PGCTL_ACK(slice)_MMIO(0x804c + (slice) * 0x4) #define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \ @@ -700,7 +702,8 @@ #define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) #define LTCDD_CLKGATE_DISREG_BIT(10) -#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) +#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) +#define XEHP_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) #define MSCUNIT_CLKGATE_DIS (1 << 10) @@ -715,7 +718,7 @@ #define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19) #define PSDUNIT_CLKGATE_DIS REG_BIT(5) -#define SUBSLICE_UNIT_LEVEL_CLKGATE_MMIO(0x9524) +#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) #define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) #define GWUNIT_CLKGATE_DIS REG_BIT(16) @@ -938,7 +941,8 @@ /* MOCS (Memory Object Control State) registers */ #define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ -#define GEN9_LNCFCMOCS_REG_COUNT 32 +#define XEHP_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ +#define LNCFCMOCS_REG_COUNT32 #define GEN7_L3CNTLREG3_MMIO(0xb024) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index b67831833c9a..
[PATCH 1/7] drm/i915/gen8: Create separate reg definitions for new MCR registers
Gen8 was the first time our hardware had multicast registers (or at least the first time the multicast nature was exposed and MMIO accesses could be steered). There are some registers that transitioned from singleton behavior to multicast during the gen7 -> gen8 transition; let's duplicate the register definitions for those registers in preparation for upcoming patches that will handle MCR registers in a special manner. The registers adjusted are: * MISCCPCTL * SAMPLER_INSTDONE * ROW_INSTDONE * ROW_CHICKEN2 * HALF_SLICE_CHICKEN1 * HALF_SLICE_CHICKEN3 Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 11 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 22 +-- .../gpu/drm/i915/gt/uc/intel_guc_capture.c| 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 2 +- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/mmio_context.c | 2 +- drivers/gpu/drm/i915/intel_gvt_mmio_table.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 10 - 9 files changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 275ad72940c1..29980bff9c10 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1544,11 +1544,11 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, for_each_ss_steering(iter, engine->gt, slice, subslice) { instdone->sampler[slice][subslice] = intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, + GEN8_SAMPLER_INSTDONE, slice, subslice); instdone->row[slice][subslice] = intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, + GEN8_ROW_INSTDONE, slice, subslice); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index d414785003cc..ad8e2b52ff64 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -644,6 +644,9 @@ #define GEN7_MISCCPCTL _MMIO(0x9424) #define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0) + +#define GEN8_MISCCPCTL _MMIO(0x9424) +#define GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0) #define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) #define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2) #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4) @@ -1063,18 +1066,22 @@ #define GEN12_GAM_DONE _MMIO(0xcf68) #define GEN7_HALF_SLICE_CHICKEN1 _MMIO(0xe100) /* IVB GT1 + VLV */ +#define GEN8_HALF_SLICE_CHICKEN1 _MMIO(0xe100) #define GEN7_MAX_PS_THREAD_DEP (8 << 12) #define GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE (1 << 10) #define GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE (1 << 4) #define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1 << 3) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) +#define GEN8_SAMPLER_INSTDONE _MMIO(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define GEN8_ROW_INSTDONE _MMIO(0xe164) #define HALF_SLICE_CHICKEN2_MMIO(0xe180) #define GEN8_ST_PO_DISABLE (1 << 13) -#define HALF_SLICE_CHICKEN3_MMIO(0xe184) +#define HSW_HALF_SLICE_CHICKEN3_MMIO(0xe184) +#define GEN8_HALF_SLICE_CHICKEN3 _MMIO(0xe184) #define HSW_SAMPLE_C_PERFORMANCE (1 << 9) #define GEN8_CENTROID_PIXEL_OPT_DIS (1 << 8) #define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1 << 5) @@ -1127,6 +1134,8 @@ #define DISABLE_EARLY_EOTREG_BIT(1) #define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) + +#define GEN8_ROW_CHICKEN2 _MMIO(0xe4f4) #define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) #define GEN12_DISABLE_EARLY_READ REG_BIT(14) #define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 6d2003d598e6..e382cb4c6971 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -295,10 +295,10 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, * Also see the
[PATCH 0/7] i915: Prep work for explicit MCR handling
Steering of multicast/replicated registers becomes a bit more complicated on Meteor Lake. Whereas previously the control register we used to manage the steering was only used by our driver[*], software's control of steering has now been consolidated with the controls for various other hardware/firmware agents into a single register. We can no longer utilize pre-programmed implicit steering since other firmware agents may change the steering target and not restore it afterward; we'll need to explicitly steer all types of MCR registers (including the GSLICE/COMPUTE/DSS ranges that have been handled implicitly in the past). Furthermore, since multiple agents will now be sharing a single steering control register, races are possible. To address this, the hardware adds a new MCR semaphore register which is supposed to be used to temporarily lock the steering while performing MCR operations. It's going to become important for us to handle accesses of multicast registers very explicitly going forward. This series provides some prep work for that by updating our register definitions to clearly define registers as either MCR or non-MCR and ensure that we're using the intel_gt_mcr_*() functions rather than intel_uncore_*() when operating on MCR registers. In a future series we plan to change the MCR_REG() register definitions to actually declare MCR registers as a new C type (i.e., not an i915_reg_t) so that the compiler will be able to help us find any mistakes where non-MCR functions are used on MCR registers and vice-versa. Introduction of the MTL steering tables and introduction of the steering semaphore support will also arrive in future patch series. [*] This is a bit of an oversimplification; there are some hardware and software debug tools that use the same MCR_SELECTOR register that i915 does and which could potentially re-steer MCR accesses behind our back. E.g., simply using IGT's "intel_reg" tool to write the MCR_SELECTOR register at the wrong time could interfere with driver operation. But given that these debug facilities require root privileges to run and are only used by people intentionally debugging the driver or hardware, we can ignore such races for real-world usage. Matt Roper (7): drm/i915/gen8: Create separate reg definitions for new MCR registers drm/i915/xehp: Create separate reg definitions for new MCR registers drm/i915/gt: Drop a few unused register definitions drm/i915/gt: Correct prefix on a few registers drm/i915: Define MCR registers explicitly drm/i915/gt: Always use MCR functions on multicast registers drm/i915/gt: Add MCR-specific workaround initializers drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 4 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 138 +++--- drivers/gpu/drm/i915/gt/intel_gtt.c | 44 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 2 +- drivers/gpu/drm/i915/gt/intel_mocs.c | 12 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 426 +++--- .../gpu/drm/i915/gt/intel_workarounds_types.h | 4 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c| 7 +- .../gpu/drm/i915/gt/uc/intel_guc_capture.c| 4 +- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 12 +- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/mmio_context.c | 2 +- drivers/gpu/drm/i915/intel_gvt_mmio_table.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 20 +- 15 files changed, 397 insertions(+), 286 deletions(-) -- 2.37.2
[PATCH v2 4/8] drm/i915: Prepare more multi-GT initialization
We're going to introduce an additional intel_gt for MTL's media unit soon. Let's provide a bit more multi-GT initialization framework in preparation for that. The initialization will pull the list of GTs for a platform from the device info structure. Although necessary for the immediate MTL media enabling, this same framework will also be used farther down the road when we enable remote tiles on xehpsdv and pvc. v2: - Re-add missing test for !HAS_EXTRA_GT_LIST in intel_gt_probe_all(). Cc: Aravind Iddamsetty Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt.c| 51 +-- drivers/gpu/drm/i915/gt/intel_gt.h| 1 - drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/intel_device_info.h | 16 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + 7 files changed, 70 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 275ad72940c1..41acc285e8bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -736,7 +736,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) u16 vdbox_mask; u16 vebox_mask; - info->engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; + GEM_BUG_ON(!info->engine_mask); if (GRAPHICS_VER(i915) < 11) return info->engine_mask; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index cf7aab7adb30..7b880dbed6ce 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -807,17 +807,16 @@ static void intel_gt_tile_cleanup(struct intel_gt *gt) { intel_uncore_cleanup_mmio(gt->uncore); - - if (!gt_is_root(gt)) - kfree(gt); } int intel_gt_probe_all(struct drm_i915_private *i915) { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct intel_gt *gt = &i915->gt0; + const struct intel_gt_definition *gtdef; phys_addr_t phys_addr; unsigned int mmio_bar; + unsigned int i; int ret; mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; @@ -828,14 +827,58 @@ int intel_gt_probe_all(struct drm_i915_private *i915) * and it has been already initialized early during probe * in i915_driver_probe() */ + gt->i915 = i915; + gt->name = "Primary GT"; + gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; + + drm_dbg(&i915->drm, "Setting up %s\n", gt->name); ret = intel_gt_tile_setup(gt, phys_addr); if (ret) return ret; i915->gt[0] = gt; - /* TODO: add more tiles */ + if (!HAS_EXTRA_GT_LIST(i915)) + return 0; + + for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]; +gtdef->setup != NULL; +i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) { + gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL); + if (!gt) { + ret = -ENOMEM; + goto err; + } + + gt->i915 = i915; + gt->name = gtdef->name; + gt->type = gtdef->type; + gt->info.engine_mask = gtdef->engine_mask; + gt->info.id = i; + + drm_dbg(&i915->drm, "Setting up %s\n", gt->name); + if (GEM_WARN_ON(range_overflows_t(resource_size_t, + gtdef->mapping_base, + SZ_16M, + pci_resource_len(pdev, mmio_bar { + ret = -ENODEV; + goto err; + } + + ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base); + if (ret) + goto err; + + i915->gt[i] = gt; + } + return 0; + +err: + i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret); + intel_gt_release_all(i915); + + return ret; } int intel_gt_tiles_init(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 40b06adf509a..4d8779529cc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -54,7 +54,6 @@ void intel_gt_driver_register(struct intel_gt *gt); void intel_gt_driver_unregister(struct intel_gt *gt); void intel_gt_driver_remove(struct intel_gt *gt); void intel_gt_driver_release(struct i
[PATCH 1/8] drm/i915: Move locking and unclaimed check into mmio_debug_{suspend, resume}
Moving the locking for MMIO debug (and the final check for unclaimed accesses when resuming debug after a userspace-initiated forcewake) will make it simpler to completely skip MMIO debug handling on uncores that don't support it in future patches. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/intel_uncore.c | 41 +++-- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9b81b2543ce2..e717ea55484a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -50,23 +50,33 @@ intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug) mmio_debug->unclaimed_mmio_check = 1; } -static void mmio_debug_suspend(struct intel_uncore_mmio_debug *mmio_debug) +static void mmio_debug_suspend(struct intel_uncore *uncore) { - lockdep_assert_held(&mmio_debug->lock); + spin_lock(&uncore->debug->lock); /* Save and disable mmio debugging for the user bypass */ - if (!mmio_debug->suspend_count++) { - mmio_debug->saved_mmio_check = mmio_debug->unclaimed_mmio_check; - mmio_debug->unclaimed_mmio_check = 0; + if (!uncore->debug->suspend_count++) { + uncore->debug->saved_mmio_check = uncore->debug->unclaimed_mmio_check; + uncore->debug->unclaimed_mmio_check = 0; } + + spin_unlock(&uncore->debug->lock); } -static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug) +static bool check_for_unclaimed_mmio(struct intel_uncore *uncore); + +static void mmio_debug_resume(struct intel_uncore *uncore) { - lockdep_assert_held(&mmio_debug->lock); + spin_lock(&uncore->debug->lock); + + if (!--uncore->debug->suspend_count) + uncore->debug->unclaimed_mmio_check = uncore->debug->saved_mmio_check; - if (!--mmio_debug->suspend_count) - mmio_debug->unclaimed_mmio_check = mmio_debug->saved_mmio_check; + if (check_for_unclaimed_mmio(uncore)) + drm_info(&uncore->i915->drm, +"Invalid mmio detected during user access\n"); + + spin_unlock(&uncore->debug->lock); } static const char * const forcewake_domain_names[] = { @@ -677,9 +687,7 @@ void intel_uncore_forcewake_user_get(struct intel_uncore *uncore) spin_lock_irq(&uncore->lock); if (!uncore->user_forcewake_count++) { intel_uncore_forcewake_get__locked(uncore, FORCEWAKE_ALL); - spin_lock(&uncore->debug->lock); - mmio_debug_suspend(uncore->debug); - spin_unlock(&uncore->debug->lock); + mmio_debug_suspend(uncore); } spin_unlock_irq(&uncore->lock); } @@ -695,14 +703,7 @@ void intel_uncore_forcewake_user_put(struct intel_uncore *uncore) { spin_lock_irq(&uncore->lock); if (!--uncore->user_forcewake_count) { - spin_lock(&uncore->debug->lock); - mmio_debug_resume(uncore->debug); - - if (check_for_unclaimed_mmio(uncore)) - drm_info(&uncore->i915->drm, -"Invalid mmio detected during user access\n"); - spin_unlock(&uncore->debug->lock); - + mmio_debug_resume(uncore); intel_uncore_forcewake_put__locked(uncore, FORCEWAKE_ALL); } spin_unlock_irq(&uncore->lock); -- 2.37.2
[PATCH 6/8] drm/i915/xelpmp: Expose media as another GT
Xe_LPM+ platforms have "standalone media." I.e., the media unit is designed as an additional GT with its own engine list, GuC, forcewake, etc. Let's allow platforms to include media GTs in their device info. Cc: Aravind Iddamsetty Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/Makefile| 1 + drivers/gpu/drm/i915/gt/intel_gt.c | 12 ++-- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 + drivers/gpu/drm/i915/gt/intel_sa_media.c | 39 drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 + drivers/gpu/drm/i915/i915_pci.c | 15 + drivers/gpu/drm/i915/intel_device_info.h | 5 ++- drivers/gpu/drm/i915/intel_uncore.c | 16 -- drivers/gpu/drm/i915/intel_uncore.h | 20 ++-- 9 files changed, 123 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 522ef9b4aff3..e83e4cd46968 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -123,6 +123,7 @@ gt-y += \ gt/intel_ring.o \ gt/intel_ring_submission.o \ gt/intel_rps.o \ + gt/intel_sa_media.o \ gt/intel_sseu.o \ gt/intel_sseu_debugfs.o \ gt/intel_timeline.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d21ec11346a5..2a29502289cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -776,10 +776,15 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915) } } -static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) +static int intel_gt_tile_setup(struct intel_gt *gt, + phys_addr_t phys_addr, + u32 gsi_offset) { int ret; + /* GSI offset is only applicable for media GTs */ + drm_WARN_ON(>->i915->drm, gsi_offset); + if (!gt_is_root(gt)) { struct intel_uncore *uncore; @@ -832,7 +837,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; drm_dbg(&i915->drm, "Setting up %s\n", gt->name); - ret = intel_gt_tile_setup(gt, phys_addr); + ret = intel_gt_tile_setup(gt, phys_addr, 0); if (ret) return ret; @@ -862,7 +867,8 @@ int intel_gt_probe_all(struct drm_i915_private *i915) goto err; } - ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base); + ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base, + gtdef->gsi_offset); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 94f9ddcfb3a5..05a40ef33258 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1576,4 +1576,12 @@ #define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) +/* + * Standalone Media's non-engine GT registers are located at their regular GT + * offsets plus 0x38. This extra offset is stored inside the intel_uncore + * structure so that the existing code can be used for both GTs without + * modification. + */ +#define MTL_MEDIA_GSI_BASE 0x38 + #endif /* __INTEL_GT_REGS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c new file mode 100644 index ..8c5c519457cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "gt/intel_gt.h" +#include "gt/intel_sa_media.h" + +int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, + u32 gsi_offset) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore; + + uncore = drmm_kzalloc(&i915->drm, sizeof(*uncore), GFP_KERNEL); + if (!uncore) + return -ENOMEM; + + uncore->gsi_offset = gsi_offset; + + intel_gt_common_init_early(gt); + intel_uncore_init_early(uncore, gt); + + /* +* Standalone media shares the general MMIO space with the primary +* GT. We'll re-use the primary GT's mapping. +*/ + uncore->regs = i915->uncore.regs; + if (drm_WARN_ON(&i915->drm, uncore->regs == NULL)) + return -EIO; + + gt->uncore = uncore; + gt->phys_addr = phys_addr; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_sa_m
[PATCH 3/8] drm/i915: Use managed allocations for extra uncore objects
We're slowly transitioning the init-time kzalloc's of the driver over to DRM-managed allocations; let's make sure the uncore objects allocated for non-root GTs are thus allocated. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a82b5e2e0d83..cf7aab7adb30 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -783,7 +783,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) if (!gt_is_root(gt)) { struct intel_uncore *uncore; - uncore = kzalloc(sizeof(*uncore), GFP_KERNEL); + uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), GFP_KERNEL); if (!uncore) return -ENOMEM; @@ -808,10 +808,8 @@ intel_gt_tile_cleanup(struct intel_gt *gt) { intel_uncore_cleanup_mmio(gt->uncore); - if (!gt_is_root(gt)) { - kfree(gt->uncore); + if (!gt_is_root(gt)) kfree(gt); - } } int intel_gt_probe_all(struct drm_i915_private *i915) -- 2.37.2
[PATCH 7/8] drm/i915/mtl: Use primary GT's irq lock for media GT
When we hook up interrupts (in the next patch), interrupts for the media GT are still processed as part of the primary GT's interrupt flow. As such, we should share the same IRQ lock with the primary GT. Let's convert gt->irq_lock into a pointer and just point the media GT's instance at the same lock the primary GT is using. Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 8 +++--- drivers/gpu/drm/i915/gt/intel_gt.c| 15 +-- drivers/gpu/drm/i915/gt/intel_gt.h| 2 +- drivers/gpu/drm/i915/gt/intel_gt_irq.c| 16 ++-- drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c | 8 +++--- drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 26 +-- drivers/gpu/drm/i915/gt/intel_sa_media.c | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.c| 24 - .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +-- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 4 +-- drivers/gpu/drm/i915/i915_driver.c| 4 ++- drivers/gpu/drm/i915/i915_irq.c | 4 +-- drivers/gpu/drm/i915/pxp/intel_pxp.c | 4 +-- drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 14 +- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 4 +-- 16 files changed, 77 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 41acc285e8bf..6e0122b3dca2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1688,9 +1688,9 @@ bool intel_engine_irq_enable(struct intel_engine_cs *engine) return false; /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); + spin_lock(engine->gt->irq_lock); engine->irq_enable(engine); - spin_unlock(&engine->gt->irq_lock); + spin_unlock(engine->gt->irq_lock); return true; } @@ -1701,9 +1701,9 @@ void intel_engine_irq_disable(struct intel_engine_cs *engine) return; /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); + spin_lock(engine->gt->irq_lock); engine->irq_disable(engine); - spin_unlock(&engine->gt->irq_lock); + spin_unlock(engine->gt->irq_lock); } void intel_engines_reset_default_submission(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 2a29502289cb..b974a6d23281 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -37,7 +37,7 @@ void intel_gt_common_init_early(struct intel_gt *gt) { - spin_lock_init(>->irq_lock); + spin_lock_init(gt->irq_lock); INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -58,14 +58,19 @@ void intel_gt_common_init_early(struct intel_gt *gt) } /* Preliminary initialization of Tile 0 */ -void intel_root_gt_init_early(struct drm_i915_private *i915) +int intel_root_gt_init_early(struct drm_i915_private *i915) { struct intel_gt *gt = to_gt(i915); gt->i915 = i915; gt->uncore = &i915->uncore; + gt->irq_lock = drmm_kzalloc(&i915->drm, sizeof(*gt->irq_lock), GFP_KERNEL); + if (!gt->irq_lock) + return -ENOMEM; intel_gt_common_init_early(gt); + + return 0; } static int intel_gt_probe_lmem(struct intel_gt *gt) @@ -787,12 +792,18 @@ static int intel_gt_tile_setup(struct intel_gt *gt, if (!gt_is_root(gt)) { struct intel_uncore *uncore; + spinlock_t *irq_lock; uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), GFP_KERNEL); if (!uncore) return -ENOMEM; + irq_lock = drmm_kzalloc(>->i915->drm, sizeof(*irq_lock), GFP_KERNEL); + if (!irq_lock) + return -ENOMEM; + gt->uncore = uncore; + gt->irq_lock = irq_lock; intel_gt_common_init_early(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index c9a359f35d0f..2ee582e287c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -45,7 +45,7 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) } void intel_gt_common_init_early(struct intel_gt *gt); -void intel_root_gt_init_early(struct drm_i915_private *i915); +int intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); int intel_gt_init_mmio(struct intel_gt *gt); int __must_check intel_gt_init_hw(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_
[PATCH 8/8] drm/i915/mtl: Hook up interrupts for standalone media
Top-level handling of standalone media interrupts will be processed as part of the primary GT's interrupt handler (since primary and media GTs share an MMIO space, unlike remote tile setups). When we get down to the point of handling engine interrupts, we need to take care to lookup VCS and VECS engines in the media GT rather than the primary. There are also a couple of additional "other" instance bits that correspond to the media GT's GuC and media GT's power management interrupts; we need to direct those to the media GT instance as well. Bspec: 45605 Cc: Anusha Srivatsa Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 19 +++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_sa_media.c | 7 +++ drivers/gpu/drm/i915/i915_drv.h | 3 +++ 4 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 0dfd0c42d00d..f26882fdc24c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -59,11 +59,17 @@ static void gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, const u16 iir) { + struct intel_gt *media_gt = gt->i915->media_gt; + if (instance == OTHER_GUC_INSTANCE) return guc_irq_handler(>->uc.guc, iir); + if (instance == OTHER_MEDIA_GUC_INSTANCE && media_gt) + return guc_irq_handler(&media_gt->uc.guc, iir); if (instance == OTHER_GTPM_INSTANCE) return gen11_rps_irq_handler(>->rps, iir); + if (instance == OTHER_MEDIA_GTPM_INSTANCE && media_gt) + return gen11_rps_irq_handler(&media_gt->rps, iir); if (instance == OTHER_KCR_INSTANCE) return intel_pxp_irq_handler(>->pxp, iir); @@ -81,6 +87,18 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, { struct intel_engine_cs *engine; + /* +* Platforms with standalone media have their media engines in another +* GT. +*/ + if (MEDIA_VER(gt->i915) >= 13 && + (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS)) { + if (!gt->i915->media_gt) + goto err; + + gt = gt->i915->media_gt; + } + if (instance <= MAX_ENGINE_INSTANCE) engine = gt->engine_class[class][instance]; else @@ -89,6 +107,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, if (likely(engine)) return intel_engine_cs_irq(engine, iir); +err: WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", class, instance); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 05a40ef33258..21c7a225157f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1552,6 +1552,8 @@ #define OTHER_GTPM_INSTANCE 1 #define OTHER_KCR_INSTANCE 4 #define OTHER_GSC_INSTANCE 6 +#define OTHER_MEDIA_GUC_INSTANCE 16 +#define OTHER_MEDIA_GTPM_INSTANCE17 #define GEN11_IIR_REG_SELECTOR(x) _MMIO(0x190070 + ((x) * 4)) diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c index cf3053710bbf..41c270f103cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_sa_media.c +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c @@ -36,5 +36,12 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, gt->uncore = uncore; gt->phys_addr = phys_addr; + /* +* For current platforms we can assume there's only a single +* media GT and cache it for quick lookup. +*/ + drm_WARN_ON(&i915->drm, i915->media_gt); + i915->media_gt = gt; + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d45dca70bfa6..917958d42805 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -497,6 +497,9 @@ struct drm_i915_private { struct kobject *sysfs_gt; + /* Quick lookup of media GT (current platforms only have one) */ + struct intel_gt *media_gt; + struct { struct i915_gem_contexts { spinlock_t lock; /* locks list */ -- 2.37.2
[PATCH 4/8] drm/i915: Prepare more multi-GT initialization
We're going to introduce an additional intel_gt for MTL's media unit soon. Let's provide a bit more multi-GT initialization framework in preparation for that. The initialization will pull the list of GTs for a platform from the device info structure. Although necessary for the immediate MTL media enabling, this same framework will also be used farther down the road when we enable remote tiles on xehpsdv and pvc. Cc: Aravind Iddamsetty Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt.c| 48 +-- drivers/gpu/drm/i915/gt/intel_gt.h| 1 - drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/intel_device_info.h | 16 +++ .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + 7 files changed, 67 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 275ad72940c1..41acc285e8bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -736,7 +736,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) u16 vdbox_mask; u16 vebox_mask; - info->engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; + GEM_BUG_ON(!info->engine_mask); if (GRAPHICS_VER(i915) < 11) return info->engine_mask; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index cf7aab7adb30..7c0525e96155 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -807,17 +807,16 @@ static void intel_gt_tile_cleanup(struct intel_gt *gt) { intel_uncore_cleanup_mmio(gt->uncore); - - if (!gt_is_root(gt)) - kfree(gt); } int intel_gt_probe_all(struct drm_i915_private *i915) { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct intel_gt *gt = &i915->gt0; + const struct intel_gt_definition *gtdef; phys_addr_t phys_addr; unsigned int mmio_bar; + unsigned int i; int ret; mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; @@ -828,14 +827,55 @@ int intel_gt_probe_all(struct drm_i915_private *i915) * and it has been already initialized early during probe * in i915_driver_probe() */ + gt->i915 = i915; + gt->name = "Primary GT"; + gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; + + drm_dbg(&i915->drm, "Setting up %s\n", gt->name); ret = intel_gt_tile_setup(gt, phys_addr); if (ret) return ret; i915->gt[0] = gt; - /* TODO: add more tiles */ + for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]; +gtdef->setup != NULL; +i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) { + gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL); + if (!gt) { + ret = -ENOMEM; + goto err; + } + + gt->i915 = i915; + gt->name = gtdef->name; + gt->type = gtdef->type; + gt->info.engine_mask = gtdef->engine_mask; + gt->info.id = i; + + drm_dbg(&i915->drm, "Setting up %s\n", gt->name); + if (GEM_WARN_ON(range_overflows_t(resource_size_t, + gtdef->mapping_base, + SZ_16M, + pci_resource_len(pdev, mmio_bar { + ret = -ENODEV; + goto err; + } + + ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base); + if (ret) + goto err; + + i915->gt[i] = gt; + } + return 0; + +err: + i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret); + intel_gt_release_all(i915); + + return ret; } int intel_gt_tiles_init(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 40b06adf509a..4d8779529cc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -54,7 +54,6 @@ void intel_gt_driver_register(struct intel_gt *gt); void intel_gt_driver_unregister(struct intel_gt *gt); void intel_gt_driver_remove(struct intel_gt *gt); void intel_gt_driver_release(struct intel_gt *gt); - void intel_gt_driver_late_release_all(struct drm_i915_private *i915); int intel_gt_wait_for_idle(struct intel_gt *gt, l
[PATCH 5/8] drm/i915: Rename and expose common GT early init routine
The common early GT init is needed for initialization of all GT types (root/primary, remote tile, standalone media). Since standalone media (coming in the next patch) will be implemented in a separate file, rename and expose the function for use. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_gt.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 7c0525e96155..d21ec11346a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -35,7 +35,7 @@ #include "intel_uncore.h" #include "shmem_utils.h" -static void __intel_gt_init_early(struct intel_gt *gt) +void intel_gt_common_init_early(struct intel_gt *gt) { spin_lock_init(>->irq_lock); @@ -65,7 +65,7 @@ void intel_root_gt_init_early(struct drm_i915_private *i915) gt->i915 = i915; gt->uncore = &i915->uncore; - __intel_gt_init_early(gt); + intel_gt_common_init_early(gt); } static int intel_gt_probe_lmem(struct intel_gt *gt) @@ -789,7 +789,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) gt->uncore = uncore; - __intel_gt_init_early(gt); + intel_gt_common_init_early(gt); } intel_uncore_init_early(gt->uncore, gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 4d8779529cc2..c9a359f35d0f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -44,6 +44,7 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) return container_of(gsc, struct intel_gt, gsc); } +void intel_gt_common_init_early(struct intel_gt *gt); void intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); int intel_gt_init_mmio(struct intel_gt *gt); -- 2.37.2
[PATCH 2/8] drm/i915: Only hook up uncore->debug for primary uncore
The original intent of intel_uncore_mmio_debug as described in commit 0a9b26306d6a ("drm/i915: split out uncore_mmio_debug") was to be a singleton structure that could be shared between multiple GTs' uncore objects in a multi-tile system. Somehow we went off track and started allocating separate instances of this structure for each GT, which defeats that original goal. But in reality, there isn't even a need to share the mmio_debug between multiple GTs; on all modern platforms (i.e., everything after gen7) unclaimed register accesses are something that can only be detected for display registers. There's no point in grabbing the debug spinlock and checking for unclaimed accesses on an uncore used by an xehpsdv or pvc remote tile GT, or the uncore used by a mtl standalone media GT since all of the display accesses go through the primary intel_uncore. The simplest solution is to simply leave uncore->debug NULL on all intel_uncore instances except for the primary one. This will allow us to avoid the pointless debug spinlock acquisition we've been doing on MMIO accesses coming in through these intel_uncores. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt.c | 9 - drivers/gpu/drm/i915/i915_driver.c | 2 +- drivers/gpu/drm/i915/intel_uncore.c | 23 ++- drivers/gpu/drm/i915/intel_uncore.h | 3 +-- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index e4bac2431e41..a82b5e2e0d83 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -781,21 +781,13 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) int ret; if (!gt_is_root(gt)) { - struct intel_uncore_mmio_debug *mmio_debug; struct intel_uncore *uncore; uncore = kzalloc(sizeof(*uncore), GFP_KERNEL); if (!uncore) return -ENOMEM; - mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL); - if (!mmio_debug) { - kfree(uncore); - return -ENOMEM; - } - gt->uncore = uncore; - gt->uncore->debug = mmio_debug; __intel_gt_init_early(gt); } @@ -817,7 +809,6 @@ intel_gt_tile_cleanup(struct intel_gt *gt) intel_uncore_cleanup_mmio(gt->uncore); if (!gt_is_root(gt)) { - kfree(gt->uncore->debug); kfree(gt->uncore); kfree(gt); } diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 053a7dab5506..de9020771836 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -326,7 +326,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_device_info_subplatform_init(dev_priv); intel_step_init(dev_priv); - intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug); + intel_uncore_mmio_debug_init_early(dev_priv); spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index e717ea55484a..6841f76533f9 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -44,14 +44,19 @@ fw_domains_get(struct intel_uncore *uncore, enum forcewake_domains fw_domains) } void -intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug) +intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915) { - spin_lock_init(&mmio_debug->lock); - mmio_debug->unclaimed_mmio_check = 1; + spin_lock_init(&i915->mmio_debug.lock); + i915->mmio_debug.unclaimed_mmio_check = 1; + + i915->uncore.debug = &i915->mmio_debug; } static void mmio_debug_suspend(struct intel_uncore *uncore) { + if (!uncore->debug) + return; + spin_lock(&uncore->debug->lock); /* Save and disable mmio debugging for the user bypass */ @@ -67,6 +72,9 @@ static bool check_for_unclaimed_mmio(struct intel_uncore *uncore); static void mmio_debug_resume(struct intel_uncore *uncore) { + if (!uncore->debug) + return; + spin_lock(&uncore->debug->lock); if (!--uncore->debug->suspend_count) @@ -1705,7 +1713,7 @@ unclaimed_reg_debug(struct intel_uncore *uncore, const bool read, const bool before) { - if (likely(!uncore->i915->params.mmio_debug)) + if (likely(!uncore->i915->params.mmio_debug) || !uncore->debug) return; /* interrupts are disabled and re-enabled around uncore->lock usage
[PATCH 0/8] i915: Add "standalone media" support for MTL
Starting with MTL, media functionality has moved into a new, second GT at the hardware level. This new GT, referred to as "standalone media" in the spec, has its own GuC, power management/forcewake, etc. The general non-engine GT registers for standalone media start at 0x38, but otherwise use the same MMIO offsets as the primary GT. Standalone media has a lot of similarity to the remote tiles present on platforms like xehpsdv and pvc, and our i915 implementation can share much of the general "multi GT" infrastructure between the two types of platforms. However there are a few notable differences we must deal with: - The 0x38 offset only applies to the non-engine GT registers (which the specs refer to as "GSI" registers). The engine registers remain at their usual locations (e.g., 0x1C for VCS0). - Unlike platforms with remote tiles, all interrupt handling for standalone media still happens via the primary GT. Matt Roper (8): drm/i915: Move locking and unclaimed check into mmio_debug_{suspend,resume} drm/i915: Only hook up uncore->debug for primary uncore drm/i915: Use managed allocations for extra uncore objects drm/i915: Prepare more multi-GT initialization drm/i915: Rename and expose common GT early init routine drm/i915/xelpmp: Expose media as another GT drm/i915/mtl: Use primary GT's irq lock for media GT drm/i915/mtl: Hook up interrupts for standalone media drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 10 +-- drivers/gpu/drm/i915/gt/intel_gt.c| 88 ++- drivers/gpu/drm/i915/gt/intel_gt.h| 4 +- drivers/gpu/drm/i915/gt/intel_gt_irq.c| 35 ++-- drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c | 8 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 10 +++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 5 +- drivers/gpu/drm/i915/gt/intel_rps.c | 26 +++--- drivers/gpu/drm/i915/gt/intel_sa_media.c | 47 ++ drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 drivers/gpu/drm/i915/gt/uc/intel_guc.c| 24 ++--- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 4 +- drivers/gpu/drm/i915/i915_driver.c| 6 +- drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/i915_pci.c | 15 drivers/gpu/drm/i915/intel_device_info.h | 19 drivers/gpu/drm/i915/intel_uncore.c | 80 +++-- drivers/gpu/drm/i915/intel_uncore.h | 23 - drivers/gpu/drm/i915/pxp/intel_pxp.c | 4 +- drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 14 +-- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + 25 files changed, 340 insertions(+), 116 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h -- 2.37.2
Re: [PATCH v2 15/21] drm/i915/mtl: Obtain SAGV values from MMIO instead of GT pcode mailbox
On Thu, Aug 18, 2022 at 04:41:56PM -0700, Radhakrishna Sripada wrote: > From Meteorlake, Latency Level, SAGV bloack time are read from > LATENCY_SAGV register instead of the GT driver pcode mailbox. DDR type > and QGV information are also to be read from Mem SS registers. > > v2: > - Simplify MTL_MEM_SS_INFO_QGV_POINT macro(MattR) > - Nit: Rearrange the bit def's from higher to lower(MattR) > - Restore platform definition for ADL-P(MattR) > - Move back intel_qgv_point def to intel_bw.c(Jani) > Bspec: 64636, 64608 > > Cc: Matt Roper > Cc: Jani Nikula > Original Author: Caz Yokoyama > Signed-off-by: José Roberto de Souza > Signed-off-by: Radhakrishna Sripada > --- > drivers/gpu/drm/i915/display/intel_bw.c | 42 ++--- > drivers/gpu/drm/i915/i915_reg.h | 16 ++ > drivers/gpu/drm/i915/intel_dram.c | 41 +++- > drivers/gpu/drm/i915/intel_pm.c | 8 - > 4 files changed, 100 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_bw.c > b/drivers/gpu/drm/i915/display/intel_bw.c > index 79269d2c476b..46b63afd536a 100644 > --- a/drivers/gpu/drm/i915/display/intel_bw.c > +++ b/drivers/gpu/drm/i915/display/intel_bw.c > @@ -137,6 +137,42 @@ int icl_pcode_restrict_qgv_points(struct > drm_i915_private *dev_priv, > return 0; > } > > +static int mtl_read_qgv_point_info(struct drm_i915_private *dev_priv, > +struct intel_qgv_point *sp, int point) > +{ > + u32 val, val2; > + u16 dclk; > + > + val = intel_uncore_read(&dev_priv->uncore, > + MTL_MEM_SS_INFO_QGV_POINT_LOW(point)); > + val2 = intel_uncore_read(&dev_priv->uncore, > + MTL_MEM_SS_INFO_QGV_POINT_HIGH(point)); > + dclk = REG_FIELD_GET(MTL_DCLK_MASK, val); > + sp->dclk = DIV_ROUND_UP((16667 * dclk), 1000); > + sp->t_rp = REG_FIELD_GET(MTL_TRP_MASK, val); > + sp->t_rcd = REG_FIELD_GET(MTL_TRCD_MASK, val); > + > + sp->t_rdpre = REG_FIELD_GET(MTL_TRDPRE_MASK, val2); > + sp->t_ras = REG_FIELD_GET(MTL_TRAS_MASK, val2); > + > + sp->t_rc = sp->t_rp + sp->t_ras; > + > + return 0; > +} > + > +static int > +intel_read_qgv_point_info(struct drm_i915_private *dev_priv, > + struct intel_qgv_point *sp, > + int point) > +{ > + if (DISPLAY_VER(dev_priv) >= 14) > + return mtl_read_qgv_point_info(dev_priv, sp, point); > + else if (IS_DG1(dev_priv)) > + return dg1_mchbar_read_qgv_point_info(dev_priv, sp, point); > + else > + return icl_pcode_read_qgv_point_info(dev_priv, sp, point); > +} > + > static int icl_get_qgv_points(struct drm_i915_private *dev_priv, > struct intel_qgv_info *qi, > bool is_y_tile) > @@ -193,11 +229,7 @@ static int icl_get_qgv_points(struct drm_i915_private > *dev_priv, > for (i = 0; i < qi->num_points; i++) { > struct intel_qgv_point *sp = &qi->points[i]; > > - if (IS_DG1(dev_priv)) > - ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i); > - else > - ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i); > - > + ret = intel_read_qgv_point_info(dev_priv, sp, i); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index b2d5e1230c25..5245af8d0ea8 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -8397,4 +8397,20 @@ enum skl_power_gate { > #define MTL_LATENCY_LEVEL0_2_4_MASK REG_GENMASK(12, 0) > #define MTL_LATENCY_LEVEL1_3_5_MASK REG_GENMASK(28, 16) > > +#define MTL_LATENCY_SAGV _MMIO(0x4578c) > +#define MTL_LATENCY_QCLK_SAGV REG_GENMASK(12, 0) Minor nitpick: we usually have two additional spaces (for a total of three) between the 'define' and the field name. > + > +#define MTL_MEM_SS_INFO_GLOBAL _MMIO(0x45700) > +#define MTL_DDR_TYPE_MASK REG_GENMASK(3, 0) > +#define MTL_N_OF_POPULATED_CH_MASK REG_GENMASK(7, 4) > +#define MTL_N_OF_ENABLED_QGV_POINTS_MASKREG_GENMASK(11, 8) Another nitpick: we usually order fields from low to high (which is also usually how the spec orders them). > + > +#define MTL_MEM_SS_INFO_QGV_POINT_LOW(point) _MMIO(0x45710 + (point) * 2) > +#define MTL_MEM_SS_INFO_QGV_POINT_HIGH(point) _MMIO(0x45714 + > (point) * 2) > +#def
[PATCH] drm/i915/ats-m: Add thread execution tuning setting
On client DG2 platforms, optimal performance is achieved with the hardware's default "age based" thread execution setting. However on ATS-M, switching this to "round robin after dependencies" provides better performance. We'll add a new "tuning" feature flag to the ATS-M device info to enable/disable this setting. Bspec: 68331 Cc: Lucas De Marchi Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 + drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_device_info.h| 1 + 4 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 94f9ddcfb3a5..d414785003cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1110,6 +1110,8 @@ #define GEN12_DISABLE_TDL_PUSH REG_BIT(9) #define GEN11_DIS_PICK_2ND_EUREG_BIT(7) #define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) +#define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) +#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3cdb8294e13f..ff8c3735abc9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2700,6 +2700,15 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, 0 /* write-only, so skip validation */, true); } + + /* +* This tuning setting proves beneficial only on ATS-M designs; the +* default "age based" setting is optimal on regular DG2 and other +* platforms. +*/ + if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) + wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, + THREAD_EX_ARB_MODE_RR_AFTER_DEP); } /* diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 857e8bb6865c..26b25d9434d6 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1080,6 +1080,7 @@ static const struct intel_device_info ats_m_info = { DG2_FEATURES, .display = { 0 }, .require_force_probe = 1, + .tuning_thread_rr_after_dep = 1, }; #define XE_HPC_FEATURES \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 0ccde94b225f..6904ad03ca19 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -171,6 +171,7 @@ enum intel_ppgtt_type { func(has_runtime_pm); \ func(has_snoop); \ func(has_coherent_ggtt); \ + func(tuning_thread_rr_after_dep); \ func(unfenced_needs_alignment); \ func(hws_needs_physical); -- 2.37.2
[PATCH] Revert "drm/i915/dg2: Add preemption changes for Wa_14015141709"
This reverts commit ca6920811aa5428270dd78af0a7a36b10119065a. The intent of Wa_14015141709 was to inform us that userspace can no longer control object-level preemption as it has on past platforms (i.e., by twiddling register bit CS_CHICKEN1[0]). The description of the workaround in the spec wasn't terribly well-written, and when we requested clarification from the hardware teams we were told that on the kernel side we should also probably stop setting FF_SLICE_CS_CHICKEN1[14], which is the register bit that directs the hardware to honor the settings in per-context register CS_CHICKEN1. It turns out that this guidance about FF_SLICE_CS_CHICKEN1[14] was a mistake; even though CS_CHICKEN1[0] is non-operational and useless to userspace, there are other bits in the register that do still work and might need to be adjusted by userspace in the future (e.g., to implement other workarounds that show up). If we don't set FF_SLICE_CS_CHICKEN1[14] in i915, then those future workarounds would not take effect. This miscommunication came to light because another workaround (Wa_16013994831) has now shown up that requires userspace to adjust the value of CS_CHICKEN[10] in certain circumstances. To ensure userspace's updates to this chicken bit are handled properly by the hardware, we need to make sure that FF_SLICE_CS_CHICKEN1[14] is once again set by the kernel. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3cdb8294e13f..69a0c6a74474 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2389,7 +2389,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) FF_DOP_CLOCK_GATE_DISABLE); } - if (HAS_PERCTX_PREEMPT_CTRL(i915)) { + if (IS_GRAPHICS_VER(i915, 9, 12)) { /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ wa_masked_en(wal, GEN7_FF_SLICE_CS_CHICKEN1, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2b00ef3626db..d6a1ab6f65de 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1352,9 +1352,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_GUC_DEPRIVILEGE(dev_priv) \ (INTEL_INFO(dev_priv)->has_guc_deprivilege) -#define HAS_PERCTX_PREEMPT_CTRL(i915) \ - ((GRAPHICS_VER(i915) >= 9) && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) - #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) -- 2.37.2
[PATCH v2 1/2] drm/i915/gt: Add dedicated function for non-ctx register tuning settings
The bspec performance tuning section gives recommended settings that the driver should program for various MMIO registers. Although these settings aren't "workarounds" we use the workaround infrastructure to do this programming to make sure it is handled at the appropriate places and doesn't conflict with any real workarounds. Since more of these are starting to show up on recent platforms, it's a good time to create a dedicated function to hold them so that there's less ambiguity about how/where to implement new ones. Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 42 ++--- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 59cf28baa472..a68d279b01f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2102,13 +2102,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) /* Wa_1509235366:dg2 */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE); - - /* -* The following are not actually "workarounds" but rather -* recommended tuning settings documented in the bspec's -* performance guide section. -*/ - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { @@ -2676,6 +2669,32 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } } +/* + * The bspec performance guide has recommended MMIO tuning settings. These + * aren't truly "workarounds" but we want to program them with the same + * workaround infrastructure to ensure that they're automatically added to + * the GuC save/restore lists, re-applied at the right times, and checked for + * any conflicting programming requested by real workarounds. + * + * Programming settings should be added here only if their registers are not + * part of an engine's register state context. If a register is part of a + * context, then any tuning settings should be programmed in an appropriate + * function invoked by __intel_engine_init_ctx_wa(). + */ +static void +add_render_compute_tuning_settings(struct drm_i915_private *i915, + struct i915_wa_list *wal) +{ + if (IS_PONTEVECCHIO(i915)) { + wa_write(wal, XEHPC_L3SCRUB, +SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + } + + if (IS_DG2(i915)) { + wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + } +} + /* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a @@ -2690,14 +2709,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li { struct drm_i915_private *i915 = engine->i915; - if (IS_PONTEVECCHIO(i915)) { - /* -* The following is not actually a "workaround" but rather -* a recommended tuning setting documented in the bspec's -* performance guide section. -*/ - wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + add_render_compute_tuning_settings(i915, wal); + if (IS_PONTEVECCHIO(i915)) { /* Wa_16016694945 */ wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); } -- 2.37.1
[PATCH] drm/i915/dg2: Incorporate Wa_16014892111 into DRAW_WATERMARK tuning
Although register tuning settings are generally implemented via the workaround infrastructure, it turns out that the DRAW_WATERMARK register is not properly saved/restored by hardware around power events (i.e., RC6 entry) so updates to the value cannot be applied in the usual manner. New workaround Wa_16014892111 informs us that any tuning updates to this register must instead be applied via an INDIRECT_CTX batch buffer. This will ensure that the necessary value is re-applied when a context begins running, even if an RC6 entry had wiped the register back to hardware defaults since the last context ran. Fixes: 6dc85721df74 ("drm/i915/dg2: Add additional tuning settings") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6642 Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_lrc.c | 21 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 -- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eec73c66406c..070cec4ff8a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1242,6 +1242,23 @@ dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) return cs; } +/* + * The bspec's tuning guide asks us to program a vertical watermark value of + * 0x3FF. However this register is not saved/restored properly by the + * hardware, so we're required to apply the desired value via INDIRECT_CTX + * batch buffer to ensure the value takes effect properly. All other bits + * in this register should remain at 0 (the hardware default). + */ +static u32 * +dg2_emit_draw_watermark_setting(u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(DRAW_WATERMARK); + *cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF); + + return cs; +} + static u32 * gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) { @@ -1263,6 +1280,10 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) if (!HAS_FLAT_CCS(ce->engine->i915)) cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); + /* Wa_16014892111 */ + if (IS_DG2(ce->engine->i915)) + cs = dg2_emit_draw_watermark_setting(cs); + return cs; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 31e129329fb0..3cdb8294e13f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2685,8 +2685,6 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, if (IS_DG2(i915)) { wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); - wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL, -REG_FIELD_PREP(VERT_WM_VAL, 0x3FF)); /* * This is also listed as Wa_22012654132 for certain DG2 -- 2.37.2
Re: [Intel-gfx] [PATCH v2 21/21] drm/i915/mtl: Do not update GV point, mask value
On Thu, Aug 18, 2022 at 04:42:02PM -0700, Radhakrishna Sripada wrote: > No need to update mask value/restrict because > "Pcode only wants to use GV bandwidth value, not the mask value." > for Display version greater than 14. While the code changes might be correct, I can't decipher what the commit message here is trying to tell us. I'm not sure what the source or context of the quote is, but the description in the commit message should be more focused on why it's correct for our driver to skip these operations. I assume it has something to do with the new pm_demand interfaces we'll be using to program this information into the hardware in a future series? If so, maybe this patch (with a modified commit message) is better suited for inclusion in that future series where the context makes more sense. > > Bspec: 646365 This page number seems to be incorrect too. Matt > Cc: Matt Roper > Original Author: Caz Yokoyama > Signed-off-by: Radhakrishna Sripada > --- > drivers/gpu/drm/i915/intel_pm.c | 18 ++ > 1 file changed, 18 insertions(+) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index d09e9e5f4481..47869fe964ba 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3924,6 +3924,14 @@ void intel_sagv_pre_plane_update(struct > intel_atomic_state *state) > { > struct drm_i915_private *i915 = to_i915(state->base.dev); > > + /* > + * No need to update mask value/restrict because > + * "Pcode only wants to use GV bandwidth value, not the mask value." > + * for DISPLAY_VER() >= 14. > + */ > + if (DISPLAY_VER(i915) >= 14) > + return; > + > /* >* Just return if we can't control SAGV or don't have it. >* This is different from situation when we have SAGV but just can't > @@ -3944,6 +3952,16 @@ void intel_sagv_post_plane_update(struct > intel_atomic_state *state) > { > struct drm_i915_private *i915 = to_i915(state->base.dev); > > + /* > + * No need to update mask value/restrict because > + * "Pcode only wants to use GV bandwidth value, not the mask value." > + * for DISPLAY_VER() >= 14. > + * > + * GV bandwidth will be set by intel_pmdemand_post_plane_update() > + */ > + if (DISPLAY_VER(i915) >= 14) > + return; > + > /* >* Just return if we can't control SAGV or don't have it. >* This is different from situation when we have SAGV but just can't > -- > 2.25.1 > -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
Re: [PATCH v2 14/21] drm/i915/mtl: memory latency data from LATENCY_LPX_LPY for WM
On Thu, Aug 18, 2022 at 04:41:55PM -0700, Radhakrishna Sripada wrote: > Since Xe LPD+, Memory latency data are in LATENCY_LPX_LPY registers > instead of GT driver mailbox. > > v2: Use the extracted wm latency adjustment function(Matt) > > Bspec: 64608 > > Cc: Matt Roper > Original Author: Caz Yokoyama > Signed-off-by: Radhakrishna Sripada > --- > drivers/gpu/drm/i915/i915_reg.h | 7 +++ > drivers/gpu/drm/i915/intel_pm.c | 21 ++--- > 2 files changed, 25 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 04a269fa8717..b2d5e1230c25 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -8390,4 +8390,11 @@ enum skl_power_gate { > #define GEN12_STATE_ACK_DEBUG_MMIO(0x20BC) > > #define MTL_MEDIA_GSI_BASE 0x38 > + > +#define MTL_LATENCY_LP0_LP1 _MMIO(0x45780) > +#define MTL_LATENCY_LP2_LP3 _MMIO(0x45784) > +#define MTL_LATENCY_LP4_LP5 _MMIO(0x45788) > +#define MTL_LATENCY_LEVEL0_2_4_MASK REG_GENMASK(12, 0) > +#define MTL_LATENCY_LEVEL1_3_5_MASK REG_GENMASK(28, 16) You might consider "_{EVEN,ODD}_LEVEL_MASK" naming here, just in case future IP versions add additional levels beyond LP5. Otherwise, Reviewed-by: Matt Roper > + > #endif /* _I915_REG_H_ */ > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 898e56d2eaf7..fac565d23d57 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -2908,13 +2908,28 @@ static void intel_read_wm_latency(struct > drm_i915_private *dev_priv, > u16 wm[]) > { > struct intel_uncore *uncore = &dev_priv->uncore; > + int max_level = ilk_wm_max_level(dev_priv); > > - if (DISPLAY_VER(dev_priv) >= 9) { > + if (DISPLAY_VER(dev_priv) >= 14) { > + u32 val; > + > + val = intel_uncore_read(uncore, MTL_LATENCY_LP0_LP1); > + wm[0] = REG_FIELD_GET(MTL_LATENCY_LEVEL0_2_4_MASK, val); > + wm[1] = REG_FIELD_GET(MTL_LATENCY_LEVEL1_3_5_MASK, val); > + val = intel_uncore_read(uncore, MTL_LATENCY_LP2_LP3); > + wm[2] = REG_FIELD_GET(MTL_LATENCY_LEVEL0_2_4_MASK, val); > + wm[3] = REG_FIELD_GET(MTL_LATENCY_LEVEL1_3_5_MASK, val); > + val = intel_uncore_read(uncore, MTL_LATENCY_LP4_LP5); > + wm[4] = REG_FIELD_GET(MTL_LATENCY_LEVEL0_2_4_MASK, val); > + wm[5] = REG_FIELD_GET(MTL_LATENCY_LEVEL1_3_5_MASK, val); > + > + adjust_wm_latency(wm, max_level, 6, > + dev_priv->dram_info.wm_lv_0_adjust_needed); > + } else if (DISPLAY_VER(dev_priv) >= 9) { > int read_latency = DISPLAY_VER(dev_priv) >= 12 ? 3 : 2; > + int mult = IS_DG2(dev_priv) ? 2 : 1; > u32 val; > int ret; > - int max_level = ilk_wm_max_level(dev_priv); > - int mult = IS_DG2(dev_priv) ? 2 : 1; > > /* read the first set of memory latencies[0:3] */ > val = 0; /* data0 to be programmed to 0 for first set */ > -- > 2.25.1 > -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
Re: [PATCH v2 13/21] drm/i915: Extract wm latency adjustment to its own function
On Thu, Aug 18, 2022 at 04:41:54PM -0700, Radhakrishna Sripada wrote: > Watermark latency is adjusted in cases when latency is 0us for level > greater than 1, the subsequent levels are disabled. Extract this logic > into its own function. > > Suggested-by: Matt Roper > Signed-off-by: Radhakrishna Sripada > --- > drivers/gpu/drm/i915/intel_pm.c | 88 ++--- > 1 file changed, 48 insertions(+), 40 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index ef7553b494ea..898e56d2eaf7 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -2861,15 +2861,59 @@ static void ilk_compute_wm_level(const struct > drm_i915_private *dev_priv, > result->enable = true; > } > > +static void > +adjust_wm_latency(u16 wm[], int max_level, int read_latency, > + bool wm_lv_0_adjust_needed) > +{ > + int i, level; > + > + /* > + * If a level n (n > 1) has a 0us latency, all levels m (m >= n) > + * need to be disabled. We make sure to sanitize the values out > + * of the punit to satisfy this requirement. > + */ > + for (level = 1; level <= max_level; level++) { > + if (wm[level] == 0) { > + for (i = level + 1; i <= max_level; i++) > + wm[i] = 0; > + > + max_level = level - 1; > + break; > + } > + } > + > + /* > + * WaWmMemoryReadLatency > + * > + * punit doesn't take into account the read latency so we need > + * to add proper adjustement to each valid level we retrieve > + * from the punit when level 0 response data is 0us. > + */ > + if (wm[0] == 0) { > + for (level = 0; level <= max_level; level++) > + wm[level] += read_latency; > + } > + > + /* > + * WA Level-0 adjustment for 16GB DIMMs: SKL+ > + * If we could not get dimm info enable this WA to prevent from > + * any underrun. If not able to get Dimm info assume 16GB dimm > + * to avoid any underrun. > + */ > + if (wm_lv_0_adjust_needed) > + wm[0] += 1; > +} > + > static void intel_read_wm_latency(struct drm_i915_private *dev_priv, > u16 wm[]) > { > struct intel_uncore *uncore = &dev_priv->uncore; > > if (DISPLAY_VER(dev_priv) >= 9) { > + int read_latency = DISPLAY_VER(dev_priv) >= 12 ? 3 : 2; > u32 val; > - int ret, i; > - int level, max_level = ilk_wm_max_level(dev_priv); > + int ret; > + int max_level = ilk_wm_max_level(dev_priv); > int mult = IS_DG2(dev_priv) ? 2 : 1; > > /* read the first set of memory latencies[0:3] */ > @@ -2909,44 +2953,8 @@ static void intel_read_wm_latency(struct > drm_i915_private *dev_priv, > wm[7] = ((val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & > GEN9_MEM_LATENCY_LEVEL_MASK) * mult; > > - /* > - * If a level n (n > 1) has a 0us latency, all levels m (m >= n) > - * need to be disabled. We make sure to sanitize the values out > - * of the punit to satisfy this requirement. > - */ > - for (level = 1; level <= max_level; level++) { > - if (wm[level] == 0) { > - for (i = level + 1; i <= max_level; i++) > - wm[i] = 0; > - > - max_level = level - 1; > - > - break; > - } > - } > - > - /* > - * WaWmMemoryReadLatency > - * > - * punit doesn't take into account the read latency so we need > - * to add proper adjustement to each valid level we retrieve > - * from the punit when level 0 response data is 0us. > - */ > - if (wm[0] == 0) { > - u8 adjust = DISPLAY_VER(dev_priv) >= 12 ? 3 : 2; > - > - for (level = 0; level <= max_level; level++) > - wm[level] += adjust; > - } > - > - /* > - * WA Level-0 adjustment for 16GB DIMMs: SKL+ > - * If we could not get dimm info enable this WA to prevent from > - * any underrun. If not able to get Dimm info assume 16GB dimm > - * to
Re: [PATCH v2 07/21] drm/i915/mtl: Add gmbus and gpio support
On Thu, Aug 18, 2022 at 04:41:48PM -0700, Radhakrishna Sripada wrote: > Add tables to map the GMBUS pin pairs to GPIO registers and port to DDC. > From spec we have registers GPIO_CTL[1-5] mapped to native display phys and > GPIO_CTL[9-14] are mapped to TC ports. > > BSpec: 49306 > > Original Author: Brian J Lovin > Signed-off-by: Radhakrishna Sripada > --- > drivers/gpu/drm/i915/display/intel_gmbus.c | 17 + > drivers/gpu/drm/i915/display/intel_gmbus.h | 1 + > 2 files changed, 18 insertions(+) > > diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c > b/drivers/gpu/drm/i915/display/intel_gmbus.c > index a6ba7fb72339..542b8b2654be 100644 > --- a/drivers/gpu/drm/i915/display/intel_gmbus.c > +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c > @@ -116,6 +116,20 @@ static const struct gmbus_pin gmbus_pins_dg2[] = { > [GMBUS_PIN_9_TC1_ICP] = { "tc1", GPIOJ }, > }; > > +static const struct gmbus_pin gmbus_pins_mtp[] = { > + [GMBUS_PIN_1_BXT] = { "dpa", GPIOB }, > + [GMBUS_PIN_2_BXT] = { "dpb", GPIOC }, > + [GMBUS_PIN_3_BXT] = { "dpc", GPIOD }, > + [GMBUS_PIN_4_CNP] = { "dpd", GPIOE }, > + [GMBUS_PIN_5_MTP] = { "dpe", GPIOF }, > + [GMBUS_PIN_9_TC1_ICP] = { "tc1", GPIOJ }, > + [GMBUS_PIN_10_TC2_ICP] = { "tc2", GPIOK }, > + [GMBUS_PIN_11_TC3_ICP] = { "tc3", GPIOL }, > + [GMBUS_PIN_12_TC4_ICP] = { "tc4", GPIOM }, > + [GMBUS_PIN_13_TC5_TGP] = { "tc5", GPION }, > + [GMBUS_PIN_14_TC6_TGP] = { "tc6", GPIOO }, There's no GPIO_CTL registers for pin 13 or pin 14 on on MTP so these last two entries shouldn't be here (and the commit message should be fixed too). Matt > +}; > + > static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *i915, >unsigned int pin) > { > @@ -128,6 +142,9 @@ static const struct gmbus_pin *get_gmbus_pin(struct > drm_i915_private *i915, > } else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) { > pins = gmbus_pins_dg1; > size = ARRAY_SIZE(gmbus_pins_dg1); > + } else if (INTEL_PCH_TYPE(i915) >= PCH_MTP) { > + pins = gmbus_pins_mtp; > + size = ARRAY_SIZE(gmbus_pins_mtp); > } else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) { > pins = gmbus_pins_icp; > size = ARRAY_SIZE(gmbus_pins_icp); > diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.h > b/drivers/gpu/drm/i915/display/intel_gmbus.h > index 8edc2e99cf53..20f704bd4e70 100644 > --- a/drivers/gpu/drm/i915/display/intel_gmbus.h > +++ b/drivers/gpu/drm/i915/display/intel_gmbus.h > @@ -24,6 +24,7 @@ struct i2c_adapter; > #define GMBUS_PIN_2_BXT 2 > #define GMBUS_PIN_3_BXT 3 > #define GMBUS_PIN_4_CNP 4 > +#define GMBUS_PIN_5_MTP 5 > #define GMBUS_PIN_9_TC1_ICP 9 > #define GMBUS_PIN_10_TC2_ICP 10 > #define GMBUS_PIN_11_TC3_ICP 11 > -- > 2.25.1 > -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
Re: [Intel-gfx] [PATCH v2 01/21] drm/i915: Read graphics/media/display arch version from hw
On Thu, Aug 18, 2022 at 04:41:42PM -0700, Radhakrishna Sripada wrote: > From: Matt Roper > > Going forward, the hardware teams no longer consider new platforms to > have a "generation" in the way we've defined it for past platforms. > Instead, each IP block (graphics, media, display) will have their own > architecture major.minor versions and stepping ID's which should be read > directly from a register in the MMIO space. New hardware programming > styles, features, and workarounds should be conditional solely on the > architecture version, and should no longer be derived from the PCI > device ID, revision ID, or platform-specific feature flags. > > v1.1: Fix build error As Jani noted on the previous version, this patch needs to be split into three patches (and/or be based on top of the other series that Jani has in flight). Also the giant macro is no longer necessary on current drm-tip now that we the version values stored consistently in structures; we can just use a regular function and pass pointers to the structures. Bala also had feedback on the previous version that hasn't been incorporated here yet either. Matt > > Bspec: 63361, 64111 > > Signed-off-by: Matt Roper > Signed-off-by: Rodrigo Vivi > Signed-off-by: Radhakrishna Sripada > --- > drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 + > drivers/gpu/drm/i915/i915_driver.c| 80 ++- > drivers/gpu/drm/i915/i915_drv.h | 16 ++-- > drivers/gpu/drm/i915/i915_pci.c | 1 + > drivers/gpu/drm/i915/i915_reg.h | 6 ++ > drivers/gpu/drm/i915/intel_device_info.c | 32 > drivers/gpu/drm/i915/intel_device_info.h | 14 > .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + > 8 files changed, 128 insertions(+), 24 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > index 94f9ddcfb3a5..a053493dae24 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > @@ -39,6 +39,8 @@ > #define FORCEWAKE_ACK_RENDER_GEN9_MMIO(0xd84) > #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0xd88) > > +#define GMD_ID_GRAPHICS _MMIO(0xd8c) > + > #define MCFG_MCR_SELECTOR_MMIO(0xfd0) > #define SF_MCR_SELECTOR _MMIO(0xfd8) > #define GEN8_MCR_SELECTOR_MMIO(0xfdc) > diff --git a/drivers/gpu/drm/i915/i915_driver.c > b/drivers/gpu/drm/i915/i915_driver.c > index deb8a8b76965..33566f6e9546 100644 > --- a/drivers/gpu/drm/i915/i915_driver.c > +++ b/drivers/gpu/drm/i915/i915_driver.c > @@ -70,6 +70,7 @@ > #include "gem/i915_gem_pm.h" > #include "gt/intel_gt.h" > #include "gt/intel_gt_pm.h" > +#include "gt/intel_gt_regs.h" > #include "gt/intel_rc6.h" > > #include "pxp/intel_pxp_pm.h" > @@ -306,15 +307,83 @@ static void sanitize_gpu(struct drm_i915_private *i915) > __intel_gt_reset(to_gt(i915), ALL_ENGINES); > } > > +#define IP_VER_READ(offset, ri_prefix) \ > + addr = pci_iomap_range(pdev, 0, offset, sizeof(u32)); \ > + if (drm_WARN_ON(&i915->drm, !addr)) { \ > + /* Fall back to whatever was in the device info */ \ > + RUNTIME_INFO(i915)->ri_prefix.ver = > INTEL_INFO(i915)->ri_prefix.ver; \ > + RUNTIME_INFO(i915)->ri_prefix.rel = > INTEL_INFO(i915)->ri_prefix.rel; \ > + goto ri_prefix##done; \ > + } \ > + \ > + ver = ioread32(addr); \ > + pci_iounmap(pdev, addr); \ > + \ > + RUNTIME_INFO(i915)->ri_prefix.ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, > ver); \ > + RUNTIME_INFO(i915)->ri_prefix.rel = REG_FIELD_GET(GMD_ID_RELEASE_MASK, > ver); \ > + RUNTIME_INFO(i915)->ri_prefix.step = REG_FIELD_GET(GMD_ID_STEP, ver); \ > + \ > + /* Sanity check against expected versions from device info */ \ > + if (RUNTIME_INFO(i915)->ri_prefix.ver != > INTEL_INFO(i915)->ri_prefix.ver || \ > + RUNTIME_INFO(i915)->ri_prefix.rel > > INTEL_INFO(i915)->ri_prefix.rel) \ > + drm_dbg(&i915->drm, \ > + "Hardware reports " #ri_prefix " IP version %u.%u but > minimum expected is %u.%u\n", \ > + RUNTIME_INFO(i915)->ri_prefix.ver, \ > + RUNTIME_INFO(i915)->ri_prefix.rel, \ > + INTEL_INFO(i915)->ri_prefix.ver, \ > + INTEL_INFO(i915)->ri_prefix.rel); \ > +ri_prefix##done: > + > +/** > + * intel_ip
[PATCH v2 2/2] drm/i915/dg2: Add additional tuning settings
Some additional MMIO tuning settings have appeared in the bspec's performance tuning guide section. One of the tuning settings here is also documented as formal workaround Wa_22012654132 for some steppings of DG2. However the tuning setting applies to all DG2 variants and steppings, making it a superset of the workaround. v2: - Move DRAW_WATERMARK to engine workaround section. It only moves into the engine context on future platforms. (Lucas) - CHICKEN_RASTER_2 needs to be handled as a masked register. (Lucas) Bspec: 68331 Cc: Lucas De Marchi Cc: Lionel Landwerlin Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 ++--- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index b3b49f6d6d1c..f64fafe28f72 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -259,6 +259,9 @@ #define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) #define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) +#define DRAW_WATERMARK _MMIO(0x26c0) +#define VERT_WM_VAL REG_GENMASK(9, 0) + #define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ #define RENDER_HWS_PGA_GEN7_MMIO(0x4080) @@ -374,6 +377,9 @@ #define CHICKEN_RASTER_1 _MMIO(0x6204) #define DIS_SF_ROUND_NEAREST_EVENREG_BIT(8) +#define CHICKEN_RASTER_2 _MMIO(0x6208) +#define TBIMR_FAST_CLIP REG_BIT(5) + #define VFLSKPD_MMIO(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -1124,6 +1130,8 @@ #define RT_CTRL_MMIO(0xe530) #define DIS_NULL_QUERY REG_BIT(10) +#define STACKID_CTRL REG_GENMASK(6, 5) +#define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2) #define EU_PERF_CNTL1 _MMIO(0xe558) #define EU_PERF_CNTL5 _MMIO(0xe55c) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index a68d279b01f0..31e129329fb0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -568,6 +568,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); wa_add(wal, @@ -2195,15 +2196,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || - IS_DG2_G11(i915)) { - /* Wa_22012654132:dg2 */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); - } - /* Wa_14013202645:dg2 */ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) @@ -2692,6 +2684,23 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, if (IS_DG2(i915)) { wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); + wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL, +REG_FIELD_PREP(VERT_WM_VAL, 0x3FF)); + + /* +* This is also listed as Wa_22012654132 for certain DG2 +* steppings, but the tuning setting programming is a superset +* since it applies to all DG2 variants and steppings. +* +* Note that register 0xE420 is write-only and cannot be read +* back for verification on DG2 (due to Wa_14012342262), so +* we need to explicitly skip the readback. +*/ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); } } -- 2.37.1
[PATCH v2 2/2] drm/i915/dg2: Add additional tuning settings
Some additional MMIO tuning settings have appeared in the bspec's performance tuning guide section. One of the tuning settings here is also documented as formal workaround Wa_22012654132 for some steppings of DG2. However the tuning setting applies to all DG2 variants and steppings, making it a superset of the workaround. v2: - Move DRAW_WATERMARK to engine workaround section. It only moves into the engine context on future platforms. (Lucas) - CHICKEN_RASTER_2 needs to be handled as a masked register. (Lucas) Bspec: 68331 Cc: Lucas De Marchi Cc: Lionel Landwerlin Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 ++--- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index b3b49f6d6d1c..f64fafe28f72 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -259,6 +259,9 @@ #define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) #define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) +#define DRAW_WATERMARK _MMIO(0x26c0) +#define VERT_WM_VAL REG_GENMASK(9, 0) + #define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ #define RENDER_HWS_PGA_GEN7_MMIO(0x4080) @@ -374,6 +377,9 @@ #define CHICKEN_RASTER_1 _MMIO(0x6204) #define DIS_SF_ROUND_NEAREST_EVENREG_BIT(8) +#define CHICKEN_RASTER_2 _MMIO(0x6208) +#define TBIMR_FAST_CLIP REG_BIT(5) + #define VFLSKPD_MMIO(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -1124,6 +1130,8 @@ #define RT_CTRL_MMIO(0xe530) #define DIS_NULL_QUERY REG_BIT(10) +#define STACKID_CTRL REG_GENMASK(6, 5) +#define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2) #define EU_PERF_CNTL1 _MMIO(0xe558) #define EU_PERF_CNTL5 _MMIO(0xe55c) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index a68d279b01f0..31e129329fb0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -568,6 +568,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); wa_add(wal, @@ -2195,15 +2196,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || - IS_DG2_G11(i915)) { - /* Wa_22012654132:dg2 */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); - } - /* Wa_14013202645:dg2 */ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) @@ -2692,6 +2684,23 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, if (IS_DG2(i915)) { wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); + wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL, +REG_FIELD_PREP(VERT_WM_VAL, 0x3FF)); + + /* +* This is also listed as Wa_22012654132 for certain DG2 +* steppings, but the tuning setting programming is a superset +* since it applies to all DG2 variants and steppings. +* +* Note that register 0xE420 is write-only and cannot be read +* back for verification on DG2 (due to Wa_14012342262), so +* we need to explicitly skip the readback. +*/ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); } } -- 2.37.1
[PATCH] drm/i915: Correct ss -> steering calculation for pre-Xe_HP platforms
Accidental use of a "SLICE" macro where a "SUBSLICE" macro was intended causes the group ID for steering to be calculated incorrectly on pre-Xe_HP platforms. Fixes: 9a92732f040a ("drm/i915/gt: Add general DSS steering iterator to intel_gt_mcr") Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index f8c64ab9d3ca..e79405a45312 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -515,7 +515,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, *group = dss / GEN_DSS_PER_GSLICE; *instance = dss % GEN_DSS_PER_GSLICE; } else { - *group = dss / GEN_MAX_HSW_SLICES; + *group = dss / GEN_MAX_SS_PER_HSW_SLICE; *instance = dss % GEN_MAX_SS_PER_HSW_SLICE; return; } -- 2.36.1
Re: [PATCH] drm/i915/display: clean up comments
On Fri, Jul 01, 2022 at 04:32:36PM -0400, Tom Rix wrote: > spelling changes > resoluition -> resolution > dont-> don't > commmit -> commit > Invalidade -> Invalidate > > Signed-off-by: Tom Rix Reviewed-by: Matt Roper and applied to drm-intel-next. Thanks for the patch. Matt > --- > drivers/gpu/drm/i915/display/intel_psr.c | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_psr.c > b/drivers/gpu/drm/i915/display/intel_psr.c > index 7d61c55184e5..e6a870641cd2 100644 > --- a/drivers/gpu/drm/i915/display/intel_psr.c > +++ b/drivers/gpu/drm/i915/display/intel_psr.c > @@ -555,7 +555,7 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) > /* >* TODO: 7 lines of IO_BUFFER_WAKE and FAST_WAKE are default >* values from BSpec. In order to setting an optimal power > - * consumption, lower than 4k resoluition mode needs to decrese > + * consumption, lower than 4k resolution mode needs to decrease >* IO_BUFFER_WAKE and FAST_WAKE. And higher than 4K resolution >* mode needs to increase IO_BUFFER_WAKE and FAST_WAKE. >*/ > @@ -959,7 +959,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, > int psr_setup_time; > > /* > - * Current PSR panels dont work reliably with VRR enabled > + * Current PSR panels don't work reliably with VRR enabled >* So if VRR is enabled, do not enable PSR. >*/ > if (crtc_state->vrr.enable) > @@ -1664,7 +1664,7 @@ static void intel_psr2_sel_fetch_pipe_alignment(const > struct intel_crtc_state *c > * > * Plane scaling and rotation is not supported by selective fetch and both > * properties can change without a modeset, so need to be check at every > - * atomic commmit. > + * atomic commit. > */ > static bool psr2_sel_fetch_plane_state_supported(const struct > intel_plane_state *plane_state) > { > @@ -2203,7 +2203,7 @@ static void _psr_invalidate_handle(struct intel_dp > *intel_dp) > } > > /** > - * intel_psr_invalidate - Invalidade PSR > + * intel_psr_invalidate - Invalidate PSR > * @dev_priv: i915 device > * @frontbuffer_bits: frontbuffer plane tracking bits > * @origin: which operation caused the invalidate > -- > 2.27.0 > -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
Re: [Intel-gfx] [PATCH v8 0/3] drm/doc/rfc: i915 VM_BIND feature design + uapi
On Thu, Jun 30, 2022 at 05:31:07PM -0700, Niranjana Vishwanathapura wrote: > This is the i915 driver VM_BIND feature design RFC patch series along > with the required uapi definition and description of intended use cases. > > v2: Reduce the scope to simple Mesa use case. > Remove all compute related uapi, vm_bind/unbind queue support and > only support a timeline out fence instead of an in/out timeline > fence array. > v3: Expand documentation on dma-resv usage, TLB flushing, execbuf3 and > VM_UNBIND. Add FENCE_VALID and TLB_FLUSH flags. > v4: Remove I915_GEM_VM_BIND_TLB_FLUSH flag and add additional > uapi documentation for vm_bind/unbind. > v5: Update TLB flushing documentation. > Add version support to stage implementation. > v6: Define and use drm_i915_gem_timeline_fence structure for > execbuf3 and vm_bind/unbind timeline fences. > v7: Rename I915_PARAM_HAS_VM_BIND to I915_PARAM_VM_BIND_VERSION. > Update documentation on async vm_bind/unbind and versioning. > Remove redundant vm_bind/unbind FENCE_VALID flag, execbuf3 > batch_count field and I915_EXEC3_SECURE flag. > v8: Remove I915_GEM_VM_BIND_READONLY and minor documentation > updates. > > Signed-off-by: Niranjana Vishwanathapura > Reviewed-by: Daniel Vetter > Acked-by: Paulo Zanoni Series applied to drm-intel-gt-next. Thanks for the patches and reviews. Matt > > Niranjana Vishwanathapura (3): > drm/doc/rfc: VM_BIND feature design document > drm/i915: Update i915 uapi documentation > drm/doc/rfc: VM_BIND uapi definition > > Documentation/gpu/rfc/i915_vm_bind.h | 291 + > Documentation/gpu/rfc/i915_vm_bind.rst | 245 + > Documentation/gpu/rfc/index.rst| 4 + > include/uapi/drm/i915_drm.h| 205 + > 4 files changed, 700 insertions(+), 45 deletions(-) > create mode 100644 Documentation/gpu/rfc/i915_vm_bind.h > create mode 100644 Documentation/gpu/rfc/i915_vm_bind.rst > > -- > 2.21.0.rc0.32.g243a4c7e27 > -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
Re: [PATCH 2/2] drm/i915: DG2 and ATS-M device ID updates
On Fri, Jul 01, 2022 at 09:59:50AM -0700, Lucas De Marchi wrote: > On Fri, Jul 01, 2022 at 08:22:31AM -0700, Matt Roper wrote: > > Small BAR support has now landed, which allows us to add the PCI IDs > > that correspond to add-in card designs of DG2 and ATS-M. There's also > > one additional MB-down PCI ID that recently appeared (0x5698) so we add > > it too. > > > > Cc: Lucas De Marchi > > Signed-off-by: Matt Roper > > > Reviewed-by: Lucas De Marchi Thanks. Applied to drm-intel-gt-next (since that's the branch that has the small BAR patches that were a pre-req to adding these IDs) and dropped the equivalent patch from topic/core-from-CI. Matt > > Lucas De Marchi -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
[PATCH] drm/i915/gt: Add general DSS steering iterator to intel_gt_mcr
Although all DSS belong to a single pool on Xe_HP platforms (i.e., they're not organized into slices from a topology point of view), we do still need to pass 'group' and 'instance' targets when steering register accesses to a specific instance of a per-DSS multicast register. The rules for how to determine group and instance IDs (which previously used legacy terms "slice" and "subslice") varies by platform. Some platforms determine steering by gslice membership, some platforms by cslice membership, and future platforms may have other rules. Since looping over each DSS and performing steered unicast register accesses is a relatively common pattern, let's add a dedicated iteration macro to handle this (and replace the platform-specific "instdone" loop we were using previously. This will avoid the calling code needing to figure out the details about how to obtain steering IDs for a specific DSS. Most of the places where we use this new loop are in the GPU errorstate code at the moment, but we do have some additional features coming in the future that will also need to loop over each DSS and steer some register accesses accordingly. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 34 ++- drivers/gpu/drm/i915/gt/intel_engine_types.h | 22 drivers/gpu/drm/i915/gt/intel_gt_mcr.c| 25 ++ drivers/gpu/drm/i915/gt/intel_gt_mcr.h| 24 + .../gpu/drm/i915/gt/uc/intel_guc_capture.c| 13 --- drivers/gpu/drm/i915/i915_gpu_error.c | 32 ++--- 6 files changed, 75 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 283870c65991..37fa813af766 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1517,7 +1517,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; - const struct sseu_dev_info *sseu = &engine->gt->info.sseu; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -1542,32 +1541,19 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { - instdone->sampler[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, - slice, subslice); - instdone->row[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, - slice, subslice); - } - } else { - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { - instdone->sampler[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, - slice, subslice); - instdone->row[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, - slice, subslice); - } + for_each_ss_steering(iter, engine->gt, slice, subslice) { + instdone->sampler[slice][subslice] = + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); + instdone->row[slice][subslice] = + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice); } if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) + for_each_ss_steering(iter, engine->gt, slice, subslice)
[PATCH 1/2] Revert "topic/core-for-CI: Add remaining DG2 and ATS-M device IDs"
This reverts commit f7d7dddaab81eeae4508197b5f38f0b974d97b8c. In reality we'll just rebase this patch out of the topic/core-for-CI branch after landing the "real" copy in drm-intel. But to prevent pre-merge CI from getting confused, we send it as an explicit revert on the mailing list. Cc: Lucas De Marchi Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/i915_pci.c | 2 +- drivers/gpu/drm/i915/intel_device_info.c | 2 -- include/drm/i915_pciids.h| 25 +++- 3 files changed, 4 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 0cdd6513fbb7..5edc8fbf1dff 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1075,6 +1075,7 @@ static const struct intel_device_info dg2_info = { .require_force_probe = 1, }; +__maybe_unused static const struct intel_device_info ats_m_info = { DG2_FEATURES, .display = { 0 }, @@ -1188,7 +1189,6 @@ static const struct pci_device_id pciidlist[] = { INTEL_RPLS_IDS(&adl_s_info), INTEL_RPLP_IDS(&adl_p_info), INTEL_DG2_IDS(&dg2_info), - INTEL_ATS_M_IDS(&ats_m_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index f0bf23726ed8..7eb893666595 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -189,12 +189,10 @@ static const u16 subplatform_rpl_ids[] = { static const u16 subplatform_g10_ids[] = { INTEL_DG2_G10_IDS(0), - INTEL_ATS_M150_IDS(0), }; static const u16 subplatform_g11_ids[] = { INTEL_DG2_G11_IDS(0), - INTEL_ATS_M75_IDS(0), }; static const u16 subplatform_g12_ids[] = { diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 4585fed4e41e..283dadfbb4db 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -696,41 +696,22 @@ #define INTEL_DG2_G10_IDS(info) \ INTEL_VGA_DEVICE(0x5690, info), \ INTEL_VGA_DEVICE(0x5691, info), \ - INTEL_VGA_DEVICE(0x5692, info), \ - INTEL_VGA_DEVICE(0x56A0, info), \ - INTEL_VGA_DEVICE(0x56A1, info), \ - INTEL_VGA_DEVICE(0x56A2, info) + INTEL_VGA_DEVICE(0x5692, info) #define INTEL_DG2_G11_IDS(info) \ INTEL_VGA_DEVICE(0x5693, info), \ INTEL_VGA_DEVICE(0x5694, info), \ INTEL_VGA_DEVICE(0x5695, info), \ - INTEL_VGA_DEVICE(0x56A5, info), \ - INTEL_VGA_DEVICE(0x56A6, info), \ - INTEL_VGA_DEVICE(0x56B0, info), \ - INTEL_VGA_DEVICE(0x56B1, info) + INTEL_VGA_DEVICE(0x56B0, info) #define INTEL_DG2_G12_IDS(info) \ INTEL_VGA_DEVICE(0x5696, info), \ INTEL_VGA_DEVICE(0x5697, info), \ - INTEL_VGA_DEVICE(0x56A3, info), \ - INTEL_VGA_DEVICE(0x56A4, info), \ - INTEL_VGA_DEVICE(0x56B2, info), \ - INTEL_VGA_DEVICE(0x56B3, info) + INTEL_VGA_DEVICE(0x56B2, info) #define INTEL_DG2_IDS(info) \ INTEL_DG2_G10_IDS(info), \ INTEL_DG2_G11_IDS(info), \ INTEL_DG2_G12_IDS(info) -#define INTEL_ATS_M150_IDS(info) \ - INTEL_VGA_DEVICE(0x56C0, info) - -#define INTEL_ATS_M75_IDS(info) \ - INTEL_VGA_DEVICE(0x56C1, info) - -#define INTEL_ATS_M_IDS(info) \ - INTEL_ATS_M150_IDS(info), \ - INTEL_ATS_M75_IDS(info) - #endif /* _I915_PCIIDS_H */ -- 2.36.1
[PATCH 2/2] drm/i915: DG2 and ATS-M device ID updates
Small BAR support has now landed, which allows us to add the PCI IDs that correspond to add-in card designs of DG2 and ATS-M. There's also one additional MB-down PCI ID that recently appeared (0x5698) so we add it too. Cc: Lucas De Marchi Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/i915_pci.c | 2 +- drivers/gpu/drm/i915/intel_device_info.c | 2 ++ include/drm/i915_pciids.h| 26 +--- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 5edc8fbf1dff..0cdd6513fbb7 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1075,7 +1075,6 @@ static const struct intel_device_info dg2_info = { .require_force_probe = 1, }; -__maybe_unused static const struct intel_device_info ats_m_info = { DG2_FEATURES, .display = { 0 }, @@ -1189,6 +1188,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_RPLS_IDS(&adl_s_info), INTEL_RPLP_IDS(&adl_p_info), INTEL_DG2_IDS(&dg2_info), + INTEL_ATS_M_IDS(&ats_m_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 7eb893666595..f0bf23726ed8 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -189,10 +189,12 @@ static const u16 subplatform_rpl_ids[] = { static const u16 subplatform_g10_ids[] = { INTEL_DG2_G10_IDS(0), + INTEL_ATS_M150_IDS(0), }; static const u16 subplatform_g11_ids[] = { INTEL_DG2_G11_IDS(0), + INTEL_ATS_M75_IDS(0), }; static const u16 subplatform_g12_ids[] = { diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 283dadfbb4db..1bd0420a213d 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -696,22 +696,42 @@ #define INTEL_DG2_G10_IDS(info) \ INTEL_VGA_DEVICE(0x5690, info), \ INTEL_VGA_DEVICE(0x5691, info), \ - INTEL_VGA_DEVICE(0x5692, info) + INTEL_VGA_DEVICE(0x5692, info), \ + INTEL_VGA_DEVICE(0x56A0, info), \ + INTEL_VGA_DEVICE(0x56A1, info), \ + INTEL_VGA_DEVICE(0x56A2, info) #define INTEL_DG2_G11_IDS(info) \ INTEL_VGA_DEVICE(0x5693, info), \ INTEL_VGA_DEVICE(0x5694, info), \ INTEL_VGA_DEVICE(0x5695, info), \ - INTEL_VGA_DEVICE(0x56B0, info) + INTEL_VGA_DEVICE(0x5698, info), \ + INTEL_VGA_DEVICE(0x56A5, info), \ + INTEL_VGA_DEVICE(0x56A6, info), \ + INTEL_VGA_DEVICE(0x56B0, info), \ + INTEL_VGA_DEVICE(0x56B1, info) #define INTEL_DG2_G12_IDS(info) \ INTEL_VGA_DEVICE(0x5696, info), \ INTEL_VGA_DEVICE(0x5697, info), \ - INTEL_VGA_DEVICE(0x56B2, info) + INTEL_VGA_DEVICE(0x56A3, info), \ + INTEL_VGA_DEVICE(0x56A4, info), \ + INTEL_VGA_DEVICE(0x56B2, info), \ + INTEL_VGA_DEVICE(0x56B3, info) #define INTEL_DG2_IDS(info) \ INTEL_DG2_G10_IDS(info), \ INTEL_DG2_G11_IDS(info), \ INTEL_DG2_G12_IDS(info) +#define INTEL_ATS_M150_IDS(info) \ + INTEL_VGA_DEVICE(0x56C0, info) + +#define INTEL_ATS_M75_IDS(info) \ + INTEL_VGA_DEVICE(0x56C1, info) + +#define INTEL_ATS_M_IDS(info) \ + INTEL_ATS_M150_IDS(info), \ + INTEL_ATS_M75_IDS(info) + #endif /* _I915_PCIIDS_H */ -- 2.36.1
[PATCH 2/2] drm/i915: Prefer "XEHP_" prefix for registers
We've been introducing new registers with a mix of "XEHP_" (architecture) and "XEHPSDV_" (platform) prefixes. For consistency, let's settle on "XEHP_" as the preferred form. XEHPSDV_RP_STATE_CAP stays with its current name since that's truly a platform-specific register and not something that applies to the Xe_HP architecture as a whole. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 8 drivers/gpu/drm/i915/i915_reg.h | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index e63de9c06596..166d0a4b9e8c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -836,8 +836,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, } else { resource_size_t lmem_range; - lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0x; - lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; + lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0x; + lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 61815b6e87de..37c1095d8603 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -324,11 +324,11 @@ #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) -#define XEHPSDV_TILE0_ADDR_RANGE _MMIO(0x4900) -#define XEHPSDV_TILE_LMEM_RANGE_SHIFT8 +#define XEHP_TILE0_ADDR_RANGE _MMIO(0x4900) +#define XEHP_TILE_LMEM_RANGE_SHIFT 8 -#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) -#define XEHPSDV_CCS_BASE_SHIFT 8 +#define XEHP_FLAT_CCS_BASE_ADDR_MMIO(0x4910) +#define XEHP_CCS_BASE_SHIFT 8 #define GAMTARBMODE_MMIO(0x4a08) #define ARB_MODE_BWGTLB_DISABLE (1 << 9) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index ae8a8f725f01..73a8b46e0234 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -679,7 +679,7 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev, u32 val; int err; - err = snb_pcode_read_p(gt->uncore, XEHPSDV_PCODE_FREQUENCY_CONFIG, + err = snb_pcode_read_p(gt->uncore, XEHP_PCODE_FREQUENCY_CONFIG, PCODE_MBOX_FC_SC_READ_FUSED_P0, PCODE_MBOX_DOMAIN_MEDIAFF, &val); @@ -700,7 +700,7 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev, u32 val; int err; - err = snb_pcode_read_p(gt->uncore, XEHPSDV_PCODE_FREQUENCY_CONFIG, + err = snb_pcode_read_p(gt->uncore, XEHP_PCODE_FREQUENCY_CONFIG, PCODE_MBOX_FC_SC_READ_FUSED_PN, PCODE_MBOX_DOMAIN_MEDIAFF, &val); diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 2ff448047020..d09b996a9759 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -105,12 +105,12 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) resource_size_t lmem_range; u64 tile_stolen, flat_ccs_base; - lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0x; - lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; + lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0x; + lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; - flat_ccs_base = intel_gt_mcr_read_any(gt, XEHPSDV_FLAT_CCS_BASE_ADDR); - flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K; + flat_ccs_base = intel_gt_mcr_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + flat_ccs_base = (flat_ccs_base >> XEHP_CCS_BASE_SHIFT) * SZ_64K; /* FIXME: Remove this when we have small-bar enabled */ if (pci_resource_len(pdev, 2) < lmem_size) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cf5e16abf6c7..643d7f020a4a 100644 --- a/
[PATCH 1/2] drm/i915: Correct duplicated/misplaced GT register definitions
XEHPSDV_FLAT_CCS_BASE_ADDR, GEN8_L3_LRA_1_GPGPU, and MMCD_MISC_CTRL were duplicated between i915_reg.h and intel_gt_regs.h. These are all GT registers, so we should drop the copy from i915_reg.h. XEHPSDV_TILE0_ADDR_RANGE was defined in i915_reg.h, but really belongs in intel_gt_regs.h. Move it. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 1 + drivers/gpu/drm/i915/gt/intel_gt_regs.h| 3 +++ drivers/gpu/drm/i915/i915_reg.h| 17 - 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index fa54823d1219..e63de9c06596 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -14,6 +14,7 @@ #include "gem/i915_gem_region.h" #include "gt/intel_gt.h" #include "gt/intel_gt_mcr.h" +#include "gt/intel_gt_regs.h" #include "gt/intel_region_lmem.h" #include "i915_drv.h" #include "i915_gem_stolen.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 07ef111947b8..61815b6e87de 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -324,6 +324,9 @@ #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) +#define XEHPSDV_TILE0_ADDR_RANGE _MMIO(0x4900) +#define XEHPSDV_TILE_LMEM_RANGE_SHIFT8 + #define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) #define XEHPSDV_CCS_BASE_SHIFT 8 diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 932bd6aa4a0a..cf5e16abf6c7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8345,23 +8345,6 @@ enum skl_power_gate { #define SGGI_DIS REG_BIT(15) #define SGR_DIS REG_BIT(13) -#define XEHPSDV_TILE0_ADDR_RANGE _MMIO(0x4900) -#define XEHPSDV_TILE_LMEM_RANGE_SHIFT 8 - -#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) -#define XEHPSDV_CCS_BASE_SHIFT 8 - -/* gamt regs */ -#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) -#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */ -#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV 0x5FF101FF /* max/min for LRA1/2 */ -#define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL 0x67F1427F /*"" */ -#define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT 0x5FF101FF /*"" */ - -#define MMCD_MISC_CTRL _MMIO(0x4ddc) /* skl+ */ -#define MMCD_PCLA (1 << 31) -#define MMCD_HOTSPOT_EN (1 << 27) - #define _ICL_PHY_MISC_A0x64C00 #define _ICL_PHY_MISC_B0x64C04 #define _DG2_PHY_MISC_TC1 0x64C14 /* TC1="PHY E" but offset as if "PHY F" */ -- 2.36.1
Re: [Intel-gfx] [PATCH] drm/i915/guc: ADL-N should use the same GuC FW as ADL-S
On Tue, Jun 21, 2022 at 04:30:05PM -0700, Daniele Ceraolo Spurio wrote: > The only difference between the ADL S and P GuC FWs is the HWConfig > support. ADL-N does not support HWConfig, so we should use the same > binary as ADL-S, otherwise the GuC might attempt to fetch a config > table that does not exist. ADL-N is internally identified as an ADL-P, > so we need to special-case it in the FW selection code. > > Fixes: 7e28d0b26759 ("drm/i915/adl-n: Enable ADL-N platform") > Cc: John Harrison > Cc: Tejas Upadhyay > Cc: Anusha Srivatsa > Cc: Jani Nikula > Signed-off-by: Daniele Ceraolo Spurio Would the config table still get used somehow even though we return false for ADL-N in has_table()? Even if it couldn't be used, this change makes the behavior more clear and explicit. Reviewed-by: Matt Roper > --- > drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 9 + > 1 file changed, 9 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c > b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c > index d2c5c9367cc4..ef2d10184ee2 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c > @@ -162,6 +162,15 @@ __uc_fw_auto_select(struct drm_i915_private *i915, > struct intel_uc_fw *uc_fw) > u8 rev = INTEL_REVID(i915); > int i; > > + /* > + * The only difference between the ADL GuC FWs is the HWConfig support. > + * ADL-N does not support HWConfig, so we should use the same binary as > + * ADL-S, otherwise the GuC might attempt to fetch a config table that > + * does not exist. > + */ > + if (IS_ADLP_N(i915)) > + p = INTEL_ALDERLAKE_S; > + > GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); > fw_blobs = blobs_all[uc_fw->type].blobs; > fw_count = blobs_all[uc_fw->type].count; > -- > 2.25.1 > -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
Re: [PATCH 2/2] drm/i915/gt: Re-do the intel-gtt split
snb_pte_encode; > + > + ggtt->vm.vma_ops.bind_vma= intel_ggtt_bind_vma; > + ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; > + > + return ggtt_probe_common(ggtt, size); > +} > + > static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) > { > struct drm_i915_private *i915 = gt->i915; > @@ -576,12 +1104,12 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, > struct intel_gt *gt) > ggtt->vm.dma = i915->drm.dev; > dma_resv_init(&ggtt->vm._resv); > > - if (GRAPHICS_VER(i915) <= 5) > - ret = intel_gt_gmch_gen5_probe(ggtt); > + if (GRAPHICS_VER(i915) < 6) > + ret = intel_ggtt_gmch_probe(ggtt); > else if (GRAPHICS_VER(i915) < 8) > - ret = intel_gt_gmch_gen6_probe(ggtt); > + ret = gen6_gmch_probe(ggtt); > else > - ret = intel_gt_gmch_gen8_probe(ggtt); > + ret = gen8_gmch_probe(ggtt); We could also take the opportunity to reverse the if/else ladder here and put it in the i915-preferred "newest at top" order. > if (ret) { > dma_resv_fini(&ggtt->vm._resv); > return ret; > @@ -635,7 +1163,10 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) > > int i915_ggtt_enable_hw(struct drm_i915_private *i915) > { > - return intel_gt_gmch_gen5_enable_hw(i915); > + if (GRAPHICS_VER(i915) < 6) > + return intel_ggtt_gmch_enable_hw(i915); > + > + return 0; > } > > void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c > b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c > new file mode 100644 > index ..1c15825d4bd3 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c > @@ -0,0 +1,132 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2022 Intel Corporation > + */ > + > +#include "intel_ggtt_gmch.h" > + > +#include > +#include > + > +#include > + > +#include "i915_drv.h" > +#include "i915_utils.h" > +#include "intel_gtt.h" > +#include "intel_gt_regs.h" > +#include "intel_gt.h" > + > +static void gen5_ggtt_insert_page(struct i915_address_space *vm, All the "gen5_" prefixes in this file seem a bit misleading if they wind up getting used on earlier platforms too; most of the driver uses the prefix to indicate the first platform/architecture that the function was used on. Maybe we should also rename these with a "gmch_" prefix as well to indicate that they're intended for the platforms where the GMCH was an independent chip and not integrated into the CPU? Anyway, all of my review comments are just bikeshedding so feel free to use or ignore them as you see fit. Either way, this series should fix the issue and restore GGTT handling for non-x86. Reviewed-by: Matt Roper > + dma_addr_t addr, > + u64 offset, > + enum i915_cache_level cache_level, > + u32 unused) > +{ > + unsigned int flags = (cache_level == I915_CACHE_NONE) ? > + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; > + > + intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); > +} > + > +static void gen5_ggtt_insert_entries(struct i915_address_space *vm, > + struct i915_vma_resource *vma_res, > + enum i915_cache_level cache_level, > + u32 unused) > +{ > + unsigned int flags = (cache_level == I915_CACHE_NONE) ? > + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; > + > + intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> > PAGE_SHIFT, > + flags); > +} > + > +static void gen5_ggtt_invalidate(struct i915_ggtt *ggtt) > +{ > + intel_gmch_gtt_flush(); > +} > + > +static void gen5_ggtt_clear_range(struct i915_address_space *vm, > + u64 start, u64 length) > +{ > + intel_gmch_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); > +} > + > +static void gen5_ggtt_remove(struct i915_address_space *vm) > +{ > + intel_gmch_remove(); > +} > + > +/* > + * Certain Gen5 chipsets require idling the GPU before unmapping anything > from > + * the GTT when VT-d is enabled. > + */ > +static bool needs_idle_maps(struct drm_i915_private *i915) > +{ > + /* > + * Query intel_iommu to see if we need the workaround. Presumably that
Re: [PATCH v2 2/2] drm/i915/gt: Cleanup interface for MCR operations
On Fri, Jun 17, 2022 at 06:57:20AM -0700, Harish Chegondi wrote: > On Tue, Jun 14, 2022 at 05:10:19PM -0700, Matt Roper wrote: > > Let's replace the assortment of intel_gt_* and intel_uncore_* functions > > that operate on MCR registers with a cleaner set of interfaces: > > > > * intel_gt_mcr_read -- unicast read from specific instance > > * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated > > instance > > * intel_gt_mcr_unicast_write -- unicast write to specific instance > > * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances > > > > We'll also replace the historic "slice" and "subslice" terminology with > > "group" and "instance" to match the documentation for more recent > > platforms; these days MCR steering applies to more types of replication > > than just slice/subslice. > > > > v2: > > - Reference the new kerneldoc from i915.rst. (Jani) > > - Tweak the wording of the documentation for a couple functions to > >clarify the difference between "_fw" and non-"_fw" forms. > > > > Signed-off-by: Matt Roper > > Acked-by: Jani Nikula > > --- > > Documentation/gpu/i915.rst | 12 + > > drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 2 +- > > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 33 ++- > > drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 2 +- > > drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 239 > > drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 43 ++-- > > drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 +- > > drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 +- > > drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- > > 9 files changed, 200 insertions(+), 145 deletions(-) > > > > diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst > > index 54060cd6c419..4e59db1cfb00 100644 > > --- a/Documentation/gpu/i915.rst > > +++ b/Documentation/gpu/i915.rst > > @@ -246,6 +246,18 @@ Display State Buffer > > .. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c > > :internal: > > > > +GT Programming > > +== > > + > > +Multicast/Replicated (MCR) Registers > > + > > + > > +.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c > > + :doc: GT Multicast/Replicated (MCR) Register Support > > + > > +.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c > > + :internal: > > + > > Memory Management and Command Submission > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > > b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > > index da30503d3ca2..fa54823d1219 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > > @@ -835,7 +835,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private > > *i915, u16 type, > > } else { > > resource_size_t lmem_range; > > > > - lmem_range = intel_gt_read_register(&i915->gt0, > > XEHPSDV_TILE0_ADDR_RANGE) & 0x; > > + lmem_range = intel_gt_mcr_read_any(&i915->gt0, > > XEHPSDV_TILE0_ADDR_RANGE) & 0x; > > lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; > > lmem_size *= SZ_1G; > > } > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > index 244af1bdb7db..136cc44c3deb 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > @@ -1428,14 +1428,6 @@ void intel_engine_cancel_stop_cs(struct > > intel_engine_cs *engine) > > ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); > > } > > > > -static u32 > > -read_subslice_reg(const struct intel_engine_cs *engine, > > - int slice, int subslice, i915_reg_t reg) > > -{ > > - return intel_uncore_read_with_mcr_steering(engine->uncore, reg, > > - slice, subslice); > > -} > > - > > /* NB: please notice the memset */ > > void intel_engine_get_instdone(const struct intel_engine_cs *engine, > >struct intel_instdone *instdone) > > @@ -1469,28 +1461,33 @@ void intel_engine_get_instdone(const struct > > intel_engine_cs *engine, > >
Re: [PATCH v2 3/9] drm/i915/dg2: Add DG2_NB_MBD subplatform
On Thu, Jun 16, 2022 at 05:31:00PM +0530, Anshuman Gupta wrote: > DG2 NB SKU need to distinguish between MBD and AIC to probe > the VRAM Self Refresh feature support. Adding those sub platform > accordingly. > > Cc: Matt Roper > Signed-off-by: Anshuman Gupta > --- > drivers/gpu/drm/i915/i915_drv.h | 3 +++ > drivers/gpu/drm/i915/intel_device_info.c | 21 + > drivers/gpu/drm/i915/intel_device_info.h | 11 +++ > include/drm/i915_pciids.h| 23 --- > 4 files changed, 47 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index a5bc6a774c5a..f1f8699eedfd 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1007,10 +1007,13 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > #define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO) > > #define IS_DG2_G10(dev_priv) \ > + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10_NB_MBD) || \ > IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) > #define IS_DG2_G11(dev_priv) \ > + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11_NB_MBD) || \ > IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11) > #define IS_DG2_G12(dev_priv) \ > + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G12_NB_MBD) || \ > IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G12) > #define IS_ADLS_RPLS(dev_priv) \ > IS_SUBPLATFORM(dev_priv, INTEL_ALDERLAKE_S, INTEL_SUBPLATFORM_RPL) > diff --git a/drivers/gpu/drm/i915/intel_device_info.c > b/drivers/gpu/drm/i915/intel_device_info.c > index f0bf23726ed8..93da555adc4e 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.c > +++ b/drivers/gpu/drm/i915/intel_device_info.c > @@ -187,6 +187,18 @@ static const u16 subplatform_rpl_ids[] = { > INTEL_RPLP_IDS(0), > }; > > +static const u16 subplatform_g10_mb_mbd_ids[] = { > + INTEL_DG2_G10_NB_MBD_IDS(0), > +}; > + > +static const u16 subplatform_g11_mb_mbd_ids[] = { > + INTEL_DG2_G11_NB_MBD_IDS(0), > +}; > + > +static const u16 subplatform_g12_mb_mbd_ids[] = { > + INTEL_DG2_G12_NB_MBD_IDS(0), > +}; We only need a single MBD subplatform, not three new subplatforms. Unless I'm forgetting something, a single device ID can be assigned two two independent subplatforms at the same time. So the decision about whether to set the G10, G11, or G12 bit is one decision. The decision about whether to set the MBD bit is a completely separate decision that doesn't care about the G10/G11/G12 stuff. > + > static const u16 subplatform_g10_ids[] = { > INTEL_DG2_G10_IDS(0), > INTEL_ATS_M150_IDS(0), > @@ -246,6 +258,15 @@ void intel_device_info_subplatform_init(struct > drm_i915_private *i915) > } else if (find_devid(devid, subplatform_rpl_ids, > ARRAY_SIZE(subplatform_rpl_ids))) { > mask = BIT(INTEL_SUBPLATFORM_RPL); > + } else if (find_devid(devid, subplatform_g10_mb_mbd_ids, > + ARRAY_SIZE(subplatform_g10_mb_mbd_ids))) { > + mask = BIT(INTEL_SUBPLATFORM_G10_NB_MBD); > + } else if (find_devid(devid, subplatform_g11_mb_mbd_ids, > + ARRAY_SIZE(subplatform_g11_mb_mbd_ids))) { > + mask = BIT(INTEL_SUBPLATFORM_G11_NB_MBD); > + } else if (find_devid(devid, subplatform_g12_mb_mbd_ids, > + ARRAY_SIZE(subplatform_g12_mb_mbd_ids))) { > + mask = BIT(INTEL_SUBPLATFORM_G12_NB_MBD); Assuming you consolidate MBD back down to just a single extra subplatform, the lookup and bit setting should happen in a separate 'if' statement (not an 'else' block). if (find_devid(devid, subplatform_mbd_ids, ARRAY_SIZE(subplatform_mbd_ids))) mask |= BIT(INTEL_SUBPLATFORM_MBD); Matt > } else if (find_devid(devid, subplatform_g10_ids, > ARRAY_SIZE(subplatform_g10_ids))) { > mask = BIT(INTEL_SUBPLATFORM_G10); > diff --git a/drivers/gpu/drm/i915/intel_device_info.h > b/drivers/gpu/drm/i915/intel_device_info.h > index 08341174ee0a..c929e2d7e59c 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -97,7 +97,7 @@ enum intel_platform { > * it is fine for the same bit to be used on multiple parent platforms. > */ > > -#define INTEL_SUBPLATFORM_BITS (3) > +#define INTEL_SUBPLATFORM_BITS (6) > #define INTEL_SUBPLATFORM_MASK (BIT(INTEL_SUBPLATFORM_BITS) - 1) > > /* HSW/BDW/SKL/KBL/CFL */ > @@ -111,9 +111,12 @@ enum
[PATCH v2 2/2] drm/i915/gt: Cleanup interface for MCR operations
Let's replace the assortment of intel_gt_* and intel_uncore_* functions that operate on MCR registers with a cleaner set of interfaces: * intel_gt_mcr_read -- unicast read from specific instance * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated instance * intel_gt_mcr_unicast_write -- unicast write to specific instance * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances We'll also replace the historic "slice" and "subslice" terminology with "group" and "instance" to match the documentation for more recent platforms; these days MCR steering applies to more types of replication than just slice/subslice. v2: - Reference the new kerneldoc from i915.rst. (Jani) - Tweak the wording of the documentation for a couple functions to clarify the difference between "_fw" and non-"_fw" forms. Signed-off-by: Matt Roper Acked-by: Jani Nikula --- Documentation/gpu/i915.rst | 12 + drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 33 ++- drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 239 drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 43 ++-- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- 9 files changed, 200 insertions(+), 145 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 54060cd6c419..4e59db1cfb00 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -246,6 +246,18 @@ Display State Buffer .. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c :internal: +GT Programming +== + +Multicast/Replicated (MCR) Registers + + +.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c + :doc: GT Multicast/Replicated (MCR) Register Support + +.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c + :internal: + Memory Management and Command Submission diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index da30503d3ca2..fa54823d1219 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -835,7 +835,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, } else { resource_size_t lmem_range; - lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0x; + lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0x; lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 244af1bdb7db..136cc44c3deb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1428,14 +1428,6 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); } -static u32 -read_subslice_reg(const struct intel_engine_cs *engine, - int slice, int subslice, i915_reg_t reg) -{ - return intel_uncore_read_with_mcr_steering(engine->uncore, reg, - slice, subslice); -} - /* NB: please notice the memset */ void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) @@ -1469,28 +1461,33 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice);
[PATCH v2 0/2] i915: Extract, polish, and document multicast handling
Multicast/replicated (MCR) registers on Intel hardware are a purely GT-specific concept. Rather than leaving MCR register handling spread across several places throughout the driver (intel_uncore.c, intel_gt.c, etc.) with confusing combinations of handler functions living in different namespaces, let's consolidate it all into a single place (intel_gt_mcr.c) and provide a more consistent and clearly-documented interface for the rest of the driver to access such registers: * intel_gt_mcr_read -- unicast read from specific instance * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated instance * intel_gt_mcr_unicast_write -- unicast write to specific instance * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances v2: - Reference the new kerneldoc from i915.rst. (Jani) - Tweak the wording of the documentation for a couple functions to clarify the difference between "_fw" and non-"_fw" forms. Matt Roper (2): drm/i915/gt: Move multicast register handling to a dedicated file drm/i915/gt: Cleanup interface for MCR operations Documentation/gpu/i915.rst | 12 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 36 +- drivers/gpu/drm/i915/gt/intel_gt.c | 297 +--- drivers/gpu/drm/i915/gt/intel_gt.h | 15 - drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 497 drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 34 ++ drivers/gpu/drm/i915/gt/intel_region_lmem.c | 5 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 3 +- drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/intel_uncore.c | 112 - drivers/gpu/drm/i915/intel_uncore.h | 8 - 15 files changed, 577 insertions(+), 460 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.h -- 2.35.3
[PATCH v2 1/2] drm/i915/gt: Move multicast register handling to a dedicated file
Handling of multicast/replicated registers is spread across intel_gt.c and intel_uncore.c today. As multicast handling and the related steering logic gets more complicated with the addition of new platforms and new rules it makes sense to centralize it all in one place. For now the existing functions have been moved to the new .c/.h as-is. Function renames and updates to operate in a more consistent manner will be done in subsequent patches. Signed-off-by: Matt Roper Acked-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt.c | 297 + drivers/gpu/drm/i915/gt/intel_gt.h | 15 - drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 1 + drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 448 drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 37 ++ drivers/gpu/drm/i915/gt/intel_region_lmem.c | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/intel_uncore.c | 112 - drivers/gpu/drm/i915/intel_uncore.h | 8 - 14 files changed, 495 insertions(+), 433 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d2b18f03a33c..08f5d0d6e83a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -103,6 +103,7 @@ gt-y += \ gt/intel_gt_debugfs.o \ gt/intel_gt_engines_debugfs.o \ gt/intel_gt_irq.o \ + gt/intel_gt_mcr.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_debugfs.o \ gt/intel_gt_pm_irq.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 47b5e0e342ab..da30503d3ca2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -13,6 +13,7 @@ #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_region_lmem.h" #include "i915_drv.h" #include "i915_gem_stolen.h" diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f0acf8518a51..244af1bdb7db 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -21,8 +21,9 @@ #include "intel_engine_user.h" #include "intel_execlists_submission.h" #include "intel_gt.h" -#include "intel_gt_requests.h" +#include "intel_gt_mcr.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_lrc_reg.h" #include "intel_reset.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f33290358c51..be9877c4b496 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -17,6 +17,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_debugfs.h" #include "intel_gt_gmch.h" +#include "intel_gt_mcr.h" #include "intel_gt_pm.h" #include "intel_gt_regs.h" #include "intel_gt_requests.h" @@ -102,107 +103,13 @@ int intel_gt_assign_ggtt(struct intel_gt *gt) return gt->ggtt ? 0 : -ENOMEM; } -static const char * const intel_steering_types[] = { - "L3BANK", - "MSLICE", - "LNCF", - "INSTANCE 0", -}; - -static const struct intel_mmio_range icl_l3bank_steering_table[] = { - { 0x00B100, 0x00B3FF }, - {}, -}; - -static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { - { 0x004000, 0x004AFF }, - { 0x00C800, 0x00CFFF }, - { 0x00DD00, 0x00DDFF }, - { 0x00E900, 0x00 }, /* 0xEA00 - OxEFFF is unused */ - {}, -}; - -static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { - { 0x00B000, 0x00B0FF }, - { 0x00D800, 0x00D8FF }, - {}, -}; - -static const struct intel_mmio_range dg2_lncf_steering_table[] = { - { 0x00B000, 0x00B0FF }, - { 0x00D880, 0x00D8FF }, - {}, -}; - -/* - * We have several types of MCR registers on PVC where steering to (0,0) - * will always provide us with a non-terminated value. We'll stick them - * all in the same table for simplicity. - */ -static const struct intel_mmio_range pvc_instance0_steering_table[] = { - { 0x004000, 0x004AFF }, /* HALF-BSLICE */ - { 0x008800, 0x00887F }, /* CC */ - { 0x008A80, 0x008AFF }, /* TILEPSMI */ - {
[PATCH] drm/i915/pvc: Add recommended MMIO setting
As with past platforms, the bspec's performance tuning guide provides recommended MMIO settings. Although not technically "workarounds" we apply these through the workaround framework to ensure that they're re-applied at the proper times (e.g., on engine resets) and that any conflicts with real workarounds are flagged. Bspec: 72161 Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 + 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 226557018037..07ef111947b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -981,6 +981,11 @@ #define XEHP_L3SCQREG7 _MMIO(0xb188) #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) +#define XEHPC_L3SCRUB _MMIO(0xb18c) +#define SCRUB_CL_DWNGRADE_SHARED REG_BIT(12) +#define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0) +#define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6) + #define L3SQCREG1_CCS0 _MMIO(0xb200) #define FLUSHALLNONCOH REG_BIT(5) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 1e982ac931dc..c4af51144216 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2679,6 +2679,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li { struct drm_i915_private *i915 = engine->i915; + if (IS_PONTEVECCHIO(i915)) { + /* +* The following is not actually a "workaround" but rather +* a recommended tuning setting documented in the bspec's +* performance guide section. +*/ + wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + } + if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ wa_masked_en(wal, -- 2.35.3
[PATCH 2/2] drm/i915/gt: Cleanup interface for MCR operations
Let's replace the assortment of intel_gt_* and intel_uncore_* functions that operate on MCR registers with a cleaner set of interfaces: * intel_gt_mcr_read -- unicast read from specific instance * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated instance * intel_gt_mcr_unicast_write -- unicast write to specific instance * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances We'll also replace the historic "slice" and "subslice" terminology with "group" and "instance" to match the documentation for more recent platforms; these days MCR steering applies to more types of replication than just slice/subslice. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 33 ++- drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 225 +++- drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 43 ++-- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- 8 files changed, 174 insertions(+), 145 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index da30503d3ca2..fa54823d1219 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -835,7 +835,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, } else { resource_size_t lmem_range; - lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0x; + lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0x; lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 244af1bdb7db..136cc44c3deb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1428,14 +1428,6 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); } -static u32 -read_subslice_reg(const struct intel_engine_cs *engine, - int slice, int subslice, i915_reg_t reg) -{ - return intel_uncore_read_with_mcr_steering(engine->uncore, reg, - slice, subslice); -} - /* NB: please notice the memset */ void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) @@ -1469,28 +1461,33 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice); } } else { for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + intel_gt_mcr_read(engine->gt, +
[PATCH 1/2] drm/i915/gt: Move multicast register handling to a dedicated file
Handling of multicast/replicated registers is spread across intel_gt.c and intel_uncore.c today. As multicast handling and the related steering logic gets more complicated with the addition of new platforms and new rules it makes sense to centralize it all in one place. For now the existing functions have been moved to the new .c/.h as-is. Function renames and updates to operate in a more consistent manner will be done in subsequent patches. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt.c | 297 + drivers/gpu/drm/i915/gt/intel_gt.h | 15 - drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 1 + drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 448 drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 37 ++ drivers/gpu/drm/i915/gt/intel_region_lmem.c | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/intel_uncore.c | 112 - drivers/gpu/drm/i915/intel_uncore.h | 8 - 14 files changed, 495 insertions(+), 433 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d2b18f03a33c..08f5d0d6e83a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -103,6 +103,7 @@ gt-y += \ gt/intel_gt_debugfs.o \ gt/intel_gt_engines_debugfs.o \ gt/intel_gt_irq.o \ + gt/intel_gt_mcr.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_debugfs.o \ gt/intel_gt_pm_irq.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 47b5e0e342ab..da30503d3ca2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -13,6 +13,7 @@ #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_region_lmem.h" #include "i915_drv.h" #include "i915_gem_stolen.h" diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f0acf8518a51..244af1bdb7db 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -21,8 +21,9 @@ #include "intel_engine_user.h" #include "intel_execlists_submission.h" #include "intel_gt.h" -#include "intel_gt_requests.h" +#include "intel_gt_mcr.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_lrc_reg.h" #include "intel_reset.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f33290358c51..be9877c4b496 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -17,6 +17,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_debugfs.h" #include "intel_gt_gmch.h" +#include "intel_gt_mcr.h" #include "intel_gt_pm.h" #include "intel_gt_regs.h" #include "intel_gt_requests.h" @@ -102,107 +103,13 @@ int intel_gt_assign_ggtt(struct intel_gt *gt) return gt->ggtt ? 0 : -ENOMEM; } -static const char * const intel_steering_types[] = { - "L3BANK", - "MSLICE", - "LNCF", - "INSTANCE 0", -}; - -static const struct intel_mmio_range icl_l3bank_steering_table[] = { - { 0x00B100, 0x00B3FF }, - {}, -}; - -static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { - { 0x004000, 0x004AFF }, - { 0x00C800, 0x00CFFF }, - { 0x00DD00, 0x00DDFF }, - { 0x00E900, 0x00 }, /* 0xEA00 - OxEFFF is unused */ - {}, -}; - -static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { - { 0x00B000, 0x00B0FF }, - { 0x00D800, 0x00D8FF }, - {}, -}; - -static const struct intel_mmio_range dg2_lncf_steering_table[] = { - { 0x00B000, 0x00B0FF }, - { 0x00D880, 0x00D8FF }, - {}, -}; - -/* - * We have several types of MCR registers on PVC where steering to (0,0) - * will always provide us with a non-terminated value. We'll stick them - * all in the same table for simplicity. - */ -static const struct intel_mmio_range pvc_instance0_steering_table[] = { - { 0x004000, 0x004AFF }, /* HALF-BSLICE */ - { 0x008800, 0x00887F }, /* CC */ - { 0x008A80, 0x008AFF }, /* TILEPSMI */ - { 0x00B000,
[PATCH 0/2] i915: Extract, polish, and document multicast handling
Multicast/replicated (MCR) registers on Intel hardware are a purely GT-specific concept. Rather than leaving MCR register handling spread across several places throughout the driver (intel_uncore.c, intel_gt.c, etc.) with confusing combinations of handler functions living in different namespaces, let's consolidate it all into a single place (intel_gt_mcr.c) and provide a more consistent and clearly-documented interface for the rest of the driver to access such registers: * intel_gt_mcr_read -- unicast read from specific instance * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated instance * intel_gt_mcr_unicast_write -- unicast write to specific instance * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances Matt Roper (2): drm/i915/gt: Move multicast register handling to a dedicated file drm/i915/gt: Cleanup interface for MCR operations drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 36 +- drivers/gpu/drm/i915/gt/intel_gt.c | 297 +--- drivers/gpu/drm/i915/gt/intel_gt.h | 15 - drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 483 drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 34 ++ drivers/gpu/drm/i915/gt/intel_region_lmem.c | 5 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 3 +- drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/intel_uncore.c | 112 - drivers/gpu/drm/i915/intel_uncore.h | 8 - 14 files changed, 551 insertions(+), 460 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.h -- 2.35.3
[PATCH] drm/i915: More PVC+DG2 workarounds
A new PVC+DG2 workaround has appeared recently: - Wa_16015675438 And a couple existing DG2 workarounds have been extended to PVC: - Wa_14015795083 - Wa_18018781329 Note that Wa_16015675438 asks us to program a register that is in the 0x2xxx range typically associated with the RCS engine, even though PVC does not have an RCS. By default the GuC will think we've made a mistake and throw an exception when it sees this register on a CCS engine's save/restore list, so we need to pass an extra GuC control flag to tell it that this is expected and not a problem. Signed-off-by: Anshuman Gupta Signed-off-by: Badal Nilawar Signed-off-by: Prathap Kumar Valsan Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +++-- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 1 + 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index c8129a351731..226557018037 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -140,6 +140,7 @@ #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define GEN9_TSG_BARRIER_ACK_DISABLE (1 << 8) #define GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE(1 << 10) +#define GEN12_PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) #define GEN9_CS_DEBUG_MODE1_MMIO(0x20ec) #define FF_DOP_CLOCK_GATE_DISABLEREG_BIT(1) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 1e7ca3863f20..e1e70eff9aac 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1491,13 +1491,20 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } +static void +pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + /* Wa_14015795083 */ + wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); +} + static void gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; if (IS_PONTEVECCHIO(i915)) - ; /* none yet */ + pvc_gt_workarounds_init(gt, wal); else if (IS_DG2(i915)) dg2_gt_workarounds_init(gt, wal); else if (IS_XEHPSDV(i915)) @@ -2082,12 +2089,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * performance guide section. */ wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - - /* Wa_18018781329:dg2 */ - wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { @@ -2700,6 +2701,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22014226127:dg2,pvc */ wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); + + /* Wa_16015675438:dg2,pvc */ + wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); + + /* Wa_18018781329:dg2,pvc */ + wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 2c4ad4a65089..35887cb53201 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -327,6 +327,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) flags |= GUC_WA_CONTEXT_ISOLATION; + /* Wa_16015675438 */ + if (!RCS_MASK(gt)) + flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; + return flags; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 42cb7a9a6199..b3c9a9327f76 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -105,6 +105,7 @@ #define GUC_WA_PRE_PARSERBIT(14) #define GUC_WA_HOLD_CCS_SWITCHOUTBIT(17) #define GUC_WA_POLLCSBIT(18) +#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21
[PATCH] drm/i915/xehp: Correct steering initialization
Another mistake during the conversion to DSS bitmaps: after retrieving the DSS ID intel_sseu_find_first_xehp_dss() we forgot to modulo it down to obtain which ID within the current gslice it is. Fixes: b87d39019651 ("drm/i915/sseu: Disassociate internal subslice mask representation from uapi") Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b7421f109c13..a5c0508c5b63 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1177,8 +1177,8 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) } slice = __ffs(slice_mask); - subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice); - WARN_ON(subslice > GEN_DSS_PER_GSLICE); + subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) % + GEN_DSS_PER_GSLICE; __add_mcr_wa(gt, wal, slice, subslice); -- 2.35.3
[PATCH] drm/i915/dg2: Correct DSS check for Wa_1308578152
When converting our DSS masks to bitmaps, we fumbled the condition used to check whether any DSS are present in the first gslice. Since intel_sseu_find_first_xehp_dss() returns a 0-based number, we need a >= condition rather than >. Fixes: b87d39019651 ("drm/i915/sseu: Disassociate internal subslice mask representation from uapi") Reported-by: Balasubramani Vivekanandan Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 1b191b234160..67104ba8951e 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2079,7 +2079,7 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) static bool needs_wa_1308578152(struct intel_engine_cs *engine) { - return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) > + return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >= GEN_DSS_PER_GSLICE; } -- 2.35.3
Re: [PATCH] drm/i915/dg2: Catch and log more unexpected values in DG1_MSTR_TILE_INTR
On Mon, Jun 06, 2022 at 12:55:20PM +0100, Tvrtko Ursulin wrote: > > On 27/05/2022 19:42, Matt Roper wrote: > > On Thu, May 26, 2022 at 11:18:17AM +0100, Tvrtko Ursulin wrote: > > > On 25/05/2022 19:05, Matt Roper wrote: > > > > On Wed, May 25, 2022 at 05:03:13PM +0100, Tvrtko Ursulin wrote: > > > > > > > > > > On 24/05/2022 18:51, Matt Roper wrote: > > > > > > On Tue, May 24, 2022 at 10:43:39AM +0100, Tvrtko Ursulin wrote: > > > > > > > From: Tvrtko Ursulin > > > > > > > > > > > > > > Catch and log any garbage in the register, including no tiles > > > > > > > marked, or > > > > > > > multiple tiles marked. > > > > > > > > > > > > > > Signed-off-by: Tvrtko Ursulin > > > > > > > Cc: Matt Roper > > > > > > > --- > > > > > > > We caught garbage in DG1_MSTR_TILE_INTR with DG2 (actual value > > > > > > > 0xF9D2C008) > > > > > > > during glmark and more badness. So I thought lets log all > > > > > > > possible failure > > > > > > > modes from here and also use per device logging. > > > > > > > --- > > > > > > > drivers/gpu/drm/i915/i915_irq.c | 33 > > > > > > > ++--- > > > > > > > drivers/gpu/drm/i915/i915_reg.h | 1 + > > > > > > > 2 files changed, 23 insertions(+), 11 deletions(-) > > > > > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > > > > > > > b/drivers/gpu/drm/i915/i915_irq.c > > > > > > > index 73cebc6aa650..79853d3fc1ed 100644 > > > > > > > --- a/drivers/gpu/drm/i915/i915_irq.c > > > > > > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > > > > > > @@ -2778,24 +2778,30 @@ static irqreturn_t dg1_irq_handler(int > > > > > > > irq, void *arg) > > > > > > > u32 gu_misc_iir; > > > > > > > if (!intel_irqs_enabled(i915)) > > > > > > > - return IRQ_NONE; > > > > > > > + goto none; > > > > > > > master_tile_ctl = dg1_master_intr_disable(regs); > > > > > > > - if (!master_tile_ctl) { > > > > > > > - dg1_master_intr_enable(regs); > > > > > > > - return IRQ_NONE; > > > > > > > + if (!master_tile_ctl) > > > > > > > + goto enable_none; > > > > > > > + > > > > > > > + if (master_tile_ctl & ~(DG1_MSTR_IRQ | DG1_MSTR_TILE_MASK)) { > > > > > > > + drm_warn(&i915->drm, "Garbage in master_tile_ctl: > > > > > > > 0x%08x!\n", > > > > > > > + master_tile_ctl); > > > > > > > > > > > > I know we have a bunch of them already, but shouldn't we be avoiding > > > > > > printk-based stuff like this inside interrupt handlers? Should we > > > > > > be > > > > > > migrating all these error messages over to trace_printk or something > > > > > > similar that's safer to use? > > > > > > > > > > Not sure - I kind of think some really unexpected and worrying > > > > > situations > > > > > should be loud and on by default. Risk is then spam if not > > > > > ratelimited. > > > > > Maybe we should instead ratelimit most errors/warnings coming for irq > > > > > handlers? > > > > > > > > It's not the risk of spam that's the problem, but rather that > > > > printk-based stuff eventually calls into the console code to flush its > > > > buffers. That's way more overhead than you want in an interrupt handler > > > > so it's bad on its own, but if you're using something slow like a serial > > > > console, it becomes even more of a problem. > > > > > > Is it a problem for messages which we never expect to see? > > > > Kind of. While not as catastrophic, it's the same argument for why we > > don't use BUG() anymore...when the impossible does manage to happen > > there's unnecessary collateral damage on things outside of graphics. If > >
[PATCH] drm/i915/pvc: Add register steering
Ponte Vecchio no longer has MSLICE or LNCF steering, but the bspec does document several new types of multicast register ranges. Fortunately, most of the different MCR types all provide valid values at instance (0,0) so there's no need to read fuse registers and calculate a valid instance. We'll lump all of those range types (BSLICE, HALFBSLICE, TILEPSMI, CC, and L3BANK) into a single category called "INSTANCE0" to keep things simple. We'll also perform explicit steering for each of these multicast register types, even if the implicit steering setup for COMPUTE/DSS ranges would have worked too; this is based on guidance from our hardware architects who suggested that we move away from implicit steering and start explicitly steer all MCR register accesses on modern platforms (we'll work on transitioning COMPUTE/DSS to explicit steering in the future). Note that there's one additional MCR range type defined in the bspec (SQIDI) that we don't handle here. Those ranges use a different steering control register that we never touch; since instance 0 is also always a valid setting there, we can just ignore those ranges. Finally, we'll rename the HAS_MSLICES() macro to HAS_MSLICE_STEERING(). PVC hardware still has units referred to as mslices, but there's no register steering based on mslice for this platform. Bspec: 67609 Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt.c | 50 ++--- drivers/gpu/drm/i915/gt/intel_gt_types.h| 7 +++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 22 - drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_pci.c | 3 +- drivers/gpu/drm/i915/intel_device_info.h| 2 +- 6 files changed, 76 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ddfb98f70489..b335eacd7a6c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -106,6 +106,7 @@ static const char * const intel_steering_types[] = { "L3BANK", "MSLICE", "LNCF", + "INSTANCE 0", }; static const struct intel_mmio_range icl_l3bank_steering_table[] = { @@ -133,6 +134,27 @@ static const struct intel_mmio_range dg2_lncf_steering_table[] = { {}, }; +/* + * We have several types of MCR registers on PVC where steering to (0,0) + * will always provide us with a non-terminated value. We'll stick them + * all in the same table for simplicity. + */ +static const struct intel_mmio_range pvc_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* HALF-BSLICE */ + { 0x008A80, 0x008AFF }, /* TILEPSMI */ + { 0x008800, 0x00887F }, /* CC */ + { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */ + { 0x00B100, 0x00B3FF }, /* L3BANK */ + { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */ + { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */ + { 0x00DD00, 0x00DDFF }, /* BSLICE */ + { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */ + { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */ + { 0x00F000, 0x00 }, /* HALF-BSLICE */ + { 0x024180, 0x0241FF }, /* HALF-BSLICE */ + {}, +}; + int intel_gt_init_mmio(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -146,7 +168,7 @@ int intel_gt_init_mmio(struct intel_gt *gt) * An mslice is unavailable only if both the meml3 for the slice is * disabled *and* all of the DSS in the slice (quadrant) are disabled. */ - if (HAS_MSLICES(i915)) { + if (HAS_MSLICE_STEERING(i915)) { gt->info.mslice_mask = intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask, GEN_DSS_PER_MSLICE); @@ -158,7 +180,9 @@ int intel_gt_init_mmio(struct intel_gt *gt) drm_warn(&i915->drm, "mslice mask all zero!\n"); } - if (IS_DG2(i915)) { + if (IS_PONTEVECCHIO(i915)) { + gt->steering_table[INSTANCE0] = pvc_instance0_steering_table; + } else if (IS_DG2(i915)) { gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; gt->steering_table[LNCF] = dg2_lncf_steering_table; } else if (IS_XEHPSDV(i915)) { @@ -172,7 +196,11 @@ int intel_gt_init_mmio(struct intel_gt *gt) GEN10_L3BANK_MASK; if (!gt->info.l3bank_mask) /* should be impossible! */ drm_warn(&i915->drm, "L3 bank mask is all zero!\n"); - } else if (HAS_MSLICES(i915)) { + } else if (GRAPHICS_VER(i915) >= 11) { + /* +* We expect all modern platforms to have at least some +
[PATCH] drm/i915/pvc: Add Wa_16015675438:pvc
From: Badal Nilawar Even though PVC doesn't have an RCS engine, this workaround updates a register in the 0x2xxx range that traditionally belongs to the RCS. We need to set a special flag to tell the GuC that the presence of an "RCS" register on a CCS save/restore list is okay/expected. Cc: Stuart Summers Cc: Lucas De Marchi Signed-off-by: Badal Nilawar Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 1 + 4 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 6aa1ceaa8d27..1986579b18ba 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -140,6 +140,7 @@ #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define GEN9_TSG_BARRIER_ACK_DISABLE (1 << 8) #define GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE(1 << 10) +#define GEN12_PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) #define GEN9_CS_DEBUG_MODE1_MMIO(0x20ec) #define FF_DOP_CLOCK_GATE_DISABLEREG_BIT(1) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 6e875d4f5f65..eb0598593724 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2698,6 +2698,11 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22014226127:dg2,pvc */ wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); } + + /* Wa_16015675438:pvc */ + if (IS_PVC_BD_STEP(i915, STEP_B0, STEP_FOREVER)) + wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, +GEN12_PERF_FIX_BALANCING_CFE_DISABLE); } static void diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 2c4ad4a65089..7043cf5a666a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -327,6 +327,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) flags |= GUC_WA_CONTEXT_ISOLATION; + /* Wa_16015675438:pvc */ + if (IS_PVC_BD_STEP(gt->i915, STEP_B0, STEP_FOREVER)) + flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; + return flags; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 42cb7a9a6199..b3c9a9327f76 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -105,6 +105,7 @@ #define GUC_WA_PRE_PARSERBIT(14) #define GUC_WA_HOLD_CCS_SWITCHOUTBIT(17) #define GUC_WA_POLLCSBIT(18) +#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) #define GUC_CTL_FEATURE2 #define GUC_CTL_ENABLE_SLPC BIT(2) -- 2.35.3
[PATCH] drm/i915/pvc: GuC depriv applies to PVC
We missed this setting in the initial device info patch's definition of XE_HPC_FEATURES. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/i915_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 047a6e326031..a5a1a7647320 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1089,6 +1089,7 @@ static const struct intel_device_info ats_m_info = { XE_HP_FEATURES, \ .dma_mask_size = 52, \ .has_3d_pipeline = 0, \ + .has_guc_deprivilege = 1, \ .has_l3_ccs_read = 1, \ .has_one_eu_per_fuse_bit = 1 -- 2.35.3
[PATCH] drm/i915: Add extra registers to GPU error dump
From: Stuart Summers Our internal teams have identified a few additional engine registers that are worth inspecting in error state dumps during development & debug. Let's capture and print them as part of our error dump. For simplicity we'll just dump these registers on gen11 and beyond. Most of these registers have existed since earlier platforms (e.g., gen6 or gen7) but were initially introduced only for a subset of the platforms' engines; gen11 seems to be where they became available on all engines. Signed-off-by: Stuart Summers Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 5 + drivers/gpu/drm/i915/i915_gpu_error.c | 19 +++ drivers/gpu/drm/i915/i915_gpu_error.h | 7 +++ 3 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 44de10cf7837..889f0df3940b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -8,6 +8,7 @@ #include "i915_reg_defs.h" +#define RING_EXCC(base)_MMIO((base) + 0x28) #define RING_TAIL(base)_MMIO((base) + 0x30) #define TAIL_ADDR0x0018 #define RING_HEAD(base)_MMIO((base) + 0x34) @@ -133,6 +134,8 @@ (REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \ REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1)) +#define RING_CSCMDOP(base) _MMIO((base) + 0x20c) + /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. * The lsb of each can be considered a separate enabling bit for encryption. @@ -149,6 +152,7 @@ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) #define RING_PREDICATE_RESULT(base)_MMIO((base) + 0x3b8) /* gen12+ */ + #define MI_PREDICATE_RESULT_2(base)_MMIO((base) + 0x3bc) #define LOWER_SLICE_ENABLED (1 << 0) #define LOWER_SLICE_DISABLED (0 << 0) @@ -172,6 +176,7 @@ #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8) +#define RING_CTX_SR_CTL(base) _MMIO((base) + 0x244) #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c) #define GEN8_RING_PDP_UDW(base, n) _MMIO((base) + 0x270 + (n) * 8 + 4) #define GEN8_RING_PDP_LDW(base, n) _MMIO((base) + 0x270 + (n) * 8) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 0512c66fa4f3..bff8a111424a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -581,6 +581,15 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, err_printf(m, " RC PSMI: 0x%08x\n", ee->rc_psmi); err_printf(m, " FAULT_REG: 0x%08x\n", ee->fault_reg); } + if (GRAPHICS_VER(m->i915) >= 11) { + err_printf(m, " NOPID: 0x%08x\n", ee->nopid); + err_printf(m, " EXCC: 0x%08x\n", ee->excc); + err_printf(m, " CMD_CCTL: 0x%08x\n", ee->cmd_cctl); + err_printf(m, " CSCMDOP: 0x%08x\n", ee->cscmdop); + err_printf(m, " CTX_SR_CTL: 0x%08x\n", ee->ctx_sr_ctl); + err_printf(m, " DMA_FADDR_HI: 0x%08x\n", ee->dma_faddr_hi); + err_printf(m, " DMA_FADDR_LO: 0x%08x\n", ee->dma_faddr_lo); + } if (HAS_PPGTT(m->i915)) { err_printf(m, " GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode); @@ -1224,6 +1233,16 @@ static void engine_record_registers(struct intel_engine_coredump *ee) ee->ipehr = ENGINE_READ(engine, IPEHR); } + if (GRAPHICS_VER(i915) >= 11) { + ee->cmd_cctl = ENGINE_READ(engine, RING_CMD_CCTL); + ee->cscmdop = ENGINE_READ(engine, RING_CSCMDOP); + ee->ctx_sr_ctl = ENGINE_READ(engine, RING_CTX_SR_CTL); + ee->dma_faddr_hi = ENGINE_READ(engine, RING_DMA_FADD_UDW); + ee->dma_faddr_lo = ENGINE_READ(engine, RING_DMA_FADD); + ee->nopid = ENGINE_READ(engine, RING_NOPID); + ee->excc = ENGINE_READ(engine, RING_EXCC); + } + intel_engine_get_instdone(engine, &ee->instdone); ee->instpm = ENGINE_READ(engine, RING_INSTPM); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index a611abacd9c2..55a143b92d10 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/driv
[PATCH v5 3/6] drm/i915/sseu: Simplify gen11+ SSEU handling
Although gen11 and gen12 architectures supported the concept of multiple slices, in practice all the platforms that were actually designed only had a single slice (i.e., note the parameters to 'intel_sseu_set_info' that we pass for each platform). We can simplify the code slightly by dropping the multi-slice logic from gen11+ platforms. v2: - Promote drm_dbg to drm_WARN_ON if the slice fuse register reports unexpected fusing. (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_sseu.c | 76 +--- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index b5fd479a7b85..7e5222ad2f98 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -119,52 +119,37 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } -static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) -{ - u32 ss_mask; - - ss_mask = ss_en >> (s * sseu->max_subslices); - ss_mask &= GENMASK(sseu->max_subslices - 1, 0); - - return ss_mask; -} - -static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u32 g_ss_en, u32 c_ss_en, u16 eu_en) { - int s, ss; + u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0); + int ss; /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > sizeof(g_ss_en) * BITS_PER_BYTE); - for (s = 0; s < sseu->max_slices; s++) { - if ((s_en & BIT(s)) == 0) - continue; + sseu->slice_mask |= BIT(0); - sseu->slice_mask |= BIT(s); - - /* -* XeHP introduces the concept of compute vs geometry DSS. To -* reduce variation between GENs around subslice usage, store a -* mask for both the geometry and compute enabled masks since -* userspace will need to be able to query these masks -* independently. Also compute a total enabled subslice count -* for the purposes of selecting subslices to use in a -* particular GEM context. -*/ - intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, -get_ss_stride_mask(sseu, s, c_ss_en)); - intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, -get_ss_stride_mask(sseu, s, g_ss_en)); - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, -get_ss_stride_mask(sseu, s, - g_ss_en | c_ss_en)); + /* +* XeHP introduces the concept of compute vs geometry DSS. To reduce +* variation between GENs around subslice usage, store a mask for both +* the geometry and compute enabled masks since userspace will need to +* be able to query these masks independently. Also compute a total +* enabled subslice count for the purposes of selecting subslices to +* use in a particular GEM context. +*/ + intel_sseu_set_subslices(sseu, 0, sseu->compute_subslice_mask, +c_ss_en & valid_ss_mask); + intel_sseu_set_subslices(sseu, 0, sseu->geometry_subslice_mask, +g_ss_en & valid_ss_mask); + intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, +(g_ss_en | c_ss_en) & valid_ss_mask); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, 0, ss)) + sseu_set_eus(sseu, 0, ss, eu_en); - for (ss = 0; ss < sseu->max_subslices; ss++) - if (intel_sseu_has_subslice(sseu, s, ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } sseu->eu_per_subslice = hweight16(eu_en); sseu->eu_total = compute_eu_total(sseu); } @@ -196,7 +181,7 @@ static void xehp_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en); + gen11_compute_sseu_info(sseu, g_dss_en, c_dss_en, eu_en); } static void gen12_sseu_info_init(struct intel_gt *gt) @@ -216,8 +201,13 @@ static void gen12_sseu_info_init(struct intel_gt *gt) */ intel_sseu_set_info(sseu, 1, 6, 16); + /* +* Althou
[PATCH v5 5/6] drm/i915/sseu: Disassociate internal subslice mask representation from uapi
As with EU masks, it's easier to store subslice/DSS masks internally in a format that's more natural for the driver to work with, and then only covert into the u8[] uapi form when the query ioctl is invoked. Since the hardware design changed significantly with Xe_HP, we'll use a union to choose between the old "hsw-style" subslice masks or the newer xehp mask. HSW-style masks will be stored in an array of u8's, indexed by slice (there's never more than 6 subslices per slice on older platforms). For Xe_HP and beyond where slices no longer exist, we only need a single bitmask. However we already know that this mask is eventually going to grow too large for a simple u64 to hold, so we'll represent it in a manner that can be operated on by the utilities in linux/bitmap.h. v2: - Fix typo: BIT(s) -> BIT(ss) in gen9_sseu_device_status() v3: - Eliminate sseu->ss_stride and just calculate the stride while specifically handling uapi. (Tvrtko) - Use BITMAP_BITS() macro to refer to size of masks rather than passing I915_MAX_SS_FUSE_BITS directly. (Tvrtko) - Report compute/geometry DSS masks separately when dumping Xe_HP SSEU info. (Tvrtko) - Restore dropped range checks to intel_sseu_has_subslice(). (Tvrtko) v4: - Make the bitmap size macro check the size of the .xehp field rather than the containing union. (Tvrtko) - Don't add GEM_BUG_ON() intel_sseu_has_subslice()'s check for whether slice or subslice ID exceed sseu->max_[sub]slices; various loops in the driver are expected to exceed these, so we should just silently return 'false.' v5: - Move XEHP_BITMAP_BITS() to the header so that we can also replace a usage of I915_MAX_SS_FUSE_BITS in one of the inline functions. (Bala) - Change the local variable in intel_slicemask_from_xehp_dssmask() from u16 to 'unsigned long' to make it a bit more future-proof. Cc: Tvrtko Ursulin Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c| 4 +- drivers/gpu/drm/i915/gt/intel_gt.c | 12 +- drivers/gpu/drm/i915/gt/intel_sseu.c | 261 +++ drivers/gpu/drm/i915/gt/intel_sseu.h | 79 -- drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 30 +-- drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +- drivers/gpu/drm/i915/i915_getparam.c | 3 +- drivers/gpu/drm/i915/i915_query.c| 13 +- 9 files changed, 243 insertions(+), 188 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ab4c5ab28e4d..a3bb73f5d53b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1875,6 +1875,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, { const struct sseu_dev_info *device = >->info.sseu; struct drm_i915_private *i915 = gt->i915; + unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, 0); /* No zeros in any field. */ if (!user->slice_mask || !user->subslice_mask || @@ -1901,7 +1902,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, if (user->slice_mask & ~device->slice_mask) return -EINVAL; - if (user->subslice_mask & ~device->subslice_mask[0]) + if (user->subslice_mask & ~dev_subslice_mask) return -EINVAL; if (user->max_eus_per_subslice > device->max_eus_per_subslice) @@ -1915,7 +1916,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, /* Part specific restrictions. */ if (GRAPHICS_VER(i915) == 11) { unsigned int hw_s = hweight8(device->slice_mask); - unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); + unsigned int hw_ss_per_s = hweight8(dev_subslice_mask); unsigned int req_s = hweight8(context->slice_mask); unsigned int req_ss = hweight8(context->subslice_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1adbf34c3632..f0acf8518a51 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -674,8 +674,8 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) return; - ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), - ss_per_ccs); + ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask, +ss_per_ccs); /* * If all DSS in a quadrant are fused off, th
[PATCH v5 2/6] drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK
Slice/subslice/EU information should be obtained via the topology queries provided by the I915_QUERY interface; let's turn off support for the old GETPARAM lookups on Xe_HP and beyond where we can't return meaningful values. The slice mask lookup is meaningless since Xe_HP doesn't support traditional slices (and we make no attempt to return the various new units like gslices, cslices, mslices, etc.) here. The subslice mask lookup is even more problematic; given the distinct masks for geometry vs compute purposes, the combined mask returned here is likely not what userspace would want to act upon anyway. The value is also limited to 32-bits by the nature of the GETPARAM ioctl which is sufficient for the initial Xe_HP platforms, but is unable to convey the larger masks that will be needed on other upcoming platforms. Finally, the value returned here becomes even less meaningful when used on multi-tile platforms where each tile will have its own masks. Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin Acked-by: Lionel Landwerlin # mesa --- drivers/gpu/drm/i915/i915_getparam.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index c12a0adefda5..ac9767c56619 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -148,11 +148,19 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, value = intel_engines_has_context_isolation(i915); break; case I915_PARAM_SLICE_MASK: + /* Not supported from Xe_HP onward; use topology queries */ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + return -EINVAL; + value = sseu->slice_mask; if (!value) return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: + /* Not supported from Xe_HP onward; use topology queries */ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + return -EINVAL; + /* Only copy bits from the first slice */ memcpy(&value, sseu->subslice_mask, min(sseu->ss_stride, (u8)sizeof(value))); -- 2.35.3
[PATCH v5 4/6] drm/i915/sseu: Don't try to store EU mask internally in UAPI format
Storing the EU mask internally in the same format the I915_QUERY topology queries use makes the final copy_to_user() a bit simpler, but makes the rest of the driver's SSEU more complicated and harder to follow. Let's switch to an internal representation that's more natural: Xe_HP platforms will be a simple array of u16 masks, whereas pre-Xe_HP platforms will be a two-dimensional array, indexed by [slice][subslice]. We'll convert to the uapi format only when the query uapi is called. v2: - Drop has_common_ss_eumask. We waste some space repeating identical EU masks for every single DSS, but the code is simpler without it. (Tvrtko) v3: - Mask down EUs passed to sseu_set_eus at the callsite rather than inside the function. (Tvrtko) - Eliminate sseu->eu_stride and calculate it when needed. (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_sseu.c | 88 ++-- drivers/gpu/drm/i915/gt/intel_sseu.h | 10 +++- drivers/gpu/drm/i915/i915_query.c| 13 ++-- 3 files changed, 73 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 7e5222ad2f98..285cfd758bdc 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -19,8 +19,6 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); - sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); - GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); } unsigned int @@ -78,47 +76,77 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) return hweight32(intel_sseu_get_subslices(sseu, slice)); } -static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, - int subslice) -{ - int slice_stride = sseu->max_subslices * sseu->eu_stride; - - return slice * slice_stride + subslice * sseu->eu_stride; -} - static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, int subslice) { - int i, offset = sseu_eu_idx(sseu, slice, subslice); - u16 eu_mask = 0; - - for (i = 0; i < sseu->eu_stride; i++) - eu_mask |= - ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); - - return eu_mask; + if (sseu->has_xehp_dss) { + WARN_ON(slice > 0); + return sseu->eu_mask.xehp[subslice]; + } else { + return sseu->eu_mask.hsw[slice][subslice]; + } } static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, u16 eu_mask) { - int i, offset = sseu_eu_idx(sseu, slice, subslice); - - for (i = 0; i < sseu->eu_stride; i++) - sseu->eu_mask[offset + i] = - (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; + GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice); + if (sseu->has_xehp_dss) { + GEM_WARN_ON(slice > 0); + sseu->eu_mask.xehp[subslice] = eu_mask; + } else { + sseu->eu_mask.hsw[slice][subslice] = eu_mask; + } } static u16 compute_eu_total(const struct sseu_dev_info *sseu) { - u16 i, total = 0; + int s, ss, total = 0; - for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) - total += hweight8(sseu->eu_mask[i]); + for (s = 0; s < sseu->max_slices; s++) + for (ss = 0; ss < sseu->max_subslices; ss++) + if (sseu->has_xehp_dss) + total += hweight16(sseu->eu_mask.xehp[ss]); + else + total += hweight16(sseu->eu_mask.hsw[s][ss]); return total; } +/** + * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer + * @to: Pointer to userspace buffer to copy to + * @sseu: SSEU structure containing EU mask to copy + * + * Copies the EU mask to a userspace buffer in the format expected by + * the query ioctl's topology queries. + * + * Returns the result of the copy_to_user() operation. + */ +int intel_sseu_copy_eumask_to_user(void __user *to, + const struct sseu_dev_info *sseu) +{ + u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {}; + int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); + int len = sseu->max_slices * sseu->max_subslices * eu_stride; + int s, ss, i; + + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + int uapi_of
[PATCH v5 1/6] drm/i915/xehp: Use separate sseu init function
Xe_HP has enough fundamental differences from previous platforms that it makes sense to use a separate SSEU init function to keep things straightforward and easy to understand. We'll also add a has_xehp_dss flag to the SSEU structure that will be used by other upcoming changes. v2: - Add has_xehp_dss flag Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_sseu.c | 86 drivers/gpu/drm/i915/gt/intel_sseu.h | 5 ++ 2 files changed, 54 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index fdd25691beda..b5fd479a7b85 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -169,13 +169,43 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, sseu->eu_total = compute_eu_total(sseu); } -static void gen12_sseu_info_init(struct intel_gt *gt) +static void xehp_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; u32 g_dss_en, c_dss_en = 0; u16 eu_en = 0; u8 eu_en_fuse; + int eu; + + /* +* The concept of slice has been removed in Xe_HP. To be compatible +* with prior generations, assume a single slice across the entire +* device. Then calculate out the DSS for each workload type within +* that software slice. +*/ + intel_sseu_set_info(sseu, 1, 32, 16); + sseu->has_xehp_dss = 1; + + g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); + c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); + + eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; + + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en); +} + +static void gen12_sseu_info_init(struct intel_gt *gt) +{ + struct sseu_dev_info *sseu = >->info.sseu; + struct intel_uncore *uncore = gt->uncore; + u32 g_dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; u8 s_en; int eu; @@ -183,43 +213,23 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. * Instead of splitting these, provide userspace with an array * of DSS to more closely represent the hardware resource. -* -* In addition, the concept of slice has been removed in Xe_HP. -* To be compatible with prior generations, assume a single slice -* across the entire device. Then calculate out the DSS for each -* workload type within that software slice. */ - if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) - intel_sseu_set_info(sseu, 1, 32, 16); - else - intel_sseu_set_info(sseu, 1, 6, 16); + intel_sseu_set_info(sseu, 1, 6, 16); - /* -* As mentioned above, Xe_HP does not have the concept of a slice. -* Enable one for software backwards compatibility. -*/ - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - s_en = 0x1; - else - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & + GEN11_GT_S_ENA_MASK; g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); /* one bit per pair of EUs */ - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; - else - eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & + GEN11_EU_DIS_MASK); for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, g_dss_en, 0, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -574,18 +584,20 @@ void intel_sseu_info_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; - if (IS_HASWELL(i915)) - hsw_sseu_info_init(gt); - else if
[PATCH v5 0/6] i915: SSEU handling updates
v5: - Rebase to latest drm-tip (resolve trivial conflicts) - Move XEHP_BITMAP_BITS() to the header so that we can also replace a usage of I915_MAX_SS_FUSE_BITS in one of the inline functions. (Bala) - Change the local variable in intel_slicemask_from_xehp_dssmask() from u16 to 'unsigned long' to make it a bit more future-proof. - Incorporate ack's received from Tvrtko and Lionel. Cc: Tvrtko Ursulin Cc: Balasubramani Vivekanandan Matt Roper (6): drm/i915/xehp: Use separate sseu init function drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK drm/i915/sseu: Simplify gen11+ SSEU handling drm/i915/sseu: Don't try to store EU mask internally in UAPI format drm/i915/sseu: Disassociate internal subslice mask representation from uapi drm/i915/pvc: Add SSEU changes drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c| 4 +- drivers/gpu/drm/i915/gt/intel_gt.c | 12 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_sseu.c | 450 --- drivers/gpu/drm/i915/gt/intel_sseu.h | 92 ++-- drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 30 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_getparam.c | 11 +- drivers/gpu/drm/i915/i915_pci.c | 3 +- drivers/gpu/drm/i915/i915_query.c| 26 +- drivers/gpu/drm/i915/intel_device_info.h | 1 + 13 files changed, 397 insertions(+), 264 deletions(-) -- 2.35.3
[PATCH v5 6/6] drm/i915/pvc: Add SSEU changes
PVC splits the mask of enabled DSS over two registers. It also changes the meaning of the EU fuse register such that each bit represents a single EU rather than a pair of EUs. Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_sseu.c | 31 ++-- drivers/gpu/drm/i915/gt/intel_sseu.h | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 3 ++- drivers/gpu/drm/i915/intel_device_info.h | 1 + 6 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 58e9b464d564..6aa1ceaa8d27 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -561,6 +561,7 @@ #define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT) #define GEN12_GT_COMPUTE_DSS_ENABLE_MMIO(0x9144) +#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT_MMIO(0x9148) #define GEN6_UCGCTL1 _MMIO(0x9400) #define GEN6_GAMUNIT_CLOCK_GATE_DISABLE (1 << 22) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 38069330d828..5cc1896a6cb9 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -210,27 +210,44 @@ static void xehp_sseu_info_init(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; u16 eu_en = 0; u8 eu_en_fuse; + int num_compute_regs, num_geometry_regs; int eu; + if (IS_PONTEVECCHIO(gt->i915)) { + num_geometry_regs = 0; + num_compute_regs = 2; + } else { + num_geometry_regs = 1; + num_compute_regs = 1; + } + /* * The concept of slice has been removed in Xe_HP. To be compatible * with prior generations, assume a single slice across the entire * device. Then calculate out the DSS for each workload type within * that software slice. */ - intel_sseu_set_info(sseu, 1, 32, 16); + intel_sseu_set_info(sseu, 1, + 32 * max(num_geometry_regs, num_compute_regs), + 16); sseu->has_xehp_dss = 1; - xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, 1, + xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, + num_geometry_regs, GEN12_GT_GEOMETRY_DSS_ENABLE); - xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, 1, - GEN12_GT_COMPUTE_DSS_ENABLE); + xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, + num_compute_regs, + GEN12_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; - for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) - if (eu_en_fuse & BIT(eu)) - eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915)) + eu_en = eu_en_fuse; + else + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); xehp_compute_sseu_info(sseu, eu_en); } diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 61abcab5a76b..b86f23f5b3a1 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -33,7 +33,7 @@ struct drm_printer; * Maximum number of 32-bit registers used by hardware to express the * enabled/disabled subslices. */ -#define I915_MAX_SS_FUSE_REGS 1 +#define I915_MAX_SS_FUSE_REGS 2 #define I915_MAX_SS_FUSE_BITS (I915_MAX_SS_FUSE_REGS * 32) /* Maximum number of EUs that can exist within a subslice or DSS. */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bc1a7ff19463..c3854b8a014f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1359,6 +1359,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_3D_PIPELINE(i915) (INTEL_INFO(i915)->has_3d_pipeline) +#define HAS_ONE_EU_PER_FUSE_BIT(i915) (INTEL_INFO(i915)->has_one_eu_per_fuse_bit) + /* i915_gem.c */ void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 61386d1bb07b..047a6e326031 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915
Re: [PATCH v5 5/6] drm/i915/sseu: Disassociate internal subslice mask representation from uapi
On Wed, Jun 01, 2022 at 01:48:56PM +0530, Balasubramani Vivekanandan wrote: > On 23.05.2022 13:45, Matt Roper wrote: > > As with EU masks, it's easier to store subslice/DSS masks internally in > > a format that's more natural for the driver to work with, and then only > > covert into the u8[] uapi form when the query ioctl is invoked. Since > > the hardware design changed significantly with Xe_HP, we'll use a union > > to choose between the old "hsw-style" subslice masks or the newer xehp > > mask. HSW-style masks will be stored in an array of u8's, indexed by > > slice (there's never more than 6 subslices per slice on older > > platforms). For Xe_HP and beyond where slices no longer exist, we only > > need a single bitmask. However we already know that this mask is > > eventually going to grow too large for a simple u64 to hold, so we'll > > represent it in a manner that can be operated on by the utilities in > > linux/bitmap.h. > > > > v2: > > - Fix typo: BIT(s) -> BIT(ss) in gen9_sseu_device_status() > > > > v3: > > - Eliminate sseu->ss_stride and just calculate the stride while > >specifically handling uapi. (Tvrtko) > > - Use BITMAP_BITS() macro to refer to size of masks rather than > >passing I915_MAX_SS_FUSE_BITS directly. (Tvrtko) > > - Report compute/geometry DSS masks separately when dumping Xe_HP SSEU > >info. (Tvrtko) > > - Restore dropped range checks to intel_sseu_has_subslice(). (Tvrtko) > > > > v4: > > - Make the bitmap size macro check the size of the .xehp field rather > >than the containing union. (Tvrtko) > > - Don't add GEM_BUG_ON() intel_sseu_has_subslice()'s check for whether > >slice or subslice ID exceed sseu->max_[sub]slices; various loops > >in the driver are expected to exceed these, so we should just > >silently return 'false.' > > > > Cc: Tvrtko Ursulin > > Signed-off-by: Matt Roper > > --- > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +- > > drivers/gpu/drm/i915/gt/intel_engine_cs.c| 4 +- > > drivers/gpu/drm/i915/gt/intel_gt.c | 12 +- > > drivers/gpu/drm/i915/gt/intel_sseu.c | 261 +++ > > drivers/gpu/drm/i915/gt/intel_sseu.h | 76 -- > > drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 30 +-- > > drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +- > > drivers/gpu/drm/i915/i915_getparam.c | 3 +- > > drivers/gpu/drm/i915/i915_query.c| 13 +- > > 9 files changed, 241 insertions(+), 187 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > index ab4c5ab28e4d..a3bb73f5d53b 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > @@ -1875,6 +1875,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, > > { > > const struct sseu_dev_info *device = >->info.sseu; > > struct drm_i915_private *i915 = gt->i915; > > + unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, > > 0); > > > > /* No zeros in any field. */ > > if (!user->slice_mask || !user->subslice_mask || > > @@ -1901,7 +1902,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, > > if (user->slice_mask & ~device->slice_mask) > > return -EINVAL; > > > > - if (user->subslice_mask & ~device->subslice_mask[0]) > > + if (user->subslice_mask & ~dev_subslice_mask) > > return -EINVAL; > > > > if (user->max_eus_per_subslice > device->max_eus_per_subslice) > > @@ -1915,7 +1916,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, > > /* Part specific restrictions. */ > > if (GRAPHICS_VER(i915) == 11) { > > unsigned int hw_s = hweight8(device->slice_mask); > > - unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); > > + unsigned int hw_ss_per_s = hweight8(dev_subslice_mask); > > unsigned int req_s = hweight8(context->slice_mask); > > unsigned int req_ss = hweight8(context->subslice_mask); > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > index 1adbf34c3632..f0acf8518a51 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > @@ -674,8 +674,8
Re: [PATCH] drm/i915/dg2: Catch and log more unexpected values in DG1_MSTR_TILE_INTR
On Thu, May 26, 2022 at 11:18:17AM +0100, Tvrtko Ursulin wrote: > > On 25/05/2022 19:05, Matt Roper wrote: > > On Wed, May 25, 2022 at 05:03:13PM +0100, Tvrtko Ursulin wrote: > > > > > > On 24/05/2022 18:51, Matt Roper wrote: > > > > On Tue, May 24, 2022 at 10:43:39AM +0100, Tvrtko Ursulin wrote: > > > > > From: Tvrtko Ursulin > > > > > > > > > > Catch and log any garbage in the register, including no tiles marked, > > > > > or > > > > > multiple tiles marked. > > > > > > > > > > Signed-off-by: Tvrtko Ursulin > > > > > Cc: Matt Roper > > > > > --- > > > > > We caught garbage in DG1_MSTR_TILE_INTR with DG2 (actual value > > > > > 0xF9D2C008) > > > > > during glmark and more badness. So I thought lets log all possible > > > > > failure > > > > > modes from here and also use per device logging. > > > > > --- > > > > >drivers/gpu/drm/i915/i915_irq.c | 33 > > > > > ++--- > > > > >drivers/gpu/drm/i915/i915_reg.h | 1 + > > > > >2 files changed, 23 insertions(+), 11 deletions(-) > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > > > > > b/drivers/gpu/drm/i915/i915_irq.c > > > > > index 73cebc6aa650..79853d3fc1ed 100644 > > > > > --- a/drivers/gpu/drm/i915/i915_irq.c > > > > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > > > > @@ -2778,24 +2778,30 @@ static irqreturn_t dg1_irq_handler(int irq, > > > > > void *arg) > > > > > u32 gu_misc_iir; > > > > > if (!intel_irqs_enabled(i915)) > > > > > - return IRQ_NONE; > > > > > + goto none; > > > > > master_tile_ctl = dg1_master_intr_disable(regs); > > > > > - if (!master_tile_ctl) { > > > > > - dg1_master_intr_enable(regs); > > > > > - return IRQ_NONE; > > > > > + if (!master_tile_ctl) > > > > > + goto enable_none; > > > > > + > > > > > + if (master_tile_ctl & ~(DG1_MSTR_IRQ | DG1_MSTR_TILE_MASK)) { > > > > > + drm_warn(&i915->drm, "Garbage in master_tile_ctl: > > > > > 0x%08x!\n", > > > > > + master_tile_ctl); > > > > > > > > I know we have a bunch of them already, but shouldn't we be avoiding > > > > printk-based stuff like this inside interrupt handlers? Should we be > > > > migrating all these error messages over to trace_printk or something > > > > similar that's safer to use? > > > > > > Not sure - I kind of think some really unexpected and worrying situations > > > should be loud and on by default. Risk is then spam if not ratelimited. > > > Maybe we should instead ratelimit most errors/warnings coming for irq > > > handlers? > > > > It's not the risk of spam that's the problem, but rather that > > printk-based stuff eventually calls into the console code to flush its > > buffers. That's way more overhead than you want in an interrupt handler > > so it's bad on its own, but if you're using something slow like a serial > > console, it becomes even more of a problem. > > Is it a problem for messages which we never expect to see? Kind of. While not as catastrophic, it's the same argument for why we don't use BUG() anymore...when the impossible does manage to happen there's unnecessary collateral damage on things outside of graphics. If we're adding huge delays inside an interrupt handler (while other interrupts are disabled) that impacts the system-wide usability, not just our own driver. I'd also argue that these messages actually are semi-expected. Random bits being set shouldn't happen, but in the world of dgpu's, we do occasionally see cases where the PCI link itself goes down for reasons outside our control and then all registers read back as 0x, which will probably trigger error messages here (as well as a bunch of other places). > > > While the unexpected bits in the master tile register are strange and > > may point to a bigger problem somewhere else, they're also harmless on > > their own since we should just ignore those bits and only process the > > valid tiles. > > Yes, I was expecting that a patch belonging to mult
Re: [Intel-gfx] [CI] Revert "drm/i915: Add relocation exceptions for two other platforms"
On Fri, May 27, 2022 at 01:10:46AM -0700, Lucas De Marchi wrote: > This reverts commit bcefacd359f07dee18795dd80f834acb47fd21eb. > > IGT conversion is ready, so these execptions can already be removed. > Once CI is clean, this commit will actually be removed from > topic/core-for-CI. > > Signed-off-by: Lucas De Marchi Acked-by: Matt Roper > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 21 +-- > 1 file changed, 5 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index b279588c0672..c326bd2b444f 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -492,27 +492,16 @@ eb_unreserve_vma(struct eb_vma *ev) > ev->flags &= ~__EXEC_OBJECT_RESERVED; > } > > -static bool platform_has_relocs_enabled(const struct i915_execbuffer *eb) > -{ > - /* > - * Relocations are disallowed starting from gen12 with Tigerlake > - * as an exception. To unblock CI, we are temporarily allowing it > - * for Rocketlake and Alderlake. > - */ > - if (GRAPHICS_VER(eb->i915) < 12 || IS_TIGERLAKE(eb->i915) || > - IS_ROCKETLAKE(eb->i915) || IS_ALDERLAKE_S(eb->i915) || > - IS_ALDERLAKE_P(eb->i915)) > - return true; > - > - return false; > -} > - > static int > eb_validate_vma(struct i915_execbuffer *eb, > struct drm_i915_gem_exec_object2 *entry, > struct i915_vma *vma) > { > - if (entry->relocation_count && !platform_has_relocs_enabled(eb)) > + /* Relocations are disallowed for all platforms after TGL-LP. This > + * also covers all platforms with local memory. > + */ > + if (entry->relocation_count && > + GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) > return -EINVAL; > > if (unlikely(entry->flags & eb->invalid_flags)) > -- > 2.36.1 > -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
[PATCH 2/2] drm/i915/pvc: Add initial PVC workarounds
From: Stuart Summers Bspec: 64027 Signed-off-by: Stuart Summers Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 5 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 61 ++--- drivers/gpu/drm/i915/intel_pm.c | 16 +- 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 75a0c55c5aa5..44de10cf7837 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -196,6 +196,7 @@ #define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */ #define RING_PREDICATE_RESULT(base)_MMIO((base) + 0x3b8) #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) +#define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30) #define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) #define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28)/* CFL+ & Gen11+ */ #define RING_FORCE_TO_NONPRIV_ACCESS_RD (1 << 28) @@ -208,7 +209,9 @@ #define RING_FORCE_TO_NONPRIV_RANGE_64 (3 << 0) #define RING_FORCE_TO_NONPRIV_RANGE_MASK (3 << 0) #define RING_FORCE_TO_NONPRIV_MASK_VALID \ - (RING_FORCE_TO_NONPRIV_RANGE_MASK | RING_FORCE_TO_NONPRIV_ACCESS_MASK) + (RING_FORCE_TO_NONPRIV_RANGE_MASK | \ +RING_FORCE_TO_NONPRIV_ACCESS_MASK | \ +RING_FORCE_TO_NONPRIV_DENY) #define RING_MAX_NONPRIV_SLOTS 12 #define RING_EXECLIST_SQ_CONTENTS(base)_MMIO((base) + 0x510) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index b4642dcc192f..58e9b464d564 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1070,8 +1070,9 @@ #define GEN10_CACHE_MODE_SS_MMIO(0xe420) #define ENABLE_EU_COUNT_FOR_TDL_FLUSHREG_BIT(10) -#define ENABLE_PREFETCH_INTO_IC REG_BIT(3) +#define DISABLE_ECC REG_BIT(5) #define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4) +#define ENABLE_PREFETCH_INTO_IC REG_BIT(3) #define EU_PERF_CNTL0 _MMIO(0xe458) #define EU_PERF_CNTL4 _MMIO(0xe45c) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 73b59ea6fd3b..a604bc7c0701 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -776,7 +776,9 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_DG2(i915)) + if (IS_PONTEVECCHIO(i915)) + ; /* noop; none at this time */ + else if (IS_DG2(i915)) dg2_ctx_workarounds_init(engine, wal); else if (IS_XEHPSDV(i915)) ; /* noop; none at this time */ @@ -1494,7 +1496,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; - if (IS_DG2(i915)) + if (IS_PONTEVECCHIO(i915)) + ; /* none yet */ + else if (IS_DG2(i915)) dg2_gt_workarounds_init(gt, wal); else if (IS_XEHPSDV(i915)) xehpsdv_gt_workarounds_init(gt, wal); @@ -1924,6 +1928,32 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) } } +static void blacklist_trtt(struct intel_engine_cs *engine) +{ + struct i915_wa_list *w = &engine->whitelist; + + /* +* Prevent read/write access to [0x4400, 0x4600) which covers +* the TRTT range across all engines. Note that normally userspace +* cannot access the other engines' trtt control, but for simplicity +* we cover the entire range on each engine. +*/ + whitelist_reg_ext(w, _MMIO(0x4400), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64); + whitelist_reg_ext(w, _MMIO(0x4500), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64); +} + +static void pvc_whitelist_build(struct intel_engine_cs *engine) +{ + allow_read_ctx_timestamp(engine); + + /* Wa_16014440446:pvc */ + blacklist_trtt(engine); +} + void intel_engine_init_whitelist(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1931,7 +1961,9 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, "whitelist", engine->name); - if (IS_DG2(i915)) + if (IS_PONTEVECCHIO(i915)) + pvc_whitelist_build(engine); + else if (IS_DG2(i915))
[PATCH 1/2] drm/i915/pvc: Extract stepping information from PCI revid
For PVC, the base die and compute tile have separate stepping values that we need to track; we'll use the existing graphics_step field to represent the compute tile stepping and add a new 'basedie_step' field. Unlike past platforms, steppings for these components are represented by specific bitfields within the PCI revision ID, and we shouldn't make assumptions about the non-CT, non-BD bits staying 0. Let's update our stepping code accordingly. Bspec: 44484 Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/i915_drv.h | 13 ++ drivers/gpu/drm/i915/intel_step.c | 70 ++- drivers/gpu/drm/i915/intel_step.h | 4 +- 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74b3caccd839..ec1b3484fdaf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -943,6 +943,7 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) #define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step) #define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step) +#define INTEL_BASEDIE_STEP(__i915) (RUNTIME_INFO(__i915)->step.basedie_step) #define IS_DISPLAY_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ @@ -956,6 +957,10 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) (drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \ INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until)) +#define IS_BASEDIE_STEP(__i915, since, until) \ + (drm_WARN_ON(&(__i915)->drm, INTEL_BASEDIE_STEP(__i915) == STEP_NONE), \ +INTEL_BASEDIE_STEP(__i915) >= (since) && INTEL_BASEDIE_STEP(__i915) < (until)) + static __always_inline unsigned int __platform_mask_index(const struct intel_runtime_info *info, enum intel_platform p) @@ -1208,6 +1213,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_DG2(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) +#define IS_PVC_BD_STEP(__i915, since, until) \ + (IS_PONTEVECCHIO(__i915) && \ +IS_BASEDIE_STEP(__i915, since, until)) + +#define IS_PVC_CT_STEP(__i915, since, until) \ + (IS_PONTEVECCHIO(__i915) && \ +IS_GRAPHICS_STEP(__i915, since, until)) + #define IS_LP(dev_priv)(INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && !IS_LP(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 74e8e4680028..42b3133d8387 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -135,6 +135,8 @@ static const struct intel_step_info adlp_n_revids[] = { [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_D0 }, }; +static void pvc_step_init(struct drm_i915_private *i915, int pci_revid); + void intel_step_init(struct drm_i915_private *i915) { const struct intel_step_info *revids = NULL; @@ -142,7 +144,10 @@ void intel_step_init(struct drm_i915_private *i915) int revid = INTEL_REVID(i915); struct intel_step_info step = {}; - if (IS_DG2_G10(i915)) { + if (IS_PONTEVECCHIO(i915)) { + pvc_step_init(i915, revid); + return; + } else if (IS_DG2_G10(i915)) { revids = dg2_g10_revid_step_tbl; size = ARRAY_SIZE(dg2_g10_revid_step_tbl); } else if (IS_DG2_G11(i915)) { @@ -235,6 +240,69 @@ void intel_step_init(struct drm_i915_private *i915) RUNTIME_INFO(i915)->step = step; } +#define PVC_BD_REVID GENMASK(5, 3) +#define PVC_CT_REVID GENMASK(2, 0) + +static const int pvc_bd_subids[] = { + [0x0] = STEP_A0, + [0x3] = STEP_B0, + [0x4] = STEP_B1, + [0x5] = STEP_B3, +}; + +static const int pvc_ct_subids[] = { + [0x3] = STEP_A0, + [0x5] = STEP_B0, + [0x6] = STEP_B1, + [0x7] = STEP_C0, +}; + +static int +pvc_step_lookup(struct drm_i915_private *i915, const char *type, + const int *table, int size, int subid) +{ + if (subid < size && table[subid] != STEP_NONE) + return table[subid]; + + drm_warn(&i915->drm, "Unknown %s id 0x%02x\n", type, subid); + + /* +* As on other platforms, try to use the next higher ID if we land on a +* gap in the table. +*/ + while (subid < size && table[subid] == STEP_NONE) + subid++; + + if (subid < size) { + drm_dbg(&i915->dr
[PATCH 0/2] i915: PVC steppings and initial workarounds
Matt Roper (1): drm/i915/pvc: Extract stepping information from PCI revid Stuart Summers (1): drm/i915/pvc: Add initial PVC workarounds drivers/gpu/drm/i915/gt/intel_engine_regs.h | 5 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 61 +++--- drivers/gpu/drm/i915/i915_drv.h | 13 drivers/gpu/drm/i915/intel_pm.c | 16 - drivers/gpu/drm/i915/intel_step.c | 70 - drivers/gpu/drm/i915/intel_step.h | 4 +- 7 files changed, 158 insertions(+), 14 deletions(-) -- 2.35.3
Re: [PATCH] drm/i915/dg2: Catch and log more unexpected values in DG1_MSTR_TILE_INTR
On Wed, May 25, 2022 at 05:03:13PM +0100, Tvrtko Ursulin wrote: > > On 24/05/2022 18:51, Matt Roper wrote: > > On Tue, May 24, 2022 at 10:43:39AM +0100, Tvrtko Ursulin wrote: > > > From: Tvrtko Ursulin > > > > > > Catch and log any garbage in the register, including no tiles marked, or > > > multiple tiles marked. > > > > > > Signed-off-by: Tvrtko Ursulin > > > Cc: Matt Roper > > > --- > > > We caught garbage in DG1_MSTR_TILE_INTR with DG2 (actual value 0xF9D2C008) > > > during glmark and more badness. So I thought lets log all possible failure > > > modes from here and also use per device logging. > > > --- > > > drivers/gpu/drm/i915/i915_irq.c | 33 ++--- > > > drivers/gpu/drm/i915/i915_reg.h | 1 + > > > 2 files changed, 23 insertions(+), 11 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > > > b/drivers/gpu/drm/i915/i915_irq.c > > > index 73cebc6aa650..79853d3fc1ed 100644 > > > --- a/drivers/gpu/drm/i915/i915_irq.c > > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > > @@ -2778,24 +2778,30 @@ static irqreturn_t dg1_irq_handler(int irq, void > > > *arg) > > > u32 gu_misc_iir; > > > if (!intel_irqs_enabled(i915)) > > > - return IRQ_NONE; > > > + goto none; > > > master_tile_ctl = dg1_master_intr_disable(regs); > > > - if (!master_tile_ctl) { > > > - dg1_master_intr_enable(regs); > > > - return IRQ_NONE; > > > + if (!master_tile_ctl) > > > + goto enable_none; > > > + > > > + if (master_tile_ctl & ~(DG1_MSTR_IRQ | DG1_MSTR_TILE_MASK)) { > > > + drm_warn(&i915->drm, "Garbage in master_tile_ctl: 0x%08x!\n", > > > + master_tile_ctl); > > > > I know we have a bunch of them already, but shouldn't we be avoiding > > printk-based stuff like this inside interrupt handlers? Should we be > > migrating all these error messages over to trace_printk or something > > similar that's safer to use? > > Not sure - I kind of think some really unexpected and worrying situations > should be loud and on by default. Risk is then spam if not ratelimited. > Maybe we should instead ratelimit most errors/warnings coming for irq > handlers? It's not the risk of spam that's the problem, but rather that printk-based stuff eventually calls into the console code to flush its buffers. That's way more overhead than you want in an interrupt handler so it's bad on its own, but if you're using something slow like a serial console, it becomes even more of a problem. While the unexpected bits in the master tile register are strange and may point to a bigger problem somewhere else, they're also harmless on their own since we should just ignore those bits and only process the valid tiles. > > In this particular case at least DRM_ERROR with no device info is the odd > one out in the entire file so I'd suggest changing at least that, if the > rest of my changes is of questionable benefit. Changing DRM_ERROR -> drm_err would probably be fine in the short term since it doesn't really make us any worse off. Changing to drm_warn might not be great since we're generating a lot more lines of output and probably multiplying the already bad overhead that shouldn't be happening in an interrupt handler. But if we could update the interrupt handler to just save away the details and do the actual drm_warn later, outside the interrupt handler code, that would be okay. We should probably work toward something like that for all of our interrupt handler warning/error messages. Matt > > Regards, > > Tvrtko > > > > > > > Matt > > > > > + goto enable_none; > > > } > > > /* FIXME: we only support tile 0 for now. */ > > > - if (master_tile_ctl & DG1_MSTR_TILE(0)) { > > > - master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ); > > > - raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, master_ctl); > > > - } else { > > > - DRM_ERROR("Tile not supported: 0x%08x\n", master_tile_ctl); > > > - dg1_master_intr_enable(regs); > > > - return IRQ_NONE; > > > + if (REG_FIELD_GET(DG1_MSTR_TILE_MASK, master_tile_ctl) != > > > + DG1_MSTR_TILE(0)) { > > > + drm_warn(&i915->drm, "Unexpected irq from tile %u!\n", > > > + ilog2(REG_FIELD_GET(DG1_MSTR_TILE_MASK, &
[PATCH] drm/i915/hwconfig: Future-proof platform checks
PVC also has a hwconfig table. Actually the current expectation is that all future platforms will have hwconfig, so let's just change the condition to an IP version check so that we don't need to keep updating this for each new platform that shows up. Cc: John Harrison Cc: Radhakrishna Sripada Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index 5aaa3948de74..4781fccc2687 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -96,7 +96,7 @@ static bool has_table(struct drm_i915_private *i915) { if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915)) return true; - if (IS_DG2(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) return true; return false; -- 2.35.3
Re: [PATCH] drm/i915/dg2: Catch and log more unexpected values in DG1_MSTR_TILE_INTR
On Tue, May 24, 2022 at 10:43:39AM +0100, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Catch and log any garbage in the register, including no tiles marked, or > multiple tiles marked. > > Signed-off-by: Tvrtko Ursulin > Cc: Matt Roper > --- > We caught garbage in DG1_MSTR_TILE_INTR with DG2 (actual value 0xF9D2C008) > during glmark and more badness. So I thought lets log all possible failure > modes from here and also use per device logging. > --- > drivers/gpu/drm/i915/i915_irq.c | 33 ++--- > drivers/gpu/drm/i915/i915_reg.h | 1 + > 2 files changed, 23 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 73cebc6aa650..79853d3fc1ed 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2778,24 +2778,30 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) > u32 gu_misc_iir; > > if (!intel_irqs_enabled(i915)) > - return IRQ_NONE; > + goto none; > > master_tile_ctl = dg1_master_intr_disable(regs); > - if (!master_tile_ctl) { > - dg1_master_intr_enable(regs); > - return IRQ_NONE; > + if (!master_tile_ctl) > + goto enable_none; > + > + if (master_tile_ctl & ~(DG1_MSTR_IRQ | DG1_MSTR_TILE_MASK)) { > + drm_warn(&i915->drm, "Garbage in master_tile_ctl: 0x%08x!\n", > + master_tile_ctl); I know we have a bunch of them already, but shouldn't we be avoiding printk-based stuff like this inside interrupt handlers? Should we be migrating all these error messages over to trace_printk or something similar that's safer to use? Matt > + goto enable_none; > } > > /* FIXME: we only support tile 0 for now. */ > - if (master_tile_ctl & DG1_MSTR_TILE(0)) { > - master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ); > - raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, master_ctl); > - } else { > - DRM_ERROR("Tile not supported: 0x%08x\n", master_tile_ctl); > - dg1_master_intr_enable(regs); > - return IRQ_NONE; > + if (REG_FIELD_GET(DG1_MSTR_TILE_MASK, master_tile_ctl) != > + DG1_MSTR_TILE(0)) { > + drm_warn(&i915->drm, "Unexpected irq from tile %u!\n", > + ilog2(REG_FIELD_GET(DG1_MSTR_TILE_MASK, > + master_tile_ctl))); > + goto enable_none; > } > > + master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ); > + raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, master_ctl); > + > gen11_gt_irq_handler(gt, master_ctl); > > if (master_ctl & GEN11_DISPLAY_IRQ) > @@ -2810,6 +2816,11 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) > pmu_irq_stats(i915, IRQ_HANDLED); > > return IRQ_HANDLED; > + > +enable_none: > + dg1_master_intr_enable(regs); > +none: > + return IRQ_NONE; > } > > /* Called from drm generic code, passed 'crtc' which > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index d8579ab9384c..eefa301c6430 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -5774,6 +5774,7 @@ > > #define DG1_MSTR_TILE_INTR _MMIO(0x190008) > #define DG1_MSTR_IRQ REG_BIT(31) > +#define DG1_MSTR_TILE_MASK REG_GENMASK(3, 0) > #define DG1_MSTR_TILE(t) REG_BIT(t) > > #define GEN11_DISPLAY_INT_CTL_MMIO(0x44200) > -- > 2.32.0 > -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
Re: [PATCH] drm/i915/hwconfig: Report no hwconfig support on ADL-N
On Mon, May 23, 2022 at 01:21:16PM +0530, Balasubramani Vivekanandan wrote: > ADL-N being a subplatform of ADL-P, it lacks support for hwconfig > table. Explicit check added to skip ADL-N. > > Signed-off-by: Balasubramani Vivekanandan > Reviewed-by: Matt Roper > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c > index 79c66b6b51a3..5aaa3948de74 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c > @@ -94,7 +94,7 @@ static int guc_hwconfig_fill_buffer(struct intel_guc *guc, > struct intel_hwconfig > > static bool has_table(struct drm_i915_private *i915) > { > - if (IS_ALDERLAKE_P(i915)) > + if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915)) > return true; > if (IS_DG2(i915)) > return true; > -- > 2.25.1 > -- Matt Roper Graphics Software Engineer VTT-OSGC Platform Enablement Intel Corporation
[PATCH v5 5/6] drm/i915/sseu: Disassociate internal subslice mask representation from uapi
As with EU masks, it's easier to store subslice/DSS masks internally in a format that's more natural for the driver to work with, and then only covert into the u8[] uapi form when the query ioctl is invoked. Since the hardware design changed significantly with Xe_HP, we'll use a union to choose between the old "hsw-style" subslice masks or the newer xehp mask. HSW-style masks will be stored in an array of u8's, indexed by slice (there's never more than 6 subslices per slice on older platforms). For Xe_HP and beyond where slices no longer exist, we only need a single bitmask. However we already know that this mask is eventually going to grow too large for a simple u64 to hold, so we'll represent it in a manner that can be operated on by the utilities in linux/bitmap.h. v2: - Fix typo: BIT(s) -> BIT(ss) in gen9_sseu_device_status() v3: - Eliminate sseu->ss_stride and just calculate the stride while specifically handling uapi. (Tvrtko) - Use BITMAP_BITS() macro to refer to size of masks rather than passing I915_MAX_SS_FUSE_BITS directly. (Tvrtko) - Report compute/geometry DSS masks separately when dumping Xe_HP SSEU info. (Tvrtko) - Restore dropped range checks to intel_sseu_has_subslice(). (Tvrtko) v4: - Make the bitmap size macro check the size of the .xehp field rather than the containing union. (Tvrtko) - Don't add GEM_BUG_ON() intel_sseu_has_subslice()'s check for whether slice or subslice ID exceed sseu->max_[sub]slices; various loops in the driver are expected to exceed these, so we should just silently return 'false.' Cc: Tvrtko Ursulin Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c| 4 +- drivers/gpu/drm/i915/gt/intel_gt.c | 12 +- drivers/gpu/drm/i915/gt/intel_sseu.c | 261 +++ drivers/gpu/drm/i915/gt/intel_sseu.h | 76 -- drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 30 +-- drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +- drivers/gpu/drm/i915/i915_getparam.c | 3 +- drivers/gpu/drm/i915/i915_query.c| 13 +- 9 files changed, 241 insertions(+), 187 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ab4c5ab28e4d..a3bb73f5d53b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1875,6 +1875,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, { const struct sseu_dev_info *device = >->info.sseu; struct drm_i915_private *i915 = gt->i915; + unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, 0); /* No zeros in any field. */ if (!user->slice_mask || !user->subslice_mask || @@ -1901,7 +1902,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, if (user->slice_mask & ~device->slice_mask) return -EINVAL; - if (user->subslice_mask & ~device->subslice_mask[0]) + if (user->subslice_mask & ~dev_subslice_mask) return -EINVAL; if (user->max_eus_per_subslice > device->max_eus_per_subslice) @@ -1915,7 +1916,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, /* Part specific restrictions. */ if (GRAPHICS_VER(i915) == 11) { unsigned int hw_s = hweight8(device->slice_mask); - unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); + unsigned int hw_ss_per_s = hweight8(dev_subslice_mask); unsigned int req_s = hweight8(context->slice_mask); unsigned int req_ss = hweight8(context->subslice_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1adbf34c3632..f0acf8518a51 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -674,8 +674,8 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) return; - ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), - ss_per_ccs); + ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask, +ss_per_ccs); /* * If all DSS in a quadrant are fused off, the corresponding CCS * engine is not available for use. diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 034182f85501..2921f510642f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -133,13 +133,6 @@ static const struct intel_mmio_range dg2_lncf_steering_table
[PATCH v4 1/6] drm/i915/xehp: Use separate sseu init function
Xe_HP has enough fundamental differences from previous platforms that it makes sense to use a separate SSEU init function to keep things straightforward and easy to understand. We'll also add a has_xehp_dss flag to the SSEU structure that will be used by other upcoming changes. v2: - Add has_xehp_dss flag Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_sseu.c | 86 drivers/gpu/drm/i915/gt/intel_sseu.h | 5 ++ 2 files changed, 54 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index fdd25691beda..b5fd479a7b85 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -169,13 +169,43 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, sseu->eu_total = compute_eu_total(sseu); } -static void gen12_sseu_info_init(struct intel_gt *gt) +static void xehp_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; u32 g_dss_en, c_dss_en = 0; u16 eu_en = 0; u8 eu_en_fuse; + int eu; + + /* +* The concept of slice has been removed in Xe_HP. To be compatible +* with prior generations, assume a single slice across the entire +* device. Then calculate out the DSS for each workload type within +* that software slice. +*/ + intel_sseu_set_info(sseu, 1, 32, 16); + sseu->has_xehp_dss = 1; + + g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); + c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); + + eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; + + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en); +} + +static void gen12_sseu_info_init(struct intel_gt *gt) +{ + struct sseu_dev_info *sseu = >->info.sseu; + struct intel_uncore *uncore = gt->uncore; + u32 g_dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; u8 s_en; int eu; @@ -183,43 +213,23 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. * Instead of splitting these, provide userspace with an array * of DSS to more closely represent the hardware resource. -* -* In addition, the concept of slice has been removed in Xe_HP. -* To be compatible with prior generations, assume a single slice -* across the entire device. Then calculate out the DSS for each -* workload type within that software slice. */ - if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) - intel_sseu_set_info(sseu, 1, 32, 16); - else - intel_sseu_set_info(sseu, 1, 6, 16); + intel_sseu_set_info(sseu, 1, 6, 16); - /* -* As mentioned above, Xe_HP does not have the concept of a slice. -* Enable one for software backwards compatibility. -*/ - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - s_en = 0x1; - else - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & + GEN11_GT_S_ENA_MASK; g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); /* one bit per pair of EUs */ - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; - else - eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & + GEN11_EU_DIS_MASK); for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, g_dss_en, 0, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -574,18 +584,20 @@ void intel_sseu_info_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; - if (IS_HASWELL(i915)) - hsw_sseu_info_init(gt); - else if
[PATCH v4 4/6] drm/i915/sseu: Don't try to store EU mask internally in UAPI format
Storing the EU mask internally in the same format the I915_QUERY topology queries use makes the final copy_to_user() a bit simpler, but makes the rest of the driver's SSEU more complicated and harder to follow. Let's switch to an internal representation that's more natural: Xe_HP platforms will be a simple array of u16 masks, whereas pre-Xe_HP platforms will be a two-dimensional array, indexed by [slice][subslice]. We'll convert to the uapi format only when the query uapi is called. v2: - Drop has_common_ss_eumask. We waste some space repeating identical EU masks for every single DSS, but the code is simpler without it. (Tvrtko) v3: - Mask down EUs passed to sseu_set_eus at the callsite rather than inside the function. (Tvrtko) - Eliminate sseu->eu_stride and calculate it when needed. (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_sseu.c | 88 ++-- drivers/gpu/drm/i915/gt/intel_sseu.h | 10 +++- drivers/gpu/drm/i915/i915_query.c| 13 ++-- 3 files changed, 73 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 7e5222ad2f98..285cfd758bdc 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -19,8 +19,6 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); - sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); - GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); } unsigned int @@ -78,47 +76,77 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) return hweight32(intel_sseu_get_subslices(sseu, slice)); } -static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, - int subslice) -{ - int slice_stride = sseu->max_subslices * sseu->eu_stride; - - return slice * slice_stride + subslice * sseu->eu_stride; -} - static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, int subslice) { - int i, offset = sseu_eu_idx(sseu, slice, subslice); - u16 eu_mask = 0; - - for (i = 0; i < sseu->eu_stride; i++) - eu_mask |= - ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); - - return eu_mask; + if (sseu->has_xehp_dss) { + WARN_ON(slice > 0); + return sseu->eu_mask.xehp[subslice]; + } else { + return sseu->eu_mask.hsw[slice][subslice]; + } } static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, u16 eu_mask) { - int i, offset = sseu_eu_idx(sseu, slice, subslice); - - for (i = 0; i < sseu->eu_stride; i++) - sseu->eu_mask[offset + i] = - (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; + GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice); + if (sseu->has_xehp_dss) { + GEM_WARN_ON(slice > 0); + sseu->eu_mask.xehp[subslice] = eu_mask; + } else { + sseu->eu_mask.hsw[slice][subslice] = eu_mask; + } } static u16 compute_eu_total(const struct sseu_dev_info *sseu) { - u16 i, total = 0; + int s, ss, total = 0; - for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) - total += hweight8(sseu->eu_mask[i]); + for (s = 0; s < sseu->max_slices; s++) + for (ss = 0; ss < sseu->max_subslices; ss++) + if (sseu->has_xehp_dss) + total += hweight16(sseu->eu_mask.xehp[ss]); + else + total += hweight16(sseu->eu_mask.hsw[s][ss]); return total; } +/** + * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer + * @to: Pointer to userspace buffer to copy to + * @sseu: SSEU structure containing EU mask to copy + * + * Copies the EU mask to a userspace buffer in the format expected by + * the query ioctl's topology queries. + * + * Returns the result of the copy_to_user() operation. + */ +int intel_sseu_copy_eumask_to_user(void __user *to, + const struct sseu_dev_info *sseu) +{ + u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {}; + int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); + int len = sseu->max_slices * sseu->max_subslices * eu_stride; + int s, ss, i; + + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + int uapi_offset = +
[PATCH v4 0/6] i915: SSEU handling updates
This series reworks i915's internal handling of slice/subslice/EU (SSEU) data to represent platforms like Xe_HP in a more natural manner and to prepare for future platforms where the masks will need to grow in size. One key idea of this series is that although we have a fixed ABI to convey SSEU data to userspace (i.e., multiple u8[] arrays with data stored at different strides), we don't need to use this cumbersome format for the driver's own internal storage. As long as we can convert into the uapi form properly when responding to the I915_QUERY ioctl, it's preferable to use an internal storage format that's easier for the driver to work with. Another key point here is that we're reaching the point where subslice (DSS) masks will soon not fit within simple u32/u64 integer values. Xe_HP SDV and DG2 platforms today have subslice (DSS) masks that are 32 bits, which maxes out the current storage of a u32. With PVC the masks are represented by a pair of 32-bit registers, requiring a bump up to at least 64-bits of storage internally. We could switch to u64 for that in the short term, but since we already know that upcoming architectures intend to provide DSS fuse bits via three or more registers it's best to switch to a representation that's more future-proof but still easy to work with in the driver code. To accomodate this, we start storing our subslice mask for Xe_HP and beyond in a new typedef that can be processed by the linux/bitmap.h operations. Finally, since no userspace for Xe_HP or beyond is using the legacy I915_GETPARAM ioctl lookups for I915_PARAM_SLICE_MASK and I915_PARAM_SUBSLICE_MASK (since they've migrated to the more flexible I915_QUERY ioctl that can return more than a simple u32 value), we take the opportunity to officially drop support for those GETPARAM lookups on modern platforms. Maintaining support for these GETPARAM lookups don't make sense for a number of reasons: * Traditional slices no longer exist, and newer ideas like gslices, cslices, mslices, etc. aren't something userspace needs to query since it can be inferred from other information. * The GETPARAM ioctl doesn't have a way to distinguish between geometry subslice masks and compute subslice masks, which are distinct on Xe_HP and beyond. * The I915_GETPARAM ioctl is limited to returning a 32-bit value, so when subslice masks begin to exceed 32-bits (on PVC), it simply can't return the entire mask. * The GETPARAM ioctl doesn't have a way to give sensible information for multi-tile devices. v2: - Switch to union of hsw/xehp formats to keep the representation in a natural format for different types of hardware. - Avoid accessing internals of intel_sseu_ss_mask_t directly outside of intel_sseu.[ch]. - Include PVC SSEU which needs the larger SS mask storage enabled by this series. v3: - Correct a BIT(s) typo that should have been BIT(ss), causing incorrect handling on gen9 platforms. v4: - Eliminate sseu->{ss,eu}_stride fields and just calculate the proper value in the UAPI code that needs them. - Handle unwanted ~u8 sign extension at the callsite instead of inside sseu_set_eus. - Use BITMAP_BITS() macro rather than passing I915_MAX_SS_FUSE_BITS around directly to bitmap operations. - Improved debugfs / dmesg reporting for Xe_HP dumps - Various assertion check improvements. Cc: Tvrtko Ursulin Matt Roper (6): drm/i915/xehp: Use separate sseu init function drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK drm/i915/sseu: Simplify gen11+ SSEU handling drm/i915/sseu: Don't try to store EU mask internally in UAPI format drm/i915/sseu: Disassociate internal subslice mask representation from uapi drm/i915/pvc: Add SSEU changes drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c| 4 +- drivers/gpu/drm/i915/gt/intel_gt.c | 12 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_sseu.c | 450 --- drivers/gpu/drm/i915/gt/intel_sseu.h | 94 ++-- drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 30 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_getparam.c | 11 +- drivers/gpu/drm/i915/i915_pci.c | 3 +- drivers/gpu/drm/i915/i915_query.c| 26 +- drivers/gpu/drm/i915/intel_device_info.h | 1 + 13 files changed, 398 insertions(+), 265 deletions(-) -- 2.35.3
[PATCH v4 6/6] drm/i915/pvc: Add SSEU changes
PVC splits the mask of enabled DSS over two registers. It also changes the meaning of the EU fuse register such that each bit represents a single EU rather than a pair of EUs. Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_sseu.c | 31 ++-- drivers/gpu/drm/i915/gt/intel_sseu.h | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 3 ++- drivers/gpu/drm/i915/intel_device_info.h | 1 + 6 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 7246eb870c7e..3f2b46c884f9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -561,6 +561,7 @@ #define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT) #define GEN12_GT_COMPUTE_DSS_ENABLE_MMIO(0x9144) +#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT_MMIO(0x9148) #define GEN6_UCGCTL1 _MMIO(0x9400) #define GEN6_GAMUNIT_CLOCK_GATE_DISABLE (1 << 22) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 6fbc2ac507b7..82e6f324b2ef 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -212,27 +212,44 @@ static void xehp_sseu_info_init(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; u16 eu_en = 0; u8 eu_en_fuse; + int num_compute_regs, num_geometry_regs; int eu; + if (IS_PONTEVECCHIO(gt->i915)) { + num_geometry_regs = 0; + num_compute_regs = 2; + } else { + num_geometry_regs = 1; + num_compute_regs = 1; + } + /* * The concept of slice has been removed in Xe_HP. To be compatible * with prior generations, assume a single slice across the entire * device. Then calculate out the DSS for each workload type within * that software slice. */ - intel_sseu_set_info(sseu, 1, 32, 16); + intel_sseu_set_info(sseu, 1, + 32 * max(num_geometry_regs, num_compute_regs), + 16); sseu->has_xehp_dss = 1; - xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, 1, + xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, + num_geometry_regs, GEN12_GT_GEOMETRY_DSS_ENABLE); - xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, 1, - GEN12_GT_COMPUTE_DSS_ENABLE); + xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, + num_compute_regs, + GEN12_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; - for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) - if (eu_en_fuse & BIT(eu)) - eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915)) + eu_en = eu_en_fuse; + else + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); xehp_compute_sseu_info(sseu, eu_en); } diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 0d3def55e770..79732c1af6c4 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -33,7 +33,7 @@ struct drm_printer; * Maximum number of 32-bit registers used by hardware to express the * enabled/disabled subslices. */ -#define I915_MAX_SS_FUSE_REGS 1 +#define I915_MAX_SS_FUSE_REGS 2 #define I915_MAX_SS_FUSE_BITS (I915_MAX_SS_FUSE_REGS * 32) /* Maximum number of EUs that can exist within a subslice or DSS. */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c5fc402d9c50..1f8422e9511b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1405,6 +1405,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_MBUS_JOINING(i915) (IS_ALDERLAKE_P(i915)) +#define HAS_ONE_EU_PER_FUSE_BIT(i915) (INTEL_INFO(i915)->has_one_eu_per_fuse_bit) + /* i915_gem.c */ void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 5ad9884874c2..9cee3c69edde 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1079,7 +1079,8 @@ static const struct int
[PATCH v4 3/6] drm/i915/sseu: Simplify gen11+ SSEU handling
Although gen11 and gen12 architectures supported the concept of multiple slices, in practice all the platforms that were actually designed only had a single slice (i.e., note the parameters to 'intel_sseu_set_info' that we pass for each platform). We can simplify the code slightly by dropping the multi-slice logic from gen11+ platforms. v2: - Promote drm_dbg to drm_WARN_ON if the slice fuse register reports unexpected fusing. (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/gt/intel_sseu.c | 76 +--- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index b5fd479a7b85..7e5222ad2f98 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -119,52 +119,37 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } -static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) -{ - u32 ss_mask; - - ss_mask = ss_en >> (s * sseu->max_subslices); - ss_mask &= GENMASK(sseu->max_subslices - 1, 0); - - return ss_mask; -} - -static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u32 g_ss_en, u32 c_ss_en, u16 eu_en) { - int s, ss; + u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0); + int ss; /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > sizeof(g_ss_en) * BITS_PER_BYTE); - for (s = 0; s < sseu->max_slices; s++) { - if ((s_en & BIT(s)) == 0) - continue; + sseu->slice_mask |= BIT(0); - sseu->slice_mask |= BIT(s); - - /* -* XeHP introduces the concept of compute vs geometry DSS. To -* reduce variation between GENs around subslice usage, store a -* mask for both the geometry and compute enabled masks since -* userspace will need to be able to query these masks -* independently. Also compute a total enabled subslice count -* for the purposes of selecting subslices to use in a -* particular GEM context. -*/ - intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, -get_ss_stride_mask(sseu, s, c_ss_en)); - intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, -get_ss_stride_mask(sseu, s, g_ss_en)); - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, -get_ss_stride_mask(sseu, s, - g_ss_en | c_ss_en)); + /* +* XeHP introduces the concept of compute vs geometry DSS. To reduce +* variation between GENs around subslice usage, store a mask for both +* the geometry and compute enabled masks since userspace will need to +* be able to query these masks independently. Also compute a total +* enabled subslice count for the purposes of selecting subslices to +* use in a particular GEM context. +*/ + intel_sseu_set_subslices(sseu, 0, sseu->compute_subslice_mask, +c_ss_en & valid_ss_mask); + intel_sseu_set_subslices(sseu, 0, sseu->geometry_subslice_mask, +g_ss_en & valid_ss_mask); + intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, +(g_ss_en | c_ss_en) & valid_ss_mask); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, 0, ss)) + sseu_set_eus(sseu, 0, ss, eu_en); - for (ss = 0; ss < sseu->max_subslices; ss++) - if (intel_sseu_has_subslice(sseu, s, ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } sseu->eu_per_subslice = hweight16(eu_en); sseu->eu_total = compute_eu_total(sseu); } @@ -196,7 +181,7 @@ static void xehp_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en); + gen11_compute_sseu_info(sseu, g_dss_en, c_dss_en, eu_en); } static void gen12_sseu_info_init(struct intel_gt *gt) @@ -216,8 +201,13 @@ static void gen12_sseu_info_init(struct intel_gt *gt) */ intel_sseu_set_info(sseu, 1, 6, 16); + /* +* Although gen12 architecture supp