Re: [Intel-gfx] [PATCH v5] drm/i915: Enable WaProgramMgsrForCorrectSliceSpecificMmioReads for Gen9

2021-03-23 Thread Rodrigo Vivi
On Thu, Mar 11, 2021 at 01:11:48PM +0200, Joonas Lahtinen wrote:
> Quoting Tvrtko Ursulin (2021-03-11 12:45:54)
> > 
> > On 05/03/2021 12:58, Cooper Chiou wrote:
> > > WaProgramMgsrForCorrectSliceSpecificMmioReads applies for Gen9 to
> > > resolve VP8 hardware encoding system hang up on GT1 sku for
> > > ChromiumOS projects
> > > 
> > > Slice specific MMIO read inaccurate so MGSR needs to be programmed
> > > appropriately to get correct reads from these slicet-related MMIOs.
> > > 
> > > It dictates that before any MMIO read into Slice/Subslice specific
> > > registers, MCR packet control register(0xFDC) needs to be programmed
> > > to point to any enabled slice/subslice pair, especially GT1 fused sku
> > > since this issue can be reproduced on VP8 hardware encoding via ffmpeg
> > > on ChromiumOS devices.
> > > When exit PC7, MGSR will reset so that we have to skip fused subslice ID.
> > > 
> > > Reference: HSD#1508045018,1405586840, BSID#0575
> > > 
> > > Cc: Ville Syrjälä 
> > > Cc: Rodrigo Vivi 
> > > Cc: Jani Nikula 
> > > Cc: Chris Wilson 
> > > Cc: Tvrtko Ursulin 
> > > Cc: William Tseng 
> > > Cc: Lee Shawn C 
> > > 
> > > Signed-off-by: Cooper Chiou 
> > > ---
> > >   drivers/gpu/drm/i915/gt/intel_workarounds.c | 37 +
> > >   1 file changed, 37 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> > > b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > index 3b4a7da60f0b..eb2a587b06b8 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > @@ -878,9 +878,46 @@ hsw_gt_workarounds_init(struct drm_i915_private 
> > > *i915, struct i915_wa_list *wal)
> > >   wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
> > >   }
> > >   
> > > +static void
> > > +gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
> > > +{
> > > + const struct sseu_dev_info *sseu = >gt.info.sseu;
> > > + unsigned int slice, subslice;
> > > + u32 mcr, mcr_mask;
> > > +
> > > + GEM_BUG_ON(INTEL_GEN(i915) < 9);
> > > +
> > > + /*
> > > +  * WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml
> > > +  * Before any MMIO read into slice/subslice specific registers, MCR
> > > +  * packet control register needs to be programmed to point to any
> > > +  * enabled s/ss pair. Otherwise, incorrect values will be returned.
> > > +  * This means each subsequent MMIO read will be forwarded to an
> > > +  * specific s/ss combination, but this is OK since these registers
> > > +  * are consistent across s/ss in almost all cases. In the rare
> > > +  * occasions, such as INSTDONE, where this value is dependent
> > > +  * on s/ss combo, the read should be done with read_subslice_reg.
> > > +  */
> > > + slice = ffs(sseu->slice_mask) - 1;
> > > + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
> > > + subslice = ffs(intel_sseu_get_subslices(sseu, slice));
> > > + GEM_BUG_ON(!subslice);
> > > + subslice--;
> > > +
> > > + mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
> > > + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
> > > +
> > > + drm_dbg(>drm, "MCR slice:%d/subslice:%d = %x\n", slice, 
> > > subslice, mcr);
> > > +
> > > + wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
> > > +}
> > > +
> > >   static void
> > >   gen9_gt_workarounds_init(struct drm_i915_private *i915, struct 
> > > i915_wa_list *wal)
> > >   {
> > > + /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
> > > + gen9_wa_init_mcr(i915, wal);
> > > +
> > >   /* WaDisableKillLogic:bxt,skl,kbl */
> > >   if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
> > >   wa_write_or(wal,
> > > 
> > 
> > 1)
> > Patch mechanics are fine.
> > 
> > 2)
> > We have confirmation from the HW folks this actually needs doing on Gen9 
> > even if docs fail to mention it.
> > 
> > So even if the immediate fix is for VP8 encode, which is not fully open, 
> > this is the right thing to do in general and would have been done if the 
> > WA was properly documented from the start.
> > 
> > 3)
> > 3d performance regression cannot be reproduced on the machine where it 
> > was originally reported. (Or on other machines.)
> > 
> > So:
> > 
> > Reviewed-by: Tvrtko Ursulin 
> > 
> > + Joonas for ack to merge due the second point above.
> 
> If this does not effect any fully Open Source userspace, this needs to be
> carried downstream in the Chrome OS kernel tree.
> 
> Gen9 has been out there without this W/A for a long time. There is always
> potential for changing existing deployments' behaviour to the worse when
> adding W/As. If it had been implemented from the very beginning, then it
> would have undergone all the testing not to interfere with existing
> workloads. Merging it after the fact makes the risk much higher.
> 
> It's an unnecessary risk of regressions to 

Re: [Intel-gfx] [PATCH v5] drm/i915: Enable WaProgramMgsrForCorrectSliceSpecificMmioReads for Gen9

2021-03-11 Thread Joonas Lahtinen
Quoting Tvrtko Ursulin (2021-03-11 12:45:54)
> 
> On 05/03/2021 12:58, Cooper Chiou wrote:
> > WaProgramMgsrForCorrectSliceSpecificMmioReads applies for Gen9 to
> > resolve VP8 hardware encoding system hang up on GT1 sku for
> > ChromiumOS projects
> > 
> > Slice specific MMIO read inaccurate so MGSR needs to be programmed
> > appropriately to get correct reads from these slicet-related MMIOs.
> > 
> > It dictates that before any MMIO read into Slice/Subslice specific
> > registers, MCR packet control register(0xFDC) needs to be programmed
> > to point to any enabled slice/subslice pair, especially GT1 fused sku
> > since this issue can be reproduced on VP8 hardware encoding via ffmpeg
> > on ChromiumOS devices.
> > When exit PC7, MGSR will reset so that we have to skip fused subslice ID.
> > 
> > Reference: HSD#1508045018,1405586840, BSID#0575
> > 
> > Cc: Ville Syrjälä 
> > Cc: Rodrigo Vivi 
> > Cc: Jani Nikula 
> > Cc: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > Cc: William Tseng 
> > Cc: Lee Shawn C 
> > 
> > Signed-off-by: Cooper Chiou 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_workarounds.c | 37 +
> >   1 file changed, 37 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> > b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > index 3b4a7da60f0b..eb2a587b06b8 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > @@ -878,9 +878,46 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, 
> > struct i915_wa_list *wal)
> >   wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
> >   }
> >   
> > +static void
> > +gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
> > +{
> > + const struct sseu_dev_info *sseu = >gt.info.sseu;
> > + unsigned int slice, subslice;
> > + u32 mcr, mcr_mask;
> > +
> > + GEM_BUG_ON(INTEL_GEN(i915) < 9);
> > +
> > + /*
> > +  * WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml
> > +  * Before any MMIO read into slice/subslice specific registers, MCR
> > +  * packet control register needs to be programmed to point to any
> > +  * enabled s/ss pair. Otherwise, incorrect values will be returned.
> > +  * This means each subsequent MMIO read will be forwarded to an
> > +  * specific s/ss combination, but this is OK since these registers
> > +  * are consistent across s/ss in almost all cases. In the rare
> > +  * occasions, such as INSTDONE, where this value is dependent
> > +  * on s/ss combo, the read should be done with read_subslice_reg.
> > +  */
> > + slice = ffs(sseu->slice_mask) - 1;
> > + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
> > + subslice = ffs(intel_sseu_get_subslices(sseu, slice));
> > + GEM_BUG_ON(!subslice);
> > + subslice--;
> > +
> > + mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
> > + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
> > +
> > + drm_dbg(>drm, "MCR slice:%d/subslice:%d = %x\n", slice, 
> > subslice, mcr);
> > +
> > + wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
> > +}
> > +
> >   static void
> >   gen9_gt_workarounds_init(struct drm_i915_private *i915, struct 
> > i915_wa_list *wal)
> >   {
> > + /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
> > + gen9_wa_init_mcr(i915, wal);
> > +
> >   /* WaDisableKillLogic:bxt,skl,kbl */
> >   if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
> >   wa_write_or(wal,
> > 
> 
> 1)
> Patch mechanics are fine.
> 
> 2)
> We have confirmation from the HW folks this actually needs doing on Gen9 
> even if docs fail to mention it.
> 
> So even if the immediate fix is for VP8 encode, which is not fully open, 
> this is the right thing to do in general and would have been done if the 
> WA was properly documented from the start.
> 
> 3)
> 3d performance regression cannot be reproduced on the machine where it 
> was originally reported. (Or on other machines.)
> 
> So:
> 
> Reviewed-by: Tvrtko Ursulin 
> 
> + Joonas for ack to merge due the second point above.

If this does not effect any fully Open Source userspace, this needs to be
carried downstream in the Chrome OS kernel tree.

Gen9 has been out there without this W/A for a long time. There is always
potential for changing existing deployments' behaviour to the worse when
adding W/As. If it had been implemented from the very beginning, then it
would have undergone all the testing not to interfere with existing
workloads. Merging it after the fact makes the risk much higher.

It's an unnecessary risk of regressions to merge a W/A with potential
for regressions that gains nothing for the upstream driver perspective.

So in short, unless the VP8 encoding can be Open Sourced or lack of the W/A
impacts otherwise the fully open stack, there is no path forward to merge
this due to the DRM subsystem userspace requirement rules:

Re: [Intel-gfx] [PATCH v5] drm/i915: Enable WaProgramMgsrForCorrectSliceSpecificMmioReads for Gen9

2021-03-11 Thread Tvrtko Ursulin


On 05/03/2021 12:58, Cooper Chiou wrote:

WaProgramMgsrForCorrectSliceSpecificMmioReads applies for Gen9 to
resolve VP8 hardware encoding system hang up on GT1 sku for
ChromiumOS projects

Slice specific MMIO read inaccurate so MGSR needs to be programmed
appropriately to get correct reads from these slicet-related MMIOs.

It dictates that before any MMIO read into Slice/Subslice specific
registers, MCR packet control register(0xFDC) needs to be programmed
to point to any enabled slice/subslice pair, especially GT1 fused sku
since this issue can be reproduced on VP8 hardware encoding via ffmpeg
on ChromiumOS devices.
When exit PC7, MGSR will reset so that we have to skip fused subslice ID.

Reference: HSD#1508045018,1405586840, BSID#0575

Cc: Ville Syrjälä 
Cc: Rodrigo Vivi 
Cc: Jani Nikula 
Cc: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: William Tseng 
Cc: Lee Shawn C 

Signed-off-by: Cooper Chiou 
---
  drivers/gpu/drm/i915/gt/intel_workarounds.c | 37 +
  1 file changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3b4a7da60f0b..eb2a587b06b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -878,9 +878,46 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, 
struct i915_wa_list *wal)
wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
  }
  
+static void

+gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+   const struct sseu_dev_info *sseu = >gt.info.sseu;
+   unsigned int slice, subslice;
+   u32 mcr, mcr_mask;
+
+   GEM_BUG_ON(INTEL_GEN(i915) < 9);
+
+   /*
+* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml
+* Before any MMIO read into slice/subslice specific registers, MCR
+* packet control register needs to be programmed to point to any
+* enabled s/ss pair. Otherwise, incorrect values will be returned.
+* This means each subsequent MMIO read will be forwarded to an
+* specific s/ss combination, but this is OK since these registers
+* are consistent across s/ss in almost all cases. In the rare
+* occasions, such as INSTDONE, where this value is dependent
+* on s/ss combo, the read should be done with read_subslice_reg.
+*/
+   slice = ffs(sseu->slice_mask) - 1;
+   GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
+   subslice = ffs(intel_sseu_get_subslices(sseu, slice));
+   GEM_BUG_ON(!subslice);
+   subslice--;
+
+   mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+   mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
+
+   drm_dbg(>drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, 
mcr);
+
+   wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
+}
+
  static void
  gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list 
*wal)
  {
+   /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
+   gen9_wa_init_mcr(i915, wal);
+
/* WaDisableKillLogic:bxt,skl,kbl */
if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
wa_write_or(wal,



1)
Patch mechanics are fine.

2)
We have confirmation from the HW folks this actually needs doing on Gen9 
even if docs fail to mention it.


So even if the immediate fix is for VP8 encode, which is not fully open, 
this is the right thing to do in general and would have been done if the 
WA was properly documented from the start.


3)
3d performance regression cannot be reproduced on the machine where it 
was originally reported. (Or on other machines.)


So:

Reviewed-by: Tvrtko Ursulin 

+ Joonas for ack to merge due the second point above.

Regards,

Tvrtko

P.S. Many thanks for patiently dealing with requests to test on many 
platforms.


P.P.S. Sadly we are still not able to explain the whole details around 
0xfdc behaviour on Gen9 vs Gen11.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v5] drm/i915: Enable WaProgramMgsrForCorrectSliceSpecificMmioReads for Gen9

2021-03-09 Thread Chiou, Cooper
I've tested on GLK, KBL, CFL Intel NUC devices and got the following 
performance results, there is no performance regression per my testing.
and CI build test has passed as well.

Patch: [v5] drm/i915: Enable WaProgramMgsrForCorrectSliceSpecificMmioReads for 
Gen9
Test suite: 
phoronix-test-suite.supertuxkart.1024x768.Fullscreen.Ultimate.1.GranParadisoIsland.frames_per_second
Kernel version: 5.12.0-rc1 (drm-tip)

a. Device: Intel NUC kit NUC7JY Gemini Lake Celeron J4005 @2.7GHz (2 Cores)
Without patch, fps=57.45
With patch, fps=57.49
b. Device: Intel NUC kit NUC8BEH Coffee Lake Core i3-8109U @3.6GHz(4 Cores)
Without patch, fps=117.23
With patch, fps=117.27
c. Device: Intel NUC kit NUC7i3BNH Kaby Lake Core i3-7100U @2.4GHz(4 Cores)
Without patch, fps=114.05
With patch, fps=114.34

Meanwhile, Intel lkp team has validated performance test on lkp-kbl-nuc1 and no 
regression as the following.
f69d02e37a85645a  d912096c40cdc3bc9364966971 testcase/testparams/testbox
  -- ---
  %stddev  change %stddev
  \  |\
  29.79   29.67
phoronix-test-suite/performance-true-Fullscreen-Ultimate-1-Gran_Paradiso_Island__Approxima-supertuxkart-1.5.2-ucode=0xde/lkp-kbl-nuc1
  29.79   29.67GEO-MEAN 
phoronix-test-suite.supertuxkart.1280x1024.Fullscreen.Ultimate.1.GranParadisoIsland.frames_per_second

Hi Tvrtko,
Please help to review this patch since this is urgent for Google Chrome 
KBL/CML/GLK projects.

Best Regards,
Cooper
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v5] drm/i915: Enable WaProgramMgsrForCorrectSliceSpecificMmioReads for Gen9

2021-03-05 Thread Chiou, Cooper
Hi Rong,
Please help to trigger 3D performance test on several Gen9 CI test boxes which 
different fusing sku with/without “patch v5”, and share the results. Thanks,

Best Regards,
Cooper
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v5] drm/i915: Enable WaProgramMgsrForCorrectSliceSpecificMmioReads for Gen9

2021-03-05 Thread Cooper Chiou
WaProgramMgsrForCorrectSliceSpecificMmioReads applies for Gen9 to
resolve VP8 hardware encoding system hang up on GT1 sku for
ChromiumOS projects

Slice specific MMIO read inaccurate so MGSR needs to be programmed
appropriately to get correct reads from these slicet-related MMIOs.

It dictates that before any MMIO read into Slice/Subslice specific
registers, MCR packet control register(0xFDC) needs to be programmed
to point to any enabled slice/subslice pair, especially GT1 fused sku
since this issue can be reproduced on VP8 hardware encoding via ffmpeg
on ChromiumOS devices.
When exit PC7, MGSR will reset so that we have to skip fused subslice ID.

Reference: HSD#1508045018,1405586840, BSID#0575

Cc: Ville Syrjälä 
Cc: Rodrigo Vivi 
Cc: Jani Nikula 
Cc: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: William Tseng 
Cc: Lee Shawn C 

Signed-off-by: Cooper Chiou 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3b4a7da60f0b..eb2a587b06b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -878,9 +878,46 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, 
struct i915_wa_list *wal)
wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
 }
 
+static void
+gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+   const struct sseu_dev_info *sseu = >gt.info.sseu;
+   unsigned int slice, subslice;
+   u32 mcr, mcr_mask;
+
+   GEM_BUG_ON(INTEL_GEN(i915) < 9);
+
+   /*
+* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml
+* Before any MMIO read into slice/subslice specific registers, MCR
+* packet control register needs to be programmed to point to any
+* enabled s/ss pair. Otherwise, incorrect values will be returned.
+* This means each subsequent MMIO read will be forwarded to an
+* specific s/ss combination, but this is OK since these registers
+* are consistent across s/ss in almost all cases. In the rare
+* occasions, such as INSTDONE, where this value is dependent
+* on s/ss combo, the read should be done with read_subslice_reg.
+*/
+   slice = ffs(sseu->slice_mask) - 1;
+   GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
+   subslice = ffs(intel_sseu_get_subslices(sseu, slice));
+   GEM_BUG_ON(!subslice);
+   subslice--;
+
+   mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+   mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
+
+   drm_dbg(>drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, 
mcr);
+
+   wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
+}
+
 static void
 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list 
*wal)
 {
+   /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
+   gen9_wa_init_mcr(i915, wal);
+
/* WaDisableKillLogic:bxt,skl,kbl */
if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
wa_write_or(wal,
-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx