Re: [Intel-gfx] [RFC PATCH 3/3] drm/i915: Enabling WD Transcoder

2022-04-27 Thread Kandpal, Suraj
++Laurent ,Dmitry, Abhinav and Rob
> Adding support for writeback transcoder to start capturing frames using
> interrupt mechanism
> 
> Signed-off-by: Suraj Kandpal 
> ---
>  drivers/gpu/drm/i915/Makefile |   1 +
>  drivers/gpu/drm/i915/display/intel_acpi.c |   1 +
>  drivers/gpu/drm/i915/display/intel_display.c  |  89 +-
>  drivers/gpu/drm/i915/display/intel_display.h  |   9 +
>  .../drm/i915/display/intel_display_types.h|  13 +
>  drivers/gpu/drm/i915/display/intel_dpll.c |   3 +
>  drivers/gpu/drm/i915/display/intel_opregion.c |   3 +
>  drivers/gpu/drm/i915/display/intel_wd.c   | 978 ++
>  drivers/gpu/drm/i915/display/intel_wd.h   |  82 ++
>  drivers/gpu/drm/i915/i915_drv.h   |   2 +
>  drivers/gpu/drm/i915/i915_irq.c   |   8 +-
>  drivers/gpu/drm/i915/i915_pci.c   |   7 +-
>  drivers/gpu/drm/i915/i915_reg.h   | 137 +++
>  13 files changed, 1330 insertions(+), 3 deletions(-)  create mode 100644
> drivers/gpu/drm/i915/display/intel_wd.c
>  create mode 100644 drivers/gpu/drm/i915/display/intel_wd.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 087bd9d1b397..5ee32513a945 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -287,6 +287,7 @@ i915-y += \
>   display/intel_vdsc.o \
>   display/intel_vrr.o \
>   display/intel_wb_connector.o\
> + display/intel_wd.o\
>   display/vlv_dsi.o \
>   display/vlv_dsi_pll.o
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_acpi.c
> b/drivers/gpu/drm/i915/display/intel_acpi.c
> index e78430001f07..ae08db164f73 100644
> --- a/drivers/gpu/drm/i915/display/intel_acpi.c
> +++ b/drivers/gpu/drm/i915/display/intel_acpi.c
> @@ -247,6 +247,7 @@ static u32 acpi_display_type(struct intel_connector
> *connector)
>   case DRM_MODE_CONNECTOR_LVDS:
>   case DRM_MODE_CONNECTOR_eDP:
>   case DRM_MODE_CONNECTOR_DSI:
> + case DRM_MODE_CONNECTOR_WRITEBACK:
>   display_type = ACPI_DISPLAY_TYPE_INTERNAL_DIGITAL;
>   break;
>   case DRM_MODE_CONNECTOR_Unknown:
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c
> b/drivers/gpu/drm/i915/display/intel_display.c
> index eb49973621f0..6dedc7921f54 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -111,6 +111,7 @@
>  #include "intel_sprite.h"
>  #include "intel_tc.h"
>  #include "intel_vga.h"
> +#include "intel_wd.h"
>  #include "i9xx_plane.h"
>  #include "skl_scaler.h"
>  #include "skl_universal_plane.h"
> @@ -1544,6 +1545,72 @@ static void
> intel_encoders_update_complete(struct intel_atomic_state *state)
>   }
>  }
> 
> +static void intel_queue_writeback_job(struct intel_atomic_state *state,
> + struct intel_crtc *intel_crtc, struct intel_crtc_state
> *crtc_state) {
> + struct drm_connector_state *new_conn_state;
> + struct drm_connector *connector;
> + struct drm_i915_private *i915 = to_i915(intel_crtc->base.dev);
> + struct intel_wd *intel_wd;
> + struct intel_connector *intel_connector;
> + struct intel_digital_connector_state *intel_conn_state;
> + struct intel_encoder *encoder;
> + int i;
> +
> + for_each_intel_encoder_with_wd(>drm, encoder) {
> + intel_wd = enc_to_intel_wd(encoder);
> +
> + if (intel_wd->wd_crtc != intel_crtc)
> + return;
> +
> + }
> +
> + for_each_new_connector_in_state(>base, connector,
> new_conn_state,
> + i) {
> + intel_conn_state =
> to_intel_digital_connector_state(new_conn_state);
> + if (!intel_conn_state->job)
> + continue;
> + intel_connector = to_intel_connector(connector);
> + intel_writeback_queue_job(_connector->wb_conn,
> new_conn_state);
> + drm_dbg_kms(>drm, "queueing writeback job\n");
> + }
> +}
> +
> +static void intel_find_writeback_connector(struct intel_atomic_state
> *state,
> + struct intel_crtc *intel_crtc, struct intel_crtc_state
> *crtc_state) {
> + struct drm_connector_state *new_conn_state;
> + struct drm_connector *connector;
> + struct drm_i915_private *i915 = to_i915(intel_crtc->base.dev);
> + struct intel_wd *intel_wd;
> + struct intel_encoder *encoder;
> + int i;
> +
> + for_each_intel_encoder_with_wd(>drm, encoder) {
> + intel_wd = enc_to_intel_wd(encoder);
> +
> + if (intel_wd->wd_crtc != intel_crtc)
> + return;
> +
> + }
> +
> + for_each_new_connector_in_state(>base, connector,
> new_conn_state,
> + i) {
> + struct intel_connector *intel_connector;
> +
> + intel_connector = to_intel_connector(connector);
> + drm_dbg_kms(>drm, "[CONNECTOR:%d:%s]: status:
> %s\n",
> +

Re: [Intel-gfx] [RFC PATCH 2/3] drm/i915: Define WD trancoder for i915

2022-04-27 Thread Kandpal, Suraj
++Laurent ,Dmitry, Abhinav and Rob

> -Original Message-
> From: Kandpal, Suraj 
> Sent: Thursday, April 21, 2022 10:38 AM
> To: intel-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> Cc: Nikula, Jani ; ville.syrj...@linux.intel.com;
> Murthy, Arun R ; Kandpal, Suraj
> 
> Subject: [RFC PATCH 2/3] drm/i915: Define WD trancoder for i915
> 
> Adding WD Types, WD transcoder to enum list and WD Transcoder offsets
> 
> Signed-off-by: Suraj Kandpal 
> ---
>  drivers/gpu/drm/i915/display/intel_display.h   | 6 ++
>  drivers/gpu/drm/i915/display/intel_display_types.h | 1 +
>  drivers/gpu/drm/i915/i915_reg.h| 2 ++
>  3 files changed, 9 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.h
> b/drivers/gpu/drm/i915/display/intel_display.h
> index 8513703086b7..8c93a5de8e07 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.h
> +++ b/drivers/gpu/drm/i915/display/intel_display.h
> @@ -119,6 +119,8 @@ enum transcoder {
>   TRANSCODER_DSI_1,
>   TRANSCODER_DSI_A = TRANSCODER_DSI_0,/* legacy DSI */
>   TRANSCODER_DSI_C = TRANSCODER_DSI_1,/* legacy DSI */
> + TRANSCODER_WD_0,
> + TRANSCODER_WD_1,
> 
>   I915_MAX_TRANSCODERS
>  };
> @@ -140,6 +142,10 @@ static inline const char *transcoder_name(enum
> transcoder transcoder)
>   return "DSI A";
>   case TRANSCODER_DSI_C:
>   return "DSI C";
> + case TRANSCODER_WD_0:
> + return "WD 0";
> + case TRANSCODER_WD_1:
> + return "WD 1";
>   default:
>   return "";
>   }
> diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h
> b/drivers/gpu/drm/i915/display/intel_display_types.h
> index 7a96ecba73c0..dcb4ad43cf88 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_types.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
> @@ -79,6 +79,7 @@ enum intel_output_type {
>   INTEL_OUTPUT_DSI = 9,
>   INTEL_OUTPUT_DDI = 10,
>   INTEL_OUTPUT_DP_MST = 11,
> + INTEL_OUTPUT_WD = 12,
>  };
> 
>  enum hdmi_force_audio {
> diff --git a/drivers/gpu/drm/i915/i915_reg.h
> b/drivers/gpu/drm/i915/i915_reg.h index ddbc7a685a50..6396afd77209
> 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2023,6 +2023,8 @@
>  #define TRANSCODER_EDP_OFFSET 0x6f000
>  #define TRANSCODER_DSI0_OFFSET   0x6b000
>  #define TRANSCODER_DSI1_OFFSET   0x6b800
> +#define TRANSCODER_WD0_OFFSET0x6e000
> +#define TRANSCODER_WD1_OFFSET0x6e800
> 
>  #define HTOTAL(trans)_MMIO_TRANS2(trans, _HTOTAL_A)
>  #define HBLANK(trans)_MMIO_TRANS2(trans, _HBLANK_A)
> --
> 2.35.1



Re: [Intel-gfx] [PATCH 0/2] Initial GuC firmware release for DG2

2022-04-27 Thread Lucas De Marchi

On Wed, Apr 27, 2022 at 03:14:16PM -0700, John Harrison wrote:

On 4/27/2022 11:24, Timo Aaltonen wrote:

john.c.harri...@intel.com kirjoitti 27.4.2022 klo 19.55:

From: John Harrison 

Add GuC firmware for DG2.

Note that an older version of this patch exists in the CI topic
branch. Hence this set includes a revert of that patch before applying
the new version. When merging, the revert would simply be dropped and
the corresponding patch in the topic branch would also be dropped.

Signed-off-by: John Harrison 


John Harrison (2):
   Revert "drm/i915/dg2: Define GuC firmware version for DG2"
   drm/i915/dg2: Define GuC firmware version for DG2

  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)



The firmware is not public yet, though? Shouldn't it have been sent 
upstream already? Same complaint applies to DMC.



Not sure about the DMC team, but for i915 we upload the firmware to an 
FDO repo so that the CI system can find it and run the pre-merge 
testing with it. However, we don't send the final pull request for the 
real linux firmware repo until we have merged the i915 patch to 
drm-gt-intel-next and it is definitely going upstream. Otherwise, we 
might end up pushing firmwares to the linux repo that never get used.


we don't want to risk sending a pull request to drm if the firmware is
not in linux-firmware repo yet though, so we need to be careful with
this workflow.

We still have some weeks, which should be sufficient time if it's sent
to linux-firmware asap.

2nd patch pushed to drm-intel-gt-next.
1st patch I removed from topic/core-for-CI.

Thanks
Lucas De Marchi



John.



Re: [Intel-gfx] [RFC PATCH 0/3] i915 writeback private framework

2022-04-27 Thread Kandpal, Suraj
++Laurent ,Dmitry, and Abhinav

Hi,
Can you have a look at the private implementation i915 is currently going with 
till
we can figure out how  to work with drm core .

Regards,
Suraj Kandpal
> A patch series was floated in the drm mailing list which aimed to change the
> drm_connector and drm_encoder fields to pointer in the
> drm_connector_writeback structure, this received a huge pushback from the
> community but since i915 expects each connector present in the drm_device
> list to be a intel_connector but drm_writeback framework.
> [1] https://patchwork.kernel.org/project/dri-
> devel/patch/20220202081702.22119-1-suraj.kand...@intel.com/
> [2] https://patchwork.kernel.org/project/dri-
> devel/patch/20220202085429.22261-6-suraj.kand...@intel.com/
> This forces us to use a drm_connector which is not embedded in
> intel_connector the current drm_writeback framework becomes very
> unfeasible to us as it would mean a lot of checks at a lot of places to take 
> into
> account the above issue.Since no one had an issue with encoder field being
> changed into a pointer it was decided to break the connector and encoder
> pointer changes into two different series.The encoder field changes is
> currently being worked upon by Abhinav Kumar
> [3]https://patchwork.kernel.org/project/dri-devel/list/?series=633565
> In the meantime for i915 to start using the writeback functionality we came
> up with a interim solution to own writeback pipeline bypassing one provided
> by drm which is what these patches do.
> Note: these are temp patches till we figure out how we can either change
> drm core writeback to work with our intel_connector structure or find a
> different solution which allows us to work with the current drm_writeback
> framework
> 
> Suraj Kandpal (3):
>   drm/i915: Creating writeback pipeline to bypass drm_writeback
> framework
>   drm/i915: Define WD trancoder for i915
>   drm/i915: Enabling WD Transcoder
> 
>  drivers/gpu/drm/i915/Makefile |   2 +
>  drivers/gpu/drm/i915/display/intel_acpi.c |   1 +
>  drivers/gpu/drm/i915/display/intel_display.c  |  89 +-
> drivers/gpu/drm/i915/display/intel_display.h  |  15 +
>  .../drm/i915/display/intel_display_types.h|  18 +
>  drivers/gpu/drm/i915/display/intel_dpll.c |   3 +
>  drivers/gpu/drm/i915/display/intel_opregion.c |   3 +
>  .../gpu/drm/i915/display/intel_wb_connector.c | 296 ++
> .../gpu/drm/i915/display/intel_wb_connector.h |  99 ++
>  drivers/gpu/drm/i915/display/intel_wd.c   | 978 ++
>  drivers/gpu/drm/i915/display/intel_wd.h   |  82 ++
>  drivers/gpu/drm/i915/i915_drv.h   |   5 +
>  drivers/gpu/drm/i915/i915_irq.c   |   8 +-
>  drivers/gpu/drm/i915/i915_pci.c   |   7 +-
>  drivers/gpu/drm/i915/i915_reg.h   | 139 +++
>  15 files changed, 1742 insertions(+), 3 deletions(-)  create mode 100644
> drivers/gpu/drm/i915/display/intel_wb_connector.c
>  create mode 100644 drivers/gpu/drm/i915/display/intel_wb_connector.h
>  create mode 100644 drivers/gpu/drm/i915/display/intel_wd.c
>  create mode 100644 drivers/gpu/drm/i915/display/intel_wd.h
> 
> --
> 2.35.1



Re: [Intel-gfx] [PATCH] drm/i915: Support Async Flip on Linear buffers

2022-04-27 Thread Murthy, Arun R
> > > It's supported earlier than that. But IIRC there was some kind of
> > > GTT alignment vs. async flip vs. FBC restriction that we weren't handling.
> > >
> > Should I enable it for earlier Gen also, or is it fine to keep it with 
> > starting
> Gen 12.
> > The only restriction that I see in Bspec is that during async flip
> > changes to stride, pixel format, compression, FBC etc is not allowed
> > and I see this is already taken care of. Am I missing anything?
> 
> There is that GTT alignment restriction that should be mentioned
> somewhere. Can't quite remember where it was, maybe in PLANE_SURF.
> 
I checked the BSpec, and don't find anything as such specific for Async flip.
I also cross verified with the hardware team.

> But I guess the bigger question is what is the actual use case for this?
This feature is a requirement for LNL.

Thanks and Regards,
Arun R Murthy



[Intel-gfx] ✓ Fi.CI.BAT: success for i915: Turn on compute engine support (rev4)

2022-04-27 Thread Patchwork
== Series Details ==

Series: i915: Turn on compute engine support (rev4)
URL   : https://patchwork.freedesktop.org/series/103011/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11550 -> Patchwork_103011v4


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103011v4/index.html

Participating hosts (43 -> 43)
--

  Additional (3): fi-icl-u2 bat-dg1-6 bat-adlp-4 
  Missing(3): bat-rpls-1 bat-rpls-2 fi-bsw-cyan 

New tests
-

  New tests have been introduced between CI_DRM_11550 and Patchwork_103011v4:

### New IGT tests (24) ###

  * igt@gem_exec_basic@basic@ccs0-lmem0:
- Statuses : 1 pass(s)
- Exec time: [0.0] s

  * igt@gem_exec_basic@basic@ccs0-smem:
- Statuses : 1 pass(s)
- Exec time: [0.0] s

  * igt@gem_exec_basic@basic@ccs1-lmem0:
- Statuses : 1 pass(s)
- Exec time: [0.0] s

  * igt@gem_exec_basic@basic@ccs1-smem:
- Statuses : 1 pass(s)
- Exec time: [0.0] s

  * igt@gem_exec_basic@basic@ccs2-lmem0:
- Statuses : 1 pass(s)
- Exec time: [0.0] s

  * igt@gem_exec_basic@basic@ccs2-smem:
- Statuses : 1 pass(s)
- Exec time: [0.0] s

  * igt@gem_exec_basic@basic@ccs3-lmem0:
- Statuses : 1 pass(s)
- Exec time: [0.0] s

  * igt@gem_exec_basic@basic@ccs3-smem:
- Statuses : 1 pass(s)
- Exec time: [0.0] s

  * igt@gem_exec_fence@basic-await@ccs0:
- Statuses : 1 pass(s)
- Exec time: [0.09] s

  * igt@gem_exec_fence@basic-await@ccs1:
- Statuses : 1 pass(s)
- Exec time: [0.09] s

  * igt@gem_exec_fence@basic-await@ccs2:
- Statuses : 1 pass(s)
- Exec time: [0.09] s

  * igt@gem_exec_fence@basic-await@ccs3:
- Statuses : 1 pass(s)
- Exec time: [0.09] s

  * igt@gem_exec_fence@basic-busy@ccs0:
- Statuses : 1 pass(s)
- Exec time: [0.03] s

  * igt@gem_exec_fence@basic-busy@ccs1:
- Statuses : 1 pass(s)
- Exec time: [0.04] s

  * igt@gem_exec_fence@basic-busy@ccs2:
- Statuses : 1 pass(s)
- Exec time: [0.04] s

  * igt@gem_exec_fence@basic-busy@ccs3:
- Statuses : 1 pass(s)
- Exec time: [0.03] s

  * igt@gem_exec_fence@basic-wait@ccs0:
- Statuses : 1 pass(s)
- Exec time: [0.03] s

  * igt@gem_exec_fence@basic-wait@ccs1:
- Statuses : 1 pass(s)
- Exec time: [0.03] s

  * igt@gem_exec_fence@basic-wait@ccs2:
- Statuses : 1 pass(s)
- Exec time: [0.04] s

  * igt@gem_exec_fence@basic-wait@ccs3:
- Statuses : 1 pass(s)
- Exec time: [0.03] s

  * igt@gem_exec_fence@nb-await@ccs0:
- Statuses : 1 pass(s)
- Exec time: [0.07] s

  * igt@gem_exec_fence@nb-await@ccs1:
- Statuses : 1 pass(s)
- Exec time: [0.10] s

  * igt@gem_exec_fence@nb-await@ccs2:
- Statuses : 1 pass(s)
- Exec time: [0.08] s

  * igt@gem_exec_fence@nb-await@ccs3:
- Statuses : 1 pass(s)
- Exec time: [0.08] s

  

Known issues


  Here are the changes found in Patchwork_103011v4 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_suspend@basic-s0@smem:
- bat-dg1-6:  NOTRUN -> [INCOMPLETE][1] ([i915#5827])
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103011v4/bat-dg1-6/igt@gem_exec_suspend@basic...@smem.html

  * igt@gem_huc_copy@huc-copy:
- fi-rkl-11600:   NOTRUN -> [SKIP][2] ([i915#2190])
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103011v4/fi-rkl-11600/igt@gem_huc_c...@huc-copy.html
- fi-icl-u2:  NOTRUN -> [SKIP][3] ([i915#2190])
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103011v4/fi-icl-u2/igt@gem_huc_c...@huc-copy.html

  * igt@gem_lmem_swapping@basic:
- fi-rkl-11600:   NOTRUN -> [SKIP][4] ([i915#4613]) +3 similar issues
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103011v4/fi-rkl-11600/igt@gem_lmem_swapp...@basic.html
- bat-adlp-4: NOTRUN -> [SKIP][5] ([i915#4613]) +3 similar issues
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103011v4/bat-adlp-4/igt@gem_lmem_swapp...@basic.html

  * igt@gem_lmem_swapping@parallel-random-engines:
- fi-icl-u2:  NOTRUN -> [SKIP][6] ([i915#4613]) +3 similar issues
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103011v4/fi-icl-u2/igt@gem_lmem_swapp...@parallel-random-engines.html

  * igt@gem_tiled_pread_basic:
- bat-adlp-4: NOTRUN -> [SKIP][7] ([i915#3282])
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103011v4/bat-adlp-4/igt@gem_tiled_pread_basic.html
- fi-rkl-11600:   NOTRUN -> [SKIP][8] ([i915#3282])
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103011v4/fi-rkl-11600/igt@gem_tiled_pread_basic.html

  * igt@i915_pm_backlight@basic-brightness:
- fi-rkl-11600:   NOTRUN -> [SKIP][9] ([i915#3012])
   [9]: 

[Intel-gfx] ✗ Fi.CI.SPARSE: warning for i915: Turn on compute engine support (rev4)

2022-04-27 Thread Patchwork
== Series Details ==

Series: i915: Turn on compute engine support (rev4)
URL   : https://patchwork.freedesktop.org/series/103011/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.




[Intel-gfx] [PATCH v2 3/4] drm/i915/xehp: Add compute engine ABI

2022-04-27 Thread Matt Roper
We're now ready to start exposing compute engines to userspace.

v2:
 - Move kerneldoc for other engine classes to a separate patch.  (Andi)

Cc: Daniele Ceraolo Spurio 
Cc: Tvrtko Ursulin 
Cc: Vinay Belgaumkar 
Cc: Jordan Justen 
Cc: Szymon Morek 
UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14395
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 2 +-
 drivers/gpu/drm/i915/i915_drm_client.c  | 1 +
 drivers/gpu/drm/i915/i915_drm_client.h  | 2 +-
 include/uapi/drm/i915_drm.h | 9 +
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 0f6cd96b459f..46a174f8aa00 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
-   /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
+   [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
 };
 
 static int engine_cmp(void *priv, const struct list_head *A,
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c 
b/drivers/gpu/drm/i915/i915_drm_client.c
index 475a6f824cad..18d38cb59923 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -81,6 +81,7 @@ static const char * const uabi_class_names[] = {
[I915_ENGINE_CLASS_COPY] = "copy",
[I915_ENGINE_CLASS_VIDEO] = "video",
[I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance",
+   [I915_ENGINE_CLASS_COMPUTE] = "compute",
 };
 
 static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h 
b/drivers/gpu/drm/i915/i915_drm_client.h
index 5f5b02b01ba0..f796c5e8e060 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -13,7 +13,7 @@
 
 #include "gt/intel_engine_types.h"
 
-#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
+#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
 
 struct drm_i915_private;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ec000fc6c879..a2def7b27009 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -203,6 +203,15 @@ enum drm_i915_gem_engine_class {
 */
I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
 
+   /**
+* @I915_ENGINE_CLASS_COMPUTE:
+*
+* Compute engines support a subset of the instructions available
+* on render engines:  compute engines support Compute (GPGPU) and
+* programmable media workloads, but do not support the 3D pipeline.
+*/
+   I915_ENGINE_CLASS_COMPUTE   = 4,
+
/* Values in this enum should be kept compact. */
 
/**
-- 
2.35.1



[Intel-gfx] [PATCH v2 1/4] drm/i915/uapi: Add kerneldoc for engine class enum

2022-04-27 Thread Matt Roper
We'll be adding a new type of engine soon.  Let's document the existing
engine classes first to help make it clear what each type of engine is
used for.

Cc: Andi Shyti 
Signed-off-by: Matt Roper 
---
 include/uapi/drm/i915_drm.h | 53 -
 1 file changed, 47 insertions(+), 6 deletions(-)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 35ca528803fd..ec000fc6c879 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -154,21 +154,62 @@ enum i915_mocs_table_index {
I915_MOCS_CACHED,
 };
 
-/*
+/**
+ * enum drm_i915_gem_engine_class - uapi engine type enumeration
+ *
  * Different engines serve different roles, and there may be more than one
- * engine serving each role. enum drm_i915_gem_engine_class provides a
- * classification of the role of the engine, which may be used when requesting
- * operations to be performed on a certain subset of engines, or for providing
- * information about that group.
+ * engine serving each role.  This enum provides a classification of the role
+ * of the engine, which may be used when requesting operations to be performed
+ * on a certain subset of engines, or for providing information about that
+ * group.
  */
 enum drm_i915_gem_engine_class {
+   /**
+* @I915_ENGINE_CLASS_RENDER:
+*
+* Render engines support instructions used for 3D, Compute (GPGPU),
+* and programmable media workloads.  These instructions fetch data and
+* dispatch individual work items to threads that operate in parallel.
+* The threads run small programs (called "kernels" or "shaders") on
+* the GPU's execution units (EUs).
+*/
I915_ENGINE_CLASS_RENDER= 0,
+
+   /**
+* @I915_ENGINE_CLASS_COPY:
+*
+* Copy engines (also referred to as "blitters") support instructions
+* that move blocks of data from one location in memory to another,
+* or that fill a specified location of memory with fixed data.
+* Copy engines can perform pre-defined logical or bitwise operations
+* on the source, destination, or pattern data.
+*/
I915_ENGINE_CLASS_COPY  = 1,
+
+   /**
+* @I915_ENGINE_CLASS_VIDEO:
+*
+* Video engines (also referred to as "bit stream decode" (BSD) or
+* "vdbox") support instructions that perform fixed-function media
+* decode and encode.
+*/
I915_ENGINE_CLASS_VIDEO = 2,
+
+   /**
+* @I915_ENGINE_CLASS_VIDEO_ENHANCE:
+*
+* Video enhancement engines (also referred to as "vebox") support
+* instructions related to image enhancement.
+*/
I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
 
-   /* should be kept compact */
+   /* Values in this enum should be kept compact. */
 
+   /**
+* @I915_ENGINE_CLASS_INVALID:
+*
+* Placeholder value to represent an invalid engine class assignment.
+*/
I915_ENGINE_CLASS_INVALID   = -1
 };
 
-- 
2.35.1



[Intel-gfx] [PATCH v2 4/4] drm/i915: Xe_HP SDV and DG2 have up to 4 CCS engines

2022-04-27 Thread Matt Roper
From: Daniele Ceraolo Spurio 

Cc: Vinay Belgaumkar 
Signed-off-by: Daniele Ceraolo Spurio 
Signed-off-by: Matt Roper 
Reviewed-by: Matt Roper 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/i915_pci.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index b60492826478..7739d6c33481 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1037,7 +1037,8 @@ static const struct intel_device_info xehpsdv_info = {
BIT(RCS0) | BIT(BCS0) |
BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) |
BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3) |
-   BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7),
+   BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7) |
+   BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3),
.require_force_probe = 1,
 };
 
@@ -1056,7 +1057,8 @@ static const struct intel_device_info xehpsdv_info = {
.platform_engine_mask = \
BIT(RCS0) | BIT(BCS0) | \
BIT(VECS0) | BIT(VECS1) | \
-   BIT(VCS0) | BIT(VCS2)
+   BIT(VCS0) | BIT(VCS2) | \
+   BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3)
 
 static const struct intel_device_info dg2_info = {
DG2_FEATURES,
-- 
2.35.1



[Intel-gfx] [PATCH v2 2/4] drm/i915/xehp: Add register for compute engine's MMIO-based TLB invalidation

2022-04-27 Thread Matt Roper
Compute engines have a separate register that the driver should use to
perform MMIO-based TLB invalidation.

Note that the term "context" in this register's bspec description is
used to refer to the engine instance (in the same way "context" is used
on bspec 46167).

Bspec: 43930
Cc: Prathap Kumar Valsan 
Cc: Tvrtko Ursulin 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c  | 1 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 92394f13b42f..53307ca0eed0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1175,6 +1175,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
[VIDEO_DECODE_CLASS]= GEN12_VD_TLB_INV_CR,
[VIDEO_ENHANCEMENT_CLASS]   = GEN12_VE_TLB_INV_CR,
[COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
+   [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
};
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index a39718a40cc3..a0a49c16babd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1007,6 +1007,7 @@
 #define GEN12_VD_TLB_INV_CR_MMIO(0xcedc)
 #define GEN12_VE_TLB_INV_CR_MMIO(0xcee0)
 #define GEN12_BLT_TLB_INV_CR   _MMIO(0xcee4)
+#define GEN12_COMPCTX_TLB_INV_CR   _MMIO(0xcf04)
 
 #define GEN12_MERT_MOD_CTRL_MMIO(0xcf28)
 #define RENDER_MOD_CTRL_MMIO(0xcf2c)
-- 
2.35.1



[Intel-gfx] [PATCH v2 0/4] i915: Turn on compute engine support

2022-04-27 Thread Matt Roper
Now that the necessary GuC-based hardware workarounds have landed, we're
finally ready to actually enable compute engines for use by userspace.
All of the "under-the-hood" heavy lifting already landed a while back in
other series so all that remains now is to add I915_ENGINE_CLASS_COMPUTE
to the uapi enum and add the CCS engines to the engine lists for the
Xe_HP SDV and DG2.

Userspace (Mesa) is linked in the ABI patch.  Existing IGT tests (e.g.,
i915_hangman) provide test coverage for general engine behavior since compute
engines should follow the same general rules as other engines.  We've also
recently added some additional subtests like
igt@gem_reset_stats@shared-reset-domain to cover the user-visible impacts of
the compute engines sharing the same hardware reset domain as the render
engine.

v2:
 - Update TLB invalidation register for compute engines and move it to a
   separate patch since it isn't related to the new uapi.  (Tvrtko,
   Prathap)
 - Move new kerneldoc for pre-existing engine classes to a separate
   patch.  (Andi)
 - Drop the compute UMD merge request link for now because it also
   included some additional multi-tile uapi that we're not ready to
   upstream just yet.  Even if they don't have a disentangled MR ready
   for reference, we still have the Mesa MR as a key userspace consumer.
   (Tvrtko)

Cc: Lucas De Marchi 
Cc: Tvrtko Ursulin 

Daniele Ceraolo Spurio (1):
  drm/i915: Xe_HP SDV and DG2 have up to 4 CCS engines

Matt Roper (3):
  drm/i915/uapi: Add kerneldoc for engine class enum
  drm/i915/xehp: Add register for compute engine's MMIO-based TLB
invalidation
  drm/i915/xehp: Add compute engine ABI

 drivers/gpu/drm/i915/gt/intel_engine_user.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c  |  1 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/i915_drm_client.c  |  1 +
 drivers/gpu/drm/i915/i915_drm_client.h  |  2 +-
 drivers/gpu/drm/i915/i915_pci.c |  6 +-
 include/uapi/drm/i915_drm.h | 62 +++--
 7 files changed, 65 insertions(+), 10 deletions(-)

-- 
2.35.1



Re: [Intel-gfx] [PATCH 1/2] drm/i915/xehp: Add compute engine ABI

2022-04-27 Thread Matt Roper
On Mon, Apr 25, 2022 at 11:41:36AM +0100, Tvrtko Ursulin wrote:
> 
> On 22/04/2022 20:50, Matt Roper wrote:
> > We're now ready to start exposing compute engines to userspace.
> > 
> > While we're at it, let's extend the kerneldoc description for the other
> > engine types as well.
> > 
> > Cc: Daniele Ceraolo Spurio 
> > Cc: Tvrtko Ursulin 
> > Cc: Vinay Belgaumkar 
> > Cc: Jordan Justen 
> > Cc: Szymon Morek 
> > UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14395
> > UMD (compute): https://github.com/intel/compute-runtime/pull/451
> 
> The compute one points to a commit named "Add compute engine class for xehp"
> but content of which seems more about engine query, including the yet
> non-existent distance query (and more)?! I certainly does not appear to be
> adding a definition of I915_ENGINE_CLASS_COMPUTE. This needs clarifying.
> 

Hi Syzmon, any updates on the compute UMD merge request here?  Is there
a different merge request we should reference for now that just uses the
I915_ENGINE_CLASS_COMPUTE without also relying on the
DRM_I915_QUERY_DISTANCE_INFO that we aren't upstreaming just yet?

I believe distance info is only useful for multi-tile platforms and
isn't necessary for general use of compute engines on a single tile
platform.

Thanks.


Matt

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795


[Intel-gfx] ✗ Fi.CI.IGT: failure for i915: SSEU handling updates (rev2)

2022-04-27 Thread Patchwork
== Series Details ==

Series: i915: SSEU handling updates (rev2)
URL   : https://patchwork.freedesktop.org/series/103244/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11550_full -> Patchwork_103244v2_full


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_103244v2_full absolutely need 
to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_103244v2_full, please notify your bug team to allow 
them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (10 -> 13)
--

  Additional (3): shard-rkl shard-dg1 shard-tglu 

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_103244v2_full:

### IGT changes ###

 Possible regressions 

  * igt@i915_pm_sseu@full-enable:
- shard-kbl:  [PASS][1] -> [FAIL][2] +3 similar issues
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-kbl3/igt@i915_pm_s...@full-enable.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/shard-kbl4/igt@i915_pm_s...@full-enable.html

  
 Warnings 

  * igt@gem_eio@unwedge-stress:
- shard-tglb: [FAIL][3] ([i915#232]) -> [FAIL][4] +1 similar issue
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-tglb5/igt@gem_...@unwedge-stress.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/shard-tglb1/igt@gem_...@unwedge-stress.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@gem_exec_flush@basic-batch-kernel-default-wb:
- {shard-rkl}:NOTRUN -> [DMESG-WARN][5]
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/shard-rkl-5/igt@gem_exec_fl...@basic-batch-kernel-default-wb.html

  * igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend:
- {shard-dg1}:NOTRUN -> [INCOMPLETE][6] +1 similar issue
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/shard-dg1-19/igt@kms_vbl...@pipe-b-ts-continuation-dpms-suspend.html

  * igt@perf_pmu@module-unload:
- {shard-rkl}:NOTRUN -> [INCOMPLETE][7]
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/shard-rkl-5/igt@perf_...@module-unload.html

  
Known issues


  Here are the changes found in Patchwork_103244v2_full that come from known 
issues:

### CI changes ###

 Possible fixes 

  * boot:
- shard-skl:  ([PASS][8], [PASS][9], [PASS][10], [PASS][11], 
[PASS][12], [PASS][13], [PASS][14], [PASS][15], [PASS][16], [PASS][17], 
[PASS][18], [PASS][19], [FAIL][20], [PASS][21], [PASS][22], [PASS][23], 
[PASS][24], [PASS][25], [PASS][26], [PASS][27], [PASS][28], [PASS][29], 
[PASS][30], [PASS][31]) ([i915#5032]) -> ([PASS][32], [PASS][33], [PASS][34], 
[PASS][35], [PASS][36], [PASS][37], [PASS][38], [PASS][39], [PASS][40], 
[PASS][41], [PASS][42], [PASS][43], [PASS][44], [PASS][45], [PASS][46], 
[PASS][47], [PASS][48], [PASS][49], [PASS][50], [PASS][51], [PASS][52], 
[PASS][53], [PASS][54], [PASS][55], [PASS][56])
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl9/boot.html
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl9/boot.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl8/boot.html
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl8/boot.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl7/boot.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl7/boot.html
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl6/boot.html
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl6/boot.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl6/boot.html
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl5/boot.html
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl5/boot.html
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl5/boot.html
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl5/boot.html
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl4/boot.html
   [22]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl4/boot.html
   [23]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl4/boot.html
   [24]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl3/boot.html
   [25]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl3/boot.html
   [26]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl2/boot.html
   [27]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl2/boot.html
   

[Intel-gfx] ✓ Fi.CI.BAT: success for i915: SSEU handling updates (rev2)

2022-04-27 Thread Patchwork
== Series Details ==

Series: i915: SSEU handling updates (rev2)
URL   : https://patchwork.freedesktop.org/series/103244/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11550 -> Patchwork_103244v2


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/index.html

Participating hosts (43 -> 46)
--

  Additional (5): bat-dg1-6 bat-dg2-8 bat-adlm-1 fi-icl-u2 bat-adlp-4 
  Missing(2): bat-rpls-2 fi-bsw-cyan 

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_103244v2:

### IGT changes ###

 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@dmabuf@all@dma_fence_chain:
- {bat-rpls-1}:   NOTRUN -> [INCOMPLETE][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/bat-rpls-1/igt@dmabuf@all@dma_fence_chain.html

  
Known issues


  Here are the changes found in Patchwork_103244v2 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_suspend@basic-s0@smem:
- bat-dg1-6:  NOTRUN -> [INCOMPLETE][2] ([i915#5827])
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/bat-dg1-6/igt@gem_exec_suspend@basic...@smem.html

  * igt@gem_huc_copy@huc-copy:
- fi-icl-u2:  NOTRUN -> [SKIP][3] ([i915#2190])
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/fi-icl-u2/igt@gem_huc_c...@huc-copy.html

  * igt@gem_lmem_swapping@basic:
- bat-adlp-4: NOTRUN -> [SKIP][4] ([i915#4613]) +3 similar issues
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/bat-adlp-4/igt@gem_lmem_swapp...@basic.html

  * igt@gem_lmem_swapping@parallel-random-engines:
- fi-icl-u2:  NOTRUN -> [SKIP][5] ([i915#4613]) +3 similar issues
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/fi-icl-u2/igt@gem_lmem_swapp...@parallel-random-engines.html

  * igt@gem_tiled_pread_basic:
- bat-adlp-4: NOTRUN -> [SKIP][6] ([i915#3282])
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/bat-adlp-4/igt@gem_tiled_pread_basic.html

  * igt@i915_selftest@live@mman:
- fi-bdw-5557u:   NOTRUN -> [INCOMPLETE][7] ([i915#5704])
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/fi-bdw-5557u/igt@i915_selftest@l...@mman.html

  * igt@kms_chamelium@dp-crc-fast:
- bat-adlp-4: NOTRUN -> [SKIP][8] ([fdo#111827]) +8 similar issues
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/bat-adlp-4/igt@kms_chamel...@dp-crc-fast.html
- fi-bdw-5557u:   NOTRUN -> [SKIP][9] ([fdo#109271] / [fdo#111827]) +8 
similar issues
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/fi-bdw-5557u/igt@kms_chamel...@dp-crc-fast.html

  * igt@kms_chamelium@hdmi-hpd-fast:
- fi-icl-u2:  NOTRUN -> [SKIP][10] ([fdo#111827]) +8 similar issues
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/fi-icl-u2/igt@kms_chamel...@hdmi-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
- fi-icl-u2:  NOTRUN -> [SKIP][11] ([fdo#109278]) +2 similar issues
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/fi-icl-u2/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-legacy.html
- bat-adlp-4: NOTRUN -> [SKIP][12] ([i915#4103]) +1 similar issue
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/bat-adlp-4/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_force_connector_basic@force-load-detect:
- fi-icl-u2:  NOTRUN -> [SKIP][13] ([fdo#109285])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/fi-icl-u2/igt@kms_force_connector_ba...@force-load-detect.html

  * igt@kms_force_connector_basic@prune-stale-modes:
- bat-adlp-4: NOTRUN -> [SKIP][14] ([i915#4093]) +3 similar issues
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/bat-adlp-4/igt@kms_force_connector_ba...@prune-stale-modes.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-b:
- fi-cfl-8109u:   [PASS][15] -> [DMESG-WARN][16] ([i915#62]) +11 
similar issues
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/fi-cfl-8109u/igt@kms_pipe_crc_ba...@compare-crc-sanitycheck-pipe-b.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/fi-cfl-8109u/igt@kms_pipe_crc_ba...@compare-crc-sanitycheck-pipe-b.html

  * igt@kms_setmode@basic-clone-single-crtc:
- fi-icl-u2:  NOTRUN -> [SKIP][17] ([i915#3555])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v2/fi-icl-u2/igt@kms_setm...@basic-clone-single-crtc.html
- fi-bdw-5557u:   NOTRUN -> [SKIP][18] ([fdo#109271]) +14 

[Intel-gfx] ✗ Fi.CI.SPARSE: warning for i915: SSEU handling updates (rev2)

2022-04-27 Thread Patchwork
== Series Details ==

Series: i915: SSEU handling updates (rev2)
URL   : https://patchwork.freedesktop.org/series/103244/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.




[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for i915: SSEU handling updates (rev2)

2022-04-27 Thread Patchwork
== Series Details ==

Series: i915: SSEU handling updates (rev2)
URL   : https://patchwork.freedesktop.org/series/103244/
State : warning

== Summary ==

Error: dim checkpatch failed
721ef81dbce9 drm/i915/sseu: Don't try to store EU mask internally in UAPI format
91b82b1f6352 drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK
b1d7cb90b2f3 drm/i915/xehp: Use separate sseu init function
24f87a81e92f drm/i915/sseu: Simplify gen11+ SSEU handling
0a3eeddf1db0 drm/i915/sseu: Disassociate internal subslice mask representation 
from uapi
-:485: WARNING:NEW_TYPEDEFS: do not add new typedefs
#485: FILE: drivers/gpu/drm/i915/gt/intel_sseu.h:62:
+typedef struct {

total: 0 errors, 1 warnings, 0 checks, 690 lines checked




Re: [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915/gt: Fix memory leaks in per-gt sysfs

2022-04-27 Thread Dixit, Ashutosh
On Wed, 27 Apr 2022 15:45:40 -0700, Patchwork wrote:
>
> Possible regressions
>
> * igt@kms_flip@flip-vs-suspend-interruptible@a-edp1:
>
>  * shard-skl: PASS -> INCOMPLETE
>
> * igt@syncobj_timeline@wait-all-for-submit-snapshot:
>
>  * shard-skl: PASS -> FAIL
>
> Warnings
>
> * igt@gem_eio@unwedge-stress:
>
>  * shard-tglb: FAIL (i915#232) -> FAIL +1 similar issue

These failures are unrelated, the patch is related only to per-gt sysfs.


Re: [Intel-gfx] [PATCH 1/2] drm/i915/xehp: Add compute engine ABI

2022-04-27 Thread Kumar Valsan, Prathap
On Mon, Apr 25, 2022 at 11:41:36AM +0100, Tvrtko Ursulin wrote:
> 
> On 22/04/2022 20:50, Matt Roper wrote:
> > We're now ready to start exposing compute engines to userspace.
> > 
> > While we're at it, let's extend the kerneldoc description for the other
> > engine types as well.
> > 
> > Cc: Daniele Ceraolo Spurio 
> > Cc: Tvrtko Ursulin 
> > Cc: Vinay Belgaumkar 
> > Cc: Jordan Justen 
> > Cc: Szymon Morek 
> > UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14395
> > UMD (compute): https://github.com/intel/compute-runtime/pull/451
> 
> The compute one points to a commit named "Add compute engine class for xehp"
> but content of which seems more about engine query, including the yet
> non-existent distance query (and more)?! I certainly does not appear to be
> adding a definition of I915_ENGINE_CLASS_COMPUTE. This needs clarifying.
> 
> > Signed-off-by: Matt Roper 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine_user.c |  2 +-
> >   drivers/gpu/drm/i915/gt/intel_gt.c  |  1 +
> >   drivers/gpu/drm/i915/i915_drm_client.c  |  1 +
> >   drivers/gpu/drm/i915/i915_drm_client.h  |  2 +-
> >   include/uapi/drm/i915_drm.h | 62 +++--
> >   5 files changed, 60 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
> > b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > index 0f6cd96b459f..46a174f8aa00 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
> > [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
> > [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
> > [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > -   /* TODO: Add COMPUTE_CLASS mapping once ABI is available */
> > +   [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
> >   };
> >   static int engine_cmp(void *priv, const struct list_head *A,
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt.c
> > index 92394f13b42f..c96e123496a5 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> > @@ -1175,6 +1175,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
> > [VIDEO_DECODE_CLASS]= GEN12_VD_TLB_INV_CR,
> > [VIDEO_ENHANCEMENT_CLASS]   = GEN12_VE_TLB_INV_CR,
> > [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
> > +   [COMPUTE_CLASS] = GEN12_GFX_TLB_INV_CR,
> 
> Do you know what 0xcf04 is?
The mmio 0xcf04 is the one we should use for compute class. 
And the context bit in 0xcf04 represents engine instance.

GEN12_GFX_TLB_INV_CR is for render class.

Thanks,
Prathap
> 
> Or if GEN12_GFX_TLB_INV_CR is correct then I think get_reg_and_bit() might
> need adjusting to always select bit 0 for any compute engine instance. Not
> sure how hardware would behave if value other than '1' would be written into
> 0xced8.
> 
> Regards,
> 
> Tvrtko
> 
> > };
> > struct drm_i915_private *i915 = gt->i915;
> > struct intel_uncore *uncore = gt->uncore;
> > diff --git a/drivers/gpu/drm/i915/i915_drm_client.c 
> > b/drivers/gpu/drm/i915/i915_drm_client.c
> > index 475a6f824cad..18d38cb59923 100644
> > --- a/drivers/gpu/drm/i915/i915_drm_client.c
> > +++ b/drivers/gpu/drm/i915/i915_drm_client.c
> > @@ -81,6 +81,7 @@ static const char * const uabi_class_names[] = {
> > [I915_ENGINE_CLASS_COPY] = "copy",
> > [I915_ENGINE_CLASS_VIDEO] = "video",
> > [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance",
> > +   [I915_ENGINE_CLASS_COMPUTE] = "compute",
> >   };
> >   static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
> > diff --git a/drivers/gpu/drm/i915/i915_drm_client.h 
> > b/drivers/gpu/drm/i915/i915_drm_client.h
> > index 5f5b02b01ba0..f796c5e8e060 100644
> > --- a/drivers/gpu/drm/i915/i915_drm_client.h
> > +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> > @@ -13,7 +13,7 @@
> >   #include "gt/intel_engine_types.h"
> > -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
> > +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> >   struct drm_i915_private;
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 35ca528803fd..a2def7b27009 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -154,21 +154,71 @@ enum i915_mocs_table_index {
> > I915_MOCS_CACHED,
> >   };
> > -/*
> > +/**
> > + * enum drm_i915_gem_engine_class - uapi engine type enumeration
> > + *
> >* Different engines serve different roles, and there may be more than one
> > - * engine serving each role. enum drm_i915_gem_engine_class provides a
> > - * classification of the role of the engine, which may be used when 
> > requesting
> > - * operations to be performed on a certain subset of engines, or for 
> > providing
> > - * information about that group.
> > 

Re: [Intel-gfx] ✓ Fi.CI.IGT: success for i915: Upstream initial DG2 PCI IDs

2022-04-27 Thread Matt Roper
On Wed, Apr 27, 2022 at 04:53:12AM +, Patchwork wrote:
> == Series Details ==
> 
> Series: i915: Upstream initial DG2 PCI IDs
> URL   : https://patchwork.freedesktop.org/series/103098/
> State : success
> 
> == Summary ==
> 
> CI Bug Log - changes from CI_DRM_11550_full -> Patchwork_103098v1_full
> 
> 
> Summary
> ---
> 
>   **SUCCESS**
> 
>   No regressions found.
> 

One patch applied to drm-intel-next, one applied to topic/core-for-CI.
Rebuilding drm-tip required adding one cat-to-fixup to the drm-rerere
branch to properly resolve the conflicts.

Thanks for the review.


Matt

>   
> 
> Participating hosts (10 -> 13)
> --
> 
>   Additional (3): shard-rkl shard-dg1 shard-tglu 
> 
> Possible new issues
> ---
> 
>   Here are the unknown changes that may have been introduced in 
> Patchwork_103098v1_full:
> 
> ### CI changes ###
> 
>  Suppressed 
> 
>   The following results come from untrusted machines, tests, or statuses.
>   They do not affect the overall result.
> 
>   * boot:
> - {shard-rkl}:NOTRUN -> ([PASS][1], [PASS][2], [PASS][3], 
> [PASS][4], [FAIL][5], [PASS][6], [PASS][7], [PASS][8], [PASS][9], [PASS][10], 
> [PASS][11], [PASS][12], [PASS][13], [PASS][14], [PASS][15], [PASS][16], 
> [PASS][17], [PASS][18], [PASS][19], [PASS][20], [PASS][21])
>[1]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-6/boot.html
>[2]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-6/boot.html
>[3]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-6/boot.html
>[4]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-5/boot.html
>[5]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-5/boot.html
>[6]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-5/boot.html
>[7]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-4/boot.html
>[8]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-3/boot.html
>[9]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-3/boot.html
>[10]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-2/boot.html
>[11]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-2/boot.html
>[12]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-2/boot.html
>[13]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-2/boot.html
>[14]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-2/boot.html
>[15]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-2/boot.html
>[16]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-1/boot.html
>[17]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-1/boot.html
>[18]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-1/boot.html
>[19]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-1/boot.html
>[20]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-1/boot.html
>[21]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-rkl-1/boot.html
> 
>   
> 
> ### IGT changes ###
> 
>  Suppressed 
> 
>   The following results come from untrusted machines, tests, or statuses.
>   They do not affect the overall result.
> 
>   * {igt@kms_concurrent@pipe-d@hdmi-a-1}:
> - {shard-dg1}:NOTRUN -> [CRASH][22]
>[22]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-dg1-16/igt@kms_concurrent@pip...@hdmi-a-1.html
> 
>   
> New tests
> -
> 
>   New tests have been introduced between CI_DRM_11550_full and 
> Patchwork_103098v1_full:
> 
> ### New IGT tests (1) ###
> 
>   * igt@kms_sequence@get-forked-busy@hdmi-a-1-pipe-d:
> - Statuses : 1 pass(s)
> - Exec time: [1.24] s
> 
>   
> 
> Known issues
> 
> 
>   Here are the changes found in Patchwork_103098v1_full that come from known 
> issues:
> 
> ### IGT changes ###
> 
>  Issues hit 
> 
>   * igt@gem_eio@in-flight-1us:
> - shard-skl:  [PASS][23] -> [TIMEOUT][24] ([i915#3063])
>[23]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl7/igt@gem_...@in-flight-1us.html
>[24]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-skl4/igt@gem_...@in-flight-1us.html
> 
>   * igt@gem_exec_fair@basic-flow@rcs0:
> - shard-skl:  NOTRUN -> [SKIP][25] ([fdo#109271]) +155 similar 
> issues
>[25]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103098v1/shard-skl10/igt@gem_exec_fair@basic-f...@rcs0.html
> 
>   * igt@gem_exec_fair@basic-none-share@rcs0:
> - shard-iclb: NOTRUN -> [FAIL][26] ([i915#2842])
>[26]: 
> 

Re: [Intel-gfx] [PATCH 2/2] drm/i915/dg2: Define GuC firmware version for DG2

2022-04-27 Thread Ceraolo Spurio, Daniele




On 4/27/2022 9:55 AM, john.c.harri...@intel.com wrote:

From: John Harrison 

First release of GuC for DG2.


Reviewed-by: Daniele Ceraolo Spurio 

Daniele



Signed-off-by: John Harrison 
CC: Tomasz Mistat 
CC: Ramalingam C 
CC: Daniele Ceraolo Spurio 
---
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index a876d39e6bcf..d078f884b5e3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
   * firmware as TGL.
   */
  #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
+   fw_def(DG2,  0, guc_def(dg2,  70, 1, 2)) \
fw_def(ALDERLAKE_P,  0, guc_def(adlp, 70, 1, 1)) \
fw_def(ALDERLAKE_S,  0, guc_def(tgl,  70, 1, 1)) \
fw_def(DG1,  0, guc_def(dg1,  70, 1, 1)) \




Re: [Intel-gfx] ✗ Fi.CI.IGT: failure for Initial GuC firmware release for DG2

2022-04-27 Thread John Harrison

On 4/27/2022 11:59, Patchwork wrote:

Project List - Patchwork *Patch Details*
*Series:*   Initial GuC firmware release for DG2
*URL:*  https://patchwork.freedesktop.org/series/103230/
*State:*failure
*Details:* 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/index.html



  CI Bug Log - changes from CI_DRM_11550_full -> Patchwork_103230v1_full


Summary

*FAILURE*

Serious unknown changes coming with Patchwork_103230v1_full absolutely 
need to be

verified manually.

If you think the reported changes have nothing to do with the changes
introduced in Patchwork_103230v1_full, please notify your bug team to 
allow them
to document this new failure mode, which will reduce false positives 
in CI.



Participating hosts (10 -> 13)

Additional (3): shard-rkl shard-dg1 shard-tglu


Possible new issues

Here are the unknown changes that may have been introduced in 
Patchwork_103230v1_full:



  IGT changes


Possible regressions

  * igt@gem_exec_parallel@engines@fds:
  o shard-skl: PASS


-> INCOMPLETE




Warnings

  * igt@gem_eio@unwedge-stress:
  o shard-tglb: FAIL


(i915#232
) -> FAIL


+1 similar issue


Suppressed

The following results come from untrusted machines, tests, or statuses.
They do not affect the overall result.

 *

igt@kms_rotation_crc@primary-rotation-270:

  o {shard-rkl}: NOTRUN -> INCOMPLETE


 *

{igt@kms_sequence@queue-idle@edp-1-pipe-a}:

  o shard-skl: PASS


-> FAIL



None of the above are DG2 and this patch only changes the GuC firmware 
version used on DG2. Therefore, they are not related to this change.


John.



 *


Known issues

Here are the changes found in Patchwork_103230v1_full that come from 
known issues:



  CI changes


Issues hit

  * boot:
  o shard-apl: (PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS

,
PASS


[Intel-gfx] ✗ Fi.CI.BAT: failure for i915: SSEU handling updates

2022-04-27 Thread Patchwork
== Series Details ==

Series: i915: SSEU handling updates
URL   : https://patchwork.freedesktop.org/series/103244/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11550 -> Patchwork_103244v1


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_103244v1 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_103244v1, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/index.html

Participating hosts (43 -> 44)
--

  Additional (4): bat-dg2-8 fi-icl-u2 bat-dg1-6 bat-adlp-4 
  Missing(3): fi-bsw-cyan bat-rpls-2 bat-jsl-2 

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_103244v1:

### IGT changes ###

 Possible regressions 

  * igt@i915_selftest@live:
- bat-adlp-4: NOTRUN -> [INCOMPLETE][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/bat-adlp-4/igt@i915_selft...@live.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live@execlists:
- {bat-rpls-1}:   NOTRUN -> [INCOMPLETE][2]
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/bat-rpls-1/igt@i915_selftest@l...@execlists.html

  
Known issues


  Here are the changes found in Patchwork_103244v1 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_suspend@basic-s0@smem:
- bat-dg1-6:  NOTRUN -> [INCOMPLETE][3] ([i915#5827])
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/bat-dg1-6/igt@gem_exec_suspend@basic...@smem.html

  * igt@gem_huc_copy@huc-copy:
- fi-icl-u2:  NOTRUN -> [SKIP][4] ([i915#2190])
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/fi-icl-u2/igt@gem_huc_c...@huc-copy.html

  * igt@gem_lmem_swapping@basic:
- bat-adlp-4: NOTRUN -> [SKIP][5] ([i915#4613]) +3 similar issues
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/bat-adlp-4/igt@gem_lmem_swapp...@basic.html

  * igt@gem_lmem_swapping@parallel-random-engines:
- fi-icl-u2:  NOTRUN -> [SKIP][6] ([i915#4613]) +3 similar issues
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/fi-icl-u2/igt@gem_lmem_swapp...@parallel-random-engines.html

  * igt@gem_tiled_pread_basic:
- bat-adlp-4: NOTRUN -> [SKIP][7] ([i915#3282])
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/bat-adlp-4/igt@gem_tiled_pread_basic.html

  * igt@kms_chamelium@dp-crc-fast:
- bat-adlp-4: NOTRUN -> [SKIP][8] ([fdo#111827]) +8 similar issues
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/bat-adlp-4/igt@kms_chamel...@dp-crc-fast.html

  * igt@kms_chamelium@hdmi-hpd-fast:
- fi-icl-u2:  NOTRUN -> [SKIP][9] ([fdo#111827]) +8 similar issues
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/fi-icl-u2/igt@kms_chamel...@hdmi-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
- fi-icl-u2:  NOTRUN -> [SKIP][10] ([fdo#109278]) +2 similar issues
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/fi-icl-u2/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-legacy.html
- bat-adlp-4: NOTRUN -> [SKIP][11] ([i915#4103]) +1 similar issue
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/bat-adlp-4/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_flip@basic-flip-vs-modeset@a-edp1:
- bat-adlp-4: NOTRUN -> [DMESG-WARN][12] ([i915#3576])
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/bat-adlp-4/igt@kms_flip@basic-flip-vs-mode...@a-edp1.html

  * igt@kms_force_connector_basic@force-load-detect:
- fi-icl-u2:  NOTRUN -> [SKIP][13] ([fdo#109285])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/fi-icl-u2/igt@kms_force_connector_ba...@force-load-detect.html

  * igt@kms_force_connector_basic@prune-stale-modes:
- bat-adlp-4: NOTRUN -> [SKIP][14] ([i915#4093]) +3 similar issues
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/bat-adlp-4/igt@kms_force_connector_ba...@prune-stale-modes.html

  * igt@kms_setmode@basic-clone-single-crtc:
- fi-icl-u2:  NOTRUN -> [SKIP][15] ([i915#3555])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103244v1/fi-icl-u2/igt@kms_setm...@basic-clone-single-crtc.html
- bat-adlp-4: NOTRUN -> [SKIP][16] ([i915#3555])
   [16]: 

[Intel-gfx] ✗ Fi.CI.SPARSE: warning for i915: SSEU handling updates

2022-04-27 Thread Patchwork
== Series Details ==

Series: i915: SSEU handling updates
URL   : https://patchwork.freedesktop.org/series/103244/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.




[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for i915: SSEU handling updates

2022-04-27 Thread Patchwork
== Series Details ==

Series: i915: SSEU handling updates
URL   : https://patchwork.freedesktop.org/series/103244/
State : warning

== Summary ==

Error: dim checkpatch failed
55dde4d186fd drm/i915/sseu: Don't try to store EU mask internally in UAPI format
313094aca5e5 drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK
706b98dc34e8 drm/i915/xehp: Use separate sseu init function
d03e472adee0 drm/i915/sseu: Simplify gen11+ SSEU handling
d98bc53c851d drm/i915/sseu: Disassociate internal subslice mask representation 
from uapi
-:485: WARNING:NEW_TYPEDEFS: do not add new typedefs
#485: FILE: drivers/gpu/drm/i915/gt/intel_sseu.h:62:
+typedef struct {

total: 0 errors, 1 warnings, 0 checks, 690 lines checked




[Intel-gfx] [PATCH 2/5] drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK

2022-04-27 Thread Matt Roper
Slice/subslice/EU information should be obtained via the topology
queries provided by the I915_QUERY interface; let's turn off support for
the old GETPARAM lookups on Xe_HP and beyond where we can't return
meaningful values.

The slice mask lookup is meaningless since Xe_HP doesn't support
traditional slices (and we make no attempt to return the various new
units like gslices, cslices, mslices, etc.) here.

The subslice mask lookup is even more problematic; given the distinct
masks for geometry vs compute purposes, the combined mask returned here
is likely not what userspace would want to act upon anyway.  The value
is also limited to 32-bits by the nature of the GETPARAM ioctl which is
sufficient for the initial Xe_HP platforms, but is unable to convey the
larger masks that will be needed on other upcoming platforms.  Finally,
the value returned here becomes even less meaningful when used on
multi-tile platforms where each tile will have its own masks.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/i915_getparam.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_getparam.c 
b/drivers/gpu/drm/i915/i915_getparam.c
index c12a0adefda5..ac9767c56619 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -148,11 +148,19 @@ int i915_getparam_ioctl(struct drm_device *dev, void 
*data,
value = intel_engines_has_context_isolation(i915);
break;
case I915_PARAM_SLICE_MASK:
+   /* Not supported from Xe_HP onward; use topology queries */
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+   return -EINVAL;
+
value = sseu->slice_mask;
if (!value)
return -ENODEV;
break;
case I915_PARAM_SUBSLICE_MASK:
+   /* Not supported from Xe_HP onward; use topology queries */
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+   return -EINVAL;
+
/* Only copy bits from the first slice */
memcpy(, sseu->subslice_mask,
   min(sseu->ss_stride, (u8)sizeof(value)));
-- 
2.35.1



[Intel-gfx] [PATCH 5/5] drm/i915/sseu: Disassociate internal subslice mask representation from uapi

2022-04-27 Thread Matt Roper
Rather than storing subslice masks internally as u8[] (inside the sseu
structure) and u32 (everywhere else), let's move over to using an
intel_sseu_ss_mask_t typedef compatible with the operations in
linux/bitmap.h.  We're soon going to start adding code for a new
platform where subslice masks are spread across two 32-bit registers
(requiring 64 bits to represent), and we expect future platforms will
likely take this even farther, requiring bitmask storage larger than a
simple u64 can hold.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|   2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c   |  14 +-
 drivers/gpu/drm/i915/gt/intel_sseu.c | 197 +++
 drivers/gpu/drm/i915/gt/intel_sseu.h |  48 ++---
 drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c |  28 +--
 drivers/gpu/drm/i915/gt/intel_workarounds.c  |  28 ++-
 drivers/gpu/drm/i915/i915_getparam.c |   2 +-
 drivers/gpu/drm/i915/i915_query.c|   8 +-
 9 files changed, 183 insertions(+), 148 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ab4c5ab28e4d..ea012ee3a8de 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1901,7 +1901,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
if (user->slice_mask & ~device->slice_mask)
return -EINVAL;
 
-   if (user->subslice_mask & ~device->subslice_mask[0])
+   if (user->subslice_mask & ~device->subslice_mask.b[0])
return -EINVAL;
 
if (user->max_eus_per_subslice > device->max_eus_per_subslice)
@@ -1915,7 +1915,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
/* Part specific restrictions. */
if (GRAPHICS_VER(i915) == 11) {
unsigned int hw_s = hweight8(device->slice_mask);
-   unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+   unsigned int hw_ss_per_s = hweight8(device->subslice_mask.b[0]);
unsigned int req_s = hweight8(context->slice_mask);
unsigned int req_ss = hweight8(context->subslice_mask);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 14c6ddbbfde8..39c09963b3c7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -610,7 +610,7 @@ static void engine_mask_apply_compute_fuses(struct intel_gt 
*gt)
if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
return;
 
-   ccs_mask = 
intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(>sseu),
+   ccs_mask = 
intel_slicemask_from_dssmask(info->sseu.compute_subslice_mask,
ss_per_ccs);
/*
 * If all DSS in a quadrant are fused off, the corresponding CCS
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 92394f13b42f..cc03512d59ba 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -133,13 +133,6 @@ static const struct intel_mmio_range 
dg2_lncf_steering_table[] = {
{},
 };
 
-static u16 slicemask(struct intel_gt *gt, int count)
-{
-   u64 dss_mask = intel_sseu_get_subslices(>info.sseu, 0);
-
-   return intel_slicemask_from_dssmask(dss_mask, count);
-}
-
 int intel_gt_init_mmio(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
@@ -153,11 +146,14 @@ int intel_gt_init_mmio(struct intel_gt *gt)
 * An mslice is unavailable only if both the meml3 for the slice is
 * disabled *and* all of the DSS in the slice (quadrant) are disabled.
 */
-   if (HAS_MSLICES(i915))
+   if (HAS_MSLICES(i915)) {
gt->info.mslice_mask =
-   slicemask(gt, GEN_DSS_PER_MSLICE) |
+   
intel_slicemask_from_dssmask(gt->info.sseu.subslice_mask,
+GEN_DSS_PER_MSLICE);
+   gt->info.mslice_mask |=
(intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
 GEN12_MEML3_EN_MASK);
+   }
 
if (IS_DG2(i915)) {
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index f7ff6a9f67b0..466505d6bd18 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -28,56 +28,49 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 
max_slices,
 unsigned int
 intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
 {
-   unsigned int i, total = 0;
-
-   for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
-   total += hweight8(sseu->subslice_mask[i]);
-
-   return total;
+   return 

[Intel-gfx] [PATCH 0/5] i915: SSEU handling updates

2022-04-27 Thread Matt Roper
This series makes a handful of updates to i915's internal handling of
slice/subslice/EU (SSEU) data to handle recent platforms like Xe_HP in a
more natural manner and to prepare for some additional upcoming
platforms we have in the pipeline (the first of which I'll probably
start sending patches for in the next week or two).  One key idea of
this series is that although we have a fixed ABI to convey SSEU data to
userspace (i.e., multiple u8[] arrays with data stored at different
strides), we don't need to use this cumbersome format for the driver's
own internal storage.  As long as we can convert into the uapi form
properly when responding to the I915_QUERY ioctl, it's preferable to use
an internal storage format that's easier for the driver to work with.
Doing so can also save us some storage space on modern platforms since
we don't always need to replicate a bunch of data that's architecturally
guaranteed to be identical.

Another key point here is that Xe_HP platforms today have subslice (DSS)
masks that are 32 bits, which maxes out the storage of a u32.  On future
platforms the architecture design is going to start spreading their DSS
masks over multiple 32-bit fuse registers.  So even for platforms where
the total number of DSS doesn't actually go up, we're going to need
larger storage than just a u32 to express the mask properly.  To
accomodate this, we start storing our subslice mask in a new typedef
that can be processed by the linux/bitmap.h operations.

Finally, since no userspace for Xe_HP or beyond is using the legacy
I915_GETPARAM ioctl lookups for I915_PARAM_SLICE_MASK and
I915_PARAM_SUBSLICE_MASK (since they've migrated to the more flexible
I915_QUERY ioctl that can return more than a simple u32 value), we take
the opportunity to officially drop support for those GETPARAM lookups on
modern platforms.  Maintaining support for these GETPARAM lookups don't
make sense for a number of reasons:

 * Traditional slices no longer exist, and newer ideas like gslices,
   cslices, mslices, etc. aren't something userspace needs to query
   since it can be inferred from other information.
 * The GETPARAM ioctl doesn't have a way to distinguish between geometry
   subslice masks and compute subslice masks, which are distinct on
   Xe_HP and beyond.
 * The I915_GETPARAM ioctl is limited to returning a 32-bit value, so
   when subslice masks begin to exceed 32-bits, it simply can't return
   the entire mask.
 * The GETPARAM ioctl doesn't have a way to give sensible information
   for multi-tile devices.


Cc: Tvrtko Ursulin 

Matt Roper (5):
  drm/i915/sseu: Don't try to store EU mask internally in UAPI format
  drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK
  drm/i915/xehp: Use separate sseu init function
  drm/i915/sseu: Simplify gen11+ SSEU handling
  drm/i915/sseu: Disassociate internal subslice mask representation from
uapi

 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|   2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c   |  14 +-
 drivers/gpu/drm/i915/gt/intel_sseu.c | 371 +++
 drivers/gpu/drm/i915/gt/intel_sseu.h |  69 ++--
 drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c |  28 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c  |  28 +-
 drivers/gpu/drm/i915/i915_getparam.c |  10 +-
 drivers/gpu/drm/i915/i915_query.c|  16 +-
 9 files changed, 323 insertions(+), 219 deletions(-)

-- 
2.35.1



[Intel-gfx] [PATCH 1/5] drm/i915/sseu: Don't try to store EU mask internally in UAPI format

2022-04-27 Thread Matt Roper
Storing the EU mask internally in the same format the I915_QUERY
topology queries use makes the final copy_to_user() a bit simpler, but
makes the rest of the driver's SSEU more complicated.  Given that modern
platforms (gen11 and beyond) are architecturally guaranteed to have
equivalent EU masks for every subslice, it also wastes quite a bit of
space since we're storing a duplicate copy of the EU mask for every
single subslice where we really only need to store one instance.

Let's add a has_common_ss_eumask flag to the SSEU structure to determine
which type of hardware we're working on.  For the older pre-gen11
platforms the various subslices can have different EU masks so we use an
array of u16[] to store each subslice's copy.  For gen11 and beyond
we'll only use index [0] of the array and not worry about copying the
repeated value, except when converting into uapi form for the I915_QUERY
ioctl.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 66 +---
 drivers/gpu/drm/i915/gt/intel_sseu.h | 21 -
 drivers/gpu/drm/i915/i915_query.c|  8 ++--
 3 files changed, 73 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 9881a6790574..13387b4024ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -91,36 +91,70 @@ static int sseu_eu_idx(const struct sseu_dev_info *sseu, 
int slice,
 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
int subslice)
 {
-   int i, offset = sseu_eu_idx(sseu, slice, subslice);
-   u16 eu_mask = 0;
-
-   for (i = 0; i < sseu->eu_stride; i++)
-   eu_mask |=
-   ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
+   if (!intel_sseu_has_subslice(sseu, slice, subslice))
+   return 0;
 
-   return eu_mask;
+   if (sseu->has_common_ss_eumask)
+   return sseu->eu_mask[0];
+   else
+   return sseu->eu_mask[slice * sseu->max_subslices + subslice];
 }
 
 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
 u16 eu_mask)
 {
-   int i, offset = sseu_eu_idx(sseu, slice, subslice);
+   WARN_ON(sseu->has_common_ss_eumask);
+   WARN_ON(sseu->max_eus_per_subslice > sizeof(sseu->eu_mask[0]) * 
BITS_PER_BYTE);
 
-   for (i = 0; i < sseu->eu_stride; i++)
-   sseu->eu_mask[offset + i] =
-   (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+   sseu->eu_mask[slice * sseu->max_subslices + subslice] =
+   eu_mask & GENMASK(sseu->max_eus_per_subslice - 1, 0);
 }
 
 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 {
u16 i, total = 0;
 
+   if (sseu->has_common_ss_eumask)
+   return intel_sseu_subslices_per_slice(sseu, 0) *
+   hweight16(sseu->eu_mask[0]);
+
for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
-   total += hweight8(sseu->eu_mask[i]);
+   total += hweight16(sseu->eu_mask[i]);
 
return total;
 }
 
+/**
+ * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer
+ * @to: Pointer to userspace buffer to copy to
+ * @sseu: SSEU structure containing EU mask to copy
+ *
+ * Copies the EU mask to a userspace buffer in the format expected by
+ * the query ioctl's topology queries.
+ *
+ * Returns the result of the copy_to_user() operation.
+ */
+int intel_sseu_copy_eumask_to_user(void __user *to,
+  const struct sseu_dev_info *sseu)
+{
+   u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {};
+   int len = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
+   int s, ss, i;
+
+   for (s = 0; s < sseu->max_slices; s++) {
+   for (ss = 0; ss < sseu->max_subslices; ss++) {
+   int offset = sseu_eu_idx(sseu, s, ss);
+   u16 mask = sseu_get_eus(sseu, s, ss);
+
+   for (i = 0; i < sseu->eu_stride; i++)
+   eu_mask[offset + i] =
+   (mask >> (BITS_PER_BYTE * i)) & 0xff;
+   }
+   }
+
+   return copy_to_user(to, eu_mask, len);
+}
+
 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
 {
u32 ss_mask;
@@ -134,7 +168,7 @@ static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, 
u8 s, u32 ss_en)
 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
u32 g_ss_en, u32 c_ss_en, u16 eu_en)
 {
-   int s, ss;
+   int s;
 
/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
@@ -162,11 +196,9 @@ static void gen11_compute_sseu_info(struct sseu_dev_info 
*sseu, u8 s_en,
intel_sseu_set_subslices(sseu, 

[Intel-gfx] [PATCH 4/5] drm/i915/sseu: Simplify gen11+ SSEU handling

2022-04-27 Thread Matt Roper
Although gen11 and gen12 architectures supported the concept of multiple
slices, in practice all the platforms that were actually designed only
had a single slice (i.e., note the parameters to 'intel_sseu_set_info'
that we pass for each platform).  We can simplify the code slightly by
dropping the multi-slice logic from gen11+ platforms.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 73 ++--
 1 file changed, 36 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index ef66c2b8861a..f7ff6a9f67b0 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -155,48 +155,32 @@ int intel_sseu_copy_eumask_to_user(void __user *to,
return copy_to_user(to, eu_mask, len);
 }
 
-static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
-{
-   u32 ss_mask;
-
-   ss_mask = ss_en >> (s * sseu->max_subslices);
-   ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
-
-   return ss_mask;
-}
-
-static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
u32 g_ss_en, u32 c_ss_en, u16 eu_en)
 {
-   int s;
+   u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0);
 
/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
   sizeof(g_ss_en) * BITS_PER_BYTE);
 
-   for (s = 0; s < sseu->max_slices; s++) {
-   if ((s_en & BIT(s)) == 0)
-   continue;
+   sseu->slice_mask |= BIT(0);
+
+   /*
+* XeHP introduces the concept of compute vs geometry DSS. To reduce
+* variation between GENs around subslice usage, store a mask for both
+* the geometry and compute enabled masks since userspace will need to
+* be able to query these masks independently.  Also compute a total
+* enabled subslice count for the purposes of selecting subslices to
+* use in a particular GEM context.
+*/
+   intel_sseu_set_subslices(sseu, 0, sseu->compute_subslice_mask,
+c_ss_en & valid_ss_mask);
+   intel_sseu_set_subslices(sseu, 0, sseu->geometry_subslice_mask,
+g_ss_en & valid_ss_mask);
+   intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask,
+(g_ss_en | c_ss_en) & valid_ss_mask);
 
-   sseu->slice_mask |= BIT(s);
-
-   /*
-* XeHP introduces the concept of compute vs geometry DSS. To
-* reduce variation between GENs around subslice usage, store a
-* mask for both the geometry and compute enabled masks since
-* userspace will need to be able to query these masks
-* independently.  Also compute a total enabled subslice count
-* for the purposes of selecting subslices to use in a
-* particular GEM context.
-*/
-   intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
-get_ss_stride_mask(sseu, s, c_ss_en));
-   intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
-get_ss_stride_mask(sseu, s, g_ss_en));
-   intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-get_ss_stride_mask(sseu, s,
-   g_ss_en | c_ss_en));
-   }
sseu->has_common_ss_eumask = 1;
sseu->eu_mask[0] = eu_en;
sseu->eu_per_subslice = hweight16(eu_en);
@@ -229,7 +213,7 @@ static void xehp_sseu_info_init(struct intel_gt *gt)
if (eu_en_fuse & BIT(eu))
eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
 
-   gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en);
+   gen11_compute_sseu_info(sseu, g_dss_en, c_dss_en, eu_en);
 }
 
 static void gen12_sseu_info_init(struct intel_gt *gt)
@@ -249,8 +233,15 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
 */
intel_sseu_set_info(sseu, 1, 6, 16);
 
+   /*
+* Although gen12 architecture supported multiple slices, TGL, RKL,
+* DG1, and ADL only had a single slice.
+*/
s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
GEN11_GT_S_ENA_MASK;
+   if (s_en != 0x1)
+   drm_dbg(>i915->drm, "Slice mask %#x is not the expected 
0x1!\n",
+   s_en);
 
g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
 
@@ -262,7 +253,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
if (eu_en_fuse & BIT(eu))
eu_en |= BIT(eu * 2) | BIT(eu 

[Intel-gfx] [PATCH 3/5] drm/i915/xehp: Use separate sseu init function

2022-04-27 Thread Matt Roper
Xe_HP has enough fundamental differences from previous platforms that it
makes sense to use a separate SSEU init function to keep things
straightforward and easy to understand.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 85 
 1 file changed, 48 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 13387b4024ea..ef66c2b8861a 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -203,13 +203,42 @@ static void gen11_compute_sseu_info(struct sseu_dev_info 
*sseu, u8 s_en,
sseu->eu_total = compute_eu_total(sseu);
 }
 
-static void gen12_sseu_info_init(struct intel_gt *gt)
+static void xehp_sseu_info_init(struct intel_gt *gt)
 {
struct sseu_dev_info *sseu = >info.sseu;
struct intel_uncore *uncore = gt->uncore;
u32 g_dss_en, c_dss_en = 0;
u16 eu_en = 0;
u8 eu_en_fuse;
+   int eu;
+
+   /*
+* The concept of slice has been removed in Xe_HP.  To be compatible
+* with prior generations, assume a single slice across the entire
+* device. Then calculate out the DSS for each workload type within
+* that software slice.
+*/
+   intel_sseu_set_info(sseu, 1, 32, 16);
+
+   g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
+   c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
+
+   eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & 
XEHP_EU_ENA_MASK;
+
+   for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+   if (eu_en_fuse & BIT(eu))
+   eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+
+   gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en);
+}
+
+static void gen12_sseu_info_init(struct intel_gt *gt)
+{
+   struct sseu_dev_info *sseu = >info.sseu;
+   struct intel_uncore *uncore = gt->uncore;
+   u32 g_dss_en;
+   u16 eu_en = 0;
+   u8 eu_en_fuse;
u8 s_en;
int eu;
 
@@ -217,43 +246,23 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
 * Instead of splitting these, provide userspace with an array
 * of DSS to more closely represent the hardware resource.
-*
-* In addition, the concept of slice has been removed in Xe_HP.
-* To be compatible with prior generations, assume a single slice
-* across the entire device. Then calculate out the DSS for each
-* workload type within that software slice.
 */
-   if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915))
-   intel_sseu_set_info(sseu, 1, 32, 16);
-   else
-   intel_sseu_set_info(sseu, 1, 6, 16);
+   intel_sseu_set_info(sseu, 1, 6, 16);
 
-   /*
-* As mentioned above, Xe_HP does not have the concept of a slice.
-* Enable one for software backwards compatibility.
-*/
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   s_en = 0x1;
-   else
-   s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
-  GEN11_GT_S_ENA_MASK;
+   s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
+   GEN11_GT_S_ENA_MASK;
 
g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   c_dss_en = intel_uncore_read(uncore, 
GEN12_GT_COMPUTE_DSS_ENABLE);
 
/* one bit per pair of EUs */
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & 
XEHP_EU_ENA_MASK;
-   else
-   eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
-  GEN11_EU_DIS_MASK);
+   eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
+  GEN11_EU_DIS_MASK);
 
for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
if (eu_en_fuse & BIT(eu))
eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
 
-   gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
+   gen11_compute_sseu_info(sseu, s_en, g_dss_en, 0, eu_en);
 
/* TGL only supports slice-level power gating */
sseu->has_slice_pg = 1;
@@ -608,18 +617,20 @@ void intel_sseu_info_init(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
 
-   if (IS_HASWELL(i915))
-   hsw_sseu_info_init(gt);
-   else if (IS_CHERRYVIEW(i915))
-   cherryview_sseu_info_init(gt);
-   else if (IS_BROADWELL(i915))
-   bdw_sseu_info_init(gt);
-   else if (GRAPHICS_VER(i915) == 9)
-   gen9_sseu_info_init(gt);
-   else if (GRAPHICS_VER(i915) == 11)
-   gen11_sseu_info_init(gt);
+   if (GRAPHICS_VER_FULL(i915) 

[Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915/gt: Fix memory leaks in per-gt sysfs

2022-04-27 Thread Patchwork
== Series Details ==

Series: drm/i915/gt: Fix memory leaks in per-gt sysfs
URL   : https://patchwork.freedesktop.org/series/103236/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11550_full -> Patchwork_103236v1_full


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_103236v1_full absolutely need 
to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_103236v1_full, please notify your bug team to allow 
them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (10 -> 13)
--

  Additional (3): shard-rkl shard-dg1 shard-tglu 

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_103236v1_full:

### IGT changes ###

 Possible regressions 

  * igt@kms_flip@flip-vs-suspend-interruptible@a-edp1:
- shard-skl:  [PASS][1] -> [INCOMPLETE][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl7/igt@kms_flip@flip-vs-suspend-interrupti...@a-edp1.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/shard-skl10/igt@kms_flip@flip-vs-suspend-interrupti...@a-edp1.html

  * igt@syncobj_timeline@wait-all-for-submit-snapshot:
- shard-skl:  [PASS][3] -> [FAIL][4]
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl6/igt@syncobj_timel...@wait-all-for-submit-snapshot.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/shard-skl1/igt@syncobj_timel...@wait-all-for-submit-snapshot.html

  
 Warnings 

  * igt@gem_eio@unwedge-stress:
- shard-tglb: [FAIL][5] ([i915#232]) -> [FAIL][6] +1 similar issue
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-tglb5/igt@gem_...@unwedge-stress.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/shard-tglb2/igt@gem_...@unwedge-stress.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_pm_dc@dc6-dpms:
- {shard-rkl}:NOTRUN -> [INCOMPLETE][7] +2 similar issues
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/shard-rkl-5/igt@i915_pm...@dc6-dpms.html

  * {igt@kms_concurrent@pipe-d@hdmi-a-3}:
- {shard-dg1}:NOTRUN -> [CRASH][8]
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/shard-dg1-18/igt@kms_concurrent@pip...@hdmi-a-3.html

  
New tests
-

  New tests have been introduced between CI_DRM_11550_full and 
Patchwork_103236v1_full:

### New IGT tests (4) ###

  * igt@kms_sequence@get-busy@hdmi-a-3-pipe-a:
- Statuses : 1 pass(s)
- Exec time: [2.42] s

  * igt@kms_sequence@get-busy@hdmi-a-3-pipe-b:
- Statuses : 1 pass(s)
- Exec time: [2.38] s

  * igt@kms_sequence@get-busy@hdmi-a-3-pipe-c:
- Statuses : 1 pass(s)
- Exec time: [2.38] s

  * igt@kms_sequence@get-busy@hdmi-a-3-pipe-d:
- Statuses : 1 pass(s)
- Exec time: [2.39] s

  

Known issues


  Here are the changes found in Patchwork_103236v1_full that come from known 
issues:

### CI changes ###

 Possible fixes 

  * boot:
- shard-skl:  ([PASS][9], [PASS][10], [PASS][11], [PASS][12], 
[PASS][13], [PASS][14], [PASS][15], [PASS][16], [PASS][17], [PASS][18], 
[PASS][19], [PASS][20], [FAIL][21], [PASS][22], [PASS][23], [PASS][24], 
[PASS][25], [PASS][26], [PASS][27], [PASS][28], [PASS][29], [PASS][30], 
[PASS][31], [PASS][32]) ([i915#5032]) -> ([PASS][33], [PASS][34], [PASS][35], 
[PASS][36], [PASS][37], [PASS][38], [PASS][39], [PASS][40], [PASS][41], 
[PASS][42], [PASS][43], [PASS][44], [PASS][45], [PASS][46], [PASS][47], 
[PASS][48], [PASS][49], [PASS][50], [PASS][51], [PASS][52], [PASS][53], 
[PASS][54], [PASS][55], [PASS][56], [PASS][57])
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl9/boot.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl9/boot.html
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl8/boot.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl8/boot.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl7/boot.html
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl7/boot.html
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl6/boot.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl6/boot.html
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl6/boot.html
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl5/boot.html
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl5/boot.html
   [20]: 

Re: [Intel-gfx] [PATCH 0/2] Initial GuC firmware release for DG2

2022-04-27 Thread John Harrison

On 4/27/2022 11:24, Timo Aaltonen wrote:

john.c.harri...@intel.com kirjoitti 27.4.2022 klo 19.55:

From: John Harrison 

Add GuC firmware for DG2.

Note that an older version of this patch exists in the CI topic
branch. Hence this set includes a revert of that patch before applying
the new version. When merging, the revert would simply be dropped and
the corresponding patch in the topic branch would also be dropped.

Signed-off-by: John Harrison 


John Harrison (2):
   Revert "drm/i915/dg2: Define GuC firmware version for DG2"
   drm/i915/dg2: Define GuC firmware version for DG2

  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)



The firmware is not public yet, though? Shouldn't it have been sent 
upstream already? Same complaint applies to DMC.



Not sure about the DMC team, but for i915 we upload the firmware to an 
FDO repo so that the CI system can find it and run the pre-merge testing 
with it. However, we don't send the final pull request for the real 
linux firmware repo until we have merged the i915 patch to 
drm-gt-intel-next and it is definitely going upstream. Otherwise, we 
might end up pushing firmwares to the linux repo that never get used.


John.



[Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [1/2] drm/i915/gvt: Make intel_gvt_match_device() static

2022-04-27 Thread Patchwork
== Series Details ==

Series: series starting with [1/2] drm/i915/gvt: Make intel_gvt_match_device() 
static
URL   : https://patchwork.freedesktop.org/series/103237/
State : failure

== Summary ==

Error: patch 
https://patchwork.freedesktop.org/api/1.0/series/103237/revisions/1/mbox/ not 
applied
Applying: drm/i915/gvt: Make intel_gvt_match_device() static
Applying: drm/i915/gvt: Fix the compiling error when 
CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n
Using index info to reconstruct a base tree...
M   drivers/gpu/drm/i915/intel_gvt.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/intel_gvt.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/intel_gvt.c
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0002 drm/i915/gvt: Fix the compiling error when 
CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".




[Intel-gfx] [PATCH 1/2] drm/i915/gvt: Make intel_gvt_match_device() static

2022-04-27 Thread Zhi Wang
After the refactor of GVT-g, the reference of intel_gvt_match_device()
only happens in handlers.c. Make it static to let the compiler be
happy.

Cc: Jason Gunthorpe 
Cc: Jani Nikula 
Cc: Robert Beckett 
Signed-off-by: Zhi Wang 
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c 
b/drivers/gpu/drm/i915/gvt/handlers.c
index cf00398c2870..a93f8fd423c2 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -72,7 +72,7 @@ unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt)
return 0;
 }
 
-bool intel_gvt_match_device(struct intel_gvt *gvt,
+static bool intel_gvt_match_device(struct intel_gvt *gvt,
unsigned long device)
 {
return intel_gvt_get_device_type(gvt) & device;
-- 
2.17.1



[Intel-gfx] [PATCH 2/2] drm/i915/gvt: Fix the compiling error when CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n

2022-04-27 Thread Zhi Wang
A compiling error was reported when CONFIG_DRM_I915_DEBUG_RUNTIME_PM=n.
Fix the problem by using the pre-defined macro.

Cc: Jason Gunthorpe 
Cc: Jani Nikula 
Signed-off-by: Zhi Wang 
---
 drivers/gpu/drm/i915/intel_gvt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index 24bc693439e8..e98b6d69a91a 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -309,7 +309,9 @@ EXPORT_SYMBOL_NS_GPL(__intel_context_do_pin, I915_GVT);
 EXPORT_SYMBOL_NS_GPL(__intel_context_do_unpin, I915_GVT);
 EXPORT_SYMBOL_NS_GPL(intel_ring_begin, I915_GVT);
 EXPORT_SYMBOL_NS_GPL(intel_runtime_pm_get, I915_GVT);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 EXPORT_SYMBOL_NS_GPL(intel_runtime_pm_put, I915_GVT);
+#endif
 EXPORT_SYMBOL_NS_GPL(intel_runtime_pm_put_unchecked, I915_GVT);
 EXPORT_SYMBOL_NS_GPL(intel_uncore_forcewake_for_reg, I915_GVT);
 EXPORT_SYMBOL_NS_GPL(intel_uncore_forcewake_get, I915_GVT);
-- 
2.17.1



[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/gt: Fix memory leaks in per-gt sysfs

2022-04-27 Thread Patchwork
== Series Details ==

Series: drm/i915/gt: Fix memory leaks in per-gt sysfs
URL   : https://patchwork.freedesktop.org/series/103236/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11550 -> Patchwork_103236v1


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/index.html

Participating hosts (43 -> 47)
--

  Additional (5): bat-dg1-6 bat-dg2-8 bat-adlm-1 fi-icl-u2 bat-adlp-4 
  Missing(1): fi-bsw-cyan 

Known issues


  Here are the changes found in Patchwork_103236v1 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_suspend@basic-s0@smem:
- bat-dg1-6:  NOTRUN -> [INCOMPLETE][1] ([i915#5827])
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-dg1-6/igt@gem_exec_suspend@basic...@smem.html

  * igt@gem_huc_copy@huc-copy:
- fi-icl-u2:  NOTRUN -> [SKIP][2] ([i915#2190])
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/fi-icl-u2/igt@gem_huc_c...@huc-copy.html

  * igt@gem_lmem_swapping@basic:
- bat-adlp-4: NOTRUN -> [SKIP][3] ([i915#4613]) +3 similar issues
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-adlp-4/igt@gem_lmem_swapp...@basic.html

  * igt@gem_lmem_swapping@parallel-random-engines:
- fi-icl-u2:  NOTRUN -> [SKIP][4] ([i915#4613]) +3 similar issues
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/fi-icl-u2/igt@gem_lmem_swapp...@parallel-random-engines.html

  * igt@gem_tiled_pread_basic:
- bat-adlp-4: NOTRUN -> [SKIP][5] ([i915#3282])
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-adlp-4/igt@gem_tiled_pread_basic.html

  * igt@kms_chamelium@dp-crc-fast:
- bat-adlp-4: NOTRUN -> [SKIP][6] ([fdo#111827]) +8 similar issues
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-adlp-4/igt@kms_chamel...@dp-crc-fast.html

  * igt@kms_chamelium@hdmi-hpd-fast:
- fi-icl-u2:  NOTRUN -> [SKIP][7] ([fdo#111827]) +8 similar issues
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/fi-icl-u2/igt@kms_chamel...@hdmi-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
- fi-icl-u2:  NOTRUN -> [SKIP][8] ([fdo#109278]) +2 similar issues
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/fi-icl-u2/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-legacy.html
- bat-adlp-4: NOTRUN -> [SKIP][9] ([i915#4103]) +1 similar issue
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-adlp-4/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_flip@basic-plain-flip@a-edp1:
- bat-adlp-4: NOTRUN -> [DMESG-WARN][10] ([i915#3576]) +3 similar 
issues
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-adlp-4/igt@kms_flip@basic-plain-f...@a-edp1.html

  * igt@kms_force_connector_basic@force-load-detect:
- fi-icl-u2:  NOTRUN -> [SKIP][11] ([fdo#109285])
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/fi-icl-u2/igt@kms_force_connector_ba...@force-load-detect.html

  * igt@kms_force_connector_basic@prune-stale-modes:
- bat-adlp-4: NOTRUN -> [SKIP][12] ([i915#4093]) +3 similar issues
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-adlp-4/igt@kms_force_connector_ba...@prune-stale-modes.html

  * igt@kms_setmode@basic-clone-single-crtc:
- fi-icl-u2:  NOTRUN -> [SKIP][13] ([i915#3555])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/fi-icl-u2/igt@kms_setm...@basic-clone-single-crtc.html
- bat-adlp-4: NOTRUN -> [SKIP][14] ([i915#3555])
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-adlp-4/igt@kms_setm...@basic-clone-single-crtc.html

  * igt@prime_vgem@basic-fence-read:
- bat-adlp-4: NOTRUN -> [SKIP][15] ([i915#3291] / [i915#3708]) +2 
similar issues
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-adlp-4/igt@prime_v...@basic-fence-read.html

  * igt@prime_vgem@basic-userptr:
- fi-icl-u2:  NOTRUN -> [SKIP][16] ([i915#3301])
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/fi-icl-u2/igt@prime_v...@basic-userptr.html
- bat-adlp-4: NOTRUN -> [SKIP][17] ([i915#3301] / [i915#3708])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103236v1/bat-adlp-4/igt@prime_v...@basic-userptr.html

  
 Possible fixes 

  * igt@i915_module_load@reload:
- {bat-rpls-2}:   [DMESG-WARN][18] ([i915#5537]) -> [PASS][19]
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/bat-rpls-2/igt@i915_module_l...@reload.html
   [19]: 

Re: [Intel-gfx] [PATCH 1/4] drm/i915/huc: check HW directly for HuC auth status

2022-04-27 Thread Ceraolo Spurio, Daniele




On 4/26/2022 5:26 PM, Daniele Ceraolo Spurio wrote:

The huc_is_authenticated function return is based on our SW tracking of
the HuC auth status. However, around suspend/resume and reset this can
go out of sync with the actual HW state, which is why we use
huc_check_state() to look at the actual HW state. Instead of having this
duality, just make huc_is_authenticated() return the HW state and use it
everywhere we need to know if HuC is running.

Signed-off-by: Daniele Ceraolo Spurio 
---
  drivers/gpu/drm/i915/gt/uc/intel_huc.c | 23 ++-
  drivers/gpu/drm/i915/gt/uc/intel_huc.h |  5 -
  2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 556829de9c172..773020e69589a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -80,6 +80,18 @@ void intel_huc_fini(struct intel_huc *huc)
intel_uc_fw_fini(>fw);
  }
  
+static bool huc_is_authenticated(struct intel_huc *huc)

+{
+   struct intel_gt *gt = huc_to_gt(huc);
+   intel_wakeref_t wakeref;
+   u32 status = 0;
+
+   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+   status = intel_uncore_read(gt->uncore, huc->status.reg);
+
+   return (status & huc->status.mask) == huc->status.value;
+}
+
  /**
   * intel_huc_auth() - Authenticate HuC uCode
   * @huc: intel_huc structure
@@ -96,7 +108,7 @@ int intel_huc_auth(struct intel_huc *huc)
struct intel_guc *guc = >uc.guc;
int ret;
  
-	GEM_BUG_ON(intel_huc_is_authenticated(huc));

+   GEM_BUG_ON(huc_is_authenticated(huc));


It looks like even on gen9 HuC is surviving the reset, so this BUG_ON is 
now being triggered. I'm going to just change this to a BUG_ON on 
intel_uc_fw_is_running() for now, which would be equivalent to what we 
have right now, and in the meantime I'll follow up with the HuC team to 
see if we can just skip this (and the huc_fw_upload) if HuC shows up as 
already authenticated.


Daniele

  
  	if (!intel_uc_fw_is_loaded(>fw))

return -ENOEXEC;
@@ -150,10 +162,6 @@ int intel_huc_auth(struct intel_huc *huc)
   */
  int intel_huc_check_status(struct intel_huc *huc)
  {
-   struct intel_gt *gt = huc_to_gt(huc);
-   intel_wakeref_t wakeref;
-   u32 status = 0;
-
switch (__intel_uc_fw_status(>fw)) {
case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
return -ENODEV;
@@ -167,10 +175,7 @@ int intel_huc_check_status(struct intel_huc *huc)
break;
}
  
-	with_intel_runtime_pm(gt->uncore->rpm, wakeref)

-   status = intel_uncore_read(gt->uncore, huc->status.reg);
-
-   return (status & huc->status.mask) == huc->status.value;
+   return huc_is_authenticated(huc);
  }
  
  /**

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 73ec670800f2b..77d813840d76c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -50,11 +50,6 @@ static inline bool intel_huc_is_used(struct intel_huc *huc)
return intel_uc_fw_is_available(>fw);
  }
  
-static inline bool intel_huc_is_authenticated(struct intel_huc *huc)

-{
-   return intel_uc_fw_is_running(>fw);
-}
-
  void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
  
  #endif




[Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915/gt: Fix memory leaks in per-gt sysfs

2022-04-27 Thread Patchwork
== Series Details ==

Series: drm/i915/gt: Fix memory leaks in per-gt sysfs
URL   : https://patchwork.freedesktop.org/series/103236/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.




Re: [Intel-gfx] [PATCH 7/9] drm/i915/gt: Fix memory leaks in per-gt sysfs

2022-04-27 Thread Dixit, Ashutosh
On Wed, 27 Apr 2022 04:45:03 -0700, Andi Shyti wrote:
>
> Hi Ashutosh,

Hi Andi,

> > > > -static struct kobj_type kobj_gt_type = {
> > > > -   .release = kobj_gt_release,
> > > > +static struct kobj_type kobj_gtn_type = {
> > >
> > > what does it mean GTN? Or is it GTn? Please use just GT, gtn is
> > > confusing.
> > >
> > > Same for all the rest of the gtn's you have used below.
> >
> > I didn't like gtn either. But a sysfs_gt kobject is already part of 'struct
> > drm_i915_private' so I thought I'll put sysfs_gtn (for gt/gtN) in 'struct
> > intel_gt'. Otherwise browsing the code etc. gets confusing.
>
> we can even use 'gt_n' if the 'n' is really necessary.

I decided to just go with sysfs_gt in v2 as you had suggested. The total
number of instances of sysfs_gt are very few so it didn't seem too bad to
have the same member name in the two struct's.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 7/9] drm/i915/gt: Fix memory leaks in per-gt sysfs

2022-04-27 Thread Dixit, Ashutosh
On Sun, 24 Apr 2022 15:36:23 -0700, Andi Shyti wrote:
>
> Hi Andrzej and Ashutosh,
>
> > > > > b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > > > > index 937b2e1a305e..4c72b4f983a6 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > > > > @@ -222,6 +222,9 @@ struct intel_gt {
> > > > >   } mocs;
> > > > >   struct intel_pxp pxp;
> > > > > +
> > > > > + /* gt/gtN sysfs */
> > > > > + struct kobject sysfs_gtn;
> > > > If you put kobject as a part of intel_gt what assures you that lifetime 
> > > > of
> > > > kobject is shorter than intel_gt? Ie its refcounter is 0 on removal of
> > > > intel_gt?
> > > Because we are explicitly doing a kobject_put() in
> > > intel_gt_sysfs_unregister(). Which is exactly what we are *not* doing in
> > > the previous code.
> > >
> > > Let me explain a bit about the previous code (but feel free to skip since
> > > the patch should speak for itself):
> > > * Previously we kzalloc a 'struct kobj_gt'
> > > * But we don't save a pointer to the 'struct kobj_gt' so we don't have the
> > >pointer to the kobject to be able to do a kobject_put() on it later
> > > * Therefore we need to store the pointer in 'struct intel_gt'
> > > * But if we have to put the pointer in 'struct intel_gt' we might as well
> > >put the kobject as part of 'struct intel_gt' and that also removes the
> > >need to have a 'struct kobj_gt' (kobj_to_gt() can just use 
> > > container_of()
> > >to get gt from kobj).
> > > * So I think this patch simpler/cleaner than the original code if you take
> > >the requirement for kobject_put() into account.
>
> This is my oversight. This was something I completely forgot to
> fix but it was my intention to do and actually I had some fixes
> ongoing. But because this patch took too long to get in I
> completely forgot about it (Sujaritha was actually the first who
> pointed this out).
>
> Thanks, Ashutosh for taking this.
>
> > I fully agree that previous code is incorrect but I am not convinced current
> > code is correct.
> > If some objects are kref-counted it means usually they can have multiple
> > concurrent users and kobject_put does not work as traditional
> > destructor/cleanup/unregister.
> > So in this particular case after calling kobject_init_and_add sysfs core can
> > get multiple references on the object. Later, during driver unregistration
> > kobject_put is called, but if the object is still in use by sysfs core, the
> > object will not be destroyed/released. If the driver unregistration
> > continues memory will be freed, leaving sysfs-core (or other users) with
> > dangling pointers. Unless there is some additional synchronization mechanism
> > I am not aware of.
>
> Thanks Andrzej for summarizing this and what you said is actually
> what happens. I had a similar solution developed and I had wrong
> pointer reference happening.

Hi Andrzej/Andi,

I did do some research into kobject's and such before writing this patch
and based on that I believe the patch is correct. Presenting some evidence
below.

The patch is verified by:

a. Putting a printk in the release() method when it exists (it does for
   sysfs_gtn kobject)
b. Enabling dynamic prints for lib/kobject.c

For example, with the following:

# echo 'file kobject.c +p' > /sys/kernel/debug/dynamic_debug/control
# echo -n ":03:00.0" > /sys/bus/pci/drivers/i915/unbind

We see this in dmesg (see kobject_cleanup() called from kobject_put()):

[ 1034.930007] kobject: '.defaults' (88817130a640): kobject_cleanup, parent 
8882262b5778
[ 1034.930020] kobject: '.defaults' (88817130a640): auto cleanup kobject_del
[ 1034.930336] kobject: '.defaults' (88817130a640): calling ktype release
[ 1034.930340] kobject: (88817130a640): dynamic_kobj_release
[ 1034.930354] kobject: '.defaults': free name
[ 1034.930366] kobject: 'gt0' (8882262b5778): kobject_cleanup, parent 
88817130a240
[ 1034.930371] kobject: 'gt0' (8882262b5778): auto cleanup kobject_del
[ 1034.931930] kobject: 'gt0' (8882262b5778): calling ktype release
[ 1034.931936] kobject: 'gt0': free name
[ 1034.958004] kobject: 'i915__03_00.0' (88810e1f8800): fill_kobj_path: 
path = '/devices/i915__03_00.0'
[ 1034.958155] kobject: 'i915__03_00.0' (88810e1f8800): 
kobject_cleanup, parent 
[ 1034.958162] kobject: 'i915__03_00.0' (88810e1f8800): calling ktype 
release
[ 1034.958188] kobject: 'i915__03_00.0': free name
[ 1034.958729] kobject: 'gt' (88817130a240): kobject_cleanup, parent 
8881160c5000
[ 1034.958736] kobject: 'gt' (88817130a240): auto cleanup kobject_del
[ 1034.958762] kobject: 'gt' (88817130a240): calling ktype release
[ 1034.958767] kobject: (88817130a240): dynamic_kobj_release
[ 1034.958778] kobject: 'gt': free name

We have the following directory structure (one of the patches is creating
/sys/class/drm/card0/gt/gt0/.defaults):

 

[Intel-gfx] [PATCH] drm/i915/gt: Fix memory leaks in per-gt sysfs

2022-04-27 Thread Ashutosh Dixit
All kmalloc'd kobjects need a kobject_put() to free memory. For example in
previous code, kobj_gt_release() never gets called. The requirement of
kobject_put() now results in a slightly different code organization.

v2: s/gtn/gt/ (Andi)

Cc: Andi Shyti 
Cc: Andrzej Hajda 
Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_gt.c   |  1 +
 drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 ++--
 drivers/gpu/drm/i915/gt/intel_gt_sysfs.h |  6 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 +++
 drivers/gpu/drm/i915/i915_sysfs.c|  2 ++
 5 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 92394f13b42f..9aede288eb86 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
 {
intel_wakeref_t wakeref;
 
+   intel_gt_sysfs_unregister(gt);
intel_rps_driver_unregister(>rps);
intel_gsc_fini(>gsc);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index 8ec8bc660c8c..9e4ebf53379b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
 
 static struct intel_gt *kobj_to_gt(struct kobject *kobj)
 {
-   return container_of(kobj, struct kobj_gt, base)->gt;
+   return container_of(kobj, struct intel_gt, sysfs_gt);
 }
 
 struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
@@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
 };
 ATTRIBUTE_GROUPS(id);
 
+/* A kobject needs a release() method even if it does nothing */
 static void kobj_gt_release(struct kobject *kobj)
 {
-   kfree(kobj);
 }
 
 static struct kobj_type kobj_gt_type = {
@@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
 
 void intel_gt_sysfs_register(struct intel_gt *gt)
 {
-   struct kobj_gt *kg;
-
/*
 * We need to make things right with the
 * ABI compatibility. The files were originally
@@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
if (gt_is_root(gt))
intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
 
-   kg = kzalloc(sizeof(*kg), GFP_KERNEL);
-   if (!kg)
+   /* init and xfer ownership to sysfs tree */
+   if (kobject_init_and_add(>sysfs_gt, _gt_type,
+gt->i915->sysfs_gt, "gt%d", gt->info.id))
goto exit_fail;
 
-   kobject_init(>base, _gt_type);
-   kg->gt = gt;
-
-   /* xfer ownership to sysfs tree */
-   if (kobject_add(>base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
-   goto exit_kobj_put;
-
-   intel_gt_sysfs_pm_init(gt, >base);
+   intel_gt_sysfs_pm_init(gt, >sysfs_gt);
 
return;
 
-exit_kobj_put:
-   kobject_put(>base);
-
 exit_fail:
+   kobject_put(>sysfs_gt);
drm_warn(>i915->drm,
 "failed to initialize gt%d sysfs root\n", gt->info.id);
 }
+
+void intel_gt_sysfs_unregister(struct intel_gt *gt)
+{
+   kobject_put(>sysfs_gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
index 9471b26752cf..a99aa7e8b01a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
@@ -13,11 +13,6 @@
 
 struct intel_gt;
 
-struct kobj_gt {
-   struct kobject base;
-   struct intel_gt *gt;
-};
-
 bool is_object_gt(struct kobject *kobj);
 
 struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
@@ -28,6 +23,7 @@ intel_gt_create_kobj(struct intel_gt *gt,
 const char *name);
 
 void intel_gt_sysfs_register(struct intel_gt *gt);
+void intel_gt_sysfs_unregister(struct intel_gt *gt);
 struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
const char *name);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index b06611c1d4ad..edd7a3cf5f5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -224,6 +224,9 @@ struct intel_gt {
} mocs;
 
struct intel_pxp pxp;
+
+   /* gt/gtN sysfs */
+   struct kobject sysfs_gt;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
b/drivers/gpu/drm/i915/i915_sysfs.c
index 8521daba212a..3f06106cdcf5 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -259,4 +259,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 
device_remove_bin_file(kdev,  _attrs_1);
device_remove_bin_file(kdev,  _attrs);
+
+   kobject_put(dev_priv->sysfs_gt);
 }
-- 
2.34.1



[Intel-gfx] ✗ Fi.CI.BUILD: failure for RFC: nested AVIC (rev2)

2022-04-27 Thread Patchwork
== Series Details ==

Series: RFC: nested AVIC (rev2)
URL   : https://patchwork.freedesktop.org/series/100904/
State : failure

== Summary ==

Error: patch 
https://patchwork.freedesktop.org/api/1.0/series/100904/revisions/2/mbox/ not 
applied
Applying: KVM: x86: document AVIC/APICv inhibit reasons
Applying: KVM: x86: inhibit APICv/AVIC when the guest and/or host changes apic 
id/base from the defaults.
Applying: KVM: x86: SVM: remove avic's broken code that updated APIC ID
Applying: KVM: x86: mmu: allow to enable write tracking externally
Applying: x86: KVMGT: use kvm_page_track_write_tracking_enable
Applying: KVM: x86: mmu: add gfn_in_memslot helper
Applying: KVM: x86: mmu: tweak fast path for emulation of access to nested NPT 
pages
error: sha1 information is lacking or useless (arch/x86/kvm/mmu/mmu.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0007 KVM: x86: mmu: tweak fast path for emulation of access to 
nested NPT pages
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".




[Intel-gfx] [RFC PATCH v3 19/19] KVM: x86: nSVM: expose the nested AVIC to the guest

2022-04-27 Thread Maxim Levitsky
This patch enables and exposes to the nested guest
the support for the nested AVIC.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/svm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 099329711ad13..431281ccc40ef 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4087,6 +4087,9 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu 
*vcpu)
if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC))
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_X2APIC);
}
+
+   svm->avic_enabled = enable_apicv && guest_cpuid_has(vcpu, 
X86_FEATURE_AVIC);
+
init_vmcb_after_set_cpuid(vcpu);
 }
 
@@ -4827,6 +4830,9 @@ static __init void svm_set_cpu_caps(void)
if (vgif)
kvm_cpu_cap_set(X86_FEATURE_VGIF);
 
+   if (enable_apicv)
+   kvm_cpu_cap_set(X86_FEATURE_AVIC);
+
/* Nested VM can receive #VMEXIT instead of triggering #GP */
kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
}
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 18/19] KVM: x86: SVM/nSVM: add optional non strict AVIC doorbell mode

2022-04-27 Thread Maxim Levitsky
By default, peers of a vCPU, can send it doorbell messages,
only when that vCPU is assigned (loaded) a physical CPU.

However when doorbell messages are not allowed, this causes all of
the vCPU's peers to get VM exits, which is suboptimal when this
vCPU is not halted, and therefore just temporary not running
in the guest mode due to being scheduled out and/or
having a userspace VM exit.

In this case peers can't make this vCPU enter guest mode faster,
and thus the VM exits they get don't do anything good.

Therefore this patch introduces (disabled by default)
new non strict mode (enabled by setting avic_doorbell_strict
kvm_amd module param to 0), such as when it is enabled,
and a vCPU is scheduled out but not halted, its peers can continue
sending  doorbell messages to the last physical CPU where the vCPU was
last running.

Security wise, a malicious guest with a compromised guest kernel,
can in this mode in some cases slow down whatever is
running on the last physical CPU where a vCPU was running
by spamming it with doorbell messages (hammering on ICR),
from its another vCPU.

Thus this mode is disabled by default.

However if admin policy is to have 1:1 vCPU/pCPU mapping,
this mode can be useful to avoid VM exits when a vCPU has
a userspace VM exit and such.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/avic.c | 16 +---
 arch/x86/kvm/svm/svm.c  | 25 +
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 149df26e17462..4bf0f00f13c12 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -1704,7 +1704,7 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, 
int cpu, bool r)
 
 void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-   u64 entry;
+   u64 old_entry, new_entry;
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -1723,14 +1723,16 @@ void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (kvm_vcpu_is_blocking(vcpu))
return;
 
-   entry = READ_ONCE(*(svm->avic_physical_id_cache));
-   WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+   old_entry = READ_ONCE(*(svm->avic_physical_id_cache));
+   new_entry = old_entry;
 
-   entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
-   entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
-   entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+   new_entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
+   new_entry |= (h_physical_id & 
AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
+   new_entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+
+   if (old_entry != new_entry)
+   WRITE_ONCE(*(svm->avic_physical_id_cache), new_entry);
 
-   WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
 }
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b31bab832360e..099329711ad13 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -191,6 +191,10 @@ module_param(avic, bool, 0444);
 static bool force_avic;
 module_param_unsafe(force_avic, bool, 0444);
 
+static bool avic_doorbell_strict = true;
+module_param(avic_doorbell_strict, bool, 0444);
+
+
 bool __read_mostly dump_invalid_vmcb;
 module_param(dump_invalid_vmcb, bool, 0644);
 
@@ -1402,10 +1406,23 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int 
cpu)
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 {
-   if (kvm_vcpu_apicv_active(vcpu))
-   __avic_vcpu_put(vcpu);
-
-   __nested_avic_put(vcpu);
+   /*
+* Forbid this vCPU's peers to send doorbell messages.
+* Unless non strict doorbell mode is used.
+*
+* In this mode, doorbell messages are forbidden only when a vCPU
+* blocks, since for correctness only in this case it is needed
+* to intercept an IPI to wake up a vCPU.
+*
+* However this reduces the isolation of the guest since flood of
+* spurious doorbell messages can slow a CPU running another task
+* while this vCPU is scheduled out.
+*/
+   if (avic_doorbell_strict) {
+   if (kvm_vcpu_apicv_active(vcpu))
+   __avic_vcpu_put(vcpu);
+   __nested_avic_put(vcpu);
+   }
 
svm_prepare_host_switch(vcpu);
 
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 17/19] KVM: x86: nSVM: implement nested AVIC doorbell emulation

2022-04-27 Thread Maxim Levitsky
This patch implements the doorbell msr emulation
for nested AVIC.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/avic.c | 49 +
 arch/x86/kvm/svm/svm.c  |  2 ++
 arch/x86/kvm/svm/svm.h  |  1 +
 3 files changed, 52 insertions(+)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index e8c53fd77f0b1..149df26e17462 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -1165,6 +1165,55 @@ unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct 
kvm_vcpu *vcpu)
return 0;
 }
 
+int avic_emulate_doorbell_write(struct kvm_vcpu *vcpu, u64 data)
+{
+   int source_l1_apicid = vcpu->vcpu_id;
+   int target_l1_apicid = data & AVIC_DOORBELL_PHYSICAL_ID_MASK;
+   bool target_running, target_nested;
+   struct kvm_vcpu *target;
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (!svm->avic_enabled || (data & ~AVIC_DOORBELL_PHYSICAL_ID_MASK))
+   return 1;
+
+   target = avic_vcpu_by_l1_apicid(vcpu->kvm, target_l1_apicid);
+   if (!target)
+   /* Guest bug: targeting invalid APIC ID. */
+   return 0;
+
+   target_running = READ_ONCE(target->mode) == IN_GUEST_MODE;
+   target_nested = is_guest_mode(target);
+
+   trace_kvm_avic_nested_doorbell(source_l1_apicid, target_l1_apicid,
+  target_nested, target_running);
+
+   /*
+* Target is not in the nested mode, thus the doorbell doesn't affect 
it.
+* If it just became nested after is_guest_mode was checked,
+* it means that it just processed AVIC state and KVM doesn't need
+* to send it another doorbell.
+*/
+   if (!target_nested)
+   return 0;
+
+   /*
+* If the target vCPU is in guest mode, kick the real doorbell.
+* Otherwise KVM needs to try to wake it up if it was sleeping.
+*
+* If the target is not longer in guest mode (just exited it),
+* it will either halt and before that it will notice pending IRR
+* bits, and cancel halting, or it will enter the guest mode again,
+* and notice the IRR bits as well.
+*/
+   if (target_running)
+   wrmsr(MSR_AMD64_SVM_AVIC_DOORBELL,
+ kvm_cpu_get_apicid(READ_ONCE(target->cpu)), 0);
+   else
+   kvm_vcpu_wake_up(target);
+
+   return 0;
+}
+
 static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool 
flat)
 {
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d96a73931d1e5..b31bab832360e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2772,6 +2772,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
switch (ecx) {
+   case MSR_AMD64_SVM_AVIC_DOORBELL:
+   return avic_emulate_doorbell_write(vcpu, data);
case MSR_AMD64_TSC_RATIO:
 
if (!svm->tsc_scaling_enabled) {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 93fd9d6f5fd85..14e2c5c451cad 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -714,6 +714,7 @@ unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct 
kvm_vcpu *vcpu);
 void avic_reload_apic_pages(struct kvm_vcpu *vcpu);
 void avic_free_nested(struct kvm_vcpu *vcpu);
 bool avic_nested_has_interrupt(struct kvm_vcpu *vcpu);
+int avic_emulate_doorbell_write(struct kvm_vcpu *vcpu, u64 data);
 
 struct avic_physid_table *
 avic_physid_shadow_table_get(struct kvm_vcpu *vcpu, gfn_t gfn);
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 16/19] KVM: x86: nSVM: implement support for nested AVIC vmexits

2022-04-27 Thread Maxim Levitsky
* SVM_EXIT_AVIC_UNACCELERATED_ACCESS is always forwarded to the L1

* SVM_EXIT_AVIC_INCOMPLETE_IPI is hidden from the guest if:

   - is_running was false in shadow physid page because L1's vCPU
 was scheduled out - in this case, the vCPU is waken up,
 and it will process nested AVIC on next VM entry

  - invalid physical address of avic backing page was present
in the guest's physid page, which KVM translates to
valid physical address of a dummy page and is_running=false.

If this condition happens,
the AVIC_IPI_FAILURE_INVALID_BACKING_PAGE VM exit is injected to
the nested hypervisor.

* Note that it is possible to have SVM_EXIT_AVIC_INCOMPLETE_IPI
  VM exit happen both due to host and guest related reason
  at the same time:

  For example if a broadcast IPI was attempted and some shadow
  physid entries had 'is_running=false' set by the guest,
  and some had it set to false due to scheduled out L1 vCPUs.

  To support this case, all relevant entries of guest's physical
  and logical id tables are checked, and both host related actions
  (e.g wakeup) and guest vm exit reflection are done.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/avic.c   | 204 +-
 arch/x86/kvm/svm/nested.c |  14 +++
 2 files changed, 216 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index f13ca1e7b2845..e8c53fd77f0b1 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -917,6 +917,164 @@ static void avic_kick_target_vcpus(struct kvm *kvm, 
struct kvm_lapic *source,
}
 }
 
+static void
+avic_kick_target_vcpu_nested_physical(struct vcpu_svm *svm,
+ int target_l2_apic_id,
+ int *index,
+ bool *invalid_page)
+{
+   u64 gentry, sentry;
+   int target_l1_apicid;
+   struct avic_physid_table *t = svm->nested.l2_physical_id_table;
+
+   if (WARN_ON_ONCE(!t))
+   return;
+
+   /*
+* This shouldn't normally happen because this condition
+* should cause AVIC_IPI_FAILURE_INVALID_TARGET vmexit,
+* however the guest can change the page and trigger this.
+*/
+   if (target_l2_apic_id >= t->nentries)
+   return;
+
+   gentry = t->entries[target_l2_apic_id].gentry;
+   sentry = *t->entries[target_l2_apic_id].sentry;
+
+   /* Same reasoning as above  */
+   if (!(gentry & AVIC_PHYSICAL_ID_ENTRY_VALID_MASK))
+   return;
+
+   /*
+* This races against the guest updating is_running bit.
+*
+* Race itself happens on real hardware as well, and the guest
+* must use the correct means to avoid it.
+*
+* AVIC hardware already set IRR and should have done memory
+* barrier, and then found out that is_running is false
+* in shadow physid table.
+*
+* We are doing another is_running check (in the guest physid table),
+* completing it, thus don't need additional memory barrier.
+*/
+
+   target_l1_apicid = physid_entry_get_apicid(gentry);
+
+   if (target_l1_apicid == -1) {
+
+   /* is_running is false, need to vmexit to the guest */
+   if (*index == -1) {
+   u64 backing_page_phys = 
physid_entry_get_backing_table(sentry);
+
+   *index = target_l2_apic_id;
+   if (backing_page_phys == t->dummy_page_hpa)
+   *invalid_page = true;
+   }
+   } else {
+   /* Wake up the target vCPU and hide the VM exit from the guest 
*/
+   struct kvm_vcpu *target = avic_vcpu_by_l1_apicid(svm->vcpu.kvm, 
target_l1_apicid);
+
+   if (target && target != >vcpu)
+   kvm_vcpu_wake_up(target);
+   }
+
+   trace_kvm_avic_nested_kick_vcpu(svm->vcpu.vcpu_id,
+   target_l2_apic_id,
+   target_l1_apicid);
+}
+
+static void
+avic_kick_target_vcpus_nested_logical(struct vcpu_svm *svm, unsigned long dest,
+ int *index, bool *invalid_page)
+{
+   int logical_id;
+   u8 cluster = 0;
+   u64 *logical_id_table = (u64 *)svm->nested.l2_logical_id_table.hva;
+   int physical_index = -1;
+
+   if (WARN_ON_ONCE(!logical_id_table))
+   return;
+
+   if (nested_avic_get_reg(>vcpu, APIC_DFR) == APIC_DFR_CLUSTER) {
+   if (dest >= 0x40)
+   return;
+   cluster = dest & 0x3C;
+   dest &= 0x3;
+   }
+
+   for_each_set_bit(logical_id, , 8) {
+   int logical_index = cluster | logical_id;
+   u64 log_gentry = logical_id_table[logical_index];
+   int l2_apicid = logid_get_physid(log_gentry);
+
+   /* Should 

[Intel-gfx] [RFC PATCH v3 15/19] KVM: x86: nSVM: add code to reload AVIC physid table when it is invalidated

2022-04-27 Thread Maxim Levitsky
An AVIC table invalidation is not supposed to happen often, and can
only happen when the guest does something suspicious such as:

  - It places physid page in a memslot that is enabled/disabled and memslot
flushing happens.

  - It tries to update apic backing page addresses - guest has no
reason to touch this, and doing so on real hardware will likely
result in unpredictable results.

  - It writes to reserved bits of a tracked page.


  - It write floods a physid table while no vCPU is using it
(the page is likely reused at that point to contain something else)


All of the above causes a KVM_REQ_APIC_PAGE_RELOAD request to be raised
on all vCPUS, which kicks them out of the guest mode,
and then first vCPU to reach the handler will re-create the entries of
the physid page, and others will notice this and do nothing.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/avic.c | 13 +
 arch/x86/kvm/svm/svm.c  |  1 +
 arch/x86/kvm/svm/svm.h  |  1 +
 3 files changed, 15 insertions(+)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index e6ec525a88625..f13ca1e7b2845 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -379,6 +379,7 @@ static void avic_physid_shadow_table_invalidate(struct kvm 
*kvm,
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 
lockdep_assert_held(_svm->avic.tables_lock);
+   kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
avic_physid_shadow_table_erase(kvm, t);
 }
 
@@ -1638,3 +1639,15 @@ bool avic_nested_has_interrupt(struct kvm_vcpu *vcpu)
return true;
return false;
 }
+
+void avic_reload_apic_pages(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *vcpu_svm = to_svm(vcpu);
+   struct avic_physid_table *t = vcpu_svm->nested.l2_physical_id_table;
+
+   int nentries = vcpu_svm->nested.ctl.avic_physical_id &
+   AVIC_PHYSICAL_ID_TABLE_SIZE_MASK;
+
+   if (t && is_guest_mode(vcpu) && nested_avic_in_use(vcpu))
+   avic_physid_shadow_table_sync(vcpu, t, nentries);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a39bb0b27a51d..d96a73931d1e5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4677,6 +4677,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.enable_nmi_window = svm_enable_nmi_window,
.enable_irq_window = svm_enable_irq_window,
.update_cr8_intercept = svm_update_cr8_intercept,
+   .reload_apic_pages = avic_reload_apic_pages,
.refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
.check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons,
.apicv_post_state_restore = avic_apicv_post_state_restore,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 17fcc09cf4be1..93fd9d6f5fd85 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -711,6 +711,7 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
 void avic_ring_doorbell(struct kvm_vcpu *vcpu);
 unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
+void avic_reload_apic_pages(struct kvm_vcpu *vcpu);
 void avic_free_nested(struct kvm_vcpu *vcpu);
 bool avic_nested_has_interrupt(struct kvm_vcpu *vcpu);
 
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 14/19] KVM: x86: rename .set_apic_access_page_addr to reload_apic_access_page

2022-04-27 Thread Maxim Levitsky
This will be used on SVM to reload shadow page of the AVIC physid table

No functional change intended

Signed-off-by: Maxim Levitsky 
---
 arch/x86/include/asm/kvm-x86-ops.h | 2 +-
 arch/x86/include/asm/kvm_host.h| 3 +--
 arch/x86/kvm/vmx/vmx.c | 8 
 arch/x86/kvm/x86.c | 6 +++---
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index 96e4e9842dfc6..997edb7453ac2 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -82,7 +82,7 @@ KVM_X86_OP_OPTIONAL(hwapic_isr_update)
 KVM_X86_OP_OPTIONAL_RET0(guest_apic_has_interrupt)
 KVM_X86_OP_OPTIONAL(load_eoi_exitmap)
 KVM_X86_OP_OPTIONAL(set_virtual_apic_mode)
-KVM_X86_OP_OPTIONAL(set_apic_access_page_addr)
+KVM_X86_OP_OPTIONAL(reload_apic_pages)
 KVM_X86_OP(deliver_interrupt)
 KVM_X86_OP_OPTIONAL(sync_pir_to_irr)
 KVM_X86_OP_OPTIONAL_RET0(set_tss_addr)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fc7df778a3d71..52fa04c3108b1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1436,7 +1436,7 @@ struct kvm_x86_ops {
bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
-   void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
+   void (*reload_apic_pages)(struct kvm_vcpu *vcpu);
void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
  int trig_mode, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
@@ -1909,7 +1909,6 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
-
 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cf8581978bce3..7defd31703c61 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6339,7 +6339,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
vmx_update_msr_bitmap_x2apic(vcpu);
 }
 
-static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
+static void vmx_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
struct page *page;
 
@@ -,7 +,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.enable_irq_window = vmx_enable_irq_window,
.update_cr8_intercept = vmx_update_cr8_intercept,
.set_virtual_apic_mode = vmx_set_virtual_apic_mode,
-   .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
+   .reload_apic_pages = vmx_reload_apic_access_page,
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.apicv_post_state_restore = vmx_apicv_post_state_restore,
@@ -7940,12 +7940,12 @@ static __init int hardware_setup(void)
enable_vnmi = 0;
 
/*
-* set_apic_access_page_addr() is used to reload apic access
+* kvm_vcpu_reload_apic_pages() is used to reload apic access
 * page upon invalidation.  No need to do anything if not
 * using the APIC_ACCESS_ADDR VMCS field.
 */
if (!flexpriority_enabled)
-   vmx_x86_ops.set_apic_access_page_addr = NULL;
+   vmx_x86_ops.reload_apic_pages = NULL;
 
if (!cpu_has_vmx_tpr_shadow())
vmx_x86_ops.update_cr8_intercept = NULL;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d2f73ce87a1e3..ad744ab99734c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9949,12 +9949,12 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm 
*kvm,
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
 }
 
-static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_reload_apic_pages(struct kvm_vcpu *vcpu)
 {
if (!lapic_in_kernel(vcpu))
return;
 
-   static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu);
+   static_call_cond(kvm_x86_reload_apic_pages)(vcpu);
 }
 
 void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -10071,7 +10071,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
vcpu_load_eoi_exitmap(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
-   kvm_vcpu_reload_apic_access_page(vcpu);
+   kvm_vcpu_reload_apic_pages(vcpu);
if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
vcpu->run->exit_reason = 

[Intel-gfx] [RFC PATCH v3 13/19] KVM: x86: nSVM: wire nested AVIC to nested guest entry/exit

2022-04-27 Thread Maxim Levitsky
  * Passthrough guest's avic pages that can be passed through
 - logical id table
 - avic backing page

  * Passthrough AVIC's mmio range
 - nested guest is responsible for marking it RW
   in its NPT tables.

  * Write track physical id page
 - all peer's avic backing pages are pinned
   as long as the shadow table is not invalidated/
   freed.

  * Cache guest AVIC settings.

  * Add SDM mandated changes to emulated VM enter/exit.

Note that nested AVIC still can't be enabled, thus this
code has no effect yet.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/avic.c   |  51 ++-
 arch/x86/kvm/svm/nested.c | 127 +-
 arch/x86/kvm/svm/svm.c|   2 +
 arch/x86/kvm/svm/svm.h|  24 +++
 4 files changed, 199 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 34da9fabd5194..e6ec525a88625 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -59,6 +59,18 @@ static inline struct kvm_vcpu *avic_vcpu_by_l1_apicid(struct 
kvm *kvm,
return kvm_get_vcpu_by_id(kvm, l1_apicid);
 }
 
+static u32 nested_avic_get_reg(struct kvm_vcpu *vcpu, int reg_off)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   void *nested_apic_regs = svm->nested.l2_apic_access_page.hva;
+
+   if (WARN_ON_ONCE(!nested_apic_regs))
+   return 0;
+
+   return *((u32 *) (nested_apic_regs + reg_off));
+}
+
 static void avic_physid_shadow_entry_set_vcpu(struct kvm *kvm,
  struct avic_physid_table *t,
  int n,
@@ -531,6 +543,20 @@ static void avic_physid_shadow_table_flush_memslot(struct 
kvm *kvm,
mutex_unlock(_svm->avic.tables_lock);
 }
 
+void avic_free_nested(struct kvm_vcpu *vcpu)
+{
+   struct avic_physid_table *t;
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   t = svm->nested.l2_physical_id_table;
+   if (t) {
+   avic_physid_shadow_table_put(vcpu->kvm, t);
+   svm->nested.l2_physical_id_table = NULL;
+   }
+
+   kvm_vcpu_unmap(vcpu, >nested.l2_apic_access_page, true);
+   kvm_vcpu_unmap(vcpu, >nested.l2_logical_id_table, true);
+}
 
 /*
  * This is a wrapper of struct amd_iommu_ir_data.
@@ -586,10 +612,18 @@ void avic_vm_destroy(struct kvm *kvm)
 {
unsigned long flags;
struct kvm_svm_avic *avic = _kvm_svm(kvm)->avic;
+   unsigned long i;
+   struct kvm_vcpu *vcpu;
 
if (!enable_apicv)
return;
 
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   vcpu_load(vcpu);
+   avic_free_nested(vcpu);
+   vcpu_put(vcpu);
+   }
+
if (avic->logical_id_table_page)
__free_page(avic->logical_id_table_page);
if (avic->physical_id_table_page)
@@ -1501,7 +1535,7 @@ void __nested_avic_load(struct kvm_vcpu *vcpu, int cpu)
if (kvm_vcpu_is_blocking(vcpu))
return;
 
-   if (svm->nested.initialized)
+   if (svm->nested.initialized && svm->avic_enabled)
avic_update_peer_physid_entries(vcpu, cpu);
 }
 
@@ -1511,7 +1545,7 @@ void __nested_avic_put(struct kvm_vcpu *vcpu)
 
lockdep_assert_preemption_disabled();
 
-   if (svm->nested.initialized)
+   if (svm->nested.initialized && svm->avic_enabled)
avic_update_peer_physid_entries(vcpu, -1);
 }
 
@@ -1591,3 +1625,16 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
 
nested_avic_load(vcpu);
 }
+
+bool avic_nested_has_interrupt(struct kvm_vcpu *vcpu)
+{
+   int off;
+
+   if (!nested_avic_in_use(vcpu))
+   return false;
+
+   for (off = 0x10; off < 0x80; off += 0x10)
+   if (nested_avic_get_reg(vcpu, APIC_IRR + off))
+   return true;
+   return false;
+}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index bed5e1692cef0..eb5e9b600e052 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -387,6 +387,14 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu 
*vcpu,
memcpy(to->reserved_sw, from->reserved_sw,
   sizeof(struct hv_enlightenments));
}
+
+   /* copy avic related settings only when it is enabled */
+   if (from->int_ctl & AVIC_ENABLE_MASK) {
+   to->avic_vapic_bar  = from->avic_vapic_bar;
+   to->avic_backing_page   = from->avic_backing_page;
+   to->avic_logical_id = from->avic_logical_id;
+   to->avic_physical_id= from->avic_physical_id;
+   }
 }
 
 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
@@ -539,6 +547,79 @@ void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
 }
 
+
+static bool nested_vmcb02_prepare_avic(struct vcpu_svm *svm)
+{
+   struct vmcb *vmcb02 = 

[Intel-gfx] [RFC PATCH v3 12/19] KVM: x86: nSVM: make nested AVIC physid write tracking be aware of the host scheduling

2022-04-27 Thread Maxim Levitsky
For each vCPU
  - store a linked list of all shadow physical id entries
which address it.

  - Update those entries when this vCPU is scheduled
in/out

  - update this list, when physid tables are modified by
other means (guest write and/or table sync)

To avoid races vs vcpu schedule, use a spinlock.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/avic.c | 113 +---
 arch/x86/kvm/svm/svm.c  |   7 +++
 arch/x86/kvm/svm/svm.h  |  10 
 3 files changed, 122 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index f462b7e48e3ca..34da9fabd5194 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -67,8 +67,12 @@ static void avic_physid_shadow_entry_set_vcpu(struct kvm 
*kvm,
struct avic_physid_entry_descr *e = >entries[n];
u64 sentry = READ_ONCE(*e->sentry);
u64 old_sentry = sentry;
+   struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
struct kvm_vcpu *new_vcpu = NULL;
int l0_apicid = -1;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(_svm->avic.table_entries_lock, flags);
 
WARN_ON(!test_bit(n, t->valid_entires));
 
@@ -79,6 +83,9 @@ static void avic_physid_shadow_entry_set_vcpu(struct kvm *kvm,
new_vcpu = avic_vcpu_by_l1_apicid(kvm, new_l1_apicid);
 
if (new_vcpu)
+   list_add_tail(>link, 
_svm(new_vcpu)->nested.physid_ref_entries);
+
+   if (new_vcpu && to_svm(new_vcpu)->nested_avic_active)
l0_apicid = kvm_cpu_get_apicid(new_vcpu->cpu);
 
physid_entry_set_apicid(, l0_apicid);
@@ -87,6 +94,8 @@ static void avic_physid_shadow_entry_set_vcpu(struct kvm *kvm,
 
if (sentry != old_sentry)
WRITE_ONCE(*e->sentry, sentry);
+
+   raw_spin_unlock_irqrestore(_svm->avic.table_entries_lock, flags);
 }
 
 static void avic_physid_shadow_entry_create(struct kvm *kvm,
@@ -131,7 +140,11 @@ static void avic_physid_shadow_entry_remove(struct kvm 
*kvm,
   int n)
 {
struct avic_physid_entry_descr *e = >entries[n];
+   struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
hpa_t backing_page_hpa;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(_svm->avic.table_entries_lock, flags);
 
if (!test_and_clear_bit(n, t->valid_entires))
WARN_ON(1);
@@ -147,8 +160,49 @@ static void avic_physid_shadow_entry_remove(struct kvm 
*kvm,
 
e->gentry = 0;
*e->sentry = 0;
+
+   raw_spin_unlock_irqrestore(_svm->avic.table_entries_lock, flags);
 }
 
+static void avic_update_peer_physid_entries(struct kvm_vcpu *vcpu, int cpu)
+{
+   /*
+* Update all shadow physid tables which contain entries
+* which reference this vCPU with its new physical location
+*/
+   struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
+   struct vcpu_svm *vcpu_svm = to_svm(vcpu);
+   struct avic_physid_entry_descr *e;
+   int updated_nentries = 0;
+   int l0_apicid = -1;
+   unsigned long flags;
+   bool new_active = cpu != -1;
+
+   if (cpu != -1)
+   l0_apicid = kvm_cpu_get_apicid(cpu);
+
+   raw_spin_lock_irqsave(_svm->avic.table_entries_lock, flags);
+
+   list_for_each_entry(e, _svm->nested.physid_ref_entries, link) {
+   u64 sentry = READ_ONCE(*e->sentry);
+   u64 old_sentry = sentry;
+
+   physid_entry_set_apicid(, l0_apicid);
+
+   if (sentry != old_sentry) {
+   updated_nentries++;
+   WRITE_ONCE(*e->sentry, sentry);
+   }
+   }
+
+   if (updated_nentries)
+   trace_kvm_avic_physid_update_vcpu_host(vcpu->vcpu_id,
+  l0_apicid, 
updated_nentries);
+
+   vcpu_svm->nested_avic_active = new_active;
+
+   raw_spin_unlock_irqrestore(_svm->avic.table_entries_lock, flags);
+}
 
 static bool
 avic_physid_shadow_table_setup_write_tracking(struct kvm *kvm,
@@ -603,6 +657,7 @@ int avic_vm_init(struct kvm *kvm)
hash_add(svm_vm_data_hash, >hnode, avic->vm_id);
spin_unlock_irqrestore(_vm_data_hash_lock, flags);
 
+   raw_spin_lock_init(>table_entries_lock);
mutex_init(>tables_lock);
INIT_LIST_HEAD(>physid_tables);
 
@@ -1428,9 +1483,51 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu)
 static void avic_vcpu_put(struct kvm_vcpu *vcpu)
 {
preempt_disable();
-
__avic_vcpu_put(vcpu);
+   preempt_enable();
+}
+
 
+void __nested_avic_load(struct kvm_vcpu *vcpu, int cpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   lockdep_assert_preemption_disabled();
+
+   /*
+* For the same reason as in __avic_vcpu_load there is no
+* need to load nested AVIC when this vCPU is blocking
+*/
+   if (kvm_vcpu_is_blocking(vcpu))
+   return;
+
+   if 

[Intel-gfx] [RFC PATCH v3 11/19] KVM: x86: nSVM: implement shadowing of AVIC's physical id table

2022-04-27 Thread Maxim Levitsky
Implement the shadow physical id table and its
write tracking code which will be soon used for the nested AVIC.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/avic.c | 461 +++-
 arch/x86/kvm/svm/svm.h  |  71 +++
 2 files changed, 524 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index e5cbbb97fbab6..f462b7e48e3ca 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -51,6 +51,433 @@ static u32 next_vm_id = 0;
 static bool next_vm_id_wrapped = 0;
 static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
 
+
+static inline struct kvm_vcpu *avic_vcpu_by_l1_apicid(struct kvm *kvm,
+ int l1_apicid)
+{
+   WARN_ON(l1_apicid == -1);
+   return kvm_get_vcpu_by_id(kvm, l1_apicid);
+}
+
+static void avic_physid_shadow_entry_set_vcpu(struct kvm *kvm,
+ struct avic_physid_table *t,
+ int n,
+ int new_l1_apicid)
+{
+   struct avic_physid_entry_descr *e = >entries[n];
+   u64 sentry = READ_ONCE(*e->sentry);
+   u64 old_sentry = sentry;
+   struct kvm_vcpu *new_vcpu = NULL;
+   int l0_apicid = -1;
+
+   WARN_ON(!test_bit(n, t->valid_entires));
+
+   if (!list_empty(>link))
+   list_del_init(>link);
+
+   if (new_l1_apicid != -1)
+   new_vcpu = avic_vcpu_by_l1_apicid(kvm, new_l1_apicid);
+
+   if (new_vcpu)
+   l0_apicid = kvm_cpu_get_apicid(new_vcpu->cpu);
+
+   physid_entry_set_apicid(, l0_apicid);
+
+   trace_kvm_avic_physid_update_vcpu_guest(new_l1_apicid, l0_apicid);
+
+   if (sentry != old_sentry)
+   WRITE_ONCE(*e->sentry, sentry);
+}
+
+static void avic_physid_shadow_entry_create(struct kvm *kvm,
+   struct avic_physid_table *t,
+   int n,
+   u64 gentry)
+{
+   struct avic_physid_entry_descr *e = >entries[n];
+   struct page *backing_page;
+   u64 backing_page_gpa = physid_entry_get_backing_table(gentry);
+   int l1_apic_id = physid_entry_get_apicid(gentry);
+   hpa_t backing_page_hpa;
+   u64 sentry = 0;
+
+
+   if (backing_page_gpa == INVALID_BACKING_PAGE)
+   return;
+
+   /* Pin the APIC backing page */
+   backing_page = gfn_to_page(kvm, gpa_to_gfn(backing_page_gpa));
+
+   if (is_error_page(backing_page))
+   /* Invalid GPA in the guest entry - point to a dummy entry */
+   backing_page_hpa = t->dummy_page_hpa;
+   else
+   backing_page_hpa = page_to_phys(backing_page);
+
+   physid_entry_set_backing_table(, backing_page_hpa);
+
+   e->gentry = gentry;
+   *e->sentry = sentry;
+
+   if (test_and_set_bit(n, t->valid_entires))
+   WARN_ON(1);
+
+   if (backing_page_hpa != t->dummy_page_hpa)
+   avic_physid_shadow_entry_set_vcpu(kvm, t, n, l1_apic_id);
+}
+
+static void avic_physid_shadow_entry_remove(struct kvm *kvm,
+  struct avic_physid_table *t,
+  int n)
+{
+   struct avic_physid_entry_descr *e = >entries[n];
+   hpa_t backing_page_hpa;
+
+   if (!test_and_clear_bit(n, t->valid_entires))
+   WARN_ON(1);
+
+   /* Release the APIC backing page */
+   backing_page_hpa = physid_entry_get_backing_table(*e->sentry);
+
+   if (backing_page_hpa != t->dummy_page_hpa)
+   kvm_release_pfn_dirty(backing_page_hpa >> PAGE_SHIFT);
+
+   if (!list_empty(>link))
+   list_del_init(>link);
+
+   e->gentry = 0;
+   *e->sentry = 0;
+}
+
+
+static bool
+avic_physid_shadow_table_setup_write_tracking(struct kvm *kvm,
+ struct avic_physid_table *t,
+ bool enable)
+{
+   struct kvm_memory_slot *slot;
+
+   write_lock(>mmu_lock);
+   slot = gfn_to_memslot(kvm, t->gfn);
+   if (!slot) {
+   write_unlock(>mmu_lock);
+   return false;
+   }
+
+   if (enable)
+   kvm_slot_page_track_add_page(kvm, slot, t->gfn, 
KVM_PAGE_TRACK_WRITE);
+   else
+   kvm_slot_page_track_remove_page(kvm, slot, t->gfn, 
KVM_PAGE_TRACK_WRITE);
+   write_unlock(>mmu_lock);
+   return true;
+}
+
+static void
+avic_physid_shadow_table_erase(struct kvm *kvm, struct avic_physid_table *t)
+{
+   int i;
+
+   if (!t->nentries)
+   return;
+
+   avic_physid_shadow_table_setup_write_tracking(kvm, t, false);
+
+   for_each_set_bit(i, t->valid_entires, AVIC_MAX_PHYSICAL_ID_COUNT)
+   avic_physid_shadow_entry_remove(kvm, t, i);
+
+   t->nentries = 0;
+   t->flood_count = 

[Intel-gfx] [RFC PATCH v3 10/19] KVM: x86: nSVM: implement AVIC's physid/logid table access helpers

2022-04-27 Thread Maxim Levitsky
This implements a few helpers that help manipulate the AVIC's
physical and logical id table entries.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/svm.h | 45 ++
 1 file changed, 45 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 6fcb164a6ee4a..dfca4c06e2071 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -628,6 +628,51 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
 void avic_ring_doorbell(struct kvm_vcpu *vcpu);
 unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
 
+#define INVALID_BACKING_PAGE   (~(u64)0)
+
+static inline u64 physid_entry_get_backing_table(u64 entry)
+{
+   if (!(entry & AVIC_PHYSICAL_ID_ENTRY_VALID_MASK))
+   return INVALID_BACKING_PAGE;
+   return entry & AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK;
+}
+
+static inline int physid_entry_get_apicid(u64 entry)
+{
+   if (!(entry & AVIC_PHYSICAL_ID_ENTRY_VALID_MASK))
+   return -1;
+   if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
+   return -1;
+
+   return entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
+}
+
+static inline int logid_get_physid(u64 entry)
+{
+   if (!(entry & AVIC_LOGICAL_ID_ENTRY_VALID_BIT))
+   return -1;
+   return entry & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+}
+
+static inline void physid_entry_set_backing_table(u64 *entry, u64 value)
+{
+   *entry &= ~AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK;
+   *entry |= (AVIC_PHYSICAL_ID_ENTRY_VALID_MASK | value);
+}
+
+static inline void physid_entry_set_apicid(u64 *entry, int value)
+{
+   WARN_ON(!(*entry & AVIC_PHYSICAL_ID_ENTRY_VALID_MASK));
+
+   *entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
+
+   if (value == -1)
+   *entry &= ~(AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+   else
+   *entry |= (AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK | value);
+}
+
+
 /* sev.c */
 
 #define GHCB_VERSION_MAX   1ULL
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 09/19] KVM: x86: nSVM: add nested AVIC tracepoints

2022-04-27 Thread Maxim Levitsky
This patch adds few tracepoints that will be used
to debug/profile the nested AVIC.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/trace.h | 157 ++-
 arch/x86/kvm/x86.c   |  13 
 2 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index de47625175692..f7ddba5ae06a5 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1385,7 +1385,7 @@ TRACE_EVENT(kvm_apicv_accept_irq,
 );
 
 /*
- * Tracepoint for AMD AVIC
+ * Tracepoints for AMD AVIC
  */
 TRACE_EVENT(kvm_avic_incomplete_ipi,
TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index),
@@ -1479,6 +1479,161 @@ TRACE_EVENT(kvm_avic_kick_vcpu_slowpath,
  __entry->icrh, __entry->icrl, __entry->index)
 );
 
+TRACE_EVENT(kvm_avic_physid_table_alloc,
+   TP_PROTO(u64 gpa),
+   TP_ARGS(gpa),
+
+   TP_STRUCT__entry(
+   __field(u64, gpa)
+   ),
+
+   TP_fast_assign(
+   __entry->gpa = gpa;
+   ),
+
+   TP_printk("table at gpa 0x%llx",
+ __entry->gpa)
+);
+
+
+TRACE_EVENT(kvm_avic_physid_table_free,
+   TP_PROTO(u64 gpa),
+   TP_ARGS(gpa),
+
+   TP_STRUCT__entry(
+   __field(u64, gpa)
+   ),
+
+   TP_fast_assign(
+   __entry->gpa = gpa;
+   ),
+
+   TP_printk("table at gpa 0x%llx",
+ __entry->gpa)
+);
+
+TRACE_EVENT(kvm_avic_physid_table_reload,
+   TP_PROTO(u64 gpa, int nentries, int new_nentires),
+   TP_ARGS(gpa, nentries, new_nentires),
+
+   TP_STRUCT__entry(
+   __field(u64, gpa)
+   __field(int, nentries)
+   __field(int, new_nentires)
+   ),
+
+   TP_fast_assign(
+   __entry->gpa = gpa;
+   __entry->nentries = nentries;
+   __entry->new_nentires = new_nentires;
+   ),
+
+   TP_printk("table at gpa 0x%llx, nentires %d -> %d",
+ __entry->gpa, __entry->nentries, __entry->new_nentires)
+);
+
+TRACE_EVENT(kvm_avic_physid_table_write,
+   TP_PROTO(u64 gpa, int bytes),
+   TP_ARGS(gpa, bytes),
+
+   TP_STRUCT__entry(
+   __field(u64, gpa)
+   __field(int, bytes)
+   ),
+
+   TP_fast_assign(
+   __entry->gpa = gpa;
+   __entry->bytes = bytes;
+   ),
+
+   TP_printk("gpa 0x%llx, write of %d bytes",
+ __entry->gpa, __entry->bytes)
+);
+
+TRACE_EVENT(kvm_avic_physid_update_vcpu_host,
+   TP_PROTO(int vcpu_id, int cpu_id, int n),
+   TP_ARGS(vcpu_id, cpu_id, n),
+
+   TP_STRUCT__entry(
+   __field(int, vcpu_id)
+   __field(int, cpu_id)
+   __field(int, n)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id = vcpu_id;
+   __entry->cpu_id = cpu_id;
+   __entry->n = n;
+   ),
+
+   TP_printk("l1 vcpu %d -> l0 cpu %d (%d entries)",
+ __entry->vcpu_id, __entry->cpu_id, __entry->n)
+);
+
+TRACE_EVENT(kvm_avic_physid_update_vcpu_guest,
+   TP_PROTO(int vcpu_id, int cpu_id),
+   TP_ARGS(vcpu_id, cpu_id),
+
+   TP_STRUCT__entry(
+   __field(int, vcpu_id)
+   __field(int, cpu_id)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id = vcpu_id;
+   __entry->cpu_id = cpu_id;
+   ),
+
+   TP_printk("l1 vcpu %d -> l0 cpu %d",
+ __entry->vcpu_id, __entry->cpu_id)
+);
+
+TRACE_EVENT(kvm_avic_nested_doorbell,
+   TP_PROTO(int source_l1_apicid, int target_l1_apicid, bool 
target_nested,
+   bool target_running),
+   TP_ARGS(source_l1_apicid, target_l1_apicid, target_nested,
+   target_running),
+
+   TP_STRUCT__entry(
+   __field(int, source_l1_apicid)
+   __field(int, target_l1_apicid)
+   __field(bool, target_nested)
+   __field(bool, target_running)
+   ),
+
+   TP_fast_assign(
+   __entry->source_l1_apicid = source_l1_apicid;
+   __entry->target_l1_apicid = target_l1_apicid;
+   __entry->target_nested = target_nested;
+   __entry->target_running = target_running;
+   ),
+
+   TP_printk("source %d target %d (nested: %d, running %d)",
+ __entry->source_l1_apicid, __entry->target_l1_apicid,
+ __entry->target_nested, __entry->target_running)
+);
+
+TRACE_EVENT(kvm_avic_nested_kick_vcpu,
+   TP_PROTO(int source_l1_apic_id, int target_l2_apic_id, int 
target_l1_apic_id),
+   TP_ARGS(source_l1_apic_id, target_l2_apic_id, target_l1_apic_id),
+
+   TP_STRUCT__entry(
+   __field(int, source_l1_apic_id)
+   __field(int, target_l2_apic_id)
+   __field(int, target_l1_apic_id)
+   ),
+
+   TP_fast_assign(

[Intel-gfx] [RFC PATCH v3 08/19] KVM: x86: SVM: move avic state to separate struct

2022-04-27 Thread Maxim Levitsky
This will make the code a bit easier to read when nested AVIC support
is added.

No functional change intended.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/avic.c | 51 +++--
 arch/x86/kvm/svm/svm.h  | 14 ++-
 2 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 1102421668a11..e5cbbb97fbab6 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -69,6 +69,8 @@ int avic_ga_log_notifier(u32 ga_tag)
unsigned long flags;
struct kvm_svm *kvm_svm;
struct kvm_vcpu *vcpu = NULL;
+   struct kvm_svm_avic *avic;
+
u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
 
@@ -76,9 +78,13 @@ int avic_ga_log_notifier(u32 ga_tag)
trace_kvm_avic_ga_log(vm_id, vcpu_id);
 
spin_lock_irqsave(_vm_data_hash_lock, flags);
-   hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
-   if (kvm_svm->avic_vm_id != vm_id)
+   hash_for_each_possible(svm_vm_data_hash, avic, hnode, vm_id) {
+
+
+   if (avic->vm_id != vm_id)
continue;
+
+   kvm_svm = container_of(avic, struct kvm_svm, avic);
vcpu = kvm_get_vcpu_by_id(_svm->kvm, vcpu_id);
break;
}
@@ -98,18 +104,18 @@ int avic_ga_log_notifier(u32 ga_tag)
 void avic_vm_destroy(struct kvm *kvm)
 {
unsigned long flags;
-   struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
+   struct kvm_svm_avic *avic = _kvm_svm(kvm)->avic;
 
if (!enable_apicv)
return;
 
-   if (kvm_svm->avic_logical_id_table_page)
-   __free_page(kvm_svm->avic_logical_id_table_page);
-   if (kvm_svm->avic_physical_id_table_page)
-   __free_page(kvm_svm->avic_physical_id_table_page);
+   if (avic->logical_id_table_page)
+   __free_page(avic->logical_id_table_page);
+   if (avic->physical_id_table_page)
+   __free_page(avic->physical_id_table_page);
 
spin_lock_irqsave(_vm_data_hash_lock, flags);
-   hash_del(_svm->hnode);
+   hash_del(>hnode);
spin_unlock_irqrestore(_vm_data_hash_lock, flags);
 }
 
@@ -117,10 +123,9 @@ int avic_vm_init(struct kvm *kvm)
 {
unsigned long flags;
int err = -ENOMEM;
-   struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
-   struct kvm_svm *k2;
struct page *p_page;
struct page *l_page;
+   struct kvm_svm_avic *avic = _kvm_svm(kvm)->avic;
u32 vm_id;
 
if (!enable_apicv)
@@ -131,14 +136,14 @@ int avic_vm_init(struct kvm *kvm)
if (!p_page)
goto free_avic;
 
-   kvm_svm->avic_physical_id_table_page = p_page;
+   avic->physical_id_table_page = p_page;
 
/* Allocating logical APIC ID table (4KB) */
l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!l_page)
goto free_avic;
 
-   kvm_svm->avic_logical_id_table_page = l_page;
+   avic->logical_id_table_page = l_page;
 
spin_lock_irqsave(_vm_data_hash_lock, flags);
  again:
@@ -149,13 +154,15 @@ int avic_vm_init(struct kvm *kvm)
}
/* Is it still in use? Only possible if wrapped at least once */
if (next_vm_id_wrapped) {
-   hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
-   if (k2->avic_vm_id == vm_id)
+   struct kvm_svm_avic *avic2;
+
+   hash_for_each_possible(svm_vm_data_hash, avic2, hnode, vm_id) {
+   if (avic2->vm_id == vm_id)
goto again;
}
}
-   kvm_svm->avic_vm_id = vm_id;
-   hash_add(svm_vm_data_hash, _svm->hnode, kvm_svm->avic_vm_id);
+   avic->vm_id = vm_id;
+   hash_add(svm_vm_data_hash, >hnode, avic->vm_id);
spin_unlock_irqrestore(_vm_data_hash_lock, flags);
 
return 0;
@@ -169,8 +176,8 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
 {
struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
-   phys_addr_t lpa = 
__sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
-   phys_addr_t ppa = 
__sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
+   phys_addr_t lpa = 
__sme_set(page_to_phys(kvm_svm->avic.logical_id_table_page));
+   phys_addr_t ppa = 
__sme_set(page_to_phys(kvm_svm->avic.physical_id_table_page));
 
vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
@@ -193,7 +200,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu 
*vcpu,
if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
return NULL;
 
-   avic_physical_id_table = 
page_address(kvm_svm->avic_physical_id_table_page);
+   avic_physical_id_table = 

[Intel-gfx] [RFC PATCH v3 07/19] KVM: x86: mmu: tweak fast path for emulation of access to nested NPT pages

2022-04-27 Thread Maxim Levitsky
If a non leaf mmu page is write tracked externally for some reason,
which can in theory happen if it was used for nested avic physid page
before, then this code will enter an endless loop of page faults because
unprotecting the mmu page will not remove write tracking, nor will the
write tracker callback be called, because there is no mmu page at
this address.

Fix this by only invoking the fast path if we succeeded in zapping the
mmu page.

Fixes: 147277540bbc5 ("kvm: svm: Add support for additional SVM NPF error 
codes")
Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/mmu/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 633a3138d68e1..8f77d41e7fd80 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5341,8 +5341,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa, u64 error_code,
 */
if (vcpu->arch.mmu->root_role.direct &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
-   kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
-   return 1;
+   if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)))
+   return 1;
}
 
/*
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 06/19] KVM: x86: mmu: add gfn_in_memslot helper

2022-04-27 Thread Maxim Levitsky
This is a tiny refactoring, and can be useful to check
if a GPA/GFN is within a memslot a bit more cleanly.

Signed-off-by: Maxim Levitsky 
---
 include/linux/kvm_host.h | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 252ee4a61b58b..12e261559070b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1580,6 +1580,13 @@ int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
 
+
+static inline bool gfn_in_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+   return (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages);
+}
+
+
 /*
  * Returns a pointer to the memslot if it contains gfn.
  * Otherwise returns NULL.
@@ -1590,12 +1597,13 @@ try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
if (!slot)
return NULL;
 
-   if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
+   if (gfn_in_memslot(slot, gfn))
return slot;
else
return NULL;
 }
 
+
 /*
  * Returns a pointer to the memslot that contains gfn. Otherwise returns NULL.
  *
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 05/19] x86: KVMGT: use kvm_page_track_write_tracking_enable

2022-04-27 Thread Maxim Levitsky
This allows to enable the write tracking only when KVMGT is
actually used and doesn't carry any penalty otherwise.

Tested by booting a VM with a kvmgt mdev device.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/Kconfig | 3 ---
 arch/x86/kvm/mmu/mmu.c   | 2 +-
 drivers/gpu/drm/i915/Kconfig | 1 -
 drivers/gpu/drm/i915/gvt/kvmgt.c | 5 +
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index e3cbd77061364..41341905d3734 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -126,7 +126,4 @@ config KVM_XEN
 
  If in doubt, say "N".
 
-config KVM_EXTERNAL_WRITE_TRACKING
-   bool
-
 endif # VIRTUALIZATION
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index fb744616bf7df..633a3138d68e1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5753,7 +5753,7 @@ int kvm_mmu_init_vm(struct kvm *kvm)
node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
kvm_page_track_register_notifier(kvm, node);
 
-   if (IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) || !tdp_enabled)
+   if (!tdp_enabled)
mmu_enable_write_tracking(kvm);
 
return 0;
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 98c5450b8eacc..7d8346f4bae11 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -130,7 +130,6 @@ config DRM_I915_GVT_KVMGT
depends on DRM_I915_GVT
depends on KVM
depends on VFIO_MDEV
-   select KVM_EXTERNAL_WRITE_TRACKING
default n
help
  Choose this option if you want to enable KVMGT support for
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 057ec44901045..4c62ab3ef245d 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1933,6 +1933,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
struct intel_vgpu *vgpu;
struct kvmgt_vdev *vdev;
struct kvm *kvm;
+   int ret;
 
vgpu = mdev_get_drvdata(mdev);
if (handle_valid(vgpu->handle))
@@ -1948,6 +1949,10 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
if (__kvmgt_vgpu_exist(vgpu, kvm))
return -EEXIST;
 
+   ret = kvm_page_track_write_tracking_enable(kvm);
+   if (ret)
+   return ret;
+
info = vzalloc(sizeof(struct kvmgt_guest_info));
if (!info)
return -ENOMEM;
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 04/19] KVM: x86: mmu: allow to enable write tracking externally

2022-04-27 Thread Maxim Levitsky
This will be used to enable write tracking from nested AVIC code
and can also be used to enable write tracking in GVT-g module
when it actually uses it as opposed to always enabling it,
when the module is compiled in the kernel.

No functional change intended.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/include/asm/kvm_host.h   |  2 +-
 arch/x86/include/asm/kvm_page_track.h |  1 +
 arch/x86/kvm/mmu.h|  8 +---
 arch/x86/kvm/mmu/mmu.c| 17 ++---
 arch/x86/kvm/mmu/page_track.c | 10 --
 5 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 636df87542555..fc7df778a3d71 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1254,7 +1254,7 @@ struct kvm_arch {
 * is used as one input when determining whether certain memslot
 * related allocations are necessary.
 */
-   bool shadow_root_allocated;
+   bool mmu_page_tracking_enabled;
 
 #if IS_ENABLED(CONFIG_HYPERV)
hpa_t   hv_root_tdp;
diff --git a/arch/x86/include/asm/kvm_page_track.h 
b/arch/x86/include/asm/kvm_page_track.h
index eb186bc57f6a9..955a5ae07b10e 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -50,6 +50,7 @@ int kvm_page_track_init(struct kvm *kvm);
 void kvm_page_track_cleanup(struct kvm *kvm);
 
 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
+int kvm_page_track_write_tracking_enable(struct kvm *kvm);
 int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 671cfeccf04e9..44d15551f7156 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -269,7 +269,7 @@ int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
 int kvm_mmu_post_init_vm(struct kvm *kvm);
 void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
 
-static inline bool kvm_shadow_root_allocated(struct kvm *kvm)
+static inline bool mmu_page_tracking_enabled(struct kvm *kvm)
 {
/*
 * Read shadow_root_allocated before related pointers. Hence, threads
@@ -277,9 +277,11 @@ static inline bool kvm_shadow_root_allocated(struct kvm 
*kvm)
 * see the pointers. Pairs with smp_store_release in
 * mmu_first_shadow_root_alloc.
 */
-   return smp_load_acquire(>arch.shadow_root_allocated);
+   return smp_load_acquire(>arch.mmu_page_tracking_enabled);
 }
 
+int mmu_enable_write_tracking(struct kvm *kvm);
+
 #ifdef CONFIG_X86_64
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return 
kvm->arch.tdp_mmu_enabled; }
 #else
@@ -288,7 +290,7 @@ static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { 
return false; }
 
 static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
 {
-   return !is_tdp_mmu_enabled(kvm) || kvm_shadow_root_allocated(kvm);
+   return !is_tdp_mmu_enabled(kvm) || mmu_page_tracking_enabled(kvm);
 }
 
 static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 904f0faff2186..fb744616bf7df 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3389,7 +3389,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
return r;
 }
 
-static int mmu_first_shadow_root_alloc(struct kvm *kvm)
+int mmu_enable_write_tracking(struct kvm *kvm)
 {
struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
@@ -3399,21 +3399,20 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
 * Check if this is the first shadow root being allocated before
 * taking the lock.
 */
-   if (kvm_shadow_root_allocated(kvm))
+   if (mmu_page_tracking_enabled(kvm))
return 0;
 
mutex_lock(>slots_arch_lock);
 
/* Recheck, under the lock, whether this is the first shadow root. */
-   if (kvm_shadow_root_allocated(kvm))
+   if (mmu_page_tracking_enabled(kvm))
goto out_unlock;
 
/*
 * Check if anything actually needs to be allocated, e.g. all metadata
 * will be allocated upfront if TDP is disabled.
 */
-   if (kvm_memslots_have_rmaps(kvm) &&
-   kvm_page_track_write_tracking_enabled(kvm))
+   if (kvm_memslots_have_rmaps(kvm) && mmu_page_tracking_enabled(kvm))
goto out_success;
 
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
@@ -3443,7 +3442,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
 * all the related pointers are set.
 */
 out_success:
-   smp_store_release(>arch.shadow_root_allocated, true);
+   smp_store_release(>arch.mmu_page_tracking_enabled, true);
 
 out_unlock:
mutex_unlock(>slots_arch_lock);
@@ -3480,7 +3479,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
}
}
 
- 

[Intel-gfx] [RFC PATCH v3 03/19] KVM: x86: SVM: remove avic's broken code that updated APIC ID

2022-04-27 Thread Maxim Levitsky
AVIC is now inhibited if the guest changes apic id, thus remove
that broken code.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/kvm/svm/avic.c | 35 ---
 1 file changed, 35 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 54fe03714f8a6..1102421668a11 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -508,35 +508,6 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
return ret;
 }
 
-static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
-{
-   u64 *old, *new;
-   struct vcpu_svm *svm = to_svm(vcpu);
-   u32 id = kvm_xapic_id(vcpu->arch.apic);
-
-   if (vcpu->vcpu_id == id)
-   return 0;
-
-   old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
-   new = avic_get_physical_id_entry(vcpu, id);
-   if (!new || !old)
-   return 1;
-
-   /* We need to move physical_id_entry to new offset */
-   *new = *old;
-   *old = 0ULL;
-   to_svm(vcpu)->avic_physical_id_cache = new;
-
-   /*
-* Also update the guest physical APIC ID in the logical
-* APIC ID table entry if already setup the LDR.
-*/
-   if (svm->ldr_reg)
-   avic_handle_ldr_update(vcpu);
-
-   return 0;
-}
-
 static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -555,10 +526,6 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu)
AVIC_UNACCEL_ACCESS_OFFSET_MASK;
 
switch (offset) {
-   case APIC_ID:
-   if (avic_handle_apic_id_update(vcpu))
-   return 0;
-   break;
case APIC_LDR:
if (avic_handle_ldr_update(vcpu))
return 0;
@@ -650,8 +617,6 @@ int avic_init_vcpu(struct vcpu_svm *svm)
 
 void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 {
-   if (avic_handle_apic_id_update(vcpu) != 0)
-   return;
avic_handle_dfr_update(vcpu);
avic_handle_ldr_update(vcpu);
 }
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 02/19] KVM: x86: inhibit APICv/AVIC when the guest and/or host changes apic id/base from the defaults.

2022-04-27 Thread Maxim Levitsky
Neither of these settings should be changed by the guest and it is
a burden to support it in the acceleration code, so just inhibit
it instead.

Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/lapic.c| 25 ++---
 arch/x86/kvm/lapic.h|  8 
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 63eae00625bda..636df87542555 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1070,6 +1070,8 @@ enum kvm_apicv_inhibit {
APICV_INHIBIT_REASON_ABSENT,
/* AVIC is disabled because SEV doesn't support it */
APICV_INHIBIT_REASON_SEV,
+   /* APIC ID and/or APIC base was changed by the guest */
+   APICV_INHIBIT_REASON_RO_SETTINGS,
 };
 
 struct kvm_arch {
@@ -1258,6 +1260,7 @@ struct kvm_arch {
hpa_t   hv_root_tdp;
spinlock_t hv_root_tdp_lock;
 #endif
+   bool apic_id_changed;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 66b0eb0bda94e..8996675b3ef4c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2038,6 +2038,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic 
*apic, u32 lvt0_val)
}
 }
 
+static void kvm_lapic_check_initial_apic_id(struct kvm_lapic *apic)
+{
+   if (kvm_apic_has_initial_apic_id(apic))
+   return;
+
+   pr_warn_once("APIC ID change is unsupported by KVM");
+
+   kvm_set_apicv_inhibit(apic->vcpu->kvm,
+   APICV_INHIBIT_REASON_RO_SETTINGS);
+
+   apic->vcpu->kvm->arch.apic_id_changed = true;
+}
+
 static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 {
int ret = 0;
@@ -2046,9 +2059,11 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, 
u32 reg, u32 val)
 
switch (reg) {
case APIC_ID:   /* Local APIC ID */
-   if (!apic_x2apic_mode(apic))
+   if (!apic_x2apic_mode(apic)) {
+
kvm_apic_set_xapic_id(apic, val >> 24);
-   else
+   kvm_lapic_check_initial_apic_id(apic);
+   } else
ret = 1;
break;
 
@@ -2335,8 +2350,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 MSR_IA32_APICBASE_BASE;
 
if ((value & MSR_IA32_APICBASE_ENABLE) &&
-apic->base_address != APIC_DEFAULT_PHYS_BASE)
+apic->base_address != APIC_DEFAULT_PHYS_BASE) {
+   kvm_set_apicv_inhibit(apic->vcpu->kvm,
+   APICV_INHIBIT_REASON_RO_SETTINGS);
pr_warn_once("APIC base relocation is unsupported by KVM");
+   }
 }
 
 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
@@ -2649,6 +2667,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
}
}
 
+   kvm_lapic_check_initial_apic_id(vcpu->arch.apic);
return 0;
 }
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4e4f8a22754f9..b9c406d383080 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -252,4 +252,12 @@ static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
 }
 
+static inline bool kvm_apic_has_initial_apic_id(struct kvm_lapic *apic)
+{
+   if (apic_x2apic_mode(apic))
+   return true;
+
+   return kvm_xapic_id(apic) == apic->vcpu->vcpu_id;
+}
+
 #endif
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 01/19] KVM: x86: document AVIC/APICv inhibit reasons

2022-04-27 Thread Maxim Levitsky
These days there are too many AVIC/APICv inhibit
reasons, and it doesn't hurt to have some documentation
for them.

Signed-off-by: Maxim Levitsky 
---
 arch/x86/include/asm/kvm_host.h | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f164c6c1514a4..63eae00625bda 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1046,14 +1046,29 @@ struct kvm_x86_msr_filter {
 };
 
 enum kvm_apicv_inhibit {
+   /* APICv/AVIC is disabled by module param and/or not supported in 
hardware */
APICV_INHIBIT_REASON_DISABLE,
+   /* APICv/AVIC is inhibited because AutoEOI feature is being used by a 
HyperV guest*/
APICV_INHIBIT_REASON_HYPERV,
+   /* AVIC is inhibited on a CPU because it runs a nested guest */
APICV_INHIBIT_REASON_NESTED,
+   /* AVIC is inhibited due to wait for an irq window (AVIC doesn't 
support this) */
APICV_INHIBIT_REASON_IRQWIN,
+   /*
+* AVIC is inhibited because i8254 're-inject' mode is used
+* which needs EOI intercept which AVIC doesn't support
+*/
APICV_INHIBIT_REASON_PIT_REINJ,
+   /* AVIC is inhibited because the guest has x2apic in its CPUID*/
APICV_INHIBIT_REASON_X2APIC,
+   /* AVIC/APICv is inhibited because KVM_GUESTDBG_BLOCKIRQ was enabled */
APICV_INHIBIT_REASON_BLOCKIRQ,
+   /*
+* AVIC/APICv is inhibited because the guest didn't yet
+* enable kernel/split irqchip
+*/
APICV_INHIBIT_REASON_ABSENT,
+   /* AVIC is disabled because SEV doesn't support it */
APICV_INHIBIT_REASON_SEV,
 };
 
-- 
2.26.3



[Intel-gfx] [RFC PATCH v3 00/19] RFC: nested AVIC

2022-04-27 Thread Maxim Levitsky
This is V3 of my nested AVIC patches.

I fixed few more bugs, and I also split the cod insto smaller patches.

Review is welcome!

Best regards,
Maxim Levitsky

Maxim Levitsky (19):
  KVM: x86: document AVIC/APICv inhibit reasons
  KVM: x86: inhibit APICv/AVIC when the guest and/or host changes apic
id/base from the defaults.
  KVM: x86: SVM: remove avic's broken code that updated APIC ID
  KVM: x86: mmu: allow to enable write tracking externally
  x86: KVMGT: use kvm_page_track_write_tracking_enable
  KVM: x86: mmu: add gfn_in_memslot helper
  KVM: x86: mmu: tweak fast path for emulation of access to nested NPT
pages
  KVM: x86: SVM: move avic state to separate struct
  KVM: x86: nSVM: add nested AVIC tracepoints
  KVM: x86: nSVM: implement AVIC's physid/logid table access helpers
  KVM: x86: nSVM: implement shadowing of AVIC's physical id table
  KVM: x86: nSVM: make nested AVIC physid write tracking be aware of the
host scheduling
  KVM: x86: nSVM: wire nested AVIC to nested guest entry/exit
  KVM: x86: rename .set_apic_access_page_addr to reload_apic_access_page
  KVM: x86: nSVM: add code to reload AVIC physid table when it is
invalidated
  KVM: x86: nSVM: implement support for nested AVIC vmexits
  KVM: x86: nSVM: implement nested AVIC doorbell emulation
  KVM: x86: SVM/nSVM: add optional non strict AVIC doorbell mode
  KVM: x86: nSVM: expose the nested AVIC to the guest

 arch/x86/include/asm/kvm-x86-ops.h|   2 +-
 arch/x86/include/asm/kvm_host.h   |  23 +-
 arch/x86/include/asm/kvm_page_track.h |   1 +
 arch/x86/kvm/Kconfig  |   3 -
 arch/x86/kvm/lapic.c  |  25 +-
 arch/x86/kvm/lapic.h  |   8 +
 arch/x86/kvm/mmu.h|   8 +-
 arch/x86/kvm/mmu/mmu.c|  21 +-
 arch/x86/kvm/mmu/page_track.c |  10 +-
 arch/x86/kvm/svm/avic.c   | 985 +++---
 arch/x86/kvm/svm/nested.c | 141 +++-
 arch/x86/kvm/svm/svm.c|  39 +-
 arch/x86/kvm/svm/svm.h| 166 -
 arch/x86/kvm/trace.h  | 157 +++-
 arch/x86/kvm/vmx/vmx.c|   8 +-
 arch/x86/kvm/x86.c|  19 +-
 drivers/gpu/drm/i915/Kconfig  |   1 -
 drivers/gpu/drm/i915/gvt/kvmgt.c  |   5 +
 include/linux/kvm_host.h  |  10 +-
 19 files changed, 1507 insertions(+), 125 deletions(-)

-- 
2.26.3




Re: [Intel-gfx] [PATCH v2 1/4] drm/i915/gt: GEM_BUG_ON unexpected NULL at scatterlist walking

2022-04-27 Thread Matthew Auld

On 25/04/2022 17:24, Ramalingam C wrote:

While locating the start of ccs scatterlist in smem scatterlist, that has
to be the size of lmem obj size + corresponding ccs data size. Report bug
if scatterlist terminate before that length.

Signed-off-by: Ramalingam C 
---
  drivers/gpu/drm/i915/gt/intel_migrate.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 9d552f30b627..29d761da02c4 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -687,6 +687,12 @@ static void get_ccs_sg_sgt(struct sgt_dma *it, u32 
bytes_to_cpy)
bytes_to_cpy -= len;
  
  		it->sg = __sg_next(it->sg);

+
+   /*
+* scatterlist supposed to be the size of
+* bytes_to_cpy + GET_CCS_BYTES(bytes_to_copy).
+*/
+   GEM_BUG_ON(!it->sg);


It will crash and burn anyway, with the below NULL deref. Not sure if 
BUG_ON() is really much better, but I guess with the additional comment,

Reviewed-by: Matthew Auld 


it->dma = sg_dma_address(it->sg);
it->max = it->dma + sg_dma_len(it->sg);
} while (bytes_to_cpy);


Re: [Intel-gfx] [PATCH v2 2/4] drm/i915/gt: optimize the ccs_sz calculation per chunk

2022-04-27 Thread Matthew Auld

On 25/04/2022 17:24, Ramalingam C wrote:

Calculate the ccs_sz that needs to be emitted based on the src
and dst pages emitted per chunk. And handle the return value of emit_pte
for the ccs pages.

Signed-off-by: Ramalingam C 
---
  drivers/gpu/drm/i915/gt/intel_migrate.c | 36 +
  1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 29d761da02c4..463a6a14b5f9 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -647,17 +647,9 @@ static int scatter_list_length(struct scatterlist *sg)
  
  static void

  calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
-  int *src_sz, int *ccs_sz, u32 bytes_to_cpy,
-  u32 ccs_bytes_to_cpy)
+  int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy)
  {
if (ccs_bytes_to_cpy) {
-   /*
-* We can only copy the ccs data corresponding to
-* the CHUNK_SZ of lmem which is
-* GET_CCS_BYTES(i915, CHUNK_SZ))
-*/
-   *ccs_sz = min_t(int, ccs_bytes_to_cpy, GET_CCS_BYTES(i915, 
CHUNK_SZ));
-
if (!src_is_lmem)
/*
 * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
@@ -713,10 +705,10 @@ intel_context_migrate_copy(struct intel_context *ce,
struct drm_i915_private *i915 = ce->engine->i915;
u32 ccs_bytes_to_cpy = 0, bytes_to_cpy;
enum i915_cache_level ccs_cache_level;
-   int src_sz, dst_sz, ccs_sz;
u32 src_offset, dst_offset;
u8 src_access, dst_access;
struct i915_request *rq;
+   int src_sz, dst_sz;
bool ccs_is_src;
int err;
  
@@ -770,7 +762,7 @@ intel_context_migrate_copy(struct intel_context *ce,

}
  
  	do {

-   int len;
+   int len, ccs_sz;


This could be moved into the reduced scope below.

Reviewed-by: Matthew Auld 

  
  		rq = i915_request_create(ce);

if (IS_ERR(rq)) {
@@ -797,7 +789,7 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
  
-		calculate_chunk_sz(i915, src_is_lmem, _sz, _sz,

+   calculate_chunk_sz(i915, src_is_lmem, _sz,
   bytes_to_cpy, ccs_bytes_to_cpy);
  
  		len = emit_pte(rq, _src, src_cache_level, src_is_lmem,

@@ -835,33 +827,29 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
  
+			ccs_sz = GET_CCS_BYTES(i915, len);

err = emit_pte(rq, _ccs, ccs_cache_level, false,
   ccs_is_src ? src_offset : dst_offset,
   ccs_sz);
+   if (err < 0)
+   goto out_rq;
+   if (err < ccs_sz) {
+   err = -EINVAL;
+   goto out_rq;
+   }
  
  			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);

if (err)
goto out_rq;
  
-			/*

-* Using max of src_sz and dst_sz, as we need to
-* pass the lmem size corresponding to the ccs
-* blocks we need to handle.
-*/
-   ccs_sz = max_t(int, ccs_is_src ? ccs_sz : src_sz,
-  ccs_is_src ? dst_sz : ccs_sz);
-
err = emit_copy_ccs(rq, dst_offset, dst_access,
-   src_offset, src_access, ccs_sz);
+   src_offset, src_access, len);
if (err)
goto out_rq;
  
  			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);

if (err)
goto out_rq;
-
-   /* Converting back to ccs bytes */
-   ccs_sz = GET_CCS_BYTES(rq->engine->i915, ccs_sz);
ccs_bytes_to_cpy -= ccs_sz;
}
  


[Intel-gfx] ✗ Fi.CI.IGT: failure for Initial GuC firmware release for DG2

2022-04-27 Thread Patchwork
== Series Details ==

Series: Initial GuC firmware release for DG2
URL   : https://patchwork.freedesktop.org/series/103230/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11550_full -> Patchwork_103230v1_full


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_103230v1_full absolutely need 
to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_103230v1_full, please notify your bug team to allow 
them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (10 -> 13)
--

  Additional (3): shard-rkl shard-dg1 shard-tglu 

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_103230v1_full:

### IGT changes ###

 Possible regressions 

  * igt@gem_exec_parallel@engines@fds:
- shard-skl:  [PASS][1] -> [INCOMPLETE][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl2/igt@gem_exec_parallel@engi...@fds.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/shard-skl2/igt@gem_exec_parallel@engi...@fds.html

  
 Warnings 

  * igt@gem_eio@unwedge-stress:
- shard-tglb: [FAIL][3] ([i915#232]) -> [FAIL][4] +1 similar issue
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-tglb5/igt@gem_...@unwedge-stress.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/shard-tglb2/igt@gem_...@unwedge-stress.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@kms_rotation_crc@primary-rotation-270:
- {shard-rkl}:NOTRUN -> [INCOMPLETE][5]
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/shard-rkl-5/igt@kms_rotation_...@primary-rotation-270.html

  * {igt@kms_sequence@queue-idle@edp-1-pipe-a}:
- shard-skl:  [PASS][6] -> [FAIL][7]
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-skl4/igt@kms_sequence@queue-i...@edp-1-pipe-a.html
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/shard-skl5/igt@kms_sequence@queue-i...@edp-1-pipe-a.html

  
Known issues


  Here are the changes found in Patchwork_103230v1_full that come from known 
issues:

### CI changes ###

 Issues hit 

  * boot:
- shard-apl:  ([PASS][8], [PASS][9], [PASS][10], [PASS][11], 
[PASS][12], [PASS][13], [PASS][14], [PASS][15], [PASS][16], [PASS][17], 
[PASS][18], [PASS][19], [PASS][20], [PASS][21], [PASS][22], [PASS][23], 
[PASS][24], [PASS][25], [PASS][26], [PASS][27], [PASS][28], [PASS][29], 
[PASS][30], [PASS][31], [PASS][32]) -> ([PASS][33], [PASS][34], [PASS][35], 
[PASS][36], [PASS][37], [PASS][38], [PASS][39], [PASS][40], [PASS][41], 
[PASS][42], [PASS][43], [PASS][44], [PASS][45], [PASS][46], [PASS][47], 
[PASS][48], [PASS][49], [PASS][50], [PASS][51], [PASS][52], [PASS][53], 
[PASS][54], [PASS][55], [PASS][56], [FAIL][57]) ([i915#4386])
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl3/boot.html
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl4/boot.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl4/boot.html
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl3/boot.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl4/boot.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl4/boot.html
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl6/boot.html
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl6/boot.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl6/boot.html
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl3/boot.html
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl7/boot.html
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl3/boot.html
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl2/boot.html
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl2/boot.html
   [22]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl7/boot.html
   [23]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl2/boot.html
   [24]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl7/boot.html
   [25]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl7/boot.html
   [26]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl8/boot.html
   [27]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl8/boot.html
   [28]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/shard-apl2/boot.html
   [29]: 

Re: [Intel-gfx] [PATCH v3] drm/i915: Don't show engine information in fdinfo with GuC submission

2022-04-27 Thread Dixit, Ashutosh
On Wed, 27 Apr 2022 10:16:03 -0700, Tvrtko Ursulin wrote:
>
>
> On 27/04/2022 16:43, Dixit, Ashutosh wrote:
> > On Wed, 27 Apr 2022 02:15:35 -0700, Tvrtko Ursulin wrote:
> >>
> >> On 15/04/2022 01:25, Ashutosh Dixit wrote:
> >>> At present i915 does not fetch busyness information from GuC, resulting in
> >>> incorrect busyness values in fdinfo. Because engine information is coupled
> >>> with busyness in fdinfo, skip showing client engine information in fdinfo
> >>> with GuC submission till fetching busyness is supported in the i915 GuC
> >>> submission backend.
> >>>
> >>> v2 (Daniele):
> >>> Make commit title and description more precise
> >>> Add FIXME with brief description at code change
> >>> s/intel_guc_submission_is_used/intel_uc_uses_guc_submission/
> >>>
> >>> v3 (Daniele):
> >>> Drop FIXME in comment
> >>>
> >>> Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/5564
> >>> Fixes: 055634e4b62f ("drm/i915: Expose client engine utilisation via 
> >>> fdinfo")
> >>> Cc: Daniele Ceraolo Spurio  >>> Cc: Umesh Nerlige Ramappa 
> >>> Signed-off-by: Ashutosh Dixit 
> >>> Reviewed-by: Daniele Ceraolo Spurio 
> >>> ---
> >>>drivers/gpu/drm/i915/i915_drm_client.c | 6 +-
> >>>1 file changed, 5 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c 
> >>> b/drivers/gpu/drm/i915/i915_drm_client.c
> >>> index e539f6b23060..475a6f824cad 100644
> >>> --- a/drivers/gpu/drm/i915/i915_drm_client.c
> >>> +++ b/drivers/gpu/drm/i915/i915_drm_client.c
> >>> @@ -145,7 +145,11 @@ void i915_drm_client_fdinfo(struct seq_file *m, 
> >>> struct file *f)
> >>>  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
> >>>   seq_printf(m, "drm-client-id:\t%u\n", client->id);
> >>>-  if (GRAPHICS_VER(i915) < 8)
> >>> + /*
> >>> +  * Temporarily skip showing client engine information with GuC 
> >>> submission till
> >>> +  * fetching engine busyness is implemented in the GuC submission backend
> >>> +  */
> >>> + if (GRAPHICS_VER(i915) < 8 || 
> >>> intel_uc_uses_guc_submission(>gt0.uc))
> >>>   return;
> >>>   for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
> >>
> >> Thanks for fixing this while I was away. It was a simple miss, nothing
> >> sinister. In terms of mention of "garbage" numbers being reported - were
> >> they actually garbage or simply always zero?
> >
> > Ah, you are referring to what I wrote in the bug. Actually I didn't check
> > the values myself but was told we were displaying "garbage" values (or at
> > least I interpreted it that way, and garbage meaning not just zero). But
> > looking now at IGT outputs from that time appears the values were just zero
> > :/
> >
> > https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11503/re-adlp-pub1/igt@drm_fdi...@all-busy-idle-check-all.html
> >
> > I think we could even have left zero values as is except that we'd have to
> > fix the IGT failure.
> >
> > Sorry for the confusion.
>
> Np. One could even say that there is little difference between skip and
> fail, given both need manual handling in cibuglog to be marked as known
> until GuC support gets added. At least if I still remember how it works
> and that unexplained skips still count as fails.

True, I just updated
https://gitlab.freedesktop.org/drm/intel/-/issues/5563. I think in
retrospect we should have just changed the IGT check to ignore 0 busyness
values :/



Re: [Intel-gfx] [PATCH 0/2] Initial GuC firmware release for DG2

2022-04-27 Thread Timo Aaltonen

john.c.harri...@intel.com kirjoitti 27.4.2022 klo 19.55:

From: John Harrison 

Add GuC firmware for DG2.

Note that an older version of this patch exists in the CI topic
branch. Hence this set includes a revert of that patch before applying
the new version. When merging, the revert would simply be dropped and
the corresponding patch in the topic branch would also be dropped.

Signed-off-by: John Harrison 


John Harrison (2):
   Revert "drm/i915/dg2: Define GuC firmware version for DG2"
   drm/i915/dg2: Define GuC firmware version for DG2

  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)



The firmware is not public yet, though? Shouldn't it have been sent 
upstream already? Same complaint applies to DMC.



--
t


[Intel-gfx] ✓ Fi.CI.BAT: success for Initial GuC firmware release for DG2

2022-04-27 Thread Patchwork
== Series Details ==

Series: Initial GuC firmware release for DG2
URL   : https://patchwork.freedesktop.org/series/103230/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11550 -> Patchwork_103230v1


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/index.html

Participating hosts (43 -> 47)
--

  Additional (5): bat-dg1-6 bat-dg2-8 bat-adlm-1 fi-icl-u2 bat-adlp-4 
  Missing(1): fi-bsw-cyan 

Known issues


  Here are the changes found in Patchwork_103230v1 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_suspend@basic-s0@smem:
- bat-dg1-6:  NOTRUN -> [INCOMPLETE][1] ([i915#5827])
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-dg1-6/igt@gem_exec_suspend@basic...@smem.html

  * igt@gem_huc_copy@huc-copy:
- fi-icl-u2:  NOTRUN -> [SKIP][2] ([i915#2190])
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/fi-icl-u2/igt@gem_huc_c...@huc-copy.html

  * igt@gem_lmem_swapping@basic:
- bat-adlp-4: NOTRUN -> [SKIP][3] ([i915#4613]) +3 similar issues
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-adlp-4/igt@gem_lmem_swapp...@basic.html

  * igt@gem_lmem_swapping@parallel-random-engines:
- fi-icl-u2:  NOTRUN -> [SKIP][4] ([i915#4613]) +3 similar issues
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/fi-icl-u2/igt@gem_lmem_swapp...@parallel-random-engines.html

  * igt@gem_tiled_pread_basic:
- bat-adlp-4: NOTRUN -> [SKIP][5] ([i915#3282])
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-adlp-4/igt@gem_tiled_pread_basic.html

  * igt@kms_busy@basic@flip:
- bat-adlp-4: NOTRUN -> [DMESG-WARN][6] ([i915#3576])
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-adlp-4/igt@kms_busy@ba...@flip.html

  * igt@kms_chamelium@dp-crc-fast:
- bat-adlp-4: NOTRUN -> [SKIP][7] ([fdo#111827]) +8 similar issues
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-adlp-4/igt@kms_chamel...@dp-crc-fast.html

  * igt@kms_chamelium@hdmi-hpd-fast:
- fi-icl-u2:  NOTRUN -> [SKIP][8] ([fdo#111827]) +8 similar issues
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/fi-icl-u2/igt@kms_chamel...@hdmi-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
- fi-icl-u2:  NOTRUN -> [SKIP][9] ([fdo#109278]) +2 similar issues
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/fi-icl-u2/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-legacy.html
- bat-adlp-4: NOTRUN -> [SKIP][10] ([i915#4103]) +1 similar issue
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-adlp-4/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_force_connector_basic@force-load-detect:
- fi-icl-u2:  NOTRUN -> [SKIP][11] ([fdo#109285])
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/fi-icl-u2/igt@kms_force_connector_ba...@force-load-detect.html

  * igt@kms_force_connector_basic@prune-stale-modes:
- bat-adlp-4: NOTRUN -> [SKIP][12] ([i915#4093]) +3 similar issues
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-adlp-4/igt@kms_force_connector_ba...@prune-stale-modes.html

  * igt@kms_setmode@basic-clone-single-crtc:
- fi-icl-u2:  NOTRUN -> [SKIP][13] ([i915#3555])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/fi-icl-u2/igt@kms_setm...@basic-clone-single-crtc.html
- bat-adlp-4: NOTRUN -> [SKIP][14] ([i915#3555])
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-adlp-4/igt@kms_setm...@basic-clone-single-crtc.html

  * igt@prime_vgem@basic-fence-read:
- bat-adlp-4: NOTRUN -> [SKIP][15] ([i915#3291] / [i915#3708]) +2 
similar issues
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-adlp-4/igt@prime_v...@basic-fence-read.html

  * igt@prime_vgem@basic-userptr:
- fi-icl-u2:  NOTRUN -> [SKIP][16] ([i915#3301])
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/fi-icl-u2/igt@prime_v...@basic-userptr.html
- bat-adlp-4: NOTRUN -> [SKIP][17] ([i915#3301] / [i915#3708])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/bat-adlp-4/igt@prime_v...@basic-userptr.html

  
 Possible fixes 

  * igt@i915_selftest@live@gt_heartbeat:
- fi-cfl-guc: [DMESG-FAIL][18] ([i915#5334]) -> [PASS][19]
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/fi-cfl-guc/igt@i915_selftest@live@gt_heartbeat.html
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_103230v1/fi-cfl-guc/igt@i915_selftest@live@gt_heartbeat.html

  * 

Re: [Intel-gfx] [PATCH v2] drm/doc: add rfc section for small BAR uapi

2022-04-27 Thread Matthew Auld

On 27/04/2022 09:36, Tvrtko Ursulin wrote:


On 20/04/2022 18:13, Matthew Auld wrote:

Add an entry for the new uapi needed for small BAR on DG2+.

v2:
   - Some spelling fixes and other small tweaks. (Akeem & Thomas)
   - Rework error capture interactions, including no longer needing
 NEEDS_CPU_ACCESS for objects marked for capture. (Thomas)
   - Add probed_cpu_visible_size. (Lionel)

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Lionel Landwerlin 
Cc: Jon Bloomfield 
Cc: Daniel Vetter 
Cc: Jordan Justen 
Cc: Kenneth Graunke 
Cc: Akeem G Abodunrin 
Cc: mesa-...@lists.freedesktop.org
---
  Documentation/gpu/rfc/i915_small_bar.h   | 190 +++
  Documentation/gpu/rfc/i915_small_bar.rst |  58 +++
  Documentation/gpu/rfc/index.rst  |   4 +
  3 files changed, 252 insertions(+)
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.h
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.rst

diff --git a/Documentation/gpu/rfc/i915_small_bar.h 
b/Documentation/gpu/rfc/i915_small_bar.h

new file mode 100644
index ..7bfd0cf44d35
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_small_bar.h
@@ -0,0 +1,190 @@
+/**
+ * struct __drm_i915_memory_region_info - Describes one region as 
known to the

+ * driver.
+ *
+ * Note this is using both struct drm_i915_query_item and struct 
drm_i915_query.
+ * For this new query we are adding the new query id 
DRM_I915_QUERY_MEMORY_REGIONS

+ * at _i915_query_item.query_id.
+ */
+struct __drm_i915_memory_region_info {
+    /** @region: The class:instance pair encoding */
+    struct drm_i915_gem_memory_class_instance region;
+
+    /** @rsvd0: MBZ */
+    __u32 rsvd0;
+
+    /** @probed_size: Memory probed by the driver (-1 = unknown) */
+    __u64 probed_size;
+
+    /** @unallocated_size: Estimate of memory remaining (-1 = 
unknown) */

+    __u64 unallocated_size;
+
+    union {
+    /** @rsvd1: MBZ */
+    __u64 rsvd1[8];
+    struct {
+    /**
+ * @probed_cpu_visible_size: Memory probed by the driver
+ * that is CPU accessible. (-1 = unknown).
+ *
+ * This will be always be <= @probed_size, and the
+ * remainder(if there is any) will not be CPU
+ * accessible.
+ */
+    __u64 probed_cpu_visible_size;


Would unallocated_cpu_visible_size be useful, to follow the total 
unallocated_size?


Make sense. But I don't think unallocated_size has actually been 
properly wired up yet. It still just gives the same value as 
probed_size. IIRC for unallocated_size we still need a real 
user/usecase/umd, before wiring that up for real with the existing avail 
tracking. Once we have that we can also add unallocated_cpu_visible_size.




Btw, have we ever considered whether unallocated_size should require 
CAP_SYS_ADMIN/PERFMON or something?


Note sure. But just in case we do add it for real at some point, why the 
added restriction?





+    };
+    };
+};
+
+/**
+ * struct __drm_i915_gem_create_ext - Existing gem_create behaviour, 
with added

+ * extension support using struct i915_user_extension.
+ *
+ * Note that new buffer flags should be added here, at least for the 
stuff that
+ * is immutable. Previously we would have two ioctls, one to create 
the object
+ * with gem_create, and another to apply various parameters, however 
this
+ * creates some ambiguity for the params which are considered 
immutable. Also in

+ * general we're phasing out the various SET/GET ioctls.
+ */
+struct __drm_i915_gem_create_ext {
+    /**
+ * @size: Requested size for the object.
+ *
+ * The (page-aligned) allocated size for the object will be 
returned.

+ *
+ * Note that for some devices we have might have further minimum
+ * page-size restrictions(larger than 4K), likefor device 
local-memory.

+ * However in general the final size here should always reflect any
+ * rounding up, if for example using the 
I915_GEM_CREATE_EXT_MEMORY_REGIONS

+ * extension to place the object in device local-memory.
+ */
+    __u64 size;
+    /**
+ * @handle: Returned handle for the object.
+ *
+ * Object handles are nonzero.
+ */
+    __u32 handle;
+    /**
+ * @flags: Optional flags.
+ *
+ * Supported values:
+ *
+ * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the 
kernel that

+ * the object will need to be accessed via the CPU.
+ *
+ * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
+ * only strictly required on platforms where only some of the device
+ * memory is directly visible or mappable through the CPU, like 
on DG2+.

+ *
+ * One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
+ * ensure we can always spill the allocation tosystem memory, if we
+ * can't place the object in the mappable part of
+ * I915_MEMORY_CLASS_DEVICE.
+ *
+ * Note that since the kernel only supports 

Re: [Intel-gfx] [PATCH v3] drm/i915: Don't show engine information in fdinfo with GuC submission

2022-04-27 Thread Tvrtko Ursulin



On 27/04/2022 16:43, Dixit, Ashutosh wrote:

On Wed, 27 Apr 2022 02:15:35 -0700, Tvrtko Ursulin wrote:


On 15/04/2022 01:25, Ashutosh Dixit wrote:

At present i915 does not fetch busyness information from GuC, resulting in
incorrect busyness values in fdinfo. Because engine information is coupled
with busyness in fdinfo, skip showing client engine information in fdinfo
with GuC submission till fetching busyness is supported in the i915 GuC
submission backend.

v2 (Daniele):
Make commit title and description more precise
Add FIXME with brief description at code change
s/intel_guc_submission_is_used/intel_uc_uses_guc_submission/

v3 (Daniele):
Drop FIXME in comment

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/5564
Fixes: 055634e4b62f ("drm/i915: Expose client engine utilisation via fdinfo")
Cc: Daniele Ceraolo Spurio 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Daniele Ceraolo Spurio 
---
   drivers/gpu/drm/i915/i915_drm_client.c | 6 +-
   1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c 
b/drivers/gpu/drm/i915/i915_drm_client.c
index e539f6b23060..475a6f824cad 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -145,7 +145,11 @@ void i915_drm_client_fdinfo(struct seq_file *m, struct 
file *f)
   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
seq_printf(m, "drm-client-id:\t%u\n", client->id);
   -if (GRAPHICS_VER(i915) < 8)
+   /*
+* Temporarily skip showing client engine information with GuC 
submission till
+* fetching engine busyness is implemented in the GuC submission backend
+*/
+   if (GRAPHICS_VER(i915) < 8 || 
intel_uc_uses_guc_submission(>gt0.uc))
return;
for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)


Thanks for fixing this while I was away. It was a simple miss, nothing
sinister. In terms of mention of "garbage" numbers being reported - were
they actually garbage or simply always zero?


Ah, you are referring to what I wrote in the bug. Actually I didn't check
the values myself but was told we were displaying "garbage" values (or at
least I interpreted it that way, and garbage meaning not just zero). But
looking now at IGT outputs from that time appears the values were just zero
:/

https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11503/re-adlp-pub1/igt@drm_fdi...@all-busy-idle-check-all.html

I think we could even have left zero values as is except that we'd have to
fix the IGT failure.

Sorry for the confusion.


Np. One could even say that there is little difference between skip and 
fail, given both need manual handling in cibuglog to be marked as known 
until GuC support gets added. At least if I still remember how it works 
and that unexplained skips still count as fails.


Regards,

Tvrtko


Re: [Intel-gfx] [PATCH v2 4/4] uapi/drm/i915: Document memory residency and Flat-CCS capability of obj

2022-04-27 Thread Matthew Auld

On 25/04/2022 17:24, Ramalingam C wrote:

Capture the impact of memory region preference list of an object, on
their memory residency and Flat-CCS capability of the objects.

v2:
   Fix the Flat-CCS capability of an obj with {lmem, smem} preference
   list [Thomas]

Signed-off-by: Ramalingam C 
cc: Matthew Auld 
cc: Thomas Hellstrom 
---
  include/uapi/drm/i915_drm.h | 18 ++
  1 file changed, 18 insertions(+)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 35ca528803fd..ad191ed6547c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3393,6 +3393,24 @@ struct drm_i915_gem_create_ext {
   * At which point we get the object handle in _i915_gem_create_ext.handle,
   * along with the final object size in _i915_gem_create_ext.size, which
   * should account for any rounding up, if required.
+ *
+ * Objects with multiple memory regions in the preference list will be backed
+ * by one of the memory regions mentioned in the preference list. Though I915
+ * tries to honour the order of the memory regions in the preference list,
+ * based on the memory pressure of the regions, objects' backing region
+ * will be selected.
+ *
+ * Userspace has no means of knowing the backing region for such objects.


"Note that userspace has no means of knowing the current backing region 
for objects where @num_regions is larger than one. The kernel will only 
ensure that the priority order of the @regions array is honoured, either 
when initially placing the object, or when moving memory around due to 
memory pressure."



+ *
+ * On Flat-CCS capable HW, compression is supported for the objects residing
+ * in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) has other
+ * memory class in preference list and migrated (by I915, due to memory
+ * constrain) to the non I915_MEMORY_CLASS_DEVICE region, then I915 needs to
+ * decompress the content. But I915 dont have the required information to


"doesn't", also prefer @regions etc instead of "preference list"

Anyway,
Reviewed-by: Matthew Auld 


+ * decompress the userspace compressed objects.
+ *
+ * So I915 supports Flat-CCS, only on the objects which can reside only on
+ * I915_MEMORY_CLASS_DEVICE regions. >*/
  struct drm_i915_gem_create_ext_memory_regions {
/** @base: Extension link. See struct i915_user_extension. */


[Intel-gfx] [PATCH 2/2] drm/i915/dg2: Define GuC firmware version for DG2

2022-04-27 Thread John . C . Harrison
From: John Harrison 

First release of GuC for DG2.

Signed-off-by: John Harrison 
CC: Tomasz Mistat 
CC: Ramalingam C 
CC: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index a876d39e6bcf..d078f884b5e3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
  * firmware as TGL.
  */
 #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
+   fw_def(DG2,  0, guc_def(dg2,  70, 1, 2)) \
fw_def(ALDERLAKE_P,  0, guc_def(adlp, 70, 1, 1)) \
fw_def(ALDERLAKE_S,  0, guc_def(tgl,  70, 1, 1)) \
fw_def(DG1,  0, guc_def(dg1,  70, 1, 1)) \
-- 
2.25.1



[Intel-gfx] [PATCH 1/2] Revert "drm/i915/dg2: Define GuC firmware version for DG2"

2022-04-27 Thread John . C . Harrison
From: John Harrison 

This reverts commit 55c7f980e48e56861496526e02ed5bbfdac49ede.

The CI topic branch within drm-top contains an old patch for
supporting GuC on DG2. That needs to be dropped and an updated patch
merged to drm-gt-next. Hence this patch reverts it so the new patch
can be sent in it's correct form for CI testing.

Signed-off-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index cb5dd16421d0..a876d39e6bcf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -53,7 +53,6 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
  * firmware as TGL.
  */
 #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
-   fw_def(DG2,  0, guc_def(dg2,  70, 1, 1)) \
fw_def(ALDERLAKE_P,  0, guc_def(adlp, 70, 1, 1)) \
fw_def(ALDERLAKE_S,  0, guc_def(tgl,  70, 1, 1)) \
fw_def(DG1,  0, guc_def(dg1,  70, 1, 1)) \
-- 
2.25.1



[Intel-gfx] [PATCH 0/2] Initial GuC firmware release for DG2

2022-04-27 Thread John . C . Harrison
From: John Harrison 

Add GuC firmware for DG2.

Note that an older version of this patch exists in the CI topic
branch. Hence this set includes a revert of that patch before applying
the new version. When merging, the revert would simply be dropped and
the corresponding patch in the topic branch would also be dropped.

Signed-off-by: John Harrison 


John Harrison (2):
  Revert "drm/i915/dg2: Define GuC firmware version for DG2"
  drm/i915/dg2: Define GuC firmware version for DG2

 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-- 
2.25.1



Re: [Intel-gfx] [PATCH v2 3/4] drm/i915/gt: Document the eviction of the Flat-CCS objects

2022-04-27 Thread Matthew Auld

On 25/04/2022 17:24, Ramalingam C wrote:

Capture the eviction details for Flat-CCS capable, lmem objects.

v2:
   Fix the Flat-ccs capbility of lmem obj with smem residency
   possibility [Thomas]

Signed-off-by: Ramalingam C 
cc: Thomas Hellstrom 
cc: Matthew Auld 
---
  drivers/gpu/drm/i915/gt/intel_migrate.c | 23 ++-
  1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 463a6a14b5f9..930e0fd9795f 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -485,16 +485,21 @@ static bool wa_1209644611_applies(int ver, u32 size)
   * And CCS data can be copied in and out of CCS region through
   * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
   *
- * When we exhaust the lmem, if the object's placements support smem, then we 
can
- * directly decompress the compressed lmem object into smem and start using it
- * from smem itself.
+ * I915 supports Flat-CCS on lmem only objects. When an objects has the smem in


"When an object has smem in"


+ * its preference list, on memory pressure, i915 needs to migarte the lmem


"migrate"


+ * content into smem. If the lmem object is Flat-CCS compressed by userspace,
+ * then i915 needs to decompress it. But I915 lack the required information
+ * for such decompression. Hence I915 supports Flat-CCS only on lmem only 
objects.
   *
- * But when we need to swapout the compressed lmem object into a smem region
- * though objects' placement doesn't support smem, then we copy the lmem 
content
- * as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT).
- * When the object is referred, lmem content will be swaped in along with
- * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding
- * location.
+ * when we exhaust the lmem, Flat-CCS capable objects' lmem backing memory can


"When"

Otherwise,
Reviewed-by: Matthew Auld 


+ * be temporarily evicted to smem, along with the auxiliary CCS state, where
+ * it can be potentially swapped-out at a later point, if required.
+ * If userspace later touches the evicted pages, then we always move
+ * the backing memory back to lmem, which includes restoring the saved CCS 
state,
+ * and potentially performing any required swap-in.
+ *
+ * For the migration of the lmem objects with smem in placement list, such as
+ * {lmem, smem}, objects are treated as non Flat-CCS capable objects.
   */
  
  static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)


Re: [Intel-gfx] [PATCH v3] drm/i915: Don't show engine information in fdinfo with GuC submission

2022-04-27 Thread Dixit, Ashutosh
On Wed, 27 Apr 2022 02:15:35 -0700, Tvrtko Ursulin wrote:
>
> On 15/04/2022 01:25, Ashutosh Dixit wrote:
> > At present i915 does not fetch busyness information from GuC, resulting in
> > incorrect busyness values in fdinfo. Because engine information is coupled
> > with busyness in fdinfo, skip showing client engine information in fdinfo
> > with GuC submission till fetching busyness is supported in the i915 GuC
> > submission backend.
> >
> > v2 (Daniele):
> >Make commit title and description more precise
> >Add FIXME with brief description at code change
> >s/intel_guc_submission_is_used/intel_uc_uses_guc_submission/
> >
> > v3 (Daniele):
> >Drop FIXME in comment
> >
> > Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/5564
> > Fixes: 055634e4b62f ("drm/i915: Expose client engine utilisation via 
> > fdinfo")
> > Cc: Daniele Ceraolo Spurio  > Cc: Umesh Nerlige Ramappa 
> > Signed-off-by: Ashutosh Dixit 
> > Reviewed-by: Daniele Ceraolo Spurio 
> > ---
> >   drivers/gpu/drm/i915/i915_drm_client.c | 6 +-
> >   1 file changed, 5 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drm_client.c 
> > b/drivers/gpu/drm/i915/i915_drm_client.c
> > index e539f6b23060..475a6f824cad 100644
> > --- a/drivers/gpu/drm/i915/i915_drm_client.c
> > +++ b/drivers/gpu/drm/i915/i915_drm_client.c
> > @@ -145,7 +145,11 @@ void i915_drm_client_fdinfo(struct seq_file *m, struct 
> > file *f)
> >PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
> > seq_printf(m, "drm-client-id:\t%u\n", client->id);
> >   - if (GRAPHICS_VER(i915) < 8)
> > +   /*
> > +* Temporarily skip showing client engine information with GuC 
> > submission till
> > +* fetching engine busyness is implemented in the GuC submission backend
> > +*/
> > +   if (GRAPHICS_VER(i915) < 8 || 
> > intel_uc_uses_guc_submission(>gt0.uc))
> > return;
> > for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
>
> Thanks for fixing this while I was away. It was a simple miss, nothing
> sinister. In terms of mention of "garbage" numbers being reported - were
> they actually garbage or simply always zero?

Ah, you are referring to what I wrote in the bug. Actually I didn't check
the values myself but was told we were displaying "garbage" values (or at
least I interpreted it that way, and garbage meaning not just zero). But
looking now at IGT outputs from that time appears the values were just zero
:/

https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11503/re-adlp-pub1/igt@drm_fdi...@all-busy-idle-check-all.html

I think we could even have left zero values as is except that we'd have to
fix the IGT failure.

Sorry for the confusion.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [RFC v2 1/2] drm/doc/rfc: VM_BIND feature design document

2022-04-27 Thread Niranjana Vishwanathapura

On Wed, Apr 20, 2022 at 03:45:25PM -0700, Niranjana Vishwanathapura wrote:

On Thu, Mar 31, 2022 at 10:28:48AM +0200, Daniel Vetter wrote:

Adding a pile of people who've expressed interest in vm_bind for their
drivers.

Also note to the intel folks: This is largely written with me having my
subsystem co-maintainer hat on, i.e. what I think is the right thing to do
here for the subsystem at large. There is substantial rework involved
here, but it's not any different from i915 adopting ttm or i915 adpoting
drm/sched, and I do think this stuff needs to happen in one form or
another.

On Mon, Mar 07, 2022 at 12:31:45PM -0800, Niranjana Vishwanathapura wrote:

VM_BIND design document with description of intended use cases.

Signed-off-by: Niranjana Vishwanathapura 
---
Documentation/gpu/rfc/i915_vm_bind.rst | 210 +
Documentation/gpu/rfc/index.rst|   4 +
2 files changed, 214 insertions(+)
create mode 100644 Documentation/gpu/rfc/i915_vm_bind.rst

diff --git a/Documentation/gpu/rfc/i915_vm_bind.rst 
b/Documentation/gpu/rfc/i915_vm_bind.rst
new file mode 100644
index ..cdc6bb25b942
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_vm_bind.rst
@@ -0,0 +1,210 @@
+==
+I915 VM_BIND feature design and use cases
+==
+
+VM_BIND feature
+
+DRM_I915_GEM_VM_BIND/UNBIND ioctls allows UMD to bind/unbind GEM buffer
+objects (BOs) or sections of a BOs at specified GPU virtual addresses on
+a specified address space (VM).
+
+These mappings (also referred to as persistent mappings) will be persistent
+across multiple GPU submissions (execbuff) issued by the UMD, without user
+having to provide a list of all required mappings during each submission
+(as required by older execbuff mode).
+
+VM_BIND ioctl deferes binding the mappings until next execbuff submission
+where it will be required, or immediately if I915_GEM_VM_BIND_IMMEDIATE
+flag is set (useful if mapping is required for an active context).


So this is a screw-up I've done, and for upstream I think we need to fix
it: Implicit sync is bad, and it's also still a bad idea for vm_bind, and
I was wrong suggesting we should do this a few years back when we kicked
this off internally :-(

What I think we need is just always VM_BIND_IMMEDIATE mode, and then a few
things on top:
- in and out fences, like with execbuf, to allow userspace to sync with
execbuf as needed
- for compute-mode context this means userspace memory fences
- for legacy context this means a timeline syncobj in drm_syncobj

No sync_file or anything else like this at all. This means a bunch of
work, but also it'll have benefits because it means we should be able to
use exactly the same code paths and logic for both compute and for legacy
context, because drm_syncobj support future fence semantics.



Thanks Daniel,
Ok, will update



I had a long conversation with Daniel on some of the points discussed here.
Thanks to Daniel for clarifying many points here.

Here is the summary of the discussion.

1) A prep patch is needed to update documentation of some existing uapi and this
  new VM_BIND uapi can update/refer to that.
  I will include this prep patch in the next revision of this RFC series.
  Will also include the uapi header file in the rst file so that it gets 
rendered.

2) Will update documentation here with proper use of dma_resv_usage while adding
  fences to vm_bind objects. It is going to be, DMA_RESV_USAGE_BOOKKEEP by 
default
  if not override with execlist in execbuff path.

3) Add extension to execbuff ioctl to specify batch buffer as GPU virtual 
address
  instead of having to pass it as a BO handle in execlist. This will also make 
the
  execlist usage solely for implicit sync setting which is further discussed 
below.

4) Need to look into when will Jason's dma-buf fence import/export ioctl 
support will
  land and whether it will be used both for vl and gl. Need to sync with Jason 
on this.
  Probably the better option here would be to not support execlist in execbuff 
path in
  vm_bind mode for initial vm_bind support (hoping Jason's dma-buf fence 
import/export
  ioctl will be enough). We can add support for execlist in execbuff for 
vm_bind mode
  later if required (say for gl).

5) There are lot of things in execbuff path that doesn't apply in VM_BIND mode 
(like
  relocations, implicit sync etc). Separate them out by using function pointers 
wherever
  the functionality differs between current design and the newer VM_BIND design.

6) Separate out i915_vma active reference counting in execbuff path and do not 
use it in
  VM_BIND mode. Instead use dma-resv fence checking for VM_BIND mode. This 
should be easier
  to get it working with the current TTM backend (which initial VM_BIND support 
will use).
  And remove i915_vma active reference counting fully while supporting TTM 
backend for igfx.

7) As we support compute mode contexts only with GuC 

Re: [Intel-gfx] [PATCH v2] drm/doc: add rfc section for small BAR uapi

2022-04-27 Thread Lionel Landwerlin

On 27/04/2022 18:18, Matthew Auld wrote:

On 27/04/2022 07:48, Lionel Landwerlin wrote:
One question though, how do we detect that this flag 
(I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) is accepted on a given 
kernel?
I assume older kernels are going to reject object creation if we use 
this flag?


From some offline discussion with Lionel, the plan here is to just do 
a dummy gem_create_ext to check if the kernel throws an error with the 
new flag or not.




I didn't plan to use __drm_i915_query_vma_info, but isn't it 
inconsistent to select the placement on the GEM object and then query 
whether it's mappable by address?
You made a comment stating this is racy, wouldn't querying on the GEM 
object prevent this?


Since mesa at this time doesn't currently have a use for this one, 
then I guess we should maybe just drop this part of the uapi, in this 
version at least, if no objections.



Just repeating what we discussed (maybe I missed some other discussion 
and that's why I was confused) :



The way I was planning to use this is to have 3 heaps in Vulkan :

    - heap0: local only, no cpu visible

    - heap1: system, cpu visible

    - heap2: local & cpu visible


With heap2 having the reported probed_cpu_visible_size size.

It is an error for the application to map from heap0 [1].


With that said, it means if we created a GEM BO without 
I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS, we'll never mmap it.


So why the query?

I guess it would be useful when we import a buffer from another 
application. But in that case, why not have the query on the BO?



-Lionel


[1] : 
https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/vkMapMemory.html 
(VUID-vkMapMemory-memory-00682)






Thanks,

-Lionel

On 27/04/2022 09:35, Lionel Landwerlin wrote:

Hi Matt,


The proposal looks good to me.

Looking forward to try it on drm-tip.


-Lionel

On 20/04/2022 20:13, Matthew Auld wrote:

Add an entry for the new uapi needed for small BAR on DG2+.

v2:
   - Some spelling fixes and other small tweaks. (Akeem & Thomas)
   - Rework error capture interactions, including no longer needing
 NEEDS_CPU_ACCESS for objects marked for capture. (Thomas)
   - Add probed_cpu_visible_size. (Lionel)

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Lionel Landwerlin 
Cc: Jon Bloomfield 
Cc: Daniel Vetter 
Cc: Jordan Justen 
Cc: Kenneth Graunke 
Cc: Akeem G Abodunrin 
Cc: mesa-...@lists.freedesktop.org
---
  Documentation/gpu/rfc/i915_small_bar.h   | 190 
+++

  Documentation/gpu/rfc/i915_small_bar.rst |  58 +++
  Documentation/gpu/rfc/index.rst  |   4 +
  3 files changed, 252 insertions(+)
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.h
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.rst

diff --git a/Documentation/gpu/rfc/i915_small_bar.h 
b/Documentation/gpu/rfc/i915_small_bar.h

new file mode 100644
index ..7bfd0cf44d35
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_small_bar.h
@@ -0,0 +1,190 @@
+/**
+ * struct __drm_i915_memory_region_info - Describes one region as 
known to the

+ * driver.
+ *
+ * Note this is using both struct drm_i915_query_item and struct 
drm_i915_query.
+ * For this new query we are adding the new query id 
DRM_I915_QUERY_MEMORY_REGIONS

+ * at _i915_query_item.query_id.
+ */
+struct __drm_i915_memory_region_info {
+    /** @region: The class:instance pair encoding */
+    struct drm_i915_gem_memory_class_instance region;
+
+    /** @rsvd0: MBZ */
+    __u32 rsvd0;
+
+    /** @probed_size: Memory probed by the driver (-1 = unknown) */
+    __u64 probed_size;
+
+    /** @unallocated_size: Estimate of memory remaining (-1 = 
unknown) */

+    __u64 unallocated_size;
+
+    union {
+    /** @rsvd1: MBZ */
+    __u64 rsvd1[8];
+    struct {
+    /**
+ * @probed_cpu_visible_size: Memory probed by the driver
+ * that is CPU accessible. (-1 = unknown).
+ *
+ * This will be always be <= @probed_size, and the
+ * remainder(if there is any) will not be CPU
+ * accessible.
+ */
+    __u64 probed_cpu_visible_size;
+    };
+    };
+};
+
+/**
+ * struct __drm_i915_gem_create_ext - Existing gem_create 
behaviour, with added

+ * extension support using struct i915_user_extension.
+ *
+ * Note that new buffer flags should be added here, at least for 
the stuff that
+ * is immutable. Previously we would have two ioctls, one to 
create the object
+ * with gem_create, and another to apply various parameters, 
however this
+ * creates some ambiguity for the params which are considered 
immutable. Also in

+ * general we're phasing out the various SET/GET ioctls.
+ */
+struct __drm_i915_gem_create_ext {
+    /**
+ * @size: Requested size for the object.
+ *
+ * The (page-aligned) allocated size for the object will be 
returned.

+ *
+ * Note that for some devices we have might have further minimum
+ 

Re: [Intel-gfx] [PATCH v2] drm/doc: add rfc section for small BAR uapi

2022-04-27 Thread Matthew Auld

On 27/04/2022 07:48, Lionel Landwerlin wrote:
One question though, how do we detect that this flag 
(I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) is accepted on a given kernel?
I assume older kernels are going to reject object creation if we use 
this flag?


From some offline discussion with Lionel, the plan here is to just do a 
dummy gem_create_ext to check if the kernel throws an error with the new 
flag or not.




I didn't plan to use __drm_i915_query_vma_info, but isn't it 
inconsistent to select the placement on the GEM object and then query 
whether it's mappable by address?
You made a comment stating this is racy, wouldn't querying on the GEM 
object prevent this?


Since mesa at this time doesn't currently have a use for this one, then 
I guess we should maybe just drop this part of the uapi, in this version 
at least, if no objections.




Thanks,

-Lionel

On 27/04/2022 09:35, Lionel Landwerlin wrote:

Hi Matt,


The proposal looks good to me.

Looking forward to try it on drm-tip.


-Lionel

On 20/04/2022 20:13, Matthew Auld wrote:

Add an entry for the new uapi needed for small BAR on DG2+.

v2:
   - Some spelling fixes and other small tweaks. (Akeem & Thomas)
   - Rework error capture interactions, including no longer needing
 NEEDS_CPU_ACCESS for objects marked for capture. (Thomas)
   - Add probed_cpu_visible_size. (Lionel)

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Lionel Landwerlin 
Cc: Jon Bloomfield 
Cc: Daniel Vetter 
Cc: Jordan Justen 
Cc: Kenneth Graunke 
Cc: Akeem G Abodunrin 
Cc: mesa-...@lists.freedesktop.org
---
  Documentation/gpu/rfc/i915_small_bar.h   | 190 +++
  Documentation/gpu/rfc/i915_small_bar.rst |  58 +++
  Documentation/gpu/rfc/index.rst  |   4 +
  3 files changed, 252 insertions(+)
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.h
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.rst

diff --git a/Documentation/gpu/rfc/i915_small_bar.h 
b/Documentation/gpu/rfc/i915_small_bar.h

new file mode 100644
index ..7bfd0cf44d35
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_small_bar.h
@@ -0,0 +1,190 @@
+/**
+ * struct __drm_i915_memory_region_info - Describes one region as 
known to the

+ * driver.
+ *
+ * Note this is using both struct drm_i915_query_item and struct 
drm_i915_query.
+ * For this new query we are adding the new query id 
DRM_I915_QUERY_MEMORY_REGIONS

+ * at _i915_query_item.query_id.
+ */
+struct __drm_i915_memory_region_info {
+    /** @region: The class:instance pair encoding */
+    struct drm_i915_gem_memory_class_instance region;
+
+    /** @rsvd0: MBZ */
+    __u32 rsvd0;
+
+    /** @probed_size: Memory probed by the driver (-1 = unknown) */
+    __u64 probed_size;
+
+    /** @unallocated_size: Estimate of memory remaining (-1 = 
unknown) */

+    __u64 unallocated_size;
+
+    union {
+    /** @rsvd1: MBZ */
+    __u64 rsvd1[8];
+    struct {
+    /**
+ * @probed_cpu_visible_size: Memory probed by the driver
+ * that is CPU accessible. (-1 = unknown).
+ *
+ * This will be always be <= @probed_size, and the
+ * remainder(if there is any) will not be CPU
+ * accessible.
+ */
+    __u64 probed_cpu_visible_size;
+    };
+    };
+};
+
+/**
+ * struct __drm_i915_gem_create_ext - Existing gem_create behaviour, 
with added

+ * extension support using struct i915_user_extension.
+ *
+ * Note that new buffer flags should be added here, at least for the 
stuff that
+ * is immutable. Previously we would have two ioctls, one to create 
the object
+ * with gem_create, and another to apply various parameters, however 
this
+ * creates some ambiguity for the params which are considered 
immutable. Also in

+ * general we're phasing out the various SET/GET ioctls.
+ */
+struct __drm_i915_gem_create_ext {
+    /**
+ * @size: Requested size for the object.
+ *
+ * The (page-aligned) allocated size for the object will be 
returned.

+ *
+ * Note that for some devices we have might have further minimum
+ * page-size restrictions(larger than 4K), like for device 
local-memory.

+ * However in general the final size here should always reflect any
+ * rounding up, if for example using the 
I915_GEM_CREATE_EXT_MEMORY_REGIONS

+ * extension to place the object in device local-memory.
+ */
+    __u64 size;
+    /**
+ * @handle: Returned handle for the object.
+ *
+ * Object handles are nonzero.
+ */
+    __u32 handle;
+    /**
+ * @flags: Optional flags.
+ *
+ * Supported values:
+ *
+ * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the 
kernel that

+ * the object will need to be accessed via the CPU.
+ *
+ * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
+ * only strictly required on platforms where only some of the 
device
+ * memory is directly visible or 

Re: [Intel-gfx] [PATCH v2] drm/doc: add rfc section for small BAR uapi

2022-04-27 Thread Daniel Vetter
On Wed, Apr 27, 2022 at 08:55:07AM +0200, Christian König wrote:
> Well usually we increment the drm minor version when adding some new flags
> on amdgpu.
> 
> Additional to that just one comment from our experience with that: You don't
> just need one flag, but two. The first one is a hint which says "CPU access
> needed" and the second is a promise which says "CPU access never needed".
> 
> The background is that on a whole bunch of buffers you can 100% certain say
> that you will never ever need CPU access.
> 
> Then at least we have a whole bunch of buffers where we might need CPU
> access, but can't tell for sure.
> 
> And last we have stuff like transfer buffers you can be 100% sure that you
> need CPU access.
> 
> Separating it like this helped a lot with performance on small BAR systems.

So my assumption was that for transfer buffers you'd fill them with the
cpu first anyway, so no need for the extra flag.

I guess this if for transfer buffers for gpu -> cpu transfers, where it
would result in costly bo move and stalls and it's better to make sure
it's cpu accessible from the start? At least on current gpu we have where
there's no coherent interconnect, those buffers have to be in system
memory or your cpu access will be a disaster, so again they're naturally
cpu accessible.

What's the use-case for the "cpu access required" flag where "cpu access
before gpu access" isn't a good enough hint already to get the same perf
benefits?

Also for scanout my idea at least is that we just fail mmap when you
haven't set the flag and the scanout is pinned to unmappable, for two
reasons:
- 4k buffers are big, if we force them all into mappable things are
  non-pretty.
- You need mesa anyway to access tiled buffers, and mesa knows how to use
  a transfer buffer. That should work even when you do desktop switching
  and fastboot and stuff like that with the getfb2 ioctl should all work
  (and without getfb2 it's doomed to garbage anyway).

So only dumb kms buffers (which are linear) would ever get the
NEEDS_CPU_ACCESS flag, and only those we'd ever pin into cpu accessible
range for scanout. Is there a hole in that plan?

Cheers, Daniel

> 
> Regards,
> Christian.
> 
> Am 27.04.22 um 08:48 schrieb Lionel Landwerlin:
> > One question though, how do we detect that this flag
> > (I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) is accepted on a given
> > kernel?
> > I assume older kernels are going to reject object creation if we use
> > this flag?
> > 
> > I didn't plan to use __drm_i915_query_vma_info, but isn't it
> > inconsistent to select the placement on the GEM object and then query
> > whether it's mappable by address?
> > You made a comment stating this is racy, wouldn't querying on the GEM
> > object prevent this?
> > 
> > Thanks,
> > 
> > -Lionel
> > 
> > On 27/04/2022 09:35, Lionel Landwerlin wrote:
> > > Hi Matt,
> > > 
> > > 
> > > The proposal looks good to me.
> > > 
> > > Looking forward to try it on drm-tip.
> > > 
> > > 
> > > -Lionel
> > > 
> > > On 20/04/2022 20:13, Matthew Auld wrote:
> > > > Add an entry for the new uapi needed for small BAR on DG2+.
> > > > 
> > > > v2:
> > > >    - Some spelling fixes and other small tweaks. (Akeem & Thomas)
> > > >    - Rework error capture interactions, including no longer needing
> > > >  NEEDS_CPU_ACCESS for objects marked for capture. (Thomas)
> > > >    - Add probed_cpu_visible_size. (Lionel)
> > > > 
> > > > Signed-off-by: Matthew Auld 
> > > > Cc: Thomas Hellström 
> > > > Cc: Lionel Landwerlin 
> > > > Cc: Jon Bloomfield 
> > > > Cc: Daniel Vetter 
> > > > Cc: Jordan Justen 
> > > > Cc: Kenneth Graunke 
> > > > Cc: Akeem G Abodunrin 
> > > > Cc: mesa-...@lists.freedesktop.org
> > > > ---
> > > >   Documentation/gpu/rfc/i915_small_bar.h   | 190
> > > > +++
> > > >   Documentation/gpu/rfc/i915_small_bar.rst |  58 +++
> > > >   Documentation/gpu/rfc/index.rst  |   4 +
> > > >   3 files changed, 252 insertions(+)
> > > >   create mode 100644 Documentation/gpu/rfc/i915_small_bar.h
> > > >   create mode 100644 Documentation/gpu/rfc/i915_small_bar.rst
> > > > 
> > > > diff --git a/Documentation/gpu/rfc/i915_small_bar.h
> > > > b/Documentation/gpu/rfc/i915_small_bar.h
> > > > new file mode 100644
> > > > index ..7bfd0cf44d35
> > > > --- /dev/null
> > > > +++ b/Documentation/gpu/rfc/i915_small_bar.h
> > > > @@ -0,0 +1,190 @@
> > > > +/**
> > > > + * struct __drm_i915_memory_region_info - Describes one region
> > > > as known to the
> > > > + * driver.
> > > > + *
> > > > + * Note this is using both struct drm_i915_query_item and
> > > > struct drm_i915_query.
> > > > + * For this new query we are adding the new query id
> > > > DRM_I915_QUERY_MEMORY_REGIONS
> > > > + * at _i915_query_item.query_id.
> > > > + */
> > > > +struct __drm_i915_memory_region_info {
> > > > +    /** @region: The class:instance pair encoding */
> > > > +    struct drm_i915_gem_memory_class_instance region;
> > > > +
> > > > +    /** @rsvd0: 

Re: [Intel-gfx] [PATCH v2] drm/doc: add rfc section for small BAR uapi

2022-04-27 Thread Matthew Auld

On 27/04/2022 07:55, Christian König wrote:
Well usually we increment the drm minor version when adding some new 
flags on amdgpu.


Additional to that just one comment from our experience with that: You 
don't just need one flag, but two. The first one is a hint which says 
"CPU access needed" and the second is a promise which says "CPU access 
never needed".


The background is that on a whole bunch of buffers you can 100% certain 
say that you will never ever need CPU access.


Then at least we have a whole bunch of buffers where we might need CPU 
access, but can't tell for sure.


And last we have stuff like transfer buffers you can be 100% sure that 
you need CPU access.


Separating it like this helped a lot with performance on small BAR systems.


Thanks for the comments. For the "CPU access never needed" flag, what 
extra stuff does that do on the kernel side vs not specifying any 
flag/hint? I assume it still prioritizes using the non-CPU visible 
portion first? What else does it do?




Regards,
Christian.

Am 27.04.22 um 08:48 schrieb Lionel Landwerlin:
One question though, how do we detect that this flag 
(I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) is accepted on a given 
kernel?
I assume older kernels are going to reject object creation if we use 
this flag?


I didn't plan to use __drm_i915_query_vma_info, but isn't it 
inconsistent to select the placement on the GEM object and then query 
whether it's mappable by address?
You made a comment stating this is racy, wouldn't querying on the GEM 
object prevent this?


Thanks,

-Lionel

On 27/04/2022 09:35, Lionel Landwerlin wrote:

Hi Matt,


The proposal looks good to me.

Looking forward to try it on drm-tip.


-Lionel

On 20/04/2022 20:13, Matthew Auld wrote:

Add an entry for the new uapi needed for small BAR on DG2+.

v2:
   - Some spelling fixes and other small tweaks. (Akeem & Thomas)
   - Rework error capture interactions, including no longer needing
 NEEDS_CPU_ACCESS for objects marked for capture. (Thomas)
   - Add probed_cpu_visible_size. (Lionel)

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Lionel Landwerlin 
Cc: Jon Bloomfield 
Cc: Daniel Vetter 
Cc: Jordan Justen 
Cc: Kenneth Graunke 
Cc: Akeem G Abodunrin 
Cc: mesa-...@lists.freedesktop.org
---
  Documentation/gpu/rfc/i915_small_bar.h   | 190 
+++

  Documentation/gpu/rfc/i915_small_bar.rst |  58 +++
  Documentation/gpu/rfc/index.rst  |   4 +
  3 files changed, 252 insertions(+)
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.h
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.rst

diff --git a/Documentation/gpu/rfc/i915_small_bar.h 
b/Documentation/gpu/rfc/i915_small_bar.h

new file mode 100644
index ..7bfd0cf44d35
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_small_bar.h
@@ -0,0 +1,190 @@
+/**
+ * struct __drm_i915_memory_region_info - Describes one region as 
known to the

+ * driver.
+ *
+ * Note this is using both struct drm_i915_query_item and struct 
drm_i915_query.
+ * For this new query we are adding the new query id 
DRM_I915_QUERY_MEMORY_REGIONS

+ * at _i915_query_item.query_id.
+ */
+struct __drm_i915_memory_region_info {
+    /** @region: The class:instance pair encoding */
+    struct drm_i915_gem_memory_class_instance region;
+
+    /** @rsvd0: MBZ */
+    __u32 rsvd0;
+
+    /** @probed_size: Memory probed by the driver (-1 = unknown) */
+    __u64 probed_size;
+
+    /** @unallocated_size: Estimate of memory remaining (-1 = 
unknown) */

+    __u64 unallocated_size;
+
+    union {
+    /** @rsvd1: MBZ */
+    __u64 rsvd1[8];
+    struct {
+    /**
+ * @probed_cpu_visible_size: Memory probed by the driver
+ * that is CPU accessible. (-1 = unknown).
+ *
+ * This will be always be <= @probed_size, and the
+ * remainder(if there is any) will not be CPU
+ * accessible.
+ */
+    __u64 probed_cpu_visible_size;
+    };
+    };
+};
+
+/**
+ * struct __drm_i915_gem_create_ext - Existing gem_create 
behaviour, with added

+ * extension support using struct i915_user_extension.
+ *
+ * Note that new buffer flags should be added here, at least for 
the stuff that
+ * is immutable. Previously we would have two ioctls, one to create 
the object
+ * with gem_create, and another to apply various parameters, 
however this
+ * creates some ambiguity for the params which are considered 
immutable. Also in

+ * general we're phasing out the various SET/GET ioctls.
+ */
+struct __drm_i915_gem_create_ext {
+    /**
+ * @size: Requested size for the object.
+ *
+ * The (page-aligned) allocated size for the object will be 
returned.

+ *
+ * Note that for some devices we have might have further minimum
+ * page-size restrictions(larger than 4K), like for device 
local-memory.
+ * However in general the final size here should always reflect 
any
+ * rounding up, 

Re: [Intel-gfx] [PATCH] drm/i915/uc: use io memcpy functions for device memory copy

2022-04-27 Thread Siva Mullati
LGTM

Acked-by: Siva Mullati 

On 06/04/22 14:48, Vivekanandan, Balasubramani wrote:
> When copying RSA use io memcpy functions if the destination address
> contains a GPU local memory address. Considering even the source
> address can be on local memory, a bounce buffer is used to copy from io
> to io.
> The intention of this patch is to make i915 portable outside x86 mainly
> on ARM64.
>
> Signed-off-by: Balasubramani Vivekanandan 
> 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 23 +--
>  1 file changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> index bb864655c495..06d30670e15c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> @@ -589,7 +589,7 @@ static int uc_fw_rsa_data_create(struct intel_uc_fw 
> *uc_fw)
>   struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
>   struct i915_vma *vma;
>   size_t copied;
> - void *vaddr;
> + void *vaddr, *bounce;
>   int err;
>  
>   err = i915_inject_probe_error(gt->i915, -ENXIO);
> @@ -621,7 +621,26 @@ static int uc_fw_rsa_data_create(struct intel_uc_fw 
> *uc_fw)
>   goto unpin_out;
>   }
>  
> - copied = intel_uc_fw_copy_rsa(uc_fw, vaddr, vma->size);
> + if (i915_gem_object_is_lmem(vma->obj)) {
> + /* When vma is allocated from the GPU local memmory, it means
> +  * the destination address contains an io memory and we need to
> +  * use memcpy function for io memory for copying, to ensure
> +  * i915 portability outside x86. It is most likely the RSA will
> +  * also be on local memory and so the source of copy will also
> +  * be an io address. Since we cannot directly copy from io to
> +  * io, we use a bounce buffer to copy.
> +  */
> + copied = 0;
> + bounce = kmalloc(vma->size, GFP_KERNEL);
> + if (likely(bounce)) {
> + copied = intel_uc_fw_copy_rsa(uc_fw, bounce, vma->size);
> + memcpy_toio((void __iomem *)vaddr, bounce, copied);
> + kfree(bounce);
> + }
> + } else {
> + copied = intel_uc_fw_copy_rsa(uc_fw, vaddr, vma->size);
> + }
> +
>   i915_gem_object_unpin_map(vma->obj);
>  
>   if (copied < uc_fw->rsa_size) {


[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915/bios: Rework BDB block handling and PNPID->panel_type matching (rev8)

2022-04-27 Thread Patchwork
== Series Details ==

Series: drm/i915/bios: Rework BDB block handling and PNPID->panel_type matching 
(rev8)
URL   : https://patchwork.freedesktop.org/series/102213/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11550 -> Patchwork_102213v8


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_102213v8 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_102213v8, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/index.html

Participating hosts (43 -> 43)
--

  Additional (3): bat-dg2-8 bat-dg1-6 bat-adlp-4 
  Missing(3): fi-bsw-cyan fi-cfl-8700k fi-ivb-3770 

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_102213v8:

### IGT changes ###

 Possible regressions 

  * igt@runner@aborted:
- fi-bxt-dsi: NOTRUN -> [FAIL][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/fi-bxt-dsi/igt@run...@aborted.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_pm_rpm@module-reload:
- {bat-rpls-2}:   [DMESG-WARN][2] ([i915#4391]) -> [WARN][3]
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/bat-rpls-2/igt@i915_pm_...@module-reload.html
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-rpls-2/igt@i915_pm_...@module-reload.html

  
Known issues


  Here are the changes found in Patchwork_102213v8 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_suspend@basic-s0@smem:
- bat-dg1-6:  NOTRUN -> [INCOMPLETE][4] ([i915#5827])
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-dg1-6/igt@gem_exec_suspend@basic...@smem.html

  * igt@gem_lmem_swapping@basic:
- bat-adlp-4: NOTRUN -> [SKIP][5] ([i915#4613]) +3 similar issues
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-adlp-4/igt@gem_lmem_swapp...@basic.html

  * igt@gem_tiled_pread_basic:
- bat-adlp-4: NOTRUN -> [SKIP][6] ([i915#3282])
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-adlp-4/igt@gem_tiled_pread_basic.html

  * igt@kms_chamelium@dp-crc-fast:
- bat-adlp-4: NOTRUN -> [SKIP][7] ([fdo#111827]) +8 similar issues
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-adlp-4/igt@kms_chamel...@dp-crc-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
- bat-adlp-4: NOTRUN -> [SKIP][8] ([i915#4103]) +1 similar issue
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-adlp-4/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_flip@basic-flip-vs-modeset@a-edp1:
- bat-adlp-4: NOTRUN -> [DMESG-WARN][9] ([i915#3576])
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-adlp-4/igt@kms_flip@basic-flip-vs-mode...@a-edp1.html

  * igt@kms_force_connector_basic@prune-stale-modes:
- bat-adlp-4: NOTRUN -> [SKIP][10] ([i915#4093]) +3 similar issues
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-adlp-4/igt@kms_force_connector_ba...@prune-stale-modes.html

  * igt@kms_setmode@basic-clone-single-crtc:
- bat-adlp-4: NOTRUN -> [SKIP][11] ([i915#3555])
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-adlp-4/igt@kms_setm...@basic-clone-single-crtc.html

  * igt@prime_vgem@basic-fence-read:
- bat-adlp-4: NOTRUN -> [SKIP][12] ([i915#3291] / [i915#3708]) +2 
similar issues
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-adlp-4/igt@prime_v...@basic-fence-read.html

  * igt@prime_vgem@basic-userptr:
- bat-adlp-4: NOTRUN -> [SKIP][13] ([i915#3301] / [i915#3708])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-adlp-4/igt@prime_v...@basic-userptr.html

  
 Possible fixes 

  * igt@i915_module_load@reload:
- {bat-rpls-2}:   [DMESG-WARN][14] ([i915#5537]) -> [PASS][15]
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/bat-rpls-2/igt@i915_module_l...@reload.html
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_102213v8/bat-rpls-2/igt@i915_module_l...@reload.html

  * igt@i915_selftest@live@gt_heartbeat:
- fi-cfl-guc: [DMESG-FAIL][16] ([i915#5334]) -> [PASS][17]
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11550/fi-cfl-guc/igt@i915_selftest@live@gt_heartbeat.html
   [17]: 

Re: [Intel-gfx] [RFC v2 1/2] drm/doc/rfc: VM_BIND feature design document

2022-04-27 Thread Daniel Vetter
On Wed, Apr 20, 2022 at 03:50:00PM -0700, Niranjana Vishwanathapura wrote:
> On Thu, Mar 31, 2022 at 01:37:08PM +0200, Daniel Vetter wrote:
> > One thing I've forgotten, since it's only hinted at here: If/when we
> > switch tlb flushing from the current dumb implementation
> > we now have in i915 in upstream to one with batching using dma_fence,
> > then I think that should be something which is done with a small
> > helper library of shared code too. The batching is somewhat tricky,
> > and you need to make sure you put the fence into the right
> > dma_resv_usage slot, and the trick with replace the vm fence with a
> > tlb flush fence is also a good reason to share the code so we only
> > have it one.
> > 
> > Christian's recent work also has some prep work for this already with
> > the fence replacing trick.
> 
> Sure, but this optimization is not required for initial vm_bind support
> to land right? We can look at it soon after that. Is that ok?
> I have made a reference to this TLB flush batching work in the rst file.

Yeah for now we can just rely on the tlb flush we do on vma unbinding,
which also means there's no need for any separate tlb flushing in vm_bind
related code. This was just a thought I dropped on here to make sure we
ahve a complete picture.
-Daniel


> 
> Niranjana
> 
> > -Daniel
> > 
> > On Thu, 31 Mar 2022 at 10:28, Daniel Vetter  wrote:
> > > Adding a pile of people who've expressed interest in vm_bind for their
> > > drivers.
> > > 
> > > Also note to the intel folks: This is largely written with me having my
> > > subsystem co-maintainer hat on, i.e. what I think is the right thing to do
> > > here for the subsystem at large. There is substantial rework involved
> > > here, but it's not any different from i915 adopting ttm or i915 adpoting
> > > drm/sched, and I do think this stuff needs to happen in one form or
> > > another.
> > > 
> > > On Mon, Mar 07, 2022 at 12:31:45PM -0800, Niranjana Vishwanathapura wrote:
> > > > VM_BIND design document with description of intended use cases.
> > > >
> > > > Signed-off-by: Niranjana Vishwanathapura 
> > > > 
> > > > ---
> > > >  Documentation/gpu/rfc/i915_vm_bind.rst | 210 +
> > > >  Documentation/gpu/rfc/index.rst|   4 +
> > > >  2 files changed, 214 insertions(+)
> > > >  create mode 100644 Documentation/gpu/rfc/i915_vm_bind.rst
> > > >
> > > > diff --git a/Documentation/gpu/rfc/i915_vm_bind.rst 
> > > > b/Documentation/gpu/rfc/i915_vm_bind.rst
> > > > new file mode 100644
> > > > index ..cdc6bb25b942
> > > > --- /dev/null
> > > > +++ b/Documentation/gpu/rfc/i915_vm_bind.rst
> > > > @@ -0,0 +1,210 @@
> > > > +==
> > > > +I915 VM_BIND feature design and use cases
> > > > +==
> > > > +
> > > > +VM_BIND feature
> > > > +
> > > > +DRM_I915_GEM_VM_BIND/UNBIND ioctls allows UMD to bind/unbind GEM buffer
> > > > +objects (BOs) or sections of a BOs at specified GPU virtual addresses 
> > > > on
> > > > +a specified address space (VM).
> > > > +
> > > > +These mappings (also referred to as persistent mappings) will be 
> > > > persistent
> > > > +across multiple GPU submissions (execbuff) issued by the UMD, without 
> > > > user
> > > > +having to provide a list of all required mappings during each 
> > > > submission
> > > > +(as required by older execbuff mode).
> > > > +
> > > > +VM_BIND ioctl deferes binding the mappings until next execbuff 
> > > > submission
> > > > +where it will be required, or immediately if I915_GEM_VM_BIND_IMMEDIATE
> > > > +flag is set (useful if mapping is required for an active context).
> > > 
> > > So this is a screw-up I've done, and for upstream I think we need to fix
> > > it: Implicit sync is bad, and it's also still a bad idea for vm_bind, and
> > > I was wrong suggesting we should do this a few years back when we kicked
> > > this off internally :-(
> > > 
> > > What I think we need is just always VM_BIND_IMMEDIATE mode, and then a few
> > > things on top:
> > > - in and out fences, like with execbuf, to allow userspace to sync with
> > >   execbuf as needed
> > > - for compute-mode context this means userspace memory fences
> > > - for legacy context this means a timeline syncobj in drm_syncobj
> > > 
> > > No sync_file or anything else like this at all. This means a bunch of
> > > work, but also it'll have benefits because it means we should be able to
> > > use exactly the same code paths and logic for both compute and for legacy
> > > context, because drm_syncobj support future fence semantics.
> > > 
> > > Also on the implementation side we still need to install dma_fence to the
> > > various dma_resv, and for this we need the new dma_resv_usage series from
> > > Christian König first. vm_bind fences can then use the USAGE_BOOKKEEPING
> > > flag to make sure they never result in an oversync issue with execbuf. I
> > > don't think trying to land vm_bind without that 

[Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915/bios: Rework BDB block handling and PNPID->panel_type matching (rev8)

2022-04-27 Thread Patchwork
== Series Details ==

Series: drm/i915/bios: Rework BDB block handling and PNPID->panel_type matching 
(rev8)
URL   : https://patchwork.freedesktop.org/series/102213/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.




[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/bios: Rework BDB block handling and PNPID->panel_type matching (rev8)

2022-04-27 Thread Patchwork
== Series Details ==

Series: drm/i915/bios: Rework BDB block handling and PNPID->panel_type matching 
(rev8)
URL   : https://patchwork.freedesktop.org/series/102213/
State : warning

== Summary ==

Error: dim checkpatch failed
e059b3ad1acf drm/i915/bios: Reorder panel DTD parsing
0daf03385bab drm/i915/bios: Generate LFP data table pointers if the VBT lacks 
them
-:45: CHECK:SPACING: spaces preferred around that '+' (ctx:VxV)
#45: FILE: drivers/gpu/drm/i915/display/intel_bios.c:319:
+   if (data[i] == 0xff && data[i+1] == 0xff)
 ^

-:133: CHECK:SPACING: spaces preferred around that '-' (ctx:VxV)
#133: FILE: drivers/gpu/drm/i915/display/intel_bios.c:407:
+   next_lfp_data_ptr(>ptr[i].fp_timing, 
>ptr[i-1].fp_timing, size);
   ^

-:134: CHECK:SPACING: spaces preferred around that '-' (ctx:VxV)
#134: FILE: drivers/gpu/drm/i915/display/intel_bios.c:408:
+   next_lfp_data_ptr(>ptr[i].dvo_timing, 
>ptr[i-1].dvo_timing, size);
^

-:135: CHECK:SPACING: spaces preferred around that '-' (ctx:VxV)
#135: FILE: drivers/gpu/drm/i915/display/intel_bios.c:409:
+   next_lfp_data_ptr(>ptr[i].panel_pnp_id, 
>ptr[i-1].panel_pnp_id, size);
  ^

total: 0 errors, 0 warnings, 4 checks, 161 lines checked
1474ba848018 drm/i915/bios: Get access to the tail end of the LFP data block
1a1e2dbf1e29 drm/i915/bios: Document the mess around the LFP data tables
69293b53dad4 drm/i915/bios: Assume panel_type==0 if the VBT has bogus data
22d9a51d0e9b drm/i915/bios: Split parse_driver_features() into two parts
04dc76c9e730 drm/i915/bios: Split VBT parsing to global vs. panel specific parts
acc3e7825cf2 drm/i915/bios: Don't parse some panel specific data multiple times
95cff1eb5e1d drm/i915/pps: Split PPS init+sanitize in two
f2d496895332 drm/i915/pps: Reinit PPS delays after VBT has been fully parsed
b01feedc5f58 drm/i915/bios: Do panel specific VBT parsing later
3e5e12e89c0a drm/i915/bios: Extract get_panel_type()
20e816239ebc drm/i915/bios: Refactor panel_type code
f89016f8d594 drm/i915/bios: Determine panel type via PNPID match
45ca4d81977d drm/i915/bios: Parse the seamless DRRS min refresh rate
2a04c7ddbb0a drm/i915: Respect VBT seamless DRRS min refresh rate
fcdc28cfed8d drm/edid: Extract drm_edid_decode_mfg_id()
4f6667441d9a drm/i915/bios: Dump PNPID and panel name




Re: [Intel-gfx] [PATCH 4/4] drm/i915/huc: Don't fail the probe if HuC init fails

2022-04-27 Thread Rodrigo Vivi
On Tue, Apr 26, 2022 at 05:26:17PM -0700, Daniele Ceraolo Spurio wrote:
> The previous patch introduced new failure cases in the HuC init flow
> that can be hit by simply changing the config, so we want to avoid
> failing the probe in those scenarios. HuC load failure is already
> considered a non-fatal error and we have a way to report to userspace
> if the HuC is not available via a dedicated getparam, so no changes
> in expectation there.
> The error message in the HuC init code has also been lowered to info to
> avoid throwing error message for an expected behavior.
> 
> Signed-off-by: Daniele Ceraolo Spurio 

Reviewed-by: Rodrigo Vivi 

> ---
>  drivers/gpu/drm/i915/gt/uc/intel_huc.c |  2 +-
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c  | 11 ++-
>  2 files changed, 3 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> index 76a7df7f136fc..3d2e7a6d7c1b7 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> @@ -113,7 +113,7 @@ int intel_huc_init(struct intel_huc *huc)
>   return 0;
>  
>  out:
> - i915_probe_error(i915, "failed with %d\n", err);
> + drm_info(>drm, "HuC init failed with %d\n", err);
>   return err;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 0dce94f896a8c..ecf149c5fdb02 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -323,17 +323,10 @@ static int __uc_init(struct intel_uc *uc)
>   if (ret)
>   return ret;
>  
> - if (intel_uc_uses_huc(uc)) {
> - ret = intel_huc_init(huc);
> - if (ret)
> - goto out_guc;
> - }
> + if (intel_uc_uses_huc(uc))
> + intel_huc_init(huc);
>  
>   return 0;
> -
> -out_guc:
> - intel_guc_fini(guc);
> - return ret;
>  }
>  
>  static void __uc_fini(struct intel_uc *uc)
> -- 
> 2.25.1
> 


Re: [Intel-gfx] [PATCH] drm/i915/dmc: Add MMIO range restrictions

2022-04-27 Thread Andi Shyti
[...]

> + if (!dmc_mmio_addr_sanity_check(dmc, mmioaddr, mmio_count, 
> dmc_header->header_ver, dmc_id))
> + drm_err(>drm, "DMC firmware has Wrong MMIO Addresses\n");
> + return 0;
> +

mh? :)


Re: [Intel-gfx] [PATCH 1/4] drm/i915/huc: check HW directly for HuC auth status

2022-04-27 Thread Rodrigo Vivi
On Tue, Apr 26, 2022 at 05:26:14PM -0700, Daniele Ceraolo Spurio wrote:
> The huc_is_authenticated function return is based on our SW tracking of
> the HuC auth status. However, around suspend/resume and reset this can
> go out of sync with the actual HW state, which is why we use
> huc_check_state() to look at the actual HW state. Instead of having this
> duality, just make huc_is_authenticated() return the HW state and use it
> everywhere we need to know if HuC is running.
> 
> Signed-off-by: Daniele Ceraolo Spurio 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_huc.c | 23 ++-
>  drivers/gpu/drm/i915/gt/uc/intel_huc.h |  5 -
>  2 files changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> index 556829de9c172..773020e69589a 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> @@ -80,6 +80,18 @@ void intel_huc_fini(struct intel_huc *huc)
>   intel_uc_fw_fini(>fw);
>  }
>  
> +static bool huc_is_authenticated(struct intel_huc *huc)
> +{
> + struct intel_gt *gt = huc_to_gt(huc);
> + intel_wakeref_t wakeref;
> + u32 status = 0;
> +
> + with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> + status = intel_uncore_read(gt->uncore, huc->status.reg);
> +
> + return (status & huc->status.mask) == huc->status.value;
> +}
> +
>  /**
>   * intel_huc_auth() - Authenticate HuC uCode
>   * @huc: intel_huc structure
> @@ -96,7 +108,7 @@ int intel_huc_auth(struct intel_huc *huc)
>   struct intel_guc *guc = >uc.guc;
>   int ret;
>  
> - GEM_BUG_ON(intel_huc_is_authenticated(huc));
> + GEM_BUG_ON(huc_is_authenticated(huc));
>  
>   if (!intel_uc_fw_is_loaded(>fw))
>   return -ENOEXEC;
> @@ -150,10 +162,6 @@ int intel_huc_auth(struct intel_huc *huc)
>   */
>  int intel_huc_check_status(struct intel_huc *huc)
>  {
> - struct intel_gt *gt = huc_to_gt(huc);
> - intel_wakeref_t wakeref;
> - u32 status = 0;
> -
>   switch (__intel_uc_fw_status(>fw)) {
>   case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
>   return -ENODEV;
> @@ -167,10 +175,7 @@ int intel_huc_check_status(struct intel_huc *huc)
>   break;
>   }
>  
> - with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> - status = intel_uncore_read(gt->uncore, huc->status.reg);
> -
> - return (status & huc->status.mask) == huc->status.value;

oh, these variable names look so generic, while it looks like the only usage
for them is for mask = loaded and value = loaded...

But anyway it is better this indirection with some generic name than duplicating
the definition depending on platform in here...

so:

Reviewed-by: Rodrigo Vivi 



> + return huc_is_authenticated(huc);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
> index 73ec670800f2b..77d813840d76c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
> @@ -50,11 +50,6 @@ static inline bool intel_huc_is_used(struct intel_huc *huc)
>   return intel_uc_fw_is_available(>fw);
>  }
>  
> -static inline bool intel_huc_is_authenticated(struct intel_huc *huc)
> -{
> - return intel_uc_fw_is_running(>fw);
> -}
> -
>  void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
>  
>  #endif
> -- 
> 2.25.1
> 


Re: [Intel-gfx] [PATCH] drm/i915: Support Async Flip on Linear buffers

2022-04-27 Thread Ville Syrjälä
On Wed, Apr 27, 2022 at 02:58:09AM +, Murthy, Arun R wrote:
> > On Tue, Apr 26, 2022 at 05:34:07PM +0530, Arun R Murthy wrote:
> > > Starting from Gen12 Async Flip is supported on linear buffers.
> > 
> > It's supported earlier than that. But IIRC there was some kind of GTT
> > alignment vs. async flip vs. FBC restriction that we weren't handling.
> > 
> Should I enable it for earlier Gen also, or is it fine to keep it with 
> starting Gen 12.
> The only restriction that I see in Bspec is that during async flip changes
> to stride, pixel format, compression, FBC etc is not allowed and I see
> this is already taken care of. Am I missing anything?

There is that GTT alignment restriction that should be mentioned
somewhere. Can't quite remember where it was, maybe in PLANE_SURF.

But I guess the bigger question is what is the actual use case for
this?

-- 
Ville Syrjälä
Intel


Re: [Intel-gfx] [PATCH v12] drm/amdgpu: add drm buddy support to amdgpu

2022-04-27 Thread Mike Lothian
On Tue, 26 Apr 2022 at 17:36, Christian König  wrote:
>
> Hi Mike,
>
> sounds like somehow stitching together the SG table for PRIME doesn't
> work any more with this patch.
>
> Can you try with P2P DMA disabled?

-CONFIG_PCI_P2PDMA=y
+# CONFIG_PCI_P2PDMA is not set

If that's what you're meaning, then there's no difference, I'll upload
my dmesg to the gitlab issue

>
> Apart from that can you take a look Arun?
>
> Thanks,
> Christian.


Re: [Intel-gfx] [PATCH 7/9] drm/i915/gt: Fix memory leaks in per-gt sysfs

2022-04-27 Thread Andi Shyti
Hi Ashutosh,

> > > -static struct kobj_type kobj_gt_type = {
> > > - .release = kobj_gt_release,
> > > +static struct kobj_type kobj_gtn_type = {
> >
> > what does it mean GTN? Or is it GTn? Please use just GT, gtn is
> > confusing.
> >
> > Same for all the rest of the gtn's you have used below.
> 
> I didn't like gtn either. But a sysfs_gt kobject is already part of 'struct
> drm_i915_private' so I thought I'll put sysfs_gtn (for gt/gtN) in 'struct
> intel_gt'. Otherwise browsing the code etc. gets confusing.

we can even use 'gt_n' if the 'n' is really necessary.

Andi


[Intel-gfx] [PULL] drm-intel-gt-next

2022-04-27 Thread Tvrtko Ursulin


Hi Dave, Daniel,

Here goes the first drm-intel-gt-next PR towards 5.19.

A lot of stuff here across the board in terms of new features, new platform
support and bug fixes. For bug fixes the most interesting are:

 * a fix for out of bounds kernel access in mmap ops due incorrect object bound
   checking;
 * a fix for a GPU hang triggered by usage, of multiple media engines on
   Tigerlake and above;
 * correcting the sequence for doing engine resets on Tigerlake;
 * fix for split frame workloads in GuC mode;
 * fix for mmap of prime imported local memory objects;
 * fix for a VM refcounting bug;
 * fix fbdev setup to avoid potential out of bounds access.

Another two nice user visible improvements are one in frame buffer pinning logic
which enables Weston to reach 60fps on 8K displays and resurrection of the
ability to monitor per client GPU usage using intel_gpu_top (consuming data
exported via proc fdinfo). The latter contains a common DRM format specification
and is hopeful of gaining more wide spread adoption in the future.

GuC backend has finally achieved feature parity in terms of being able to
provide error capture state after GPU hangs. The captured data is compatible
with existing error parsing tools (IGT and Mesa) and also contains a new GuC
log segment which does not currently have an open source parser.

Also in the GuC area there was a bunch of refactoring to prepare for new
firmware API and also lay the ground work for supporting new platforms like DG2.

For the latter a buch of GuC based workarounds were added, together with support
for small PCI BAR setups, eviction of compressed objects and general platform
bring-up like code refactoring to deal with steered register writes.

DG2 also needs two new bits of UAPI. One is the hwconfig query which is a new
way of obtaining a binary table describing the GPU configuration directly from
the firmware blob. Second is a new query enabling query of geometry subslices
needed to support the fact geometry and compute slice configuration is no longer
uniform.

Also on the UAPI front there is sysfs support for multi-tile platforms which
exports the existing controls such as frequency and similar but duplicated for
each tile.

Graphics System Controller (GSC) support was added for discrete platforms as
well, which is required both for firmware management and protected media path.
Access to this is via the existing MEI character device.

Work has also started refactoring the codebase to allow driver builds outside
x86.

Finally, there were two drm-next backmerges to unblock feature development.

Regards,

Tvrtko

drm-intel-gt-next-2022-04-27:
UAPI Changes:

- GuC hwconfig support and query (John Harrison, Rodrigo Vivi, Tvrtko Ursulin)
- Sysfs support for multi-tile devices (Andi Shyti, Sujaritha Sundaresan)
- Per client GPU utilisation via fdinfo (Tvrtko Ursulin, Ashutosh Dixit)
- Add DRM_I915_QUERY_GEOMETRY_SUBSLICES (Matt Atwood)

Cross-subsystem Changes:

- Add GSC as a MEI auxiliary device (Tomas Winkler, Alexander Usyskin)

Core Changes:

- Document fdinfo format specification (Tvrtko Ursulin)

Driver Changes:

- Fix prime_mmap to work when using LMEM (Gwan-gyeong Mun)
- Fix vm open count and remove vma refcount (Thomas Hellström)
- Fixup setting screen_size (Matthew Auld)
- Opportunistically apply ALLOC_CONTIGIOUS (Matthew Auld)
- Limit where we apply TTM_PL_FLAG_CONTIGUOUS (Matthew Auld)
- Drop aux table invalidation on FlatCCS platforms (Matt Roper)
- Add missing boundary check in vm_access (Mastan Katragadda)
- Update topology dumps for Xe_HP (Matt Roper)
- Add support for steered register writes (Matt Roper)
- Add steering info to GuC register save/restore list (Daniele Ceraolo Spurio)
- Small PCI BAR enabling (Matthew Auld, Akeem G Abodunrin, CQ Tang)
- Add preemption changes for Wa_14015141709 (Akeem G Abodunrin)
- Add logical mapping for video decode engines (Matthew Brost)
- Don't evict unmappable VMAs when pinning with PIN_MAPPABLE (v2) (Vivek 
Kasireddy)
- GuC error capture support (Alan Previn, Daniele Ceraolo Spurio)
- avoid concurrent writes to aux_inv (Fei Yang)
- Add Wa_22014226127 (José Roberto de Souza)
- Sunset igpu legacy mmap support based on GRAPHICS_VER_FULL (Matt Roper)
- Evict and restore of compressed objects (Ramalingam C)
- Update to GuC version 70.1.1 (John Harrison)
- Add Wa_22011802037 force cs halt (Tilak Tangudu)
- Enable Wa_22011802037 for gen12 GuC based platforms (Umesh Nerlige Ramappa)
- GuC based workarounds for DG2 (Vinay Belgaumkar, John Harrison, Matthew 
Brost, José Roberto de Souza)
- consider min_page_size when migrating (Matthew Auld)

- Prep work for next GuC firmware release (John Harrison)
- Support platforms with CCS engines but no RCS (Matt Roper, Stuart Summers)
- Don't overallocate subslice storage (Matt Roper)
- Reduce stack usage in debugfs due to SSEU (John Harrison)
- Report steering details in debugfs (Matt Roper)
- Refactor some x86-ism out to prepare for non-x86 builds (Michael Cheng)
- 

Re: [Intel-gfx] [PATCH v2 4/5] drm/i915: ttm backend dont provide mmap_offset for kernel buffers

2022-04-27 Thread Thomas Hellström
Sorry for late reply,

On Thu, 2022-04-14 at 17:13 +0100, Robert Beckett wrote:
> 
> 
> On 14/04/2022 15:05, Thomas Hellström wrote:
> > On Tue, 2022-04-12 at 15:18 +, Robert Beckett wrote:
> > > stolen/kernel buffers should not be mmapable by userland.
> > > do not provide callbacks to facilitate this for these buffers.
> > > 
> > > Signed-off-by: Robert Beckett 
> > > ---
> > >   drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 32
> > > +--
> > > --
> > >   1 file changed, 27 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > index a878910a563c..b20f81836c54 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > @@ -1092,8 +1092,8 @@ static void i915_ttm_unmap_virtual(struct
> > > drm_i915_gem_object *obj)
> > >  ttm_bo_unmap_virtual(i915_gem_to_ttm(obj));
> > >   }
> > >   
> > > -static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops
> > > = {
> > > -   .name = "i915_gem_object_ttm",
> > > +static const struct drm_i915_gem_object_ops
> > > i915_gem_ttm_user_obj_ops = {
> > > +   .name = "i915_gem_object_ttm_user",
> > >  .flags = I915_GEM_OBJECT_IS_SHRINKABLE |
> > >   I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
> > >   
> > > @@ -,6 +,21 @@ static const struct
> > > drm_i915_gem_object_ops
> > > i915_gem_ttm_obj_ops = {
> > >  .mmap_ops = _ops_ttm,
> > >   };
> > >   
> > > +static const struct drm_i915_gem_object_ops
> > > i915_gem_ttm_kern_obj_ops = {
> > > +   .name = "i915_gem_object_ttm_kern",
> > > +   .flags = I915_GEM_OBJECT_IS_SHRINKABLE |
> > > +    I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
> > > +
> > > +   .get_pages = i915_ttm_get_pages,
> > > +   .put_pages = i915_ttm_put_pages,
> > > +   .truncate = i915_ttm_truncate,
> > > +   .shrink = i915_ttm_shrink,
> > > +
> > > +   .adjust_lru = i915_ttm_adjust_lru,
> > > +   .delayed_free = i915_ttm_delayed_free,
> > > +   .migrate = i915_ttm_migrate,
> > > +};
> > 
> > Do we really need two different ops here?
> > 
> > Since if we don't have mmap ops, basically that tells GEM it should
> > do
> > the mmapping rather than TTM.
> > 
> > That might of course come in handy for the shmem backend, but I
> > don't
> > fully follow why we need this for stolen.
> 
> the main rationale for doing this was to avoid 
> drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:can_mmap()
> presuming 
> that is can use I915_MMAP_TYPE_FIXED
> 
> As the original backend also did not have mmap_offset handlers for 
> stolen, this seemed like a reasonable design.
> 
> If desired, we could add a special case for the testing logic, but
> those 
> special cases have tendency to multiply.
> 
> > 
> > Also for the framebuffer handed over from BIOS to fbdev, Does that
> > need
> > mmapping and if so, how do we handle that?
> > 
> 
> I'm not sure of the usecase there. Do you know of any igt test that 
> tests this? I can investigate further if you do not.

It would be if we the fbdev driver at startup inherits some image that
bios has preloaded into stolen, and then a client tries to write into
it. Not sure that this is a real use case though, or whether, in that
case, that takes a separate path for user-space mappings.

/Thomas



> 
> > 
> > /Thomas
> > 
> > 
> > 
> > 
> > > +
> > >   void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
> > >   {
> > >  struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > > @@ -1165,10 +1180,19 @@ int __i915_gem_ttm_object_init(struct
> > > intel_memory_region *mem,
> > >  .no_wait_gpu = false,
> > >  };
> > >  enum ttm_bo_type bo_type;
> > > +   const struct drm_i915_gem_object_ops *ops;
> > >  int ret;
> > >   
> > >  drm_gem_private_object_init(>drm, >base,
> > > size);
> > > -   i915_gem_object_init(obj, _gem_ttm_obj_ops,
> > > _class,
> > > flags);
> > > +
> > > +   if (flags & I915_BO_ALLOC_USER &&
> > > intel_region_to_ttm_type(mem) != I915_PL_STOLEN) {
> > > +   bo_type = ttm_bo_type_device;
> > > +   ops = _gem_ttm_user_obj_ops;
> > > +   } else {
> > > +   bo_type = ttm_bo_type_kernel;
> > > +   ops = _gem_ttm_kern_obj_ops;
> > > +   }
> > > +   i915_gem_object_init(obj, ops, _class, flags);
> > >   
> > >  obj->bo_offset = offset;
> > >   
> > > @@ -1178,8 +1202,6 @@ int __i915_gem_ttm_object_init(struct
> > > intel_memory_region *mem,
> > >   
> > >  INIT_RADIX_TREE(>ttm.get_io_page.radix, GFP_KERNEL
> > > |
> > > __GFP_NOWARN);
> > >  mutex_init(>ttm.get_io_page.lock);
> > > -   bo_type = (obj->flags & I915_BO_ALLOC_USER) ?
> > > ttm_bo_type_device :
> > > -   ttm_bo_type_kernel;
> > >   
> > >  obj->base.vma_node.driver_private =
> > > i915_gem_to_ttm(obj);
> > > 

Re: [Intel-gfx] [PATCH v3] drm/i915: Don't show engine information in fdinfo with GuC submission

2022-04-27 Thread Tvrtko Ursulin



On 15/04/2022 01:25, Ashutosh Dixit wrote:

At present i915 does not fetch busyness information from GuC, resulting in
incorrect busyness values in fdinfo. Because engine information is coupled
with busyness in fdinfo, skip showing client engine information in fdinfo
with GuC submission till fetching busyness is supported in the i915 GuC
submission backend.

v2 (Daniele):
   Make commit title and description more precise
   Add FIXME with brief description at code change
   s/intel_guc_submission_is_used/intel_uc_uses_guc_submission/

v3 (Daniele):
   Drop FIXME in comment

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/5564
Fixes: 055634e4b62f ("drm/i915: Expose client engine utilisation via fdinfo")
Cc: Daniele Ceraolo Spurio 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Daniele Ceraolo Spurio 
---
  drivers/gpu/drm/i915/i915_drm_client.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c 
b/drivers/gpu/drm/i915/i915_drm_client.c
index e539f6b23060..475a6f824cad 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -145,7 +145,11 @@ void i915_drm_client_fdinfo(struct seq_file *m, struct 
file *f)
   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
seq_printf(m, "drm-client-id:\t%u\n", client->id);
  
-	if (GRAPHICS_VER(i915) < 8)

+   /*
+* Temporarily skip showing client engine information with GuC 
submission till
+* fetching engine busyness is implemented in the GuC submission backend
+*/
+   if (GRAPHICS_VER(i915) < 8 || 
intel_uc_uses_guc_submission(>gt0.uc))
return;
  
  	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)


Thanks for fixing this while I was away. It was a simple miss, nothing 
sinister. In terms of mention of "garbage" numbers being reported - were 
they actually garbage or simply always zero?


Regards,

Tvrtko


Re: [Intel-gfx] [PATCH v2] drm/doc: add rfc section for small BAR uapi

2022-04-27 Thread Tvrtko Ursulin



On 20/04/2022 18:13, Matthew Auld wrote:

Add an entry for the new uapi needed for small BAR on DG2+.

v2:
   - Some spelling fixes and other small tweaks. (Akeem & Thomas)
   - Rework error capture interactions, including no longer needing
 NEEDS_CPU_ACCESS for objects marked for capture. (Thomas)
   - Add probed_cpu_visible_size. (Lionel)

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Lionel Landwerlin 
Cc: Jon Bloomfield 
Cc: Daniel Vetter 
Cc: Jordan Justen 
Cc: Kenneth Graunke 
Cc: Akeem G Abodunrin 
Cc: mesa-...@lists.freedesktop.org
---
  Documentation/gpu/rfc/i915_small_bar.h   | 190 +++
  Documentation/gpu/rfc/i915_small_bar.rst |  58 +++
  Documentation/gpu/rfc/index.rst  |   4 +
  3 files changed, 252 insertions(+)
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.h
  create mode 100644 Documentation/gpu/rfc/i915_small_bar.rst

diff --git a/Documentation/gpu/rfc/i915_small_bar.h 
b/Documentation/gpu/rfc/i915_small_bar.h
new file mode 100644
index ..7bfd0cf44d35
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_small_bar.h
@@ -0,0 +1,190 @@
+/**
+ * struct __drm_i915_memory_region_info - Describes one region as known to the
+ * driver.
+ *
+ * Note this is using both struct drm_i915_query_item and struct 
drm_i915_query.
+ * For this new query we are adding the new query id 
DRM_I915_QUERY_MEMORY_REGIONS
+ * at _i915_query_item.query_id.
+ */
+struct __drm_i915_memory_region_info {
+   /** @region: The class:instance pair encoding */
+   struct drm_i915_gem_memory_class_instance region;
+
+   /** @rsvd0: MBZ */
+   __u32 rsvd0;
+
+   /** @probed_size: Memory probed by the driver (-1 = unknown) */
+   __u64 probed_size;
+
+   /** @unallocated_size: Estimate of memory remaining (-1 = unknown) */
+   __u64 unallocated_size;
+
+   union {
+   /** @rsvd1: MBZ */
+   __u64 rsvd1[8];
+   struct {
+   /**
+* @probed_cpu_visible_size: Memory probed by the driver
+* that is CPU accessible. (-1 = unknown).
+*
+* This will be always be <= @probed_size, and the
+* remainder(if there is any) will not be CPU
+* accessible.
+*/
+   __u64 probed_cpu_visible_size;


Would unallocated_cpu_visible_size be useful, to follow the total 
unallocated_size?


Btw, have we ever considered whether unallocated_size should require 
CAP_SYS_ADMIN/PERFMON or something?



+   };
+   };
+};
+
+/**
+ * struct __drm_i915_gem_create_ext - Existing gem_create behaviour, with added
+ * extension support using struct i915_user_extension.
+ *
+ * Note that new buffer flags should be added here, at least for the stuff that
+ * is immutable. Previously we would have two ioctls, one to create the object
+ * with gem_create, and another to apply various parameters, however this
+ * creates some ambiguity for the params which are considered immutable. Also 
in
+ * general we're phasing out the various SET/GET ioctls.
+ */
+struct __drm_i915_gem_create_ext {
+   /**
+* @size: Requested size for the object.
+*
+* The (page-aligned) allocated size for the object will be returned.
+*
+* Note that for some devices we have might have further minimum
+* page-size restrictions(larger than 4K), like for device local-memory.
+* However in general the final size here should always reflect any
+* rounding up, if for example using the 
I915_GEM_CREATE_EXT_MEMORY_REGIONS
+* extension to place the object in device local-memory.
+*/
+   __u64 size;
+   /**
+* @handle: Returned handle for the object.
+*
+* Object handles are nonzero.
+*/
+   __u32 handle;
+   /**
+* @flags: Optional flags.
+*
+* Supported values:
+*
+* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
+* the object will need to be accessed via the CPU.
+*
+* Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
+* only strictly required on platforms where only some of the device
+* memory is directly visible or mappable through the CPU, like on DG2+.
+*
+* One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
+* ensure we can always spill the allocation to system memory, if we
+* can't place the object in the mappable part of
+* I915_MEMORY_CLASS_DEVICE.
+*
+* Note that since the kernel only supports flat-CCS on objects that can
+* *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore don't
+* support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with
+* flat-CCS.
+*
+* Without this 

Re: [Intel-gfx] [PATCH] drm/i915/pmu: Use existing uncore helper to read gpm_timestamp

2022-04-27 Thread Tvrtko Ursulin



On 27/04/2022 01:35, Umesh Nerlige Ramappa wrote:

Use intel_uncore_read64_2x32 to read upper and lower fields of the GPM
timestamp.

v2: Fix compile error

Signed-off-by: Umesh Nerlige Ramappa 
---
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c   | 17 ++---
  1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 61a6f2424e24..33e695adfd6a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1200,20 +1200,6 @@ static u32 gpm_timestamp_shift(struct intel_gt *gt)
return 3 - shift;
  }
  
-static u64 gpm_timestamp(struct intel_gt *gt)

-{
-   u32 lo, hi, old_hi, loop = 0;
-
-   hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
-   do {
-   lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
-   old_hi = hi;
-   hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
-   } while (old_hi != hi && loop++ < 2);
-
-   return ((u64)hi << 32) | lo;
-}
-
  static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
  {
struct intel_gt *gt = guc_to_gt(guc);
@@ -1223,7 +1209,8 @@ static void guc_update_pm_timestamp(struct intel_guc 
*guc, ktime_t *now)
lockdep_assert_held(>timestamp.lock);
  
  	gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);

-   gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
+   gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
+ MISC_STATUS1) >> guc->timestamp.shift;
gt_stamp_lo = lower_32_bits(gpm_ts);
*now = ktime_get();
  


Reviewed-by: Tvrtko Ursulin 

Regards,

Tvrtko


Re: [Intel-gfx] [PATCH] drm/i915/dmc: Add MMIO range restrictions

2022-04-27 Thread kernel test robot
Hi Anusha,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on drm-tip/drm-tip next-20220426]
[cannot apply to v5.18-rc4]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/intel-lab-lkp/linux/commits/Anusha-Srivatsa/drm-i915-dmc-Add-MMIO-range-restrictions/20220427-084021
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-defconfig 
(https://download.01.org/0day-ci/archive/20220427/202204271502.butprbqw-...@intel.com/config)
compiler: gcc-11 (Debian 11.2.0-20) 11.2.0
reproduce (this is a W=1 build):
# 
https://github.com/intel-lab-lkp/linux/commit/f79241ea04e8815b3c1b0ab6b9d6136efc8646d3
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Anusha-Srivatsa/drm-i915-dmc-Add-MMIO-range-restrictions/20220427-084021
git checkout f79241ea04e8815b3c1b0ab6b9d6136efc8646d3
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/display/intel_dmc.c: In function 'parse_dmc_fw_header':
>> drivers/gpu/drm/i915/display/intel_dmc.c:476:9: error: this 'if' clause does 
>> not guard... [-Werror=misleading-indentation]
 476 | if (!dmc_mmio_addr_sanity_check(dmc, mmioaddr, mmio_count, 
dmc_header->header_ver, dmc_id))
 | ^~
   drivers/gpu/drm/i915/display/intel_dmc.c:478:17: note: ...this statement, 
but the latter is misleadingly indented as if it were guarded by the 'if'
 478 | return 0;
 | ^~
   cc1: all warnings being treated as errors


vim +/if +476 drivers/gpu/drm/i915/display/intel_dmc.c

   406  
   407  static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
   408 const struct intel_dmc_header_base 
*dmc_header,
   409 size_t rem_size, u8 dmc_id)
   410  {
   411  struct drm_i915_private *i915 = container_of(dmc, 
typeof(*i915), dmc);
   412  struct dmc_fw_info *dmc_info = >dmc_info[dmc_id];
   413  unsigned int header_len_bytes, dmc_header_size, payload_size, i;
   414  const u32 *mmioaddr, *mmiodata;
   415  u32 mmio_count, mmio_count_max, start_mmioaddr;
   416  u8 *payload;
   417  
   418  BUILD_BUG_ON(ARRAY_SIZE(dmc_info->mmioaddr) < 
DMC_V3_MAX_MMIO_COUNT ||
   419   ARRAY_SIZE(dmc_info->mmioaddr) < 
DMC_V1_MAX_MMIO_COUNT);
   420  
   421  /*
   422   * Check if we can access common fields, we will checkc again 
below
   423   * after we have read the version
   424   */
   425  if (rem_size < sizeof(struct intel_dmc_header_base))
   426  goto error_truncated;
   427  
   428  /* Cope with small differences between v1 and v3 */
   429  if (dmc_header->header_ver == 3) {
   430  const struct intel_dmc_header_v3 *v3 =
   431  (const struct intel_dmc_header_v3 *)dmc_header;
   432  
   433  if (rem_size < sizeof(struct intel_dmc_header_v3))
   434  goto error_truncated;
   435  
   436  mmioaddr = v3->mmioaddr;
   437  mmiodata = v3->mmiodata;
   438  mmio_count = v3->mmio_count;
   439  mmio_count_max = DMC_V3_MAX_MMIO_COUNT;
   440  /* header_len is in dwords */
   441  header_len_bytes = dmc_header->header_len * 4;
   442  start_mmioaddr = v3->start_mmioaddr;
   443  dmc_header_size = sizeof(*v3);
   444  } else if (dmc_header->header_ver == 1) {
   445  const struct intel_dmc_header_v1 *v1 =
   446  (const struct intel_dmc_header_v1 *)dmc_header;
   447  
   448  if (rem_size < sizeof(struct intel_dmc_header_v1))
   449  goto error_truncated;
   450  
   451  mmioaddr = v1->mmioaddr;
   452  mmiodata = v1->mmiodata;
   453  mmio_count = v1->mmio_count;
   454  mmio_count_max = DMC_V1_MAX_MMIO_COUNT;
   455  header_len_bytes = dmc_header->header_len;
   456  start_mmioaddr = DMC_V1_MMIO_START_RANGE;
   457  dmc_header_size = sizeof(*v1);
   458  } else {
   459  drm_err(>drm, "Unknown DMC fw header version: 
%u\n"

Re: [Intel-gfx] [PATCH v2 2/3] drm/i915: Add first set of DG2 PCI IDs

2022-04-27 Thread Lucas De Marchi

On Mon, Apr 25, 2022 at 02:12:50PM -0700, Matt Roper wrote:

The IDs added here are the subset reserved for 'motherboard down'
designs of DG2.  We have all the necessary support upstream to enable
these now (although they'll continue to require force_probe until the
usual requirements are met).

The remaining DG2 IDs for add-in cards will come in a future patch once
some additional required functionality has fully landed.

Bspec: 44477
Cc: Lucas De Marchi 
Cc: Daniel Vetter 
Cc: Dave Airlie 
Cc: Rodrigo Vivi 
Cc: Joonas Lahtinen 
Cc: Jani Nikula 
Cc: Tvrtko Ursulin 
Signed-off-by: Matt Roper 



Reviewed-by: Lucas De Marchi 

Lucas De Marchi


---
drivers/gpu/drm/i915/i915_pci.c  |  2 +-
drivers/gpu/drm/i915/intel_device_info.c | 21 +
include/drm/i915_pciids.h| 22 ++
3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index a3a1b4cb2942..1d44f57c2eb0 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1058,7 +1058,6 @@ static const struct intel_device_info xehpsdv_info = {
BIT(VECS0) | BIT(VECS1) | \
BIT(VCS0) | BIT(VCS2)

-__maybe_unused
static const struct intel_device_info dg2_info = {
DG2_FEATURES,
XE_LPD_FEATURES,
@@ -1154,6 +1153,7 @@ static const struct pci_device_id pciidlist[] = {
INTEL_DG1_IDS(_info),
INTEL_RPLS_IDS(_s_info),
INTEL_RPLP_IDS(_p_info),
+   INTEL_DG2_IDS(_info),
{0, 0, 0}
};
MODULE_DEVICE_TABLE(pci, pciidlist);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
b/drivers/gpu/drm/i915/intel_device_info.c
index 74c3ffb66b8d..cefa9ed784ff 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -186,6 +186,18 @@ static const u16 subplatform_rpl_ids[] = {
INTEL_RPLP_IDS(0),
};

+static const u16 subplatform_g10_ids[] = {
+   INTEL_DG2_G10_IDS(0),
+};
+
+static const u16 subplatform_g11_ids[] = {
+   INTEL_DG2_G11_IDS(0),
+};
+
+static const u16 subplatform_g12_ids[] = {
+   INTEL_DG2_G12_IDS(0),
+};
+
static bool find_devid(u16 id, const u16 *p, unsigned int num)
{
for (; num; num--, p++) {
@@ -231,6 +243,15 @@ void intel_device_info_subplatform_init(struct 
drm_i915_private *i915)
} else if (find_devid(devid, subplatform_rpl_ids,
  ARRAY_SIZE(subplatform_rpl_ids))) {
mask = BIT(INTEL_SUBPLATFORM_RPL);
+   } else if (find_devid(devid, subplatform_g10_ids,
+ ARRAY_SIZE(subplatform_g10_ids))) {
+   mask = BIT(INTEL_SUBPLATFORM_G10);
+   } else if (find_devid(devid, subplatform_g11_ids,
+ ARRAY_SIZE(subplatform_g11_ids))) {
+   mask = BIT(INTEL_SUBPLATFORM_G11);
+   } else if (find_devid(devid, subplatform_g12_ids,
+ ARRAY_SIZE(subplatform_g12_ids))) {
+   mask = BIT(INTEL_SUBPLATFORM_G12);
}

GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK);
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index a7b5eea7ffaa..283dadfbb4db 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -692,4 +692,26 @@
INTEL_VGA_DEVICE(0xA7A8, info), \
INTEL_VGA_DEVICE(0xA7A9, info)

+/* DG2 */
+#define INTEL_DG2_G10_IDS(info) \
+   INTEL_VGA_DEVICE(0x5690, info), \
+   INTEL_VGA_DEVICE(0x5691, info), \
+   INTEL_VGA_DEVICE(0x5692, info)
+
+#define INTEL_DG2_G11_IDS(info) \
+   INTEL_VGA_DEVICE(0x5693, info), \
+   INTEL_VGA_DEVICE(0x5694, info), \
+   INTEL_VGA_DEVICE(0x5695, info), \
+   INTEL_VGA_DEVICE(0x56B0, info)
+
+#define INTEL_DG2_G12_IDS(info) \
+   INTEL_VGA_DEVICE(0x5696, info), \
+   INTEL_VGA_DEVICE(0x5697, info), \
+   INTEL_VGA_DEVICE(0x56B2, info)
+
+#define INTEL_DG2_IDS(info) \
+   INTEL_DG2_G10_IDS(info), \
+   INTEL_DG2_G11_IDS(info), \
+   INTEL_DG2_G12_IDS(info)
+
#endif /* _I915_PCIIDS_H */
--
2.35.1



  1   2   >