Re: [Intel-gfx] [PATCH 2/2] drm/i915/gem: Migrate to system at dma-buf attach time (v5)

2021-07-14 Thread Daniel Vetter
On Wed, Jul 14, 2021 at 11:01 PM Jason Ekstrand  wrote:
>
> On Tue, Jul 13, 2021 at 10:23 AM Daniel Vetter  wrote:
> >
> > On Tue, Jul 13, 2021 at 04:06:13PM +0100, Matthew Auld wrote:
> > > On Tue, 13 Jul 2021 at 15:44, Daniel Vetter  wrote:
> > > >
> > > > On Mon, Jul 12, 2021 at 06:12:34PM -0500, Jason Ekstrand wrote:
> > > > > From: Thomas Hellström 
> > > > >
> > > > > Until we support p2p dma or as a complement to that, migrate data
> > > > > to system memory at dma-buf attach time if possible.
> > > > >
> > > > > v2:
> > > > > - Rebase on dynamic exporter. Update the igt_dmabuf_import_same_driver
> > > > >   selftest to migrate if we are LMEM capable.
> > > > > v3:
> > > > > - Migrate also in the pin() callback.
> > > > > v4:
> > > > > - Migrate in attach
> > > > > v5: (jason)
> > > > > - Lock around the migration
> > > > >
> > > > > Signed-off-by: Thomas Hellström 
> > > > > Signed-off-by: Michael J. Ruhl 
> > > > > Reported-by: kernel test robot 
> > > > > Signed-off-by: Jason Ekstrand 
> > > > > Reviewed-by: Jason Ekstrand 
> > > > > ---
> > > > >  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 25 
> > > > > ++-
> > > > >  .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |  4 ++-
> > > > >  2 files changed, 27 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
> > > > > b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > > > index 9a655f69a0671..3163f00554476 100644
> > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > > > @@ -170,8 +170,31 @@ static int i915_gem_dmabuf_attach(struct dma_buf 
> > > > > *dmabuf,
> > > > > struct dma_buf_attachment *attach)
> > > > >  {
> > > > >   struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
> > > > > + struct i915_gem_ww_ctx ww;
> > > > > + int err;
> > > > > +
> > > > > + for_i915_gem_ww(, err, true) {
> > > > > + err = i915_gem_object_lock(obj, );
> > > > > + if (err)
> > > > > + continue;
> > > > > +
> > > > > + if (!i915_gem_object_can_migrate(obj, 
> > > > > INTEL_REGION_SMEM)) {
> > > > > + err = -EOPNOTSUPP;
> > > > > + continue;
> > > > > + }
> > > > > +
> > > > > + err = i915_gem_object_migrate(obj, , 
> > > > > INTEL_REGION_SMEM);
> > > > > + if (err)
> > > > > + continue;
> > > > >
> > > > > - return i915_gem_object_pin_pages_unlocked(obj);
> > > > > + err = i915_gem_object_wait_migration(obj, 0);
> > > > > + if (err)
> > > > > + continue;
> > > > > +
> > > > > + err = i915_gem_object_pin_pages(obj);
> > > > > + }
> > > > > +
> > > > > + return err;
> > > > >  }
> > > > >
> > > > >  static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
> > > > > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c 
> > > > > b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > > > > index 3dc0f8b3cdab0..4f7e77b1c0152 100644
> > > > > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > > > > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > > > > @@ -106,7 +106,9 @@ static int igt_dmabuf_import_same_driver(void 
> > > > > *arg)
> > > > >   int err;
> > > > >
> > > > >   force_different_devices = true;
> > > > > - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
> > > > > + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
> > > >
> > > > I'm wondering (and couldn't answer) whether this creates an lmem+smem
> > > > buffer, since if we create an lmem-only buffer then the migration above
> > > > should fail.
> > >
> > > It's lmem-only, but it's also a kernel internal object, so the
> > > migration path will still happily migrate it if asked. On the other
> > > hand if it's a userspace object then we always have to respect the
> > > placements.
> > >
> > > I think for now the only usecase for that is in the selftests.
> >
> > Yeah I've read the kerneldoc, it's all nicely documented but feels a bit
> > dangerous. What I proposed on irc:
> > - i915_gem_object_migrate does the placement check, i.e. as strict as
> >   can_migrate.
> > - A new __i915_gem_object_migrate is for selftest that do special stuff.
>
> I just sent out a patch which does this except we don't actually need
> the __ version because there are no self-tests that want to do a
> dangerous migrate.  We could add such a helper later if we needed.
>
> > - In the import selftest we check that lmem-only fails (because we can't
> >   pin it into smem) for a non-dynamic importer, but lmem+smem works and
> >   gets migrated.
>
> I think we maybe want multiple things here?  The test we have right
> now is useful because, by creating an internal LMEM buffer we ensure
> that the migration actually happens.  If we create LMEM+SMEM, then

[Intel-gfx] [PATCH 2/2] drm/i915/dmc: Change intel_get_stepping_info()

2021-07-14 Thread Anusha Srivatsa
Lets use RUNTIME_INFO->step since all platforms now have their
stepping info in intel_step.c. This makes intel_get_stepping_info()
a lot simpler.

Cc: Lucas De Marchi 
Signed-off-by: Anusha Srivatsa 
---
 drivers/gpu/drm/i915/display/intel_dmc.c | 50 
 1 file changed, 8 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c 
b/drivers/gpu/drm/i915/display/intel_dmc.c
index f8789d4543bf..1f6c32932331 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -247,50 +247,15 @@ bool intel_dmc_has_payload(struct drm_i915_private *i915)
return i915->dmc.dmc_info[DMC_FW_MAIN].payload;
 }
 
-static const struct stepping_info skl_stepping_info[] = {
-   {'A', '0'}, {'B', '0'}, {'C', '0'},
-   {'D', '0'}, {'E', '0'}, {'F', '0'},
-   {'G', '0'}, {'H', '0'}, {'I', '0'},
-   {'J', '0'}, {'K', '0'}
-};
-
-static const struct stepping_info bxt_stepping_info[] = {
-   {'A', '0'}, {'A', '1'}, {'A', '2'},
-   {'B', '0'}, {'B', '1'}, {'B', '2'}
-};
-
-static const struct stepping_info icl_stepping_info[] = {
-   {'A', '0'}, {'A', '1'}, {'A', '2'},
-   {'B', '0'}, {'B', '2'},
-   {'C', '0'}
-};
-
-static const struct stepping_info no_stepping_info = { '*', '*' };
-
 static const struct stepping_info *
-intel_get_stepping_info(struct drm_i915_private *dev_priv)
+intel_get_stepping_info(struct drm_i915_private *i915,
+   struct stepping_info *si)
 {
-   const struct stepping_info *si;
-   unsigned int size;
-
-   if (IS_ICELAKE(dev_priv)) {
-   size = ARRAY_SIZE(icl_stepping_info);
-   si = icl_stepping_info;
-   } else if (IS_SKYLAKE(dev_priv)) {
-   size = ARRAY_SIZE(skl_stepping_info);
-   si = skl_stepping_info;
-   } else if (IS_BROXTON(dev_priv)) {
-   size = ARRAY_SIZE(bxt_stepping_info);
-   si = bxt_stepping_info;
-   } else {
-   size = 0;
-   si = NULL;
-   }
-
-   if (INTEL_REVID(dev_priv) < size)
-   return si + INTEL_REVID(dev_priv);
+   const char *step_name = 
intel_step_name(RUNTIME_INFO(i915)->step.display_step);
 
-   return _stepping_info;
+   si->stepping = step_name[0];
+   si->substepping = step_name[1];
+   return si;
 }
 
 static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv)
@@ -616,7 +581,8 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv,
struct intel_package_header *package_header;
struct intel_dmc_header_base *dmc_header;
struct intel_dmc *dmc = _priv->dmc;
-   const struct stepping_info *si = intel_get_stepping_info(dev_priv);
+   struct stepping_info display_info = { '*', '*'};
+   const struct stepping_info *si = intel_get_stepping_info(dev_priv, 
_info);
u32 readcount = 0;
u32 r, offset;
int id;
-- 
2.32.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915/step: Add macro magic for handling steps

2021-07-14 Thread Anusha Srivatsa
With the addition of stepping info for
all platforms, lets use macros for handling them
and autogenerating code for all steps at a time.

Suggested-by: Matt Roper 
Cc: Lucas De Marchi 
Signed-off-by: Anusha Srivatsa 
---
 drivers/gpu/drm/i915/intel_step.c | 14 
 drivers/gpu/drm/i915/intel_step.h | 37 +++
 2 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_step.c 
b/drivers/gpu/drm/i915/intel_step.c
index 9fcf17708cc8..26f6b1d376e1 100644
--- a/drivers/gpu/drm/i915/intel_step.c
+++ b/drivers/gpu/drm/i915/intel_step.c
@@ -182,3 +182,17 @@ void intel_step_init(struct drm_i915_private *i915)
 
RUNTIME_INFO(i915)->step = step;
 }
+
+#define STEP_NAME_CASE(name)   \
+   case STEP_##name:   \
+   return #name;
+
+const char *intel_step_name(enum intel_step step)
+{
+   switch(step) {
+   STEP_NAME_LIST(STEP_NAME_CASE);
+
+   default :
+   return "**";
+   }
+}
diff --git a/drivers/gpu/drm/i915/intel_step.h 
b/drivers/gpu/drm/i915/intel_step.h
index 88a77159703e..f6641e2a3c77 100644
--- a/drivers/gpu/drm/i915/intel_step.h
+++ b/drivers/gpu/drm/i915/intel_step.h
@@ -15,30 +15,39 @@ struct intel_step_info {
u8 display_step;
 };
 
+#define STEP_ENUM_VAL(name)  STEP_##name,
+
+#define STEP_NAME_LIST(func)   \
+   func(A0)\
+   func(A1)\
+   func(A2)\
+   func(B0)\
+   func(B1)\
+   func(B2)\
+   func(C0)\
+   func(C1)\
+   func(D0)\
+   func(D1)\
+   func(E0)\
+   func(F0)\
+   func(G0)\
+   func(H0)\
+   func(I0)\
+   func(I1)\
+   func(J0)
+
 /*
  * Symbolic steppings that do not match the hardware. These are valid both as 
gt
  * and display steppings as symbolic names.
  */
 enum intel_step {
STEP_NONE = 0,
-   STEP_A0,
-   STEP_A2,
-   STEP_B0,
-   STEP_B1,
-   STEP_C0,
-   STEP_D0,
-   STEP_D1,
-   STEP_E0,
-   STEP_F0,
-   STEP_G0,
-   STEP_H0,
-   STEP_I0,
-   STEP_I1,
-   STEP_J0,
+   STEP_NAME_LIST(STEP_ENUM_VAL)
STEP_FUTURE,
STEP_FOREVER,
 };
 
 void intel_step_init(struct drm_i915_private *i915);
+const char *intel_step_name(enum intel_step step);
 
 #endif /* __INTEL_STEP_H__ */
-- 
2.32.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 12/13] vfio/gvt: Fix open/close when multiple device FDs are open

2021-07-14 Thread Jason Gunthorpe
The user can open multiple device FDs if it likes, however the open
function calls vfio_register_notifier() on device global state. Calling
vfio_register_notifier() twice will trigger a WARN_ON from
notifier_chain_register() and the first close will wrongly delete the
notifier and more.

Since these really want the new open/close_device() semantics just change
the function over.

Signed-off-by: Jason Gunthorpe 
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 1ac98f8aba31e6..7efa386449d104 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -885,7 +885,7 @@ static int intel_vgpu_group_notifier(struct notifier_block 
*nb,
return NOTIFY_OK;
 }
 
-static int intel_vgpu_open(struct mdev_device *mdev)
+static int intel_vgpu_open_device(struct mdev_device *mdev)
 {
struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
@@ -1004,7 +1004,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
vgpu->handle = 0;
 }
 
-static void intel_vgpu_release(struct mdev_device *mdev)
+static void intel_vgpu_close_device(struct mdev_device *mdev)
 {
struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 
@@ -1753,8 +1753,8 @@ static struct mdev_parent_ops intel_vgpu_ops = {
.create = intel_vgpu_create,
.remove = intel_vgpu_remove,
 
-   .open   = intel_vgpu_open,
-   .release= intel_vgpu_release,
+   .open_device= intel_vgpu_open_device,
+   .close_device   = intel_vgpu_close_device,
 
.read   = intel_vgpu_read,
.write  = intel_vgpu_write,
-- 
2.32.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 05/13] vfio/fsl: Move to the device set infrastructure

2021-07-14 Thread Jason Gunthorpe
FSL uses the internal reflck to implement the open_device() functionality,
conversion to the core code is straightforward.

The decision on which set to be part of is trivially based on the
is_fsl_mc_bus_dprc() and we use a 'struct device *' pointer as the set_id.

It isn't entirely clear what the device set lock is actually protecting,
but I think it is related to the interrupt setup.

Signed-off-by: Yishai Hadas 
Signed-off-by: Jason Gunthorpe 
---
 drivers/vfio/fsl-mc/vfio_fsl_mc.c | 152 --
 drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c|   6 +-
 drivers/vfio/fsl-mc/vfio_fsl_mc_private.h |   7 -
 3 files changed, 26 insertions(+), 139 deletions(-)

diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c 
b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index 3d2be06e1bc146..49b93de05d5d62 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -19,81 +19,10 @@
 
 static struct fsl_mc_driver vfio_fsl_mc_driver;
 
-static DEFINE_MUTEX(reflck_lock);
-
-static void vfio_fsl_mc_reflck_get(struct vfio_fsl_mc_reflck *reflck)
-{
-   kref_get(>kref);
-}
-
-static void vfio_fsl_mc_reflck_release(struct kref *kref)
-{
-   struct vfio_fsl_mc_reflck *reflck = container_of(kref,
- struct vfio_fsl_mc_reflck,
- kref);
-
-   mutex_destroy(>lock);
-   kfree(reflck);
-   mutex_unlock(_lock);
-}
-
-static void vfio_fsl_mc_reflck_put(struct vfio_fsl_mc_reflck *reflck)
-{
-   kref_put_mutex(>kref, vfio_fsl_mc_reflck_release, _lock);
-}
-
-static struct vfio_fsl_mc_reflck *vfio_fsl_mc_reflck_alloc(void)
-{
-   struct vfio_fsl_mc_reflck *reflck;
-
-   reflck = kzalloc(sizeof(*reflck), GFP_KERNEL);
-   if (!reflck)
-   return ERR_PTR(-ENOMEM);
-
-   kref_init(>kref);
-   mutex_init(>lock);
-
-   return reflck;
-}
-
-static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev)
-{
-   int ret = 0;
-
-   mutex_lock(_lock);
-   if (is_fsl_mc_bus_dprc(vdev->mc_dev)) {
-   vdev->reflck = vfio_fsl_mc_reflck_alloc();
-   ret = PTR_ERR_OR_ZERO(vdev->reflck);
-   } else {
-   struct device *mc_cont_dev = vdev->mc_dev->dev.parent;
-   struct vfio_device *device;
-   struct vfio_fsl_mc_device *cont_vdev;
-
-   device = vfio_device_get_from_dev(mc_cont_dev);
-   if (!device) {
-   ret = -ENODEV;
-   goto unlock;
-   }
-
-   cont_vdev =
-   container_of(device, struct vfio_fsl_mc_device, vdev);
-   if (!cont_vdev || !cont_vdev->reflck) {
-   vfio_device_put(device);
-   ret = -ENODEV;
-   goto unlock;
-   }
-   vfio_fsl_mc_reflck_get(cont_vdev->reflck);
-   vdev->reflck = cont_vdev->reflck;
-   vfio_device_put(device);
-   }
-
-unlock:
-   mutex_unlock(_lock);
-   return ret;
-}
-
-static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
+static int vfio_fsl_mc_open_device(struct vfio_device *core_vdev)
 {
+   struct vfio_fsl_mc_device *vdev =
+   container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
struct fsl_mc_device *mc_dev = vdev->mc_dev;
int count = mc_dev->obj_desc.region_count;
int i;
@@ -136,58 +65,30 @@ static void vfio_fsl_mc_regions_cleanup(struct 
vfio_fsl_mc_device *vdev)
kfree(vdev->regions);
 }
 
-static int vfio_fsl_mc_open(struct vfio_device *core_vdev)
-{
-   struct vfio_fsl_mc_device *vdev =
-   container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
-   int ret = 0;
-
-   mutex_lock(>reflck->lock);
-   if (!vdev->refcnt) {
-   ret = vfio_fsl_mc_regions_init(vdev);
-   if (ret)
-   goto out;
-   }
-   vdev->refcnt++;
-out:
-   mutex_unlock(>reflck->lock);
 
-   return ret;
-}
-
-static void vfio_fsl_mc_release(struct vfio_device *core_vdev)
+static void vfio_fsl_mc_close_device(struct vfio_device *core_vdev)
 {
struct vfio_fsl_mc_device *vdev =
container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
+   struct fsl_mc_device *mc_dev = vdev->mc_dev;
+   struct device *cont_dev = fsl_mc_cont_dev(_dev->dev);
+   struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
int ret;
 
-   mutex_lock(>reflck->lock);
+   vfio_fsl_mc_regions_cleanup(vdev);
 
-   if (!(--vdev->refcnt)) {
-   struct fsl_mc_device *mc_dev = vdev->mc_dev;
-   struct device *cont_dev = fsl_mc_cont_dev(_dev->dev);
-   struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
-
-   vfio_fsl_mc_regions_cleanup(vdev);
+   /* reset the device before cleaning up the interrupts */
+ 

[Intel-gfx] [PATCH 10/13] vfio/mbochs: Fix close when multiple device FDs are open

2021-07-14 Thread Jason Gunthorpe
mbochs_close() iterates over global device state and frees it. Currently
this is done every time a device FD is closed, but if multiple device FDs
are open this could corrupt other still active FDs.

Change this to use close_device() so it only runs on the last close.

Signed-off-by: Jason Gunthorpe 
---
 samples/vfio-mdev/mbochs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 199273bebcc0e2..0137494c203aa2 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -1274,7 +1274,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, 
unsigned int cmd,
return -ENOTTY;
 }
 
-static void mbochs_close(struct vfio_device *vdev)
+static void mbochs_close_device(struct vfio_device *vdev)
 {
struct mdev_state *mdev_state =
container_of(vdev, struct mdev_state, vdev);
@@ -1392,7 +1392,7 @@ static struct attribute_group *mdev_type_groups[] = {
 };
 
 static const struct vfio_device_ops mbochs_dev_ops = {
-   .release = mbochs_close,
+   .close_device = mbochs_close_device,
.read = mbochs_read,
.write = mbochs_write,
.ioctl = mbochs_ioctl,
-- 
2.32.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 04/13] vfio/samples: Delete useless open/close

2021-07-14 Thread Jason Gunthorpe
The core code no longer requires these ops to be defined, so delete these
empty functions and leave the op as NULL. mtty's functions only log a
pointless message, delete that entirely.

Signed-off-by: Yishai Hadas 
Signed-off-by: Jason Gunthorpe 
---
 samples/vfio-mdev/mbochs.c |  6 --
 samples/vfio-mdev/mdpy.c   | 11 ---
 samples/vfio-mdev/mtty.c   | 13 -
 3 files changed, 30 deletions(-)

diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index cf264d0bf11053..199273bebcc0e2 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -1274,11 +1274,6 @@ static long mbochs_ioctl(struct vfio_device *vdev, 
unsigned int cmd,
return -ENOTTY;
 }
 
-static int mbochs_open(struct vfio_device *vdev)
-{
-   return 0;
-}
-
 static void mbochs_close(struct vfio_device *vdev)
 {
struct mdev_state *mdev_state =
@@ -1397,7 +1392,6 @@ static struct attribute_group *mdev_type_groups[] = {
 };
 
 static const struct vfio_device_ops mbochs_dev_ops = {
-   .open = mbochs_open,
.release = mbochs_close,
.read = mbochs_read,
.write = mbochs_write,
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
index 57334034cde6dd..8d1a80a0722aa9 100644
--- a/samples/vfio-mdev/mdpy.c
+++ b/samples/vfio-mdev/mdpy.c
@@ -614,15 +614,6 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned 
int cmd,
return -ENOTTY;
 }
 
-static int mdpy_open(struct vfio_device *vdev)
-{
-   return 0;
-}
-
-static void mdpy_close(struct vfio_device *vdev)
-{
-}
-
 static ssize_t
 resolution_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -717,8 +708,6 @@ static struct attribute_group *mdev_type_groups[] = {
 };
 
 static const struct vfio_device_ops mdpy_dev_ops = {
-   .open = mdpy_open,
-   .release = mdpy_close,
.read = mdpy_read,
.write = mdpy_write,
.ioctl = mdpy_ioctl,
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 37cc9067e1601d..5983cdb16e3d1d 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -1207,17 +1207,6 @@ static long mtty_ioctl(struct vfio_device *vdev, 
unsigned int cmd,
return -ENOTTY;
 }
 
-static int mtty_open(struct vfio_device *vdev)
-{
-   pr_info("%s\n", __func__);
-   return 0;
-}
-
-static void mtty_close(struct vfio_device *mdev)
-{
-   pr_info("%s\n", __func__);
-}
-
 static ssize_t
 sample_mtty_dev_show(struct device *dev, struct device_attribute *attr,
 char *buf)
@@ -1325,8 +1314,6 @@ static struct attribute_group *mdev_type_groups[] = {
 
 static const struct vfio_device_ops mtty_dev_ops = {
.name = "vfio-mtty",
-   .open = mtty_open,
-   .release = mtty_close,
.read = mtty_read,
.write = mtty_write,
.ioctl = mtty_ioctl,
-- 
2.32.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 07/13] vfio/pci: Move to the device set infrastructure

2021-07-14 Thread Jason Gunthorpe
From: Yishai Hadas 

PCI wants to have the usual open/close_device() logic with the slight
twist that the open/close_device() must be done under a singelton lock
shared by all of the vfio_devices that are in the PCI "reset group".

The reset group, and thus the device set, is determined by what devices
pci_reset_bus() touches, which is either the entire bus or only the slot.

Rely on the core code to do everything reflck was doing and delete reflck
entirely.

Signed-off-by: Yishai Hadas 
Signed-off-by: Jason Gunthorpe 
---
 drivers/vfio/pci/vfio_pci.c | 156 ++--
 drivers/vfio/pci/vfio_pci_private.h |   7 --
 2 files changed, 31 insertions(+), 132 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index fab3715d60d4ba..22774e447b5f4a 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -530,53 +530,40 @@ static void vfio_pci_vf_token_user_add(struct 
vfio_pci_device *vdev, int val)
vfio_device_put(_vdev->vdev);
 }
 
-static void vfio_pci_release(struct vfio_device *core_vdev)
+static void vfio_pci_close_device(struct vfio_device *core_vdev)
 {
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
 
-   mutex_lock(>reflck->lock);
-
-   if (!(--vdev->refcnt)) {
-   vfio_pci_vf_token_user_add(vdev, -1);
-   vfio_spapr_pci_eeh_release(vdev->pdev);
-   vfio_pci_disable(vdev);
+   vfio_pci_vf_token_user_add(vdev, -1);
+   vfio_spapr_pci_eeh_release(vdev->pdev);
+   vfio_pci_disable(vdev);
 
-   mutex_lock(>igate);
-   if (vdev->err_trigger) {
-   eventfd_ctx_put(vdev->err_trigger);
-   vdev->err_trigger = NULL;
-   }
-   if (vdev->req_trigger) {
-   eventfd_ctx_put(vdev->req_trigger);
-   vdev->req_trigger = NULL;
-   }
-   mutex_unlock(>igate);
+   mutex_lock(>igate);
+   if (vdev->err_trigger) {
+   eventfd_ctx_put(vdev->err_trigger);
+   vdev->err_trigger = NULL;
}
-
-   mutex_unlock(>reflck->lock);
+   if (vdev->req_trigger) {
+   eventfd_ctx_put(vdev->req_trigger);
+   vdev->req_trigger = NULL;
+   }
+   mutex_unlock(>igate);
 }
 
-static int vfio_pci_open(struct vfio_device *core_vdev)
+static int vfio_pci_open_device(struct vfio_device *core_vdev)
 {
struct vfio_pci_device *vdev =
container_of(core_vdev, struct vfio_pci_device, vdev);
int ret = 0;
 
-   mutex_lock(>reflck->lock);
-
-   if (!vdev->refcnt) {
-   ret = vfio_pci_enable(vdev);
-   if (ret)
-   goto error;
+   ret = vfio_pci_enable(vdev);
+   if (ret)
+   return ret;
 
-   vfio_spapr_pci_eeh_open(vdev->pdev);
-   vfio_pci_vf_token_user_add(vdev, 1);
-   }
-   vdev->refcnt++;
-error:
-   mutex_unlock(>reflck->lock);
-   return ret;
+   vfio_spapr_pci_eeh_open(vdev->pdev);
+   vfio_pci_vf_token_user_add(vdev, 1);
+   return 0;
 }
 
 static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
@@ -1870,8 +1857,8 @@ static int vfio_pci_match(struct vfio_device *core_vdev, 
char *buf)
 
 static const struct vfio_device_ops vfio_pci_ops = {
.name   = "vfio-pci",
-   .open   = vfio_pci_open,
-   .release= vfio_pci_release,
+   .open_device= vfio_pci_open_device,
+   .close_device   = vfio_pci_close_device,
.ioctl  = vfio_pci_ioctl,
.read   = vfio_pci_read,
.write  = vfio_pci_write,
@@ -1880,9 +1867,6 @@ static const struct vfio_device_ops vfio_pci_ops = {
.match  = vfio_pci_match,
 };
 
-static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
-static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
-
 static int vfio_pci_bus_notifier(struct notifier_block *nb,
 unsigned long action, void *data)
 {
@@ -2020,12 +2004,17 @@ static int vfio_pci_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
INIT_LIST_HEAD(>vma_list);
init_rwsem(>memory_lock);
 
-   ret = vfio_pci_reflck_attach(vdev);
+   if (pci_is_root_bus(pdev->bus))
+   ret = vfio_assign_device_set(>vdev, vdev);
+   else if (!pci_probe_reset_slot(pdev->slot))
+   ret = vfio_assign_device_set(>vdev, pdev->slot);
+   else
+   ret = vfio_assign_device_set(>vdev, pdev->bus);
if (ret)
goto out_uninit;
ret = vfio_pci_vf_init(vdev);
if (ret)
-   goto out_reflck;
+   goto out_uninit;
ret = vfio_pci_vga_init(vdev);
if (ret)
goto out_vf;
@@ -2057,8 +2046,6 @@ static int 

[Intel-gfx] [PATCH 08/13] vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set

2021-07-14 Thread Jason Gunthorpe
Keep track of all the vfio_devices that have been added to the device set
and use this list in vfio_pci_try_bus_reset() instead of trying to work
backwards from the pci_device.

The dev_set->lock directly prevents devices from joining/leaving the set,
which further implies the pci_device cannot change drivers or that the
vfio_device be freed, eliminating the need for get/put's.

Completeness of the device set can be directly measured by checking if
every PCI device in the reset group is also in the device set - which
proves that VFIO drivers are attached to everything.

This restructuring corrects a call to pci_dev_driver() without holding the
device_lock() and removes a hard wiring to _pci_driver.

Signed-off-by: Jason Gunthorpe 
---
 drivers/vfio/pci/vfio_pci.c | 110 ++--
 drivers/vfio/vfio.c |  10 
 include/linux/vfio.h|   2 +
 3 files changed, 53 insertions(+), 69 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 22774e447b5f4a..fbc20f6d2dd412 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -404,6 +404,9 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
int i, bar;
 
+   /* For needs_reset */
+   lockdep_assert_held(>vdev.dev_set->lock);
+
/* Stop the device from further DMA */
pci_clear_master(pdev);
 
@@ -2139,34 +2142,17 @@ static struct pci_driver vfio_pci_driver = {
.err_handler= _err_handlers,
 };
 
-static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
+static int vfio_pci_check_all_devices_bound(struct pci_dev *pdev, void *data)
 {
-   struct vfio_devices *devs = data;
-   struct vfio_device *device;
-   struct vfio_pci_device *vdev;
-
-   if (devs->cur_index == devs->max_index)
-   return -ENOSPC;
+   struct vfio_device_set *dev_set = data;
+   struct vfio_device *cur;
 
-   device = vfio_device_get_from_dev(>dev);
-   if (!device)
-   return -EINVAL;
-
-   if (pci_dev_driver(pdev) != _pci_driver) {
-   vfio_device_put(device);
-   return -EBUSY;
-   }
-
-   vdev = container_of(device, struct vfio_pci_device, vdev);
-
-   /* Fault if the device is not unused */
-   if (device->open_count) {
-   vfio_device_put(device);
-   return -EBUSY;
-   }
+   lockdep_assert_held(_set->lock);
 
-   devs->devices[devs->cur_index++] = vdev;
-   return 0;
+   list_for_each_entry(cur, _set->device_list, dev_set_list)
+   if (cur->dev == >dev)
+   return 0;
+   return -EBUSY;
 }
 
 static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
@@ -2210,8 +2196,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct 
pci_dev *pdev, void *data)
  *needs_reset (such as by lack of FLR support)
  * Then attempt to perform that bus or slot reset.  Callers are required
  * to hold vdev->dev_set->lock, protecting the bus/slot reset group from
- * concurrent opens.  A vfio_device reference is acquired for each device
- * to prevent unbinds during the reset operation.
+ * concurrent opens.
  *
  * NB: vfio-core considers a group to be viable even if some devices are
  * bound to drivers like pci-stub or pcieport.  Here we require all devices
@@ -2220,61 +2205,48 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct 
pci_dev *pdev, void *data)
  */
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 {
-   struct vfio_devices devs = { .cur_index = 0 };
-   int i = 0, ret = -EINVAL;
-   bool slot = false;
-   struct vfio_pci_device *tmp;
-
-   if (!pci_probe_reset_slot(vdev->pdev->slot))
-   slot = true;
-   else if (pci_probe_reset_bus(vdev->pdev->bus))
-   return;
+   struct vfio_device_set *dev_set = vdev->vdev.dev_set;
+   struct vfio_pci_device *to_reset = NULL;
+   struct vfio_pci_device *cur;
+   int ret;
 
-   if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
- , slot) || !i)
+   if (pci_probe_reset_slot(vdev->pdev->slot) &&
+   pci_probe_reset_bus(vdev->pdev->bus))
return;
 
-   devs.max_index = i;
-   devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
-   if (!devs.devices)
-   return;
+   lockdep_assert_held(>vdev.dev_set->lock);
 
-   if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
- vfio_pci_get_unused_devs,
- , slot))
-   goto put_devs;
+   /* All VFIO devices have a closed FD */
+   list_for_each_entry(cur, _set->device_list, vdev.dev_set_list)
+   if (cur->vdev.open_count)
+   return;
+
+   /* All devices 

[Intel-gfx] [PATCH 02/13] vfio: Introduce a vfio_uninit_group_dev() API call

2021-07-14 Thread Jason Gunthorpe
From: Max Gurtovoy 

This pairs with vfio_init_group_dev() and allows undoing any state that is
stored in the vfio_device unrelated to registration. Add appropriately
placed calls to all the drivers.

The following patch will use this to add pre-registration state for the
device set.

Signed-off-by: Max Gurtovoy 
Signed-off-by: Jason Gunthorpe 
---
 Documentation/driver-api/vfio.rst|  4 ++-
 drivers/vfio/fsl-mc/vfio_fsl_mc.c|  6 +++--
 drivers/vfio/mdev/vfio_mdev.c| 13 +++---
 drivers/vfio/pci/vfio_pci.c  |  6 +++--
 drivers/vfio/platform/vfio_platform_common.c |  7 +++--
 drivers/vfio/vfio.c  |  5 
 include/linux/vfio.h |  1 +
 samples/vfio-mdev/mbochs.c   |  2 ++
 samples/vfio-mdev/mdpy.c | 25 ++
 samples/vfio-mdev/mtty.c | 27 
 10 files changed, 64 insertions(+), 32 deletions(-)

diff --git a/Documentation/driver-api/vfio.rst 
b/Documentation/driver-api/vfio.rst
index 606eed8823ceab..c663b6f978255b 100644
--- a/Documentation/driver-api/vfio.rst
+++ b/Documentation/driver-api/vfio.rst
@@ -255,11 +255,13 @@ vfio_unregister_group_dev() respectively::
void vfio_init_group_dev(struct vfio_device *device,
struct device *dev,
const struct vfio_device_ops *ops);
+   void vfio_uninit_group_dev(struct vfio_device *device);
int vfio_register_group_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device);
 
 The driver should embed the vfio_device in its own structure and call
-vfio_init_group_dev() to pre-configure it before going to registration.
+vfio_init_group_dev() to pre-configure it before going to registration
+and call vfio_uninit_group_dev() after completing the un-registration.
 vfio_register_group_dev() indicates to the core to begin tracking the
 iommu_group of the specified dev and register the dev as owned by a VFIO bus
 driver. Once vfio_register_group_dev() returns it is possible for userspace to
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c 
b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index 90cad109583b80..3d2be06e1bc146 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -627,7 +627,7 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
 
ret = vfio_fsl_mc_reflck_attach(vdev);
if (ret)
-   goto out_kfree;
+   goto out_uninit;
 
ret = vfio_fsl_mc_init_device(vdev);
if (ret)
@@ -657,7 +657,8 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
vfio_fsl_uninit_device(vdev);
 out_reflck:
vfio_fsl_mc_reflck_put(vdev->reflck);
-out_kfree:
+out_uninit:
+   vfio_uninit_group_dev(>vdev);
kfree(vdev);
 out_group_put:
vfio_iommu_group_put(group, dev);
@@ -674,6 +675,7 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
 
dprc_remove_devices(mc_dev, NULL, 0);
vfio_fsl_uninit_device(vdev);
+   vfio_uninit_group_dev(>vdev);
vfio_fsl_mc_reflck_put(vdev->reflck);
 
kfree(vdev);
diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c
index 39ef7489fe4719..a5c77ccb24f70a 100644
--- a/drivers/vfio/mdev/vfio_mdev.c
+++ b/drivers/vfio/mdev/vfio_mdev.c
@@ -120,12 +120,16 @@ static int vfio_mdev_probe(struct mdev_device *mdev)
 
vfio_init_group_dev(vdev, >dev, _mdev_dev_ops);
ret = vfio_register_group_dev(vdev);
-   if (ret) {
-   kfree(vdev);
-   return ret;
-   }
+   if (ret)
+   goto out_uninit;
+
dev_set_drvdata(>dev, vdev);
return 0;
+
+out_uninit:
+   vfio_uninit_group_dev(vdev);
+   kfree(vdev);
+   return ret;
 }
 
 static void vfio_mdev_remove(struct mdev_device *mdev)
@@ -133,6 +137,7 @@ static void vfio_mdev_remove(struct mdev_device *mdev)
struct vfio_device *vdev = dev_get_drvdata(>dev);
 
vfio_unregister_group_dev(vdev);
+   vfio_uninit_group_dev(vdev);
kfree(vdev);
 }
 
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 318864d5283782..fab3715d60d4ba 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -2022,7 +2022,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
 
ret = vfio_pci_reflck_attach(vdev);
if (ret)
-   goto out_free;
+   goto out_uninit;
ret = vfio_pci_vf_init(vdev);
if (ret)
goto out_reflck;
@@ -2059,7 +2059,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
vfio_pci_vf_uninit(vdev);
 out_reflck:
vfio_pci_reflck_put(vdev->reflck);
-out_free:
+out_uninit:
+   vfio_uninit_group_dev(>vdev);
kfree(vdev->pm_save);

[Intel-gfx] [PATCH 06/13] vfio/platform: Use open_device() instead of open coding a refcnt scheme

2021-07-14 Thread Jason Gunthorpe
Platform simply wants to run some code when the device is first
opened/last closed. Use the core framework and locking for this.  Aside
from removing a bit of code this narrows the locking scope from a global
lock.

Signed-off-by: Yishai Hadas 
Signed-off-by: Jason Gunthorpe 
---
 drivers/vfio/platform/vfio_platform_common.c  | 79 ---
 drivers/vfio/platform/vfio_platform_private.h |  1 -
 2 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index bdde8605178cd2..6af7ce7d619c25 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -218,65 +218,52 @@ static int vfio_platform_call_reset(struct 
vfio_platform_device *vdev,
return -EINVAL;
 }
 
-static void vfio_platform_release(struct vfio_device *core_vdev)
+static void vfio_platform_close_device(struct vfio_device *core_vdev)
 {
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
+   const char *extra_dbg = NULL;
+   int ret;
 
-   mutex_lock(_lock);
-
-   if (!(--vdev->refcnt)) {
-   const char *extra_dbg = NULL;
-   int ret;
-
-   ret = vfio_platform_call_reset(vdev, _dbg);
-   if (ret && vdev->reset_required) {
-   dev_warn(vdev->device, "reset driver is required and 
reset call failed in release (%d) %s\n",
-ret, extra_dbg ? extra_dbg : "");
-   WARN_ON(1);
-   }
-   pm_runtime_put(vdev->device);
-   vfio_platform_regions_cleanup(vdev);
-   vfio_platform_irq_cleanup(vdev);
+   ret = vfio_platform_call_reset(vdev, _dbg);
+   if (WARN_ON(ret && vdev->reset_required)) {
+   dev_warn(
+   vdev->device,
+   "reset driver is required and reset call failed in 
release (%d) %s\n",
+   ret, extra_dbg ? extra_dbg : "");
}
-
-   mutex_unlock(_lock);
+   pm_runtime_put(vdev->device);
+   vfio_platform_regions_cleanup(vdev);
+   vfio_platform_irq_cleanup(vdev);
 }
 
-static int vfio_platform_open(struct vfio_device *core_vdev)
+static int vfio_platform_open_device(struct vfio_device *core_vdev)
 {
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
+   const char *extra_dbg = NULL;
int ret;
 
-   mutex_lock(_lock);
-
-   if (!vdev->refcnt) {
-   const char *extra_dbg = NULL;
-
-   ret = vfio_platform_regions_init(vdev);
-   if (ret)
-   goto err_reg;
+   ret = vfio_platform_regions_init(vdev);
+   if (ret)
+   return ret;
 
-   ret = vfio_platform_irq_init(vdev);
-   if (ret)
-   goto err_irq;
+   ret = vfio_platform_irq_init(vdev);
+   if (ret)
+   goto err_irq;
 
-   ret = pm_runtime_get_sync(vdev->device);
-   if (ret < 0)
-   goto err_rst;
+   ret = pm_runtime_get_sync(vdev->device);
+   if (ret < 0)
+   goto err_rst;
 
-   ret = vfio_platform_call_reset(vdev, _dbg);
-   if (ret && vdev->reset_required) {
-   dev_warn(vdev->device, "reset driver is required and 
reset call failed in open (%d) %s\n",
-ret, extra_dbg ? extra_dbg : "");
-   goto err_rst;
-   }
+   ret = vfio_platform_call_reset(vdev, _dbg);
+   if (ret && vdev->reset_required) {
+   dev_warn(
+   vdev->device,
+   "reset driver is required and reset call failed in open 
(%d) %s\n",
+   ret, extra_dbg ? extra_dbg : "");
+   goto err_rst;
}
-
-   vdev->refcnt++;
-
-   mutex_unlock(_lock);
return 0;
 
 err_rst:
@@ -284,8 +271,6 @@ static int vfio_platform_open(struct vfio_device *core_vdev)
vfio_platform_irq_cleanup(vdev);
 err_irq:
vfio_platform_regions_cleanup(vdev);
-err_reg:
-   mutex_unlock(_lock);
return ret;
 }
 
@@ -616,8 +601,8 @@ static int vfio_platform_mmap(struct vfio_device 
*core_vdev, struct vm_area_stru
 
 static const struct vfio_device_ops vfio_platform_ops = {
.name   = "vfio-platform",
-   .open   = vfio_platform_open,
-   .release= vfio_platform_release,
+   .open_device= vfio_platform_open_device,
+   .close_device   = vfio_platform_close_device,
.ioctl  = vfio_platform_ioctl,
.read   = vfio_platform_read,
.write  = vfio_platform_write,
diff --git a/drivers/vfio/platform/vfio_platform_private.h 

[Intel-gfx] [PATCH 11/13] vfio/ap, ccw: Fix open/close when multiple device FDs are open

2021-07-14 Thread Jason Gunthorpe
The user can open multiple device FDs if it likes, however these open()
functions call vfio_register_notifier() on some device global
state. Calling vfio_register_notifier() twice in will trigger a WARN_ON
from notifier_chain_register() and the first close will wrongly delete the
notifier and more.

Since these really want the new open/close_device() semantics just change
the functions over.

Signed-off-by: Jason Gunthorpe 
---
 drivers/s390/cio/vfio_ccw_ops.c   | 8 
 drivers/s390/crypto/vfio_ap_ops.c | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index c57d2a7f091975..7f540ad0b568bc 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -159,7 +159,7 @@ static int vfio_ccw_mdev_remove(struct mdev_device *mdev)
return 0;
 }
 
-static int vfio_ccw_mdev_open(struct mdev_device *mdev)
+static int vfio_ccw_mdev_open_device(struct mdev_device *mdev)
 {
struct vfio_ccw_private *private =
dev_get_drvdata(mdev_parent_dev(mdev));
@@ -194,7 +194,7 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
return ret;
 }
 
-static void vfio_ccw_mdev_release(struct mdev_device *mdev)
+static void vfio_ccw_mdev_close_device(struct mdev_device *mdev)
 {
struct vfio_ccw_private *private =
dev_get_drvdata(mdev_parent_dev(mdev));
@@ -638,8 +638,8 @@ static const struct mdev_parent_ops vfio_ccw_mdev_ops = {
.supported_type_groups  = mdev_type_groups,
.create = vfio_ccw_mdev_create,
.remove = vfio_ccw_mdev_remove,
-   .open   = vfio_ccw_mdev_open,
-   .release= vfio_ccw_mdev_release,
+   .open_device= vfio_ccw_mdev_open_device,
+   .close_device   = vfio_ccw_mdev_close_device,
.read   = vfio_ccw_mdev_read,
.write  = vfio_ccw_mdev_write,
.ioctl  = vfio_ccw_mdev_ioctl,
diff --git a/drivers/s390/crypto/vfio_ap_ops.c 
b/drivers/s390/crypto/vfio_ap_ops.c
index 122c85c224695e..cee5626fe0a4ef 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1315,7 +1315,7 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device 
*mdev)
return rc;
 }
 
-static int vfio_ap_mdev_open(struct mdev_device *mdev)
+static int vfio_ap_mdev_open_device(struct mdev_device *mdev)
 {
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
unsigned long events;
@@ -1348,7 +1348,7 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
return ret;
 }
 
-static void vfio_ap_mdev_release(struct mdev_device *mdev)
+static void vfio_ap_mdev_close_device(struct mdev_device *mdev)
 {
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
@@ -1427,8 +1427,8 @@ static const struct mdev_parent_ops vfio_ap_matrix_ops = {
.mdev_attr_groups   = vfio_ap_mdev_attr_groups,
.create = vfio_ap_mdev_create,
.remove = vfio_ap_mdev_remove,
-   .open   = vfio_ap_mdev_open,
-   .release= vfio_ap_mdev_release,
+   .open_device= vfio_ap_mdev_open_device,
+   .close_device   = vfio_ap_mdev_close_device,
.ioctl  = vfio_ap_mdev_ioctl,
 };
 
-- 
2.32.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 00/13] Provide core infrastructure for managing open/release

2021-07-14 Thread Jason Gunthorpe
Prologue:

This is the first series of three to send the "mlx5_vfio_pci" driver that has
been discussed on the list for a while now.
 - Reorganize reflck to support splitting vfio_pci
 - Split vfio_pci into vfio_pci/vfio_pci_core and provide infrastructure
   for non-generic VFIO PCI drivers
 - The new driver mlx5_vfio_pci that is a full implementation of
   suspend/resume functionality for mlx5 devices.

A preview of all the patches can be seen here:

https://github.com/jgunthorpe/linux/commits/mlx5_vfio_pci

===

This is in support of Max's series to split vfio-pci. For that to work the
reflck concept embedded in vfio-pci needs to be sharable across all of the
new VFIO PCI drivers which motivated re-examining how this is
implemented.

Another significant issue is how the VFIO PCI core includes code like:

   if (pci_dev_driver(pdev) != _pci_driver)

Which is not scalable if there are going to be multiple different driver
types.

This series takes the approach of moving the "reflck" mechanism into the
core code as a "device set". Each vfio_device driver can specify how
vfio_devices are grouped into the set using a key and the set comes along
with a set-global mutex. The core code manages creating per-device set
memory and associating it with each vfio_device.

In turn this allows the core code to provide an open/close_device()
operation that is called only for the first/last FD, and is called under
the global device set lock.

Review of all the drivers show that they are either already open coding
the first/last semantic or are buggy and missing it. All drivers are
migrated/fixed to the new open/close_device ops and the unused per-FD
open()/release() ops are deleted.

The special behavior of PCI around the bus/slot "reset group" is recast in
terms of the device set which conslidates the reflck, eliminates two
touches of pci_dev_driver(), and allows the reset mechanism to share
across all VFIO PCI drivers. PCI is changed to acquire devices directly
from the device set instead of trying to work backwards from the struct
pci_device.

Overall a few minor bugs are squashed and quite a bit of code is removed
through consolidation.

Jason Gunthorpe (11):
  vfio/samples: Remove module get/put
  vfio: Provide better generic support for open/release vfio_device_ops
  vfio/samples: Delete useless open/close
  vfio/fsl: Move to the device set infrastructure
  vfio/platform: Use open_device() instead of open coding a refcnt
scheme
  vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set
  vfio/pci: Reorganize VFIO_DEVICE_PCI_HOT_RESET to use the device set
  vfio/mbochs: Fix close when multiple device FDs are open
  vfio/ap,ccw: Fix open/close when multiple device FDs are open
  vfio/gvt: Fix open/close when multiple device FDs are open
  vfio: Remove struct vfio_device_ops open/release

Max Gurtovoy (1):
  vfio: Introduce a vfio_uninit_group_dev() API call

Yishai Hadas (1):
  vfio/pci: Move to the device set infrastructure

 Documentation/driver-api/vfio.rst |   4 +-
 drivers/gpu/drm/i915/gvt/kvmgt.c  |   8 +-
 drivers/s390/cio/vfio_ccw_ops.c   |   8 +-
 drivers/s390/crypto/vfio_ap_ops.c |   8 +-
 drivers/vfio/fsl-mc/vfio_fsl_mc.c | 158 ++
 drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c|   6 +-
 drivers/vfio/fsl-mc/vfio_fsl_mc_private.h |   7 -
 drivers/vfio/mdev/vfio_mdev.c |  29 +-
 drivers/vfio/pci/vfio_pci.c   | 459 ++
 drivers/vfio/pci/vfio_pci_private.h   |   7 -
 drivers/vfio/platform/vfio_platform_common.c  |  86 ++--
 drivers/vfio/platform/vfio_platform_private.h |   1 -
 drivers/vfio/vfio.c   | 149 +-
 include/linux/mdev.h  |   9 +-
 include/linux/vfio.h  |  26 +-
 samples/vfio-mdev/mbochs.c|  16 +-
 samples/vfio-mdev/mdpy.c  |  40 +-
 samples/vfio-mdev/mtty.c  |  40 +-
 18 files changed, 439 insertions(+), 622 deletions(-)

-- 
2.32.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 13/13] vfio: Remove struct vfio_device_ops open/release

2021-07-14 Thread Jason Gunthorpe
Nothing uses this anymore, delete it.

Signed-off-by: Yishai Hadas 
Signed-off-by: Jason Gunthorpe 
---
 drivers/vfio/mdev/vfio_mdev.c | 22 --
 drivers/vfio/vfio.c   | 14 +-
 include/linux/mdev.h  |  7 ---
 include/linux/vfio.h  |  4 
 4 files changed, 1 insertion(+), 46 deletions(-)

diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c
index 3c384d2350b64a..9e39f23ca2f546 100644
--- a/drivers/vfio/mdev/vfio_mdev.c
+++ b/drivers/vfio/mdev/vfio_mdev.c
@@ -37,26 +37,6 @@ static void vfio_mdev_close_device(struct vfio_device 
*core_vdev)
parent->ops->close_device(mdev);
 }
 
-static int vfio_mdev_open(struct vfio_device *core_vdev)
-{
-   struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
-   struct mdev_parent *parent = mdev->type->parent;
-
-   if (unlikely(!parent->ops->open))
-   return -EINVAL;
-
-   return parent->ops->open(mdev);
-}
-
-static void vfio_mdev_release(struct vfio_device *core_vdev)
-{
-   struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
-   struct mdev_parent *parent = mdev->type->parent;
-
-   if (likely(parent->ops->release))
-   parent->ops->release(mdev);
-}
-
 static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev,
 unsigned int cmd, unsigned long arg)
 {
@@ -122,8 +102,6 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = {
.name   = "vfio-mdev",
.open_device= vfio_mdev_open_device,
.close_device   = vfio_mdev_close_device,
-   .open   = vfio_mdev_open,
-   .release= vfio_mdev_release,
.ioctl  = vfio_mdev_unlocked_ioctl,
.read   = vfio_mdev_read,
.write  = vfio_mdev_write,
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 26d340283044e7..1d68e82137a735 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1475,19 +1475,13 @@ static int vfio_group_get_device_fd(struct vfio_group 
*group, char *buf)
}
mutex_unlock(>dev_set->lock);
 
-   if (device->ops->open) {
-   ret = device->ops->open(device);
-   if (ret)
-   goto err_close_device;
-   }
-
/*
 * We can't use anon_inode_getfd() because we need to modify
 * the f_mode flags directly to allow more than just ioctls
 */
fdno = ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0)
-   goto err_release;
+   goto err_close_device;
 
filep = anon_inode_getfile("[vfio-device]", _device_fops,
   device, O_RDWR);
@@ -1514,9 +1508,6 @@ static int vfio_group_get_device_fd(struct vfio_group 
*group, char *buf)
 
 err_fd:
put_unused_fd(fdno);
-err_release:
-   if (device->ops->release)
-   device->ops->release(device);
 err_close_device:
mutex_lock(>dev_set->lock);
if (device->open_count == 1 && device->ops->close_device)
@@ -1664,9 +1655,6 @@ static int vfio_device_fops_release(struct inode *inode, 
struct file *filep)
 {
struct vfio_device *device = filep->private_data;
 
-   if (device->ops->release)
-   device->ops->release(device);
-
mutex_lock(>dev_set->lock);
if (!--device->open_count && device->ops->close_device)
device->ops->close_device(device);
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index cb5b7ed1d7c30d..68427e8fadebd6 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -72,11 +72,6 @@ struct device *mtype_get_parent_dev(struct mdev_type *mtype);
  * @mdev: mdev_device device structure which is being
  *destroyed
  * Returns integer: success (0) or error (< 0)
- * @open:  Open mediated device.
- * @mdev: mediated device.
- * Returns integer: success (0) or error (< 0)
- * @release:   release mediated device
- * @mdev: mediated device.
  * @read:  Read emulation callback
  * @mdev: mediated device structure
  * @buf: read buffer
@@ -113,8 +108,6 @@ struct mdev_parent_ops {
int (*remove)(struct mdev_device *mdev);
int (*open_device)(struct mdev_device *mdev);
void(*close_device)(struct mdev_device *mdev);
-   int (*open)(struct mdev_device *mdev);
-   void(*release)(struct mdev_device *mdev);
ssize_t (*read)(struct mdev_device *mdev, char __user *buf,
size_t count, loff_t *ppos);
ssize_t (*write)(struct mdev_device *mdev, const char __user *buf,
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index f0e6a72875e471..b53a9557884ada 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ 

[Intel-gfx] [PATCH 09/13] vfio/pci: Reorganize VFIO_DEVICE_PCI_HOT_RESET to use the device set

2021-07-14 Thread Jason Gunthorpe
Like vfio_pci_try_bus_reset() this code wants to reset all of the devices
in the "reset group" which is the same membership as the device set.

Instead of trying to reconstruct the device set from the PCI list go
directly from the device set's device list to execute the reset.

The same basic structure as vfio_pci_try_bus_reset() is used. The
'vfio_devices' struct is replaced with the device set linked list and we
simply sweep it multiple times under the lock.

This eliminates a memory allocation and get/put traffic and another
improperly locked test of pci_dev_driver().

Signed-off-by: Jason Gunthorpe 
---
 drivers/vfio/pci/vfio_pci.c | 205 
 1 file changed, 88 insertions(+), 117 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index fbc20f6d2dd412..ff23168b8dc551 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -223,9 +223,11 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device 
*vdev)
}
 }
 
+struct vfio_pci_group_info;
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
 static void vfio_pci_disable(struct vfio_pci_device *vdev);
-static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data);
+static int vfio_hot_reset_device_set(struct vfio_pci_device *vdev,
+struct vfio_pci_group_info *groups);
 
 /*
  * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
@@ -645,37 +647,11 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void 
*data)
return 0;
 }
 
-struct vfio_pci_group_entry {
-   struct vfio_group *group;
-   int id;
-};
-
 struct vfio_pci_group_info {
int count;
-   struct vfio_pci_group_entry *groups;
+   struct vfio_group **groups;
 };
 
-static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data)
-{
-   struct vfio_pci_group_info *info = data;
-   struct iommu_group *group;
-   int id, i;
-
-   group = iommu_group_get(>dev);
-   if (!group)
-   return -EPERM;
-
-   id = iommu_group_id(group);
-
-   for (i = 0; i < info->count; i++)
-   if (info->groups[i].id == id)
-   break;
-
-   iommu_group_put(group);
-
-   return (i == info->count) ? -EINVAL : 0;
-}
-
 static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot 
*slot)
 {
for (; pdev; pdev = pdev->bus->self)
@@ -753,12 +729,6 @@ int vfio_pci_register_dev_region(struct vfio_pci_device 
*vdev,
return 0;
 }
 
-struct vfio_devices {
-   struct vfio_pci_device **devices;
-   int cur_index;
-   int max_index;
-};
-
 static long vfio_pci_ioctl(struct vfio_device *core_vdev,
   unsigned int cmd, unsigned long arg)
 {
@@ -1127,11 +1097,10 @@ static long vfio_pci_ioctl(struct vfio_device 
*core_vdev,
} else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
struct vfio_pci_hot_reset hdr;
int32_t *group_fds;
-   struct vfio_pci_group_entry *groups;
+   struct vfio_group **groups;
struct vfio_pci_group_info info;
-   struct vfio_devices devs = { .cur_index = 0 };
bool slot = false;
-   int i, group_idx, mem_idx = 0, count = 0, ret = 0;
+   int group_idx, count = 0, ret = 0;
 
minsz = offsetofend(struct vfio_pci_hot_reset, count);
 
@@ -1198,9 +1167,7 @@ static long vfio_pci_ioctl(struct vfio_device *core_vdev,
break;
}
 
-   groups[group_idx].group = group;
-   groups[group_idx].id =
-   vfio_external_user_iommu_id(group);
+   groups[group_idx] = group;
}
 
kfree(group_fds);
@@ -1212,64 +1179,11 @@ static long vfio_pci_ioctl(struct vfio_device 
*core_vdev,
info.count = hdr.count;
info.groups = groups;
 
-   /*
-* Test whether all the affected devices are contained
-* by the set of groups provided by the user.
-*/
-   ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
-   vfio_pci_validate_devs,
-   , slot);
-   if (ret)
-   goto hot_reset_release;
-
-   devs.max_index = count;
-   devs.devices = kcalloc(count, sizeof(struct vfio_device *),
-  GFP_KERNEL);
-   if (!devs.devices) {
-   ret = -ENOMEM;
-   goto hot_reset_release;
-   }
-
-   /*
-* We need to get memory_lock for each device, but devices
-* can share mmap_lock, therefore we need to zap and 

[Intel-gfx] [PATCH 01/13] vfio/samples: Remove module get/put

2021-07-14 Thread Jason Gunthorpe
The patch to move the get/put to core and the patch to convert the samples
to use vfio_device crossed in a way that this was missed. When both
patches are together the samples do not need their own get/put.

Fixes: 437e41368c01 ("vfio/mdpy: Convert to use vfio_register_group_dev()")
Fixes: 681c1615f891 ("vfio/mbochs: Convert to use vfio_register_group_dev()")
Signed-off-by: Jason Gunthorpe 
---
 samples/vfio-mdev/mbochs.c | 4 
 samples/vfio-mdev/mdpy.c   | 4 
 2 files changed, 8 deletions(-)

diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 6c0f229db36a1a..e81b875b4d87b4 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -1274,9 +1274,6 @@ static long mbochs_ioctl(struct vfio_device *vdev, 
unsigned int cmd,
 
 static int mbochs_open(struct vfio_device *vdev)
 {
-   if (!try_module_get(THIS_MODULE))
-   return -ENODEV;
-
return 0;
 }
 
@@ -1300,7 +1297,6 @@ static void mbochs_close(struct vfio_device *vdev)
mbochs_put_pages(mdev_state);
 
mutex_unlock(_state->ops_lock);
-   module_put(THIS_MODULE);
 }
 
 static ssize_t
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
index 393c9df6f6a010..a7d4ed28d66411 100644
--- a/samples/vfio-mdev/mdpy.c
+++ b/samples/vfio-mdev/mdpy.c
@@ -611,15 +611,11 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned 
int cmd,
 
 static int mdpy_open(struct vfio_device *vdev)
 {
-   if (!try_module_get(THIS_MODULE))
-   return -ENODEV;
-
return 0;
 }
 
 static void mdpy_close(struct vfio_device *vdev)
 {
-   module_put(THIS_MODULE);
 }
 
 static ssize_t
-- 
2.32.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 03/13] vfio: Provide better generic support for open/release vfio_device_ops

2021-07-14 Thread Jason Gunthorpe
Currently the driver ops have an open/release pair that is called once
each time a device FD is opened or closed. Add an additional set of
open/close_device() ops which are called when the device FD is opened for
the first time and closed for the last time.

An analysis shows that all of the drivers require this semantic. Some are
open coding it as part of their reflck implementation, and some are just
buggy and miss it completely.

To retain the current semantics PCI and FSL depend on, introduce the idea
of a "device set" which is a grouping of vfio_device's that share the same
lock around opening.

The device set is established by providing a 'set_id' pointer. All
vfio_device's that provide the same pointer will be joined to the same
singleton memory and lock across the whole set. This effectively replaces
the oddly named reflck.

After conversion the set_id will be sourced from:
 - A struct device from a fsl_mc_device (fsl)
 - A struct pci_slot (pci)
 - A struct pci_bus (pci)
 - The struct vfio_device (everything)

The design ensures that the above pointers are live as long as the
vfio_device is registered, so they form reliable unique keys to group
vfio_devices into sets.

This implementation uses xarray instead of searching through the driver
core structures, which simplifies the somewhat tricky locking in this
area.

Following patches convert all the drivers.

Signed-off-by: Yishai Hadas 
Signed-off-by: Jason Gunthorpe 
---
 drivers/vfio/mdev/vfio_mdev.c |  22 ++
 drivers/vfio/vfio.c   | 144 --
 include/linux/mdev.h  |   2 +
 include/linux/vfio.h  |  19 +
 4 files changed, 165 insertions(+), 22 deletions(-)

diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c
index a5c77ccb24f70a..3c384d2350b64a 100644
--- a/drivers/vfio/mdev/vfio_mdev.c
+++ b/drivers/vfio/mdev/vfio_mdev.c
@@ -17,6 +17,26 @@
 
 #include "mdev_private.h"
 
+static int vfio_mdev_open_device(struct vfio_device *core_vdev)
+{
+   struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+   struct mdev_parent *parent = mdev->type->parent;
+
+   if (unlikely(!parent->ops->open_device))
+   return -EINVAL;
+
+   return parent->ops->open_device(mdev);
+}
+
+static void vfio_mdev_close_device(struct vfio_device *core_vdev)
+{
+   struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+   struct mdev_parent *parent = mdev->type->parent;
+
+   if (likely(parent->ops->close_device))
+   parent->ops->close_device(mdev);
+}
+
 static int vfio_mdev_open(struct vfio_device *core_vdev)
 {
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
@@ -100,6 +120,8 @@ static void vfio_mdev_request(struct vfio_device 
*core_vdev, unsigned int count)
 
 static const struct vfio_device_ops vfio_mdev_dev_ops = {
.name   = "vfio-mdev",
+   .open_device= vfio_mdev_open_device,
+   .close_device   = vfio_mdev_close_device,
.open   = vfio_mdev_open,
.release= vfio_mdev_release,
.ioctl  = vfio_mdev_unlocked_ioctl,
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index cc375df0fd5dda..6908c2ae9b36f6 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -96,6 +96,74 @@ module_param_named(enable_unsafe_noiommu_mode,
 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  
This mode provides no device isolation, no DMA translation, no host kernel 
protection, cannot be used for device assignment to virtual machines, requires 
RAWIO permissions, and will taint the kernel.  If you do not know what this is 
for, step away. (default: false)");
 #endif
 
+static DEFINE_XARRAY(vfio_device_set_xa);
+
+int vfio_assign_device_set(struct vfio_device *device, void *set_id)
+{
+   struct vfio_device_set *alloc_dev_set = NULL;
+   struct vfio_device_set *dev_set;
+
+   if (WARN_ON(!set_id))
+   return -EINVAL;
+
+   /*
+* Atomically acquire a singleton object in the xarray for this set_id
+*/
+again:
+   xa_lock(_device_set_xa);
+   if (alloc_dev_set) {
+   dev_set = __xa_cmpxchg(_device_set_xa,
+  (unsigned long)set_id, NULL,
+  alloc_dev_set, GFP_KERNEL);
+   if (xa_is_err(dev_set)) {
+   xa_unlock(_device_set_xa);
+   kfree(alloc_dev_set);
+   return xa_err(dev_set);
+   }
+   if (!dev_set)
+   dev_set = alloc_dev_set;
+   } else
+   dev_set = xa_load(_device_set_xa, (unsigned long)set_id);
+   if (dev_set) {
+   dev_set->device_count++;
+   xa_unlock(_device_set_xa);
+   device->dev_set = dev_set;
+   if (dev_set != alloc_dev_set)
+   kfree(alloc_dev_set);
+   return 

[Intel-gfx] linux-next: Signed-off-by missing for commit in the drm-intel tree

2021-07-14 Thread Stephen Rothwell
Hi all,

Commit

  db47fe727e1f ("drm/i915/step: s/_revid_tbl/_revids")

is missing a Signed-off-by from its committer.

-- 
Cheers,
Stephen Rothwell


pgpibXSRKsvDB.pgp
Description: OpenPGP digital signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PULL] gvt-fixes

2021-07-14 Thread Zhenyu Wang

Hi,

Here's one fix of shadow ppgtt invalidation with proper vGPU D3 state tracking.

Thanks
--
The following changes since commit 62fb9874f5da54fdb243003b386128037319b219:

  Linux 5.13 (2021-06-27 15:21:11 -0700)

are available in the Git repository at:

  https://github.com/intel/gvt-linux tags/gvt-fixes-2021-07-15

for you to fetch changes up to c90b4503ccf42d9d367e843c223df44aa550e82a:

  drm/i915/gvt: Clear d3_entered on elsp cmd submission. (2021-07-08 16:42:34 
+0800)


gvt-fixes-2021-07-15

- Fix shadow ppgtt invalidation with proper D3 state tracking (Colin)


Colin Xu (1):
  drm/i915/gvt: Clear d3_entered on elsp cmd submission.

 drivers/gpu/drm/i915/gvt/handlers.c | 15 +++
 1 file changed, 15 insertions(+)


signature.asc
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 02/18] drm/sched: Barriers are needed for entity->last_scheduled

2021-07-14 Thread Andrey Grodzovsky


On 2021-07-13 12:45 p.m., Daniel Vetter wrote:

On Tue, Jul 13, 2021 at 6:11 PM Andrey Grodzovsky
 wrote:

On 2021-07-13 5:10 a.m., Daniel Vetter wrote:

On Tue, Jul 13, 2021 at 9:25 AM Christian König
 wrote:

Am 13.07.21 um 08:50 schrieb Daniel Vetter:

On Tue, Jul 13, 2021 at 8:35 AM Christian König
 wrote:

Am 12.07.21 um 19:53 schrieb Daniel Vetter:

It might be good enough on x86 with just READ_ONCE, but the write side
should then at least be WRITE_ONCE because x86 has total store order.

It's definitely not enough on arm.

Fix this proplery, which means
- explain the need for the barrier in both places
- point at the other side in each comment

Also pull out the !sched_list case as the first check, so that the
code flow is clearer.

While at it sprinkle some comments around because it was very
non-obvious to me what's actually going on here and why.

Note that we really need full barriers here, at first I thought
store-release and load-acquire on ->last_scheduled would be enough,
but we actually requiring ordering between that and the queue state.

v2: Put smp_rmp() in the right place and fix up comment (Andrey)

Signed-off-by: Daniel Vetter 
Cc: "Christian König" 
Cc: Steven Price 
Cc: Daniel Vetter 
Cc: Andrey Grodzovsky 
Cc: Lee Jones 
Cc: Boris Brezillon 
---
 drivers/gpu/drm/scheduler/sched_entity.c | 27 ++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index f7347c284886..89e3f6eaf519 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -439,8 +439,16 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct 
drm_sched_entity *entity)
 dma_fence_set_error(_job->s_fence->finished, -ECANCELED);

 dma_fence_put(entity->last_scheduled);
+
 entity->last_scheduled = dma_fence_get(_job->s_fence->finished);

+ /*
+  * If the queue is empty we allow drm_sched_entity_select_rq() to
+  * locklessly access ->last_scheduled. This only works if we set the
+  * pointer before we dequeue and if we a write barrier here.
+  */
+ smp_wmb();
+

Again, conceptual those barriers should be part of the spsc_queue
container and not externally.

That would be extremely unusual api. Let's assume that your queue is
very dumb, and protected by a simple lock. That's about the maximum
any user could expect.

But then you still need barriers here, because linux locks (spinlock,
mutex) are defined to be one-way barriers: Stuff that's inside is
guaranteed to be done insinde, but stuff outside of the locked region
can leak in. They're load-acquire/store-release barriers. So not good
enough.

You really need to have barriers here, and they really all need to be
documented properly. And yes that's a shit-ton of work in drm/sched,
because it's full of yolo lockless stuff.

The other case you could make is that this works like a wakeup queue,
or similar. The rules there are:
- wake_up (i.e. pushing something into the queue) is a store-release barrier
- the waked up (i.e. popping an entry) is a load acquire barrier
Which is obviuosly needed because otherwise you don't have coherency
for the data queued up. And again not the barriers you're locking for
here.

Exactly that was the idea, yes.


Either way, we'd still need the comments, because it's still lockless
trickery, and every single one of that needs to have a comment on both
sides to explain what's going on.

Essentially replace spsc_queue with an llist underneath, and that's
the amount of barriers a data structure should provide. Anything else
is asking your datastructure to paper over bugs in your users.

This is similar to how atomic_t is by default completely unordered,
and users need to add barriers as needed, with comments.

My main problem is as always that kernel atomics work different than
userspace atomics.


I think this is all to make sure people don't just write lockless algorithms
because it's a cool idea, but are forced to think this all through.
Which seems to not have happened very consistently for drm/sched, so I
guess needs to be fixed.

Well at least initially that was all perfectly thought through. The
problem is nobody is really maintaining that stuff.


I'm definitely not going to hide all that by making the spsc_queue
stuff provide random unjustified barriers just because that would
paper over drm/sched bugs. We need to fix the actual bugs, and
preferrable all of them. I've found a few, but I wasn't involved in
drm/sched thus far, so best I can do is discover them as we go.

I don't think that those are random unjustified barriers at all and it
sounds like you didn't grip what I said here.

See the spsc queue must have the following semantics:

1. When you pop a job all changes made before you push the job must be
visible.

This is the standard barriers that also wake-up queues have, it's just

Re: [Intel-gfx] [PATCH 07/16] drm/i915/guc/slpc: Enable slpc and add related H2G events

2021-07-14 Thread Belgaumkar, Vinay




On 7/10/2021 10:37 AM, Michal Wajdeczko wrote:



On 10.07.2021 03:20, Vinay Belgaumkar wrote:

Add methods for interacting with guc for enabling SLPC. Enable
SLPC after guc submission has been established. GuC load will


s/guc/GuC


fail if SLPC cannot be successfully initialized. Add various
helper methods to set/unset the parameters for SLPC. They can
be set using h2g calls or directly setting bits in the shared


/h2g/H2G


done.



data structure.

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 221 ++
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   4 -
  drivers/gpu/drm/i915/gt/uc/intel_uc.c |  10 +
  3 files changed, 231 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 94e2f19951aa..e579408d1c19 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -18,6 +18,61 @@ static inline struct intel_guc *slpc_to_guc(struct 
intel_guc_slpc *slpc)
return container_of(slpc, struct intel_guc, slpc);
  }
  
+static inline struct intel_gt *slpc_to_gt(struct intel_guc_slpc *slpc)

+{
+   return guc_to_gt(slpc_to_guc(slpc));
+}
+
+static inline struct drm_i915_private *slpc_to_i915(struct intel_guc_slpc 
*slpc)
+{
+   return (slpc_to_gt(slpc))->i915;
+}
+
+static void slpc_mem_set_param(struct slpc_shared_data *data,
+   u32 id, u32 value)
+{
+   GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS);
+   /* When the flag bit is set, corresponding value will be read
+* and applied by slpc.


fix format of multi-line comment
s/slpc/SLPC


Done.




+*/
+   data->override_params_set_bits[id >> 5] |= (1 << (id % 32));


use __set_bit instead


+   data->override_params_values[id] = value;
+}
+
+static void slpc_mem_unset_param(struct slpc_shared_data *data,
+u32 id)
+{
+   GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS);
+   /* When the flag bit is unset, corresponding value will not be
+* read by slpc.
+*/
+   data->override_params_set_bits[id >> 5] &= (~(1 << (id % 32)));


same here


Done.




+   data->override_params_values[id] = 0;
+}
+
+static void slpc_mem_task_control(struct slpc_shared_data *data,
+u64 val, u32 enable_id, u32 disable_id)


hmm, u64 to pass simple tri-state flag ?


+{
+   /* Enabling a param involves setting the enable_id
+* to 1 and disable_id to 0. Setting it to default
+* will unset both enable and disable ids and let
+* slpc choose it's default values.


fix format + s/slpc/SLPC


+*/
+   if (val == SLPC_PARAM_TASK_DEFAULT) {
+   /* set default */
+   slpc_mem_unset_param(data, enable_id);
+   slpc_mem_unset_param(data, disable_id);
+   } else if (val == SLPC_PARAM_TASK_ENABLED) {
+   /* set enable */
+   slpc_mem_set_param(data, enable_id, 1);
+   slpc_mem_set_param(data, disable_id, 0);
+   } else if (val == SLPC_PARAM_TASK_DISABLED) {
+   /* set disable */
+   slpc_mem_set_param(data, disable_id, 1);
+   slpc_mem_set_param(data, enable_id, 0);
+   }


maybe instead of SLPC_PARAM_TASK_* flags (that btw were confusing me
earlier) you can define 3x small helpers:

static void slpc_mem_set_default(data, enable_id, disable_id);
static void slpc_mem_set_enabled(data, enable_id, disable_id);
static void slpc_mem_set_disabled(data, enable_id, disable_id);



Agree, done.




+}
+
  static int slpc_shared_data_init(struct intel_guc_slpc *slpc)
  {
struct intel_guc *guc = slpc_to_guc(slpc);
@@ -34,6 +89,128 @@ static int slpc_shared_data_init(struct intel_guc_slpc 
*slpc)
return err;
  }
  
+/*

+ * Send SLPC event to guc
+ *
+ */
+static int slpc_send(struct intel_guc_slpc *slpc,
+   struct slpc_event_input *input,
+   u32 in_len)
+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+   u32 *action;
+
+   action = (u32 *)input;
+   action[0] = INTEL_GUC_ACTION_SLPC_REQUEST;


why not just updating input->h2g_action_id ?


Removed this, using your suggestion below instead.




+
+   return intel_guc_send(guc, action, in_len);
+}
+
+static bool slpc_running(struct intel_guc_slpc *slpc)
+{
+   struct slpc_shared_data *data;
+   u32 slpc_global_state;
+
+   GEM_BUG_ON(!slpc->vma);
+
+   drm_clflush_virt_range(slpc->vaddr, sizeof(struct slpc_shared_data));


do you really need to flush all 8K of shared data?
it looks that you only need single u32


sure.




+   data = slpc->vaddr;
+
+   slpc_global_state = data->global_state;
+
+   return (data->global_state == SLPC_GLOBAL_STATE_RUNNING);
+}
+
+static int 

Re: [Intel-gfx] [PATCH 19/47] drm/i915/guc: Ensure request ordering via completion fences

2021-07-14 Thread Daniele Ceraolo Spurio



On 6/24/2021 12:04 AM, Matthew Brost wrote:

If two requests are on the same ring, they are explicitly ordered by the
HW. So, a submission fence is sufficient to ensure ordering when using
the new GuC submission interface. Conversely, if two requests share a
timeline and are on the same physical engine but different context this
doesn't ensure ordering on the new GuC submission interface. So, a
completion fence needs to be used to ensure ordering.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
---
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c   |  1 -
  drivers/gpu/drm/i915/i915_request.c | 17 +
  2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0a6ccdf32316..010e46dd6b16 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -926,7 +926,6 @@ static void guc_context_sched_disable(struct intel_context 
*ce)
 * request doesn't slip through the 'context_pending_disable' fence.
 */
if (unlikely(atomic_add_unless(>pin_count, -2, 2))) {
-   spin_unlock_irqrestore(>guc_state.lock, flags);


Why is this unlock() being dropped here?


return;
}
guc_id = prep_context_pending_disable(ce);
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 9dad3df5eaf7..d92c9f25c9f4 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -444,6 +444,7 @@ void i915_request_retire_upto(struct i915_request *rq)
  
  	do {

tmp = list_first_entry(>requests, typeof(*tmp), link);
+   GEM_BUG_ON(!i915_request_completed(tmp));


This condition in the BUG_ON is not a new requirement introduced by the 
changes below, right? just want to make sure I'm not missing anything.



} while (i915_request_retire(tmp) && tmp != rq);
  }
  
@@ -1405,6 +1406,9 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)

return err;
  }
  
+static int

+i915_request_await_request(struct i915_request *to, struct i915_request *from);
+
  int
  i915_request_await_execution(struct i915_request *rq,
 struct dma_fence *fence,
@@ -1464,12 +1468,13 @@ await_request_submit(struct i915_request *to, struct 
i915_request *from)
 * the waiter to be submitted immediately to the physical engine
 * as it may then bypass the virtual request.
 */
-   if (to->engine == READ_ONCE(from->engine))
+   if (to->engine == READ_ONCE(from->engine)) {
return i915_sw_fence_await_sw_fence_gfp(>submit,
>submit,
I915_FENCE_GFP);
-   else
+   } else {
return __i915_request_await_execution(to, from, NULL);
+   }


{ }  are not needed here. I'm guessing they're leftover from a dropped 
change.



  }
  
  static int

@@ -1493,7 +1498,8 @@ i915_request_await_request(struct i915_request *to, 
struct i915_request *from)
return ret;
}
  
-	if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))

+   if (!intel_engine_uses_guc(to->engine) &&
+   is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
ret = await_request_submit(to, from);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
@@ -1654,6 +1660,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
prev = to_request(__i915_active_fence_set(>last_request,
  >fence));
if (prev && !__i915_request_is_complete(prev)) {
+   bool uses_guc = intel_engine_uses_guc(rq->engine);
+
/*
 * The requests are supposed to be kept in order. However,
 * we need to be wary in case the timeline->last_request
@@ -1664,7 +1672,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
   i915_seqno_passed(prev->fence.seqno,
 rq->fence.seqno));
  
-		if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask))

+   if ((!uses_guc && is_power_of_2(READ_ONCE(prev->engine)->mask | 
rq->engine->mask)) ||
+   (uses_guc && prev->context == rq->context))


Would it be worth adding an engine flag instead of checking which 
back-end is in use? I915_ENGINE_IS_FIFO or something. Not a blocker.


Daniele


i915_sw_fence_await_sw_fence(>submit,
 >submit,
 >submitq);




___
Intel-gfx mailing list

Re: [Intel-gfx] [PATCH 26/47] drm/i915/guc: GuC virtual engines

2021-07-14 Thread Daniele Ceraolo Spurio



On 6/24/2021 12:04 AM, Matthew Brost wrote:

Implement GuC virtual engines. Rather simple implementation, basically
just allocate an engine, setup context enter / exit function to virtual
engine specific functions, set all other variables / functions to guc
versions, and set the engine mask to that of all the siblings.

Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   |  19 +-
  drivers/gpu/drm/i915/gem/i915_gem_context.h   |   1 +
  drivers/gpu/drm/i915/gt/intel_context_types.h |  10 +
  drivers/gpu/drm/i915/gt/intel_engine.h|  45 +++-
  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  14 +
  .../drm/i915/gt/intel_execlists_submission.c  | 186 +++--
  .../drm/i915/gt/intel_execlists_submission.h  |  11 -
  drivers/gpu/drm/i915/gt/selftest_execlists.c  |  20 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 253 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.h |   2 +
  10 files changed, 429 insertions(+), 132 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5c07e6abf16a..8a9293e0ca92 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -72,7 +72,6 @@
  #include "gt/intel_context_param.h"
  #include "gt/intel_engine_heartbeat.h"
  #include "gt/intel_engine_user.h"
-#include "gt/intel_execlists_submission.h" /* virtual_engine */
  #include "gt/intel_gpu_commands.h"
  #include "gt/intel_ring.h"
  
@@ -1568,9 +1567,6 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)

if (!HAS_EXECLISTS(i915))
return -ENODEV;
  
-	if (intel_uc_uses_guc_submission(>gt.uc))

-   return -ENODEV; /* not implement yet */
-
if (get_user(idx, >engine_index))
return -EFAULT;
  
@@ -1627,7 +1623,7 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)

}
}
  
-	ce = intel_execlists_create_virtual(siblings, n);

+   ce = intel_engine_create_virtual(siblings, n);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out_siblings;
@@ -1723,13 +1719,9 @@ set_engines__bond(struct i915_user_extension __user 
*base, void *data)
 * A non-virtual engine has no siblings to choose between; and
 * a submit fence will always be directed to the one engine.
 */
-   if (intel_engine_is_virtual(virtual)) {
-   err = intel_virtual_engine_attach_bond(virtual,
-  master,
-  bond);
-   if (err)
-   return err;
-   }
+   err = intel_engine_attach_bond(virtual, master, bond);
+   if (err)
+   return err;
}
  
  	return 0;

@@ -2116,8 +2108,7 @@ static int clone_engines(struct i915_gem_context *dst,
 * the virtual engine instead.
 */
if (intel_engine_is_virtual(engine))
-   clone->engines[n] =
-   intel_execlists_clone_virtual(engine);
+   clone->engines[n] = intel_engine_clone_virtual(engine);
else
clone->engines[n] = intel_context_create(engine);
if (IS_ERR_OR_NULL(clone->engines[n])) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index b5c908f3f4f2..ba772762f7b9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -10,6 +10,7 @@
  #include "i915_gem_context_types.h"
  
  #include "gt/intel_context.h"

+#include "gt/intel_engine.h"
  
  #include "i915_drv.h"

  #include "i915_gem.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e7af6a2368f8..6945963a31ba 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -47,6 +47,16 @@ struct intel_context_ops {
  
  	void (*reset)(struct intel_context *ce);

void (*destroy)(struct kref *kref);
+
+   /* virtual engine/context interface */
+   struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
+   unsigned int count);
+   struct intel_context *(*clone_virtual)(struct intel_engine_cs *engine);
+   struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
+  unsigned int sibling);
+   int (*attach_bond)(struct intel_engine_cs *engine,
+  const struct intel_engine_cs *master,
+  const struct intel_engine_cs 

Re: [Intel-gfx] ✗ Fi.CI.IGT: failure for Minor revid/stepping and workaround cleanup (rev5)

2021-07-14 Thread Matt Roper
On Wed, Jul 14, 2021 at 08:48:12AM +, Patchwork wrote:
> == Series Details ==
> 
> Series: Minor revid/stepping and workaround cleanup (rev5)
> URL   : https://patchwork.freedesktop.org/series/92299/
> State : failure
> 
> == Summary ==
> 
> CI Bug Log - changes from CI_DRM_10343_full -> Patchwork_20590_full
> 
> 
> Summary
> ---
> 
>   **FAILURE**
> 
>   Serious unknown changes coming with Patchwork_20590_full absolutely need to 
> be
>   verified manually.
>   
>   If you think the reported changes have nothing to do with the changes
>   introduced in Patchwork_20590_full, please notify your bug team to allow 
> them
>   to document this new failure mode, which will reduce false positives in CI.
> 
>   
> 
> Possible new issues
> ---
> 
>   Here are the unknown changes that may have been introduced in 
> Patchwork_20590_full:
> 
> ### IGT changes ###
> 
>  Possible regressions 
> 
>   * igt@dumb_buffer@map-invalid-size:
> - shard-snb:  NOTRUN -> [DMESG-WARN][1]
>[1]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-snb7/igt@dumb_buf...@map-invalid-size.html

Appears to be another form of
https://gitlab.freedesktop.org/drm/intel/-/issues/3746

> 
>   * igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-mmap-gtt:
> - shard-skl:  [PASS][2] -> [FAIL][3]
>[2]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-skl4/igt@kms_frontbuffer_track...@psr-1p-offscren-pri-shrfb-draw-mmap-gtt.html
>[3]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-skl8/igt@kms_frontbuffer_track...@psr-1p-offscren-pri-shrfb-draw-mmap-gtt.html
> 

Seems similar to https://gitlab.freedesktop.org/drm/intel/-/issues/2546
that we see occasionally for the FBC subtests, although this one is a
PSR subtest.  Doesn't appear to be related to the revid/stepping
conversion though.

>   
>  Warnings 
> 
>   * igt@dumb_buffer@map-invalid-size:
> - shard-apl:  [DMESG-WARN][4] ([i915#1982]) -> [DMESG-WARN][5]
>[4]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-apl6/igt@dumb_buf...@map-invalid-size.html
>[5]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-apl1/igt@dumb_buf...@map-invalid-size.html
> 

https://gitlab.freedesktop.org/drm/intel/-/issues/3746 again


After chatting with Rodrigo, we agreed that the patches are needed on
both drm-intel-next (to support the display DMC matching) and
drm-intel-gt-next (to handle workarounds properly).  So the patches were
applied to topic branch 'topic/revid_steppings' which was then merged to
both Intel branches to make sure the new stepping macros are available
and used consistently in both branches.


Matt

>   
>  Suppressed 
> 
>   The following results come from untrusted machines, tests, or statuses.
>   They do not affect the overall result.
> 
>   * igt@kms_ccs@pipe-a-bad-rotation-90-yf_tiled_ccs:
> - {shard-rkl}:[FAIL][6] ([i915#3678]) -> [SKIP][7]
>[6]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-rkl-5/igt@kms_ccs@pipe-a-bad-rotation-90-yf_tiled_ccs.html
>[7]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-rkl-6/igt@kms_ccs@pipe-a-bad-rotation-90-yf_tiled_ccs.html
> 
>   * igt@perf@polling:
> - {shard-rkl}:NOTRUN -> [FAIL][8]
>[8]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-rkl-5/igt@p...@polling.html
> 
>   * igt@sysfs_preempt_timeout@timeout@rcs0:
> - {shard-rkl}:[PASS][9] -> [FAIL][10] +3 similar issues
>[9]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-rkl-6/igt@sysfs_preempt_timeout@time...@rcs0.html
>[10]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-rkl-1/igt@sysfs_preempt_timeout@time...@rcs0.html
> 
>   
> Known issues
> 
> 
>   Here are the changes found in Patchwork_20590_full that come from known 
> issues:
> 
> ### IGT changes ###
> 
>  Issues hit 
> 
>   * igt@gem_ctx_persistence@legacy-engines-mixed:
> - shard-snb:  NOTRUN -> [SKIP][11] ([fdo#109271] / [i915#1099]) 
> +5 similar issues
>[11]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-snb2/igt@gem_ctx_persiste...@legacy-engines-mixed.html
> 
>   * igt@gem_eio@in-flight-contexts-1us:
> - shard-tglb: [PASS][12] -> [TIMEOUT][13] ([i915#3063])
>[12]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-tglb6/igt@gem_...@in-flight-contexts-1us.html
>[13]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-tglb6/igt@gem_...@in-flight-contexts-1us.html
> 
>   * igt@gem_exec_fair@basic-deadline:
> - shard-skl:  NOTRUN -> [FAIL][14] ([i915#2846])
>[14]: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-skl7/igt@gem_exec_f...@basic-deadline.html
> 
>   * igt@gem_exec_fair@basic-none-share@rcs0:
> - 

Re: [Intel-gfx] [PATCH 31/47] drm/i915/guc: Reset implementation for new GuC interface

2021-07-14 Thread Matthew Brost
On Mon, Jul 12, 2021 at 12:58:45PM -0700, John Harrison wrote:
> On 6/24/2021 00:05, Matthew Brost wrote:
> > Reset implementation for new GuC interface. This is the legacy reset
> > implementation which is called when the i915 owns the engine hang check.
> > Future patches will offload the engine hang check to GuC but we will
> > continue to maintain this legacy path as a fallback and this code path
> > is also required if the GuC dies.
> > 
> > With the new GuC interface it is not possible to reset individual
> > engines - it is only possible to reset the GPU entirely. This patch
> > forces an entire chip reset if any engine hangs.
> There seems to be quite a lot more code being changed in the patch than is
> described above. Sure, it's all in order to support resets but there is a
> lot happening to request/context management, support for GuC submission
> enable/disable, etc. It feels like this patch really should be split into a
> couple of prep patches followed by the actual reset support. Plus see couple
> of minor comments below.
> 

Yea, this is probably the most churned on patch we have as getting
resets to full work isn't easy. I'll fix the below comments but I don't
know if it worth spliting. Everything in the patch is required to get
resets to work and I think it is better to have in a single patch so
'git blame' can give you the whole picture.

Matt 

> > Cc: John Harrison 
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_context.c   |   3 +
> >   drivers/gpu/drm/i915/gt/intel_context_types.h |   7 +
> >   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   6 +
> >   .../drm/i915/gt/intel_execlists_submission.c  |  40 ++
> >   drivers/gpu/drm/i915/gt/intel_gt_pm.c |   6 +-
> >   drivers/gpu/drm/i915/gt/intel_reset.c |  18 +-
> >   .../gpu/drm/i915/gt/intel_ring_submission.c   |  22 +
> >   drivers/gpu/drm/i915/gt/mock_engine.c |  31 +
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.c|  13 -
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h|   8 +-
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 581 ++
> >   drivers/gpu/drm/i915/gt/uc/intel_uc.c |  39 +-
> >   drivers/gpu/drm/i915/gt/uc/intel_uc.h |   3 +
> >   drivers/gpu/drm/i915/i915_request.c   |  41 +-
> >   drivers/gpu/drm/i915/i915_request.h   |   2 +
> >   15 files changed, 649 insertions(+), 171 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
> > b/drivers/gpu/drm/i915/gt/intel_context.c
> > index b24a1b7a3f88..2f01437056a8 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> > @@ -392,6 +392,9 @@ intel_context_init(struct intel_context *ce, struct 
> > intel_engine_cs *engine)
> > spin_lock_init(>guc_state.lock);
> > INIT_LIST_HEAD(>guc_state.fences);
> > +   spin_lock_init(>guc_active.lock);
> > +   INIT_LIST_HEAD(>guc_active.requests);
> > +
> > ce->guc_id = GUC_INVALID_LRC_ID;
> > INIT_LIST_HEAD(>guc_id_link);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
> > b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index 6945963a31ba..b63c8cf7823b 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -165,6 +165,13 @@ struct intel_context {
> > struct list_head fences;
> > } guc_state;
> > +   struct {
> > +   /** lock: protects everything in guc_active */
> > +   spinlock_t lock;
> > +   /** requests: active requests on this context */
> > +   struct list_head requests;
> > +   } guc_active;
> > +
> > /* GuC scheduling state that does not require a lock. */
> > atomic_t guc_sched_state_no_lock;
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
> > b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index e7cb6a06db9d..f9d264c008e8 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -426,6 +426,12 @@ struct intel_engine_cs {
> > void(*release)(struct intel_engine_cs *engine);
> > +   /*
> > +* Add / remove request from engine active tracking
> > +*/
> > +   void(*add_active_request)(struct i915_request *rq);
> > +   void(*remove_active_request)(struct i915_request *rq);
> > +
> > struct intel_engine_execlists execlists;
> > /*
> > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
> > b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > index c10ea6080752..c301a2d088b1 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > @@ -3118,6 +3118,42 @@ static void execlists_park(struct intel_engine_cs 
> > *engine)
> > cancel_timer(>execlists.preempt);
> >   }
> > +static void add_to_engine(struct i915_request *rq)
> > +{
> > +   

Re: [Intel-gfx] [PATCH 47/47] drm/i915/guc: Unblock GuC submission on Gen11+

2021-07-14 Thread Matthew Brost
On Thu, Jun 24, 2021 at 12:05:16AM -0700, Matthew Brost wrote:
> From: Daniele Ceraolo Spurio 
> 
> Unblock GuC submission on Gen11+ platforms.
> 
> Signed-off-by: Michal Wajdeczko 
> Signed-off-by: Daniele Ceraolo Spurio 
> Signed-off-by: Matthew Brost 

Updating debug message per feedback, with that:
Reviewed-by: Matthew Brost 

> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  1 +
>  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c |  8 
>  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h |  3 +--
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c | 14 +-
>  4 files changed, 19 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index fae01dc8e1b9..77981788204f 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -54,6 +54,7 @@ struct intel_guc {
>   struct ida guc_ids;
>   struct list_head guc_id_list;
>  
> + bool submission_supported;
>   bool submission_selected;
>  
>   struct i915_vma *ads_vma;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index a427336ce916..405339202280 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -2042,6 +2042,13 @@ void intel_guc_submission_disable(struct intel_guc 
> *guc)
>   /* Note: By the time we're here, GuC may have already been reset */
>  }
>  
> +static bool __guc_submission_supported(struct intel_guc *guc)
> +{
> + /* GuC submission is unavailable for pre-Gen11 */
> + return intel_guc_is_supported(guc) &&
> +INTEL_GEN(guc_to_gt(guc)->i915) >= 11;
> +}
> +
>  static bool __guc_submission_selected(struct intel_guc *guc)
>  {
>   struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> @@ -2054,6 +2061,7 @@ static bool __guc_submission_selected(struct intel_guc 
> *guc)
>  
>  void intel_guc_submission_init_early(struct intel_guc *guc)
>  {
> + guc->submission_supported = __guc_submission_supported(guc);
>   guc->submission_selected = __guc_submission_selected(guc);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
> index a2a3fad72be1..be767eb6ff71 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
> @@ -37,8 +37,7 @@ int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
>  
>  static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
>  {
> - /* XXX: GuC submission is unavailable for now */
> - return false;
> + return guc->submission_supported;
>  }
>  
>  static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 7a69c3c027e9..61be0aa81492 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -34,8 +34,15 @@ static void uc_expand_default_options(struct intel_uc *uc)
>   return;
>   }
>  
> - /* Default: enable HuC authentication only */
> - i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
> + /* Intermediate platforms are HuC authentication only */
> + if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) {
> + drm_dbg(>drm, "Disabling GuC only due to old platform\n");
> + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
> + return;
> + }
> +
> + /* Default: enable HuC authentication and GuC submission */
> + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION;
>  }
>  
>  /* Reset GuC providing us with fresh state for both GuC and HuC.
> @@ -313,9 +320,6 @@ static int __uc_init(struct intel_uc *uc)
>   if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
>   return -ENOMEM;
>  
> - /* XXX: GuC submission is unavailable for now */
> - GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
> -
>   ret = intel_guc_init(guc);
>   if (ret)
>   return ret;
> -- 
> 2.28.0
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 42/47] drm/i915/guc: Fix for error capture after full GPU reset with GuC

2021-07-14 Thread Matthew Brost
On Thu, Jun 24, 2021 at 12:05:11AM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> In the case of a full GPU reset (e.g. because GuC has died or because
> GuC's hang detection has been disabled), the driver can't rely on GuC
> reporting the guilty context. Instead, the driver needs to scan all
> active contexts and find one that is currently executing, as per the
> execlist mode behaviour. In GuC mode, this scan is different to
> execlist mode as the active request list is handled very differently.
> 
> Similarly, the request state dump in debugfs needs to be handled
> differently when in GuC submission mode.
> 
> Also refactured some of the request scanning code to avoid duplication
> across the multiple code paths that are now replicating it.
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 

Reviewed-by: Matthew Brost 

> ---
>  drivers/gpu/drm/i915/gt/intel_engine.h|   3 +
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c | 139 --
>  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |   8 +
>  drivers/gpu/drm/i915/gt/intel_reset.c |   2 +-
>  drivers/gpu/drm/i915/gt/uc/intel_guc.h|   2 +
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  67 +
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.h |   3 +
>  drivers/gpu/drm/i915/i915_request.c   |  41 ++
>  drivers/gpu/drm/i915/i915_request.h   |  11 ++
>  9 files changed, 229 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
> b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 6ea5643a3aaa..9ba131175564 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -240,6 +240,9 @@ __printf(3, 4)
>  void intel_engine_dump(struct intel_engine_cs *engine,
>  struct drm_printer *m,
>  const char *header, ...);
> +void intel_engine_dump_active_requests(struct list_head *requests,
> +struct i915_request *hung_rq,
> +struct drm_printer *m);
>  
>  ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
>  ktime_t *now);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 1d243b83b023..bbea7c9a367d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1624,6 +1624,97 @@ static void print_properties(struct intel_engine_cs 
> *engine,
>  read_ul(>defaults, p->offset));
>  }
>  
> +static void engine_dump_request(struct i915_request *rq, struct drm_printer 
> *m, const char *msg)
> +{
> + struct intel_timeline *tl = get_timeline(rq);
> +
> + i915_request_show(m, rq, msg, 0);
> +
> + drm_printf(m, "\t\tring->start:  0x%08x\n",
> +i915_ggtt_offset(rq->ring->vma));
> + drm_printf(m, "\t\tring->head:   0x%08x\n",
> +rq->ring->head);
> + drm_printf(m, "\t\tring->tail:   0x%08x\n",
> +rq->ring->tail);
> + drm_printf(m, "\t\tring->emit:   0x%08x\n",
> +rq->ring->emit);
> + drm_printf(m, "\t\tring->space:  0x%08x\n",
> +rq->ring->space);
> +
> + if (tl) {
> + drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
> +tl->hwsp_offset);
> + intel_timeline_put(tl);
> + }
> +
> + print_request_ring(m, rq);
> +
> + if (rq->context->lrc_reg_state) {
> + drm_printf(m, "Logical Ring Context:\n");
> + hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
> + }
> +}
> +
> +void intel_engine_dump_active_requests(struct list_head *requests,
> +struct i915_request *hung_rq,
> +struct drm_printer *m)
> +{
> + struct i915_request *rq;
> + const char *msg;
> + enum i915_request_state state;
> +
> + list_for_each_entry(rq, requests, sched.link) {
> + if (rq == hung_rq)
> + continue;
> +
> + state = i915_test_request_state(rq);
> + if (state < I915_REQUEST_QUEUED)
> + continue;
> +
> + if (state == I915_REQUEST_ACTIVE)
> + msg = "\t\tactive on engine";
> + else
> + msg = "\t\tactive in queue";
> +
> + engine_dump_request(rq, m, msg);
> + }
> +}
> +
> +static void engine_dump_active_requests(struct intel_engine_cs *engine, 
> struct drm_printer *m)
> +{
> + struct i915_request *hung_rq = NULL;
> + struct intel_context *ce;
> + bool guc;
> +
> + /*
> +  * No need for an engine->irq_seqno_barrier() before the seqno reads.
> +  * The GPU is still running so requests are still executing and any
> +  * hardware reads will be out of date by the time they are reported.
> +  * But the 

Re: [Intel-gfx] [PATCH 35/47] drm/i915/guc: Handle context reset notification

2021-07-14 Thread Matthew Brost
On Mon, Jul 12, 2021 at 03:58:12PM -0700, John Harrison wrote:
> On 6/24/2021 00:05, Matthew Brost wrote:
> > GuC will issue a reset on detecting an engine hang and will notify
> > the driver via a G2H message. The driver will service the notification
> > by resetting the guilty context to a simple state or banning it
> > completely.
> > 
> > Cc: Matthew Brost 
> > Cc: John Harrison 
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h|  2 ++
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  3 ++
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 35 +++
> >   drivers/gpu/drm/i915/i915_trace.h | 10 ++
> >   4 files changed, 50 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index 85ef6767f13b..e94b0ef733da 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -262,6 +262,8 @@ int intel_guc_deregister_done_process_msg(struct 
> > intel_guc *guc,
> >   const u32 *msg, u32 len);
> >   int intel_guc_sched_done_process_msg(struct intel_guc *guc,
> >  const u32 *msg, u32 len);
> > +int intel_guc_context_reset_process_msg(struct intel_guc *guc,
> > +   const u32 *msg, u32 len);
> >   void intel_guc_submission_reset_prepare(struct intel_guc *guc);
> >   void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > index 4ed074df88e5..a2020373b8e8 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > @@ -945,6 +945,9 @@ static int ct_process_request(struct intel_guc_ct *ct, 
> > struct ct_incoming_msg *r
> > case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
> > ret = intel_guc_sched_done_process_msg(guc, payload, len);
> > break;
> > +   case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
> > +   ret = intel_guc_context_reset_process_msg(guc, payload, len);
> > +   break;
> > default:
> > ret = -EOPNOTSUPP;
> > break;
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 16b61fe71b07..9845c5bd9832 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -2192,6 +2192,41 @@ int intel_guc_sched_done_process_msg(struct 
> > intel_guc *guc,
> > return 0;
> >   }
> > +static void guc_context_replay(struct intel_context *ce)
> > +{
> > +   struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
> > +
> > +   __guc_reset_context(ce, true);
> > +   tasklet_hi_schedule(_engine->tasklet);
> > +}
> > +
> > +static void guc_handle_context_reset(struct intel_guc *guc,
> > +struct intel_context *ce)
> > +{
> > +   trace_intel_context_reset(ce);
> > +   guc_context_replay(ce);
> > +}
> > +
> > +int intel_guc_context_reset_process_msg(struct intel_guc *guc,
> > +   const u32 *msg, u32 len)
> > +{
> > +   struct intel_context *ce;
> > +   int desc_idx = msg[0];
> Should do this dereference after checking the length? Or is it guaranteed
> that the length cannot be zero?
> 

I think for safety, it should be moved.

Matt

> John.
> 
> > +
> > +   if (unlikely(len != 1)) {
> > +   drm_dbg(_to_gt(guc)->i915->drm, "Invalid length %u", len);
> > +   return -EPROTO;
> > +   }
> > +
> > +   ce = g2h_context_lookup(guc, desc_idx);
> > +   if (unlikely(!ce))
> > +   return -EPROTO;
> > +
> > +   guc_handle_context_reset(guc, ce);
> > +
> > +   return 0;
> > +}
> > +
> >   void intel_guc_log_submission_info(struct intel_guc *guc,
> >struct drm_printer *p)
> >   {
> > diff --git a/drivers/gpu/drm/i915/i915_trace.h 
> > b/drivers/gpu/drm/i915/i915_trace.h
> > index 97c2e83984ed..c095c4d39456 100644
> > --- a/drivers/gpu/drm/i915/i915_trace.h
> > +++ b/drivers/gpu/drm/i915/i915_trace.h
> > @@ -929,6 +929,11 @@ DECLARE_EVENT_CLASS(intel_context,
> >   __entry->guc_sched_state_no_lock)
> >   );
> > +DEFINE_EVENT(intel_context, intel_context_reset,
> > +TP_PROTO(struct intel_context *ce),
> > +TP_ARGS(ce)
> > +);
> > +
> >   DEFINE_EVENT(intel_context, intel_context_register,
> >  TP_PROTO(struct intel_context *ce),
> >  TP_ARGS(ce)
> > @@ -1026,6 +1031,11 @@ trace_i915_request_out(struct i915_request *rq)
> >   {
> >   }
> > +static inline void
> > +trace_intel_context_reset(struct intel_context *ce)
> > +{
> > +}
> > +
> >   static inline void
> >   trace_intel_context_register(struct intel_context *ce)
> >   {
> 

Re: [Intel-gfx] [PATCH 21/47] drm/i915/guc: Ensure G2H response has space in buffer

2021-07-14 Thread John Harrison

On 7/14/2021 17:06, Matthew Brost wrote:

On Tue, Jul 13, 2021 at 11:36:05AM -0700, John Harrison wrote:

On 6/24/2021 00:04, Matthew Brost wrote:

Ensure G2H response has space in the buffer before sending H2G CTB as
the GuC can't handle any backpressure on the G2H interface.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
---
   drivers/gpu/drm/i915/gt/uc/intel_guc.h| 13 +++-
   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 76 +++
   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  4 +-
   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  4 +
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 13 ++--
   5 files changed, 87 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index b43ec56986b5..24e7a924134e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -95,11 +95,17 @@ inline int intel_guc_send(struct intel_guc *guc, const u32 
*action, u32 len)
   }
   #define INTEL_GUC_SEND_NBBIT(31)
+#define INTEL_GUC_SEND_G2H_DW_SHIFT0
+#define INTEL_GUC_SEND_G2H_DW_MASK (0xff << INTEL_GUC_SEND_G2H_DW_SHIFT)
+#define MAKE_SEND_FLAGS(len) \
+   ({GEM_BUG_ON(!FIELD_FIT(INTEL_GUC_SEND_G2H_DW_MASK, len)); \
+   (FIELD_PREP(INTEL_GUC_SEND_G2H_DW_MASK, len) | INTEL_GUC_SEND_NB);})
   static
-inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len)
+inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len,
+u32 g2h_len_dw)
   {
return intel_guc_ct_send(>ct, action, len, NULL, 0,
-INTEL_GUC_SEND_NB);
+MAKE_SEND_FLAGS(g2h_len_dw));
   }
   static inline int
@@ -113,6 +119,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 
*action, u32 len,
   static inline int intel_guc_send_busy_loop(struct intel_guc* guc,
   const u32 *action,
   u32 len,
+  u32 g2h_len_dw,
   bool loop)
   {
int err;
@@ -121,7 +128,7 @@ static inline int intel_guc_send_busy_loop(struct 
intel_guc* guc,
might_sleep_if(loop && (!in_atomic() && !irqs_disabled()));
   retry:
-   err = intel_guc_send_nb(guc, action, len);
+   err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
if (unlikely(err == -EBUSY && loop)) {
if (likely(!in_atomic() && !irqs_disabled()))
cond_resched();
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 7491f041859e..a60970e85635 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -73,6 +73,7 @@ static inline struct drm_device *ct_to_drm(struct 
intel_guc_ct *ct)
   #define CTB_DESC_SIZEALIGN(sizeof(struct 
guc_ct_buffer_desc), SZ_2K)
   #define CTB_H2G_BUFFER_SIZE  (SZ_4K)
   #define CTB_G2H_BUFFER_SIZE  (4 * CTB_H2G_BUFFER_SIZE)
+#define G2H_ROOM_BUFFER_SIZE   (PAGE_SIZE)

Any particular reason why PAGE_SIZE instead of SZ_4K? I'm not seeing
anything in the code that is actually related to page sizes. Seems like
'(CTB_G2H_BUFFER_SIZE / 4)' would be a more correct way to express it.
Unless I'm missing something about how it's used?


Yes, CTB_G2H_BUFFER_SIZE / 4 is better.

Matt

Okay. With that changed:

Reviewed-by: John Harrison 





John.



   struct ct_request {
struct list_head link;
@@ -129,23 +130,27 @@ static void guc_ct_buffer_desc_init(struct 
guc_ct_buffer_desc *desc)
   static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
   {
+   u32 space;
+
ctb->broken = false;
ctb->tail = 0;
ctb->head = 0;
-   ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
+   space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size) - ctb->resv_space;
+   atomic_set(>space, space);
guc_ct_buffer_desc_init(ctb->desc);
   }
   static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb,
   struct guc_ct_buffer_desc *desc,
-  u32 *cmds, u32 size_in_bytes)
+  u32 *cmds, u32 size_in_bytes, u32 resv_space)
   {
GEM_BUG_ON(size_in_bytes % 4);
ctb->desc = desc;
ctb->cmds = cmds;
ctb->size = size_in_bytes / 4;
+   ctb->resv_space = resv_space / 4;
guc_ct_buffer_reset(ctb);
   }
@@ -226,6 +231,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
struct guc_ct_buffer_desc *desc;
u32 blob_size;
u32 cmds_size;
+   u32 resv_space;
void *blob;
u32 *cmds;
int err;
@@ -250,19 +256,23 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
desc = blob;
cmds = blob + 2 * CTB_DESC_SIZE;
cmds_size = 

Re: [Intel-gfx] [PATCH 20/47] drm/i915/guc: Disable semaphores when using GuC scheduling

2021-07-14 Thread Matthew Brost
On Fri, Jul 09, 2021 at 04:53:37PM -0700, John Harrison wrote:
> On 6/24/2021 00:04, Matthew Brost wrote:
> > Semaphores are an optimization and not required for basic GuC submission
> > to work properly. Disable until we have time to do the implementation to
> > enable semaphores and tune them for performance. Also long direction is
> > just to delete semaphores from the i915 so another reason to not enable
> > these for GuC submission.
> > 
> > v2: Reword commit message
> > 
> > Cc: John Harrison 
> > Signed-off-by: Matthew Brost 
> I think the commit description does not really match the patch content. The
> description is valid but the 'disable' is done by simply not setting the
> enable flag (done in the execlist back end and presumably not done in the
> GuC back end). However, what the patch is actually doing seems to be fixing
> bugs with the 'are semaphores enabled' mechanism. I.e. correcting pieces of
> code that used semaphores without checking if they are enabled. And
> presumably this would be broken if someone tried to disable semaphores in
> execlist mode for any reason?
> 
> So I think keeping the existing comment text is fine but something should be
> added to explain the actual changes.
> 

Yes, commit is wrong. This more or less bug fix to the existing code. Will 
update.


Matt

> John.
> 
> 
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 --
> >   1 file changed, 4 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index 7720b8c22c81..5c07e6abf16a 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -230,7 +230,8 @@ static void intel_context_set_gem(struct intel_context 
> > *ce,
> > ce->timeline = intel_timeline_get(ctx->timeline);
> > if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
> > -   intel_engine_has_timeslices(ce->engine))
> > +   intel_engine_has_timeslices(ce->engine) &&
> > +   intel_engine_has_semaphores(ce->engine))
> > __set_bit(CONTEXT_USE_SEMAPHORES, >flags);
> > intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
> > @@ -1938,7 +1939,8 @@ static int __apply_priority(struct intel_context *ce, 
> > void *arg)
> > if (!intel_engine_has_timeslices(ce->engine))
> > return 0;
> > -   if (ctx->sched.priority >= I915_PRIORITY_NORMAL)
> > +   if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
> > +   intel_engine_has_semaphores(ce->engine))
> > intel_context_set_use_semaphores(ce);
> > else
> > intel_context_clear_use_semaphores(ce);
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 21/47] drm/i915/guc: Ensure G2H response has space in buffer

2021-07-14 Thread Matthew Brost
On Tue, Jul 13, 2021 at 11:36:05AM -0700, John Harrison wrote:
> On 6/24/2021 00:04, Matthew Brost wrote:
> > Ensure G2H response has space in the buffer before sending H2G CTB as
> > the GuC can't handle any backpressure on the G2H interface.
> > 
> > Signed-off-by: John Harrison 
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h| 13 +++-
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 76 +++
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  4 +-
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  4 +
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 13 ++--
> >   5 files changed, 87 insertions(+), 23 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index b43ec56986b5..24e7a924134e 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -95,11 +95,17 @@ inline int intel_guc_send(struct intel_guc *guc, const 
> > u32 *action, u32 len)
> >   }
> >   #define INTEL_GUC_SEND_NB BIT(31)
> > +#define INTEL_GUC_SEND_G2H_DW_SHIFT0
> > +#define INTEL_GUC_SEND_G2H_DW_MASK (0xff << INTEL_GUC_SEND_G2H_DW_SHIFT)
> > +#define MAKE_SEND_FLAGS(len) \
> > +   ({GEM_BUG_ON(!FIELD_FIT(INTEL_GUC_SEND_G2H_DW_MASK, len)); \
> > +   (FIELD_PREP(INTEL_GUC_SEND_G2H_DW_MASK, len) | INTEL_GUC_SEND_NB);})
> >   static
> > -inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 
> > len)
> > +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 
> > len,
> > +u32 g2h_len_dw)
> >   {
> > return intel_guc_ct_send(>ct, action, len, NULL, 0,
> > -INTEL_GUC_SEND_NB);
> > +MAKE_SEND_FLAGS(g2h_len_dw));
> >   }
> >   static inline int
> > @@ -113,6 +119,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const 
> > u32 *action, u32 len,
> >   static inline int intel_guc_send_busy_loop(struct intel_guc* guc,
> >const u32 *action,
> >u32 len,
> > +  u32 g2h_len_dw,
> >bool loop)
> >   {
> > int err;
> > @@ -121,7 +128,7 @@ static inline int intel_guc_send_busy_loop(struct 
> > intel_guc* guc,
> > might_sleep_if(loop && (!in_atomic() && !irqs_disabled()));
> >   retry:
> > -   err = intel_guc_send_nb(guc, action, len);
> > +   err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
> > if (unlikely(err == -EBUSY && loop)) {
> > if (likely(!in_atomic() && !irqs_disabled()))
> > cond_resched();
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > index 7491f041859e..a60970e85635 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > @@ -73,6 +73,7 @@ static inline struct drm_device *ct_to_drm(struct 
> > intel_guc_ct *ct)
> >   #define CTB_DESC_SIZE ALIGN(sizeof(struct 
> > guc_ct_buffer_desc), SZ_2K)
> >   #define CTB_H2G_BUFFER_SIZE   (SZ_4K)
> >   #define CTB_G2H_BUFFER_SIZE   (4 * CTB_H2G_BUFFER_SIZE)
> > +#define G2H_ROOM_BUFFER_SIZE   (PAGE_SIZE)
> Any particular reason why PAGE_SIZE instead of SZ_4K? I'm not seeing
> anything in the code that is actually related to page sizes. Seems like
> '(CTB_G2H_BUFFER_SIZE / 4)' would be a more correct way to express it.
> Unless I'm missing something about how it's used?
> 

Yes, CTB_G2H_BUFFER_SIZE / 4 is better.

Matt

> John.
> 
> 
> >   struct ct_request {
> > struct list_head link;
> > @@ -129,23 +130,27 @@ static void guc_ct_buffer_desc_init(struct 
> > guc_ct_buffer_desc *desc)
> >   static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
> >   {
> > +   u32 space;
> > +
> > ctb->broken = false;
> > ctb->tail = 0;
> > ctb->head = 0;
> > -   ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
> > +   space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size) - ctb->resv_space;
> > +   atomic_set(>space, space);
> > guc_ct_buffer_desc_init(ctb->desc);
> >   }
> >   static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb,
> >struct guc_ct_buffer_desc *desc,
> > -  u32 *cmds, u32 size_in_bytes)
> > +  u32 *cmds, u32 size_in_bytes, u32 resv_space)
> >   {
> > GEM_BUG_ON(size_in_bytes % 4);
> > ctb->desc = desc;
> > ctb->cmds = cmds;
> > ctb->size = size_in_bytes / 4;
> > +   ctb->resv_space = resv_space / 4;
> > guc_ct_buffer_reset(ctb);
> >   }
> > @@ -226,6 +231,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
> > struct guc_ct_buffer_desc *desc;
> > u32 blob_size;
> > u32 cmds_size;
> > +   u32 resv_space;
> > void *blob;
> > u32 *cmds;
> > int err;
> > @@ 

Re: [Intel-gfx] [PATCH 23/47] drm/i915/guc: Update GuC debugfs to support new GuC

2021-07-14 Thread Matthew Brost
On Tue, Jul 13, 2021 at 10:51:35AM +0200, Michal Wajdeczko wrote:
> 
> 
> On 24.06.2021 09:04, Matthew Brost wrote:
> > Update GuC debugfs to support the new GuC structures.
> > 
> > Signed-off-by: John Harrison 
> > Signed-off-by: Matthew Brost 
> > ---
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 22 
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  3 ++
> >  .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c| 23 +++-
> >  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 52 +++
> >  .../gpu/drm/i915/gt/uc/intel_guc_submission.h |  4 ++
> >  drivers/gpu/drm/i915/i915_debugfs.c   |  1 +
> >  6 files changed, 104 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > index e0f92e28350c..4ed074df88e5 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > @@ -1135,3 +1135,25 @@ void intel_guc_ct_event_handler(struct intel_guc_ct 
> > *ct)
> >  
> > ct_try_receive_message(ct);
> >  }
> > +
> > +void intel_guc_log_ct_info(struct intel_guc_ct *ct,
> 
> this is not "guc log" function, it is "guc ct" one, so:
> 
>   void intel_guc_ct_print_info(struct intel_guc_ct *ct,
> 

Sure.

> > +  struct drm_printer *p)
> > +{
> > +   if (!ct->enabled) {
> > +   drm_puts(p, "CT disabled\n");
> 
> nit: maybe
> 
>   drm_puts(p, "CT %s\n", enableddisabled(false));
> 

Sure.

> > +   return;
> > +   }
> > +
> > +   drm_printf(p, "H2G Space: %u\n",
> > +  atomic_read(>ctbs.send.space) * 4);
> 
> don't you want to print size ?
> or GGTT offset ?
>

I don't think so.
 
> > +   drm_printf(p, "Head: %u\n",
> > +  ct->ctbs.send.desc->head);
> > +   drm_printf(p, "Tail: %u\n",
> > +  ct->ctbs.send.desc->tail);
> > +   drm_printf(p, "G2H Space: %u\n",
> > +  atomic_read(>ctbs.recv.space) * 4);
> > +   drm_printf(p, "Head: %u\n",
> > +  ct->ctbs.recv.desc->head);
> > +   drm_printf(p, "Tail: %u\n",
> > +  ct->ctbs.recv.desc->tail);
> 
> hmm, what about adding helper:
> 
>   static void dump_ctb(struct intel_guc_ct_buffer *ctb, *p)
>   {
>   drm_printf(p, "Size: %u\n", ctb->size);
>   drm_printf(p, "Space: %u\n", atomic_read(>space) * 4);
>   drm_printf(p, "Head: %u\n", ctb->desc->head);
>   drm_printf(p, "Tail: %u\n", ctb->desc->tail);
>   }
> 
> and then:
> 
>   drm_printf(p, "H2G:\n");
>   dump_ctb(>ctbs.send, p);
>   drm_printf(p, "G2H:\n");
>   dump_ctb(>ctbs.recv, p);
> 
> or
> 
>   dump_ctb(>ctbs.send, "H2G", p);
>   dump_ctb(>ctbs.recv, "G2H", p);
>   
>

Seems unnecessary.

> > +}
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> > index ab1b79ab960b..f62eb06b32fc 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> > @@ -16,6 +16,7 @@
> >  
> >  struct i915_vma;
> >  struct intel_guc;
> > +struct drm_printer;
> >  
> >  /**
> >   * DOC: Command Transport (CT).
> > @@ -106,4 +107,6 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const 
> > u32 *action, u32 len,
> >   u32 *response_buf, u32 response_buf_size, u32 flags);
> >  void intel_guc_ct_event_handler(struct intel_guc_ct *ct);
> >  
> > +void intel_guc_log_ct_info(struct intel_guc_ct *ct, struct drm_printer *p);
> > +
> >  #endif /* _INTEL_GUC_CT_H_ */
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
> > index fe7cb7b29a1e..62b9ce0fafaa 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
> > @@ -9,6 +9,8 @@
> >  #include "intel_guc.h"
> >  #include "intel_guc_debugfs.h"
> >  #include "intel_guc_log_debugfs.h"
> > +#include "gt/uc/intel_guc_ct.h"
> > +#include "gt/uc/intel_guc_submission.h"
> >  
> >  static int guc_info_show(struct seq_file *m, void *data)
> >  {
> > @@ -22,16 +24,35 @@ static int guc_info_show(struct seq_file *m, void *data)
> > drm_puts(, "\n");
> > intel_guc_log_info(>log, );
> >  
> > -   /* Add more as required ... */
> > +   if (!intel_guc_submission_is_used(guc))
> > +   return 0;
> > +
> > +   intel_guc_log_ct_info(>ct, );
> > +   intel_guc_log_submission_info(guc, );
> >  
> > return 0;
> >  }
> >  DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info);
> >  
> > +static int guc_registered_contexts_show(struct seq_file *m, void *data)
> > +{
> > +   struct intel_guc *guc = m->private;
> > +   struct drm_printer p = drm_seq_file_printer(m);
> > +
> > +   if (!intel_guc_submission_is_used(guc))
> > +   return -ENODEV;
> > +
> > +   intel_guc_log_context_info(guc, );
> > +
> > +   return 0;
> > +}
> > +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts);
> > +
> >  void intel_guc_debugfs_register(struct intel_guc *guc, struct 

[Intel-gfx] [PATCH] drm/i915/display: Fix shared dpll mismatch for bigjoiner slave

2021-07-14 Thread Manasi Navare
Currently when we do the HW state readout, we dont set the shared dpll to NULL
for the bigjoiner slave which should not have a DPLL assigned. So it has
some garbage while the HW state readout is NULL. So explicitly reset
the shared dpll for bigjoiner slave pipe.

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/3465
Cc: Ville Syrjälä 
Cc: Ankit Nautiyal 
Tested-By: Swati Sharma 
Signed-off-by: Manasi Navare 
---
 drivers/gpu/drm/i915/display/intel_display.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 65ddb6ca16e6..c274bfb8e549 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -9006,6 +9006,10 @@ verify_crtc_state(struct intel_crtc *crtc,
if (!new_crtc_state->hw.active)
return;
 
+   if (new_crtc_state->bigjoiner_slave)
+   /* No PLLs set for slave */
+   pipe_config->shared_dpll = NULL;
+
intel_pipe_config_sanity_check(dev_priv, pipe_config);
 
if (!intel_pipe_config_compare(new_crtc_state,
-- 
2.19.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915/gem: Migrate to system at dma-buf attach time (v5)

2021-07-14 Thread Jason Ekstrand
On Tue, Jul 13, 2021 at 10:23 AM Daniel Vetter  wrote:
>
> On Tue, Jul 13, 2021 at 04:06:13PM +0100, Matthew Auld wrote:
> > On Tue, 13 Jul 2021 at 15:44, Daniel Vetter  wrote:
> > >
> > > On Mon, Jul 12, 2021 at 06:12:34PM -0500, Jason Ekstrand wrote:
> > > > From: Thomas Hellström 
> > > >
> > > > Until we support p2p dma or as a complement to that, migrate data
> > > > to system memory at dma-buf attach time if possible.
> > > >
> > > > v2:
> > > > - Rebase on dynamic exporter. Update the igt_dmabuf_import_same_driver
> > > >   selftest to migrate if we are LMEM capable.
> > > > v3:
> > > > - Migrate also in the pin() callback.
> > > > v4:
> > > > - Migrate in attach
> > > > v5: (jason)
> > > > - Lock around the migration
> > > >
> > > > Signed-off-by: Thomas Hellström 
> > > > Signed-off-by: Michael J. Ruhl 
> > > > Reported-by: kernel test robot 
> > > > Signed-off-by: Jason Ekstrand 
> > > > Reviewed-by: Jason Ekstrand 
> > > > ---
> > > >  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 25 ++-
> > > >  .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |  4 ++-
> > > >  2 files changed, 27 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
> > > > b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > > index 9a655f69a0671..3163f00554476 100644
> > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > > @@ -170,8 +170,31 @@ static int i915_gem_dmabuf_attach(struct dma_buf 
> > > > *dmabuf,
> > > > struct dma_buf_attachment *attach)
> > > >  {
> > > >   struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
> > > > + struct i915_gem_ww_ctx ww;
> > > > + int err;
> > > > +
> > > > + for_i915_gem_ww(, err, true) {
> > > > + err = i915_gem_object_lock(obj, );
> > > > + if (err)
> > > > + continue;
> > > > +
> > > > + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) 
> > > > {
> > > > + err = -EOPNOTSUPP;
> > > > + continue;
> > > > + }
> > > > +
> > > > + err = i915_gem_object_migrate(obj, , 
> > > > INTEL_REGION_SMEM);
> > > > + if (err)
> > > > + continue;
> > > >
> > > > - return i915_gem_object_pin_pages_unlocked(obj);
> > > > + err = i915_gem_object_wait_migration(obj, 0);
> > > > + if (err)
> > > > + continue;
> > > > +
> > > > + err = i915_gem_object_pin_pages(obj);
> > > > + }
> > > > +
> > > > + return err;
> > > >  }
> > > >
> > > >  static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
> > > > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c 
> > > > b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > > > index 3dc0f8b3cdab0..4f7e77b1c0152 100644
> > > > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > > > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > > > @@ -106,7 +106,9 @@ static int igt_dmabuf_import_same_driver(void *arg)
> > > >   int err;
> > > >
> > > >   force_different_devices = true;
> > > > - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
> > > > + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
> > >
> > > I'm wondering (and couldn't answer) whether this creates an lmem+smem
> > > buffer, since if we create an lmem-only buffer then the migration above
> > > should fail.
> >
> > It's lmem-only, but it's also a kernel internal object, so the
> > migration path will still happily migrate it if asked. On the other
> > hand if it's a userspace object then we always have to respect the
> > placements.
> >
> > I think for now the only usecase for that is in the selftests.
>
> Yeah I've read the kerneldoc, it's all nicely documented but feels a bit
> dangerous. What I proposed on irc:
> - i915_gem_object_migrate does the placement check, i.e. as strict as
>   can_migrate.
> - A new __i915_gem_object_migrate is for selftest that do special stuff.

I just sent out a patch which does this except we don't actually need
the __ version because there are no self-tests that want to do a
dangerous migrate.  We could add such a helper later if we needed.

> - In the import selftest we check that lmem-only fails (because we can't
>   pin it into smem) for a non-dynamic importer, but lmem+smem works and
>   gets migrated.

I think we maybe want multiple things here?  The test we have right
now is useful because, by creating an internal LMEM buffer we ensure
that the migration actually happens.  If we create LMEM+SMEM, then
it's possible it'll start off in SMEM and the migration would be a
no-op.  Not sure how likely that is in reality in a self-test
environment, though.

--Jason

> - Once we have dynamic dma-buf for p2p pci, then we'll have another
>   selftest which checks that things work for lmem only if and only if 

[Intel-gfx] [PATCH] drm/i915: Check object_can_migrate from object_migrate

2021-07-14 Thread Jason Ekstrand
We don't roll them together entirely because there are still a couple
cases where we want a separate can_migrate check.  For instance, the
display code checks that you can migrate a buffer to LMEM before it
accepts it in fb_create.  The dma-buf import code also uses it to do an
early check and return a different error code if someone tries to attach
a LMEM-only dma-buf to another driver.

However, no one actually wants to call object_migrate when can_migrate
has failed.  The stated intention is for self-tests but none of those
actually take advantage of this unsafe migration.

Signed-off-by: Jason Ekstrand 
Cc: Daniel Vetter 
Cc: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  8 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 13 ++---
 .../gpu/drm/i915/gem/selftests/i915_gem_migrate.c | 15 ---
 3 files changed, 5 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 3163f00554476..5d438b95826b9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -173,16 +173,14 @@ static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf,
struct i915_gem_ww_ctx ww;
int err;
 
+   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM))
+   return -EOPNOTSUPP;
+
for_i915_gem_ww(, err, true) {
err = i915_gem_object_lock(obj, );
if (err)
continue;
 
-   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) {
-   err = -EOPNOTSUPP;
-   continue;
-   }
-
err = i915_gem_object_migrate(obj, , INTEL_REGION_SMEM);
if (err)
continue;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 9da7b288b7ede..f2244ae09a613 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -584,12 +584,6 @@ bool i915_gem_object_can_migrate(struct 
drm_i915_gem_object *obj,
  * completed yet, and to accomplish that, i915_gem_object_wait_migration()
  * must be called.
  *
- * This function is a bit more permissive than i915_gem_object_can_migrate()
- * to allow for migrating objects where the caller knows exactly what is
- * happening. For example within selftests. More specifically this
- * function allows migrating I915_BO_ALLOC_USER objects to regions
- * that are not in the list of allowable regions.
- *
  * Note: the @ww parameter is not used yet, but included to make sure
  * callers put some effort into obtaining a valid ww ctx if one is
  * available.
@@ -616,11 +610,8 @@ int i915_gem_object_migrate(struct drm_i915_gem_object 
*obj,
if (obj->mm.region == mr)
return 0;
 
-   if (!i915_gem_object_evictable(obj))
-   return -EBUSY;
-
-   if (!obj->ops->migrate)
-   return -EOPNOTSUPP;
+   if (!i915_gem_object_can_migrate(obj, id))
+   return -EINVAL;
 
return obj->ops->migrate(obj, mr);
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index 0b7144d2991ca..28a700f08b49a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -61,11 +61,6 @@ static int igt_create_migrate(struct intel_gt *gt, enum 
intel_region_id src,
if (err)
continue;
 
-   if (!i915_gem_object_can_migrate(obj, dst)) {
-   err = -EINVAL;
-   continue;
-   }
-
err = i915_gem_object_migrate(obj, , dst);
if (err)
continue;
@@ -114,11 +109,6 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx 
*ww,
return err;
 
if (i915_gem_object_is_lmem(obj)) {
-   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) {
-   pr_err("object can't migrate to smem.\n");
-   return -EINVAL;
-   }
-
err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM);
if (err) {
pr_err("Object failed migration to smem\n");
@@ -137,11 +127,6 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx 
*ww,
}
 
} else {
-   if (!i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) {
-   pr_err("object can't migrate to lmem.\n");
-   return -EINVAL;
-   }
-
err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM);
if (err) {
pr_err("Object failed migration to lmem\n");
-- 
2.31.1


[Intel-gfx] [PATCH v2] drm/i915: Fix wm params for ccs

2021-07-14 Thread Juha-Pekka Heikkila
skl_compute_wm_params() didn't take into account ccs modifiers
on graphics ver >= 12

v2 (Imre): Don't set rc_surface flag for mc ccs.

Signed-off-by: Juha-Pekka Heikkila 
---
 drivers/gpu/drm/i915/intel_pm.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0cbb79452fcf..5a63920c3913 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5249,11 +5249,10 @@ skl_compute_wm_params(const struct intel_crtc_state 
*crtc_state,
 
wp->y_tiled = modifier == I915_FORMAT_MOD_Y_TILED ||
  modifier == I915_FORMAT_MOD_Yf_TILED ||
- modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
- modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
+ is_ccs_modifier(modifier);
wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
-   wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
-modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
+   wp->rc_surface = is_ccs_modifier(modifier) &&
+modifier != I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS;
wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier);
 
wp->width = width;
-- 
2.28.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 3/5] drm/i915: Remove allow_alloc from i915_gem_object_get_sg*

2021-07-14 Thread Jason Ekstrand
This reverts the rest of 0edbb9ba1bfe ("drm/i915: Move cmd parser
pinning to execbuffer").  Now that the only user of i915_gem_object_get_sg
without allow_alloc has been removed, we can drop the parameter.  This
portion of the revert was broken into its own patch to aid review.

Signed-off-by: Jason Ekstrand 
Cc: Maarten Lankhorst 
Reviewed-by: Jon Bloomfield 
Acked-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h | 10 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c  | 20 
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c|  2 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c   |  2 +-
 4 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 8be4fadeee487..f3ede43282dc6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -342,22 +342,22 @@ struct scatterlist *
 __i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 struct i915_gem_object_page_iter *iter,
 unsigned int n,
-unsigned int *offset, bool allow_alloc, bool dma);
+unsigned int *offset, bool dma);
 
 static inline struct scatterlist *
 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
   unsigned int n,
-  unsigned int *offset, bool allow_alloc)
+  unsigned int *offset)
 {
-   return __i915_gem_object_get_sg(obj, >mm.get_page, n, offset, 
allow_alloc, false);
+   return __i915_gem_object_get_sg(obj, >mm.get_page, n, offset, 
false);
 }
 
 static inline struct scatterlist *
 i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj,
   unsigned int n,
-  unsigned int *offset, bool allow_alloc)
+  unsigned int *offset)
 {
-   return __i915_gem_object_get_sg(obj, >mm.get_dma_page, n, offset, 
allow_alloc, true);
+   return __i915_gem_object_get_sg(obj, >mm.get_dma_page, n, offset, 
true);
 }
 
 struct page *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 0c9d28423d459..8eb1c3a6fc9cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -494,7 +494,7 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 struct i915_gem_object_page_iter *iter,
 unsigned int n,
 unsigned int *offset,
-bool allow_alloc, bool dma)
+bool dma)
 {
struct scatterlist *sg;
unsigned int idx, count;
@@ -516,9 +516,6 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
if (n < READ_ONCE(iter->sg_idx))
goto lookup;
 
-   if (!allow_alloc)
-   goto manual_lookup;
-
mutex_lock(>lock);
 
/* We prefer to reuse the last sg so that repeated lookup of this
@@ -568,16 +565,7 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
if (unlikely(n < idx)) /* insertion completed by another thread */
goto lookup;
 
-   goto manual_walk;
-
-manual_lookup:
-   idx = 0;
-   sg = obj->mm.pages->sgl;
-   count = __sg_page_count(sg);
-
-manual_walk:
-   /*
-* In case we failed to insert the entry into the radixtree, we need
+   /* In case we failed to insert the entry into the radixtree, we need
 * to look beyond the current sg.
 */
while (idx + count <= n) {
@@ -624,7 +612,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, 
unsigned int n)
 
GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
 
-   sg = i915_gem_object_get_sg(obj, n, , true);
+   sg = i915_gem_object_get_sg(obj, n, );
return nth_page(sg_page(sg), offset);
 }
 
@@ -650,7 +638,7 @@ i915_gem_object_get_dma_address_len(struct 
drm_i915_gem_object *obj,
struct scatterlist *sg;
unsigned int offset;
 
-   sg = i915_gem_object_get_sg_dma(obj, n, , true);
+   sg = i915_gem_object_get_sg_dma(obj, n, );
 
if (len)
*len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 6589411396d3f..f253b11e9e367 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -589,7 +589,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct 
ttm_buffer_object *bo,
 
GEM_WARN_ON(bo->ttm);
 
-   sg = __i915_gem_object_get_sg(obj, >ttm.get_io_page, page_offset, 
, true, true);
+   sg = __i915_gem_object_get_sg(obj, >ttm.get_io_page, page_offset, 
, true);
 
return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 

[Intel-gfx] [PATCH 4/5] drm/i915: Drop error handling from dma_fence_work

2021-07-14 Thread Jason Ekstrand
Asynchronous command parsing was the only thing which ever returned a
non-zero error.  With that gone, we can drop the error handling from
dma_fence_work.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Jon Bloomfield 
Acked-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 4 +---
 drivers/gpu/drm/i915/i915_sw_fence_work.c   | 5 +
 drivers/gpu/drm/i915/i915_sw_fence_work.h   | 2 +-
 drivers/gpu/drm/i915/i915_vma.c | 3 +--
 4 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c 
b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index daf9284ef1f54..f0435c6feb68b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -24,13 +24,11 @@ static void __do_clflush(struct drm_i915_gem_object *obj)
i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
 }
 
-static int clflush_work(struct dma_fence_work *base)
+static void clflush_work(struct dma_fence_work *base)
 {
struct clflush *clflush = container_of(base, typeof(*clflush), base);
 
__do_clflush(clflush->obj);
-
-   return 0;
 }
 
 static void clflush_release(struct dma_fence_work *base)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c 
b/drivers/gpu/drm/i915/i915_sw_fence_work.c
index a3a81bb8f2c36..5b33ef23d54c9 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c
@@ -16,11 +16,8 @@ static void fence_complete(struct dma_fence_work *f)
 static void fence_work(struct work_struct *work)
 {
struct dma_fence_work *f = container_of(work, typeof(*f), work);
-   int err;
 
-   err = f->ops->work(f);
-   if (err)
-   dma_fence_set_error(>dma, err);
+   f->ops->work(f);
 
fence_complete(f);
dma_fence_put(>dma);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.h 
b/drivers/gpu/drm/i915/i915_sw_fence_work.h
index 2c409f11c5c59..d56806918d131 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.h
@@ -17,7 +17,7 @@ struct dma_fence_work;
 
 struct dma_fence_work_ops {
const char *name;
-   int (*work)(struct dma_fence_work *f);
+   void (*work)(struct dma_fence_work *f);
void (*release)(struct dma_fence_work *f);
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 0f227f28b2802..5b9dce0f443b0 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -300,14 +300,13 @@ struct i915_vma_work {
unsigned int flags;
 };
 
-static int __vma_bind(struct dma_fence_work *work)
+static void __vma_bind(struct dma_fence_work *work)
 {
struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
struct i915_vma *vma = vw->vma;
 
vma->ops->bind_vma(vw->vm, >stash,
   vma, vw->cache_level, vw->flags);
-   return 0;
 }
 
 static void __vma_release(struct dma_fence_work *work)
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 5/5] Revert "drm/i915: Skip over MI_NOOP when parsing"

2021-07-14 Thread Jason Ekstrand
This reverts a6c5e2aea704 ("drm/i915: Skip over MI_NOOP when parsing").
It complicates the batch parsing code a bit and increases indentation
for no reason other than fast-skipping a command that userspace uses
only rarely.  Sure, there may be IGT tests that fill batches with NOOPs
but that's not a case we should optimize for in the kernel.  We should
optimize for code clarity instead.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Jon Bloomfield 
Acked-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 67 +-
 1 file changed, 34 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 00ec618d01590..322f4d5955a4f 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1470,42 +1470,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs 
*engine,
 * space. Parsing should be faster in some cases this way.
 */
batch_end = cmd + batch_length / sizeof(*batch_end);
-   while (*cmd != MI_BATCH_BUFFER_END) {
-   u32 length = 1;
-
-   if (*cmd != MI_NOOP) { /* MI_NOOP == 0 */
-   desc = find_cmd(engine, *cmd, desc, _desc);
-   if (!desc) {
-   DRM_DEBUG("CMD: Unrecognized command: 
0x%08X\n", *cmd);
-   ret = -EINVAL;
-   break;
-   }
+   do {
+   u32 length;
 
-   if (desc->flags & CMD_DESC_FIXED)
-   length = desc->length.fixed;
-   else
-   length = (*cmd & desc->length.mask) + 
LENGTH_BIAS;
+   if (*cmd == MI_BATCH_BUFFER_END)
+   break;
 
-   if ((batch_end - cmd) < length) {
-   DRM_DEBUG("CMD: Command length exceeds batch 
length: 0x%08X length=%u batchlen=%td\n",
- *cmd,
- length,
- batch_end - cmd);
-   ret = -EINVAL;
-   break;
-   }
+   desc = find_cmd(engine, *cmd, desc, _desc);
+   if (!desc) {
+   DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd);
+   ret = -EINVAL;
+   break;
+   }
 
-   if (!check_cmd(engine, desc, cmd, length)) {
-   ret = -EACCES;
-   break;
-   }
+   if (desc->flags & CMD_DESC_FIXED)
+   length = desc->length.fixed;
+   else
+   length = (*cmd & desc->length.mask) + LENGTH_BIAS;
 
-   if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) {
-   ret = check_bbstart(cmd, offset, length, 
batch_length,
-   batch_addr, shadow_addr,
-   jump_whitelist);
-   break;
-   }
+   if ((batch_end - cmd) < length) {
+   DRM_DEBUG("CMD: Command length exceeds batch length: 
0x%08X length=%u batchlen=%td\n",
+ *cmd,
+ length,
+ batch_end - cmd);
+   ret = -EINVAL;
+   break;
+   }
+
+   if (!check_cmd(engine, desc, cmd, length)) {
+   ret = -EACCES;
+   break;
+   }
+
+   if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) {
+   ret = check_bbstart(cmd, offset, length, batch_length,
+   batch_addr, shadow_addr,
+   jump_whitelist);
+   break;
}
 
if (!IS_ERR_OR_NULL(jump_whitelist))
@@ -1518,7 +1519,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs 
*engine,
ret = -EINVAL;
break;
}
-   }
+   } while (1);
 
if (trampoline) {
/*
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/5] Revert "drm/i915: Propagate errors on awaiting already signaled fences"

2021-07-14 Thread Jason Ekstrand
This reverts commit 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7.  Ever
since that commit, we've been having issues where a hang in one client
can propagate to another.  In particular, a hang in an app can propagate
to the X server which causes the whole desktop to lock up.

Error propagation along fences sound like a good idea, but as your bug
shows, surprising consequences, since propagating errors across security
boundaries is not a good thing.

What we do have is track the hangs on the ctx, and report information to
userspace using RESET_STATS. That's how arb_robustness works. Also, if my
understanding is still correct, the EIO from execbuf is when your context
is banned (because not recoverable or too many hangs). And in all these
cases it's up to userspace to figure out what is all impacted and should
be reported to the application, that's not on the kernel to guess and
automatically propagate.

What's more, we're also building more features on top of ctx error
reporting with RESET_STATS ioctl: Encrypted buffers use the same, and the
userspace fence wait also relies on that mechanism. So it is the path
going forward for reporting gpu hangs and resets to userspace.

So all together that's why I think we should just bury this idea again as
not quite the direction we want to go to, hence why I think the revert is
the right option here.

For backporters: Please note that you _must_ have a backport of
https://lore.kernel.org/dri-devel/20210602164149.391653-2-ja...@jlekstrand.net/
for otherwise backporting just this patch opens up a security bug.

v2: Augment commit message. Also restore Jason's sob that I
accidentally lost.

v3: Add a note for backporters

Signed-off-by: Jason Ekstrand 
Reported-by: Marcin Slusarz 
Cc:  # v5.6+
Cc: Jason Ekstrand 
Cc: Marcin Slusarz 
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3080
Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled 
fences")
Acked-by: Daniel Vetter 
Reviewed-by: Jon Bloomfield 
---
 drivers/gpu/drm/i915/i915_request.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 86b4c9f2613d5..09ebea9a0090a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1399,10 +1399,8 @@ i915_request_await_execution(struct i915_request *rq,
 
do {
fence = *child++;
-   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags)) {
-   i915_sw_fence_set_error_once(>submit, fence->error);
+   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags))
continue;
-   }
 
if (fence->context == rq->fence.context)
continue;
@@ -1499,10 +1497,8 @@ i915_request_await_dma_fence(struct i915_request *rq, 
struct dma_fence *fence)
 
do {
fence = *child++;
-   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags)) {
-   i915_sw_fence_set_error_once(>submit, fence->error);
+   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags))
continue;
-   }
 
/*
 * Requests on the same timeline are explicitly ordered, along
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/5] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"

2021-07-14 Thread Jason Ekstrand
This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser").  The
justification for this commit in the git history was a vague comment
about getting it out from under the struct_mutex.  While this may
improve perf for some workloads on Gen7 platforms where we rely on the
command parser for features such as indirect rendering, no numbers were
provided to prove such an improvement.  It claims to closed two
gitlab/bugzilla issues but with no explanation whatsoever as to why or
what bug it's fixing.

Meanwhile, by moving command parsing off to an async callback, it leaves
us with a problem of what to do on error.  When things were synchronous,
EXECBUFFER2 would fail with an error code if parsing failed.  When
moving it to async, we needed another way to handle that error and the
solution employed was to set an error on the dma_fence and then trust
that said error gets propagated to the client eventually.  Moving back
to synchronous will help us untangle the fence error propagation mess.

This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser
pinning to execbuffer") which is a refactor of some of our allocation
paths for asynchronous parsing.  Now that everything is synchronous, we
don't need it.

v2 (Daniel Vetter):
 - Add stabel Cc and Fixes tag

Signed-off-by: Jason Ekstrand 
Cc:  # v5.6+
Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled 
fences")
Cc: Maarten Lankhorst 
Reviewed-by: Jon Bloomfield 
Acked-by: Daniel Vetter 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 227 +-
 .../i915/gem/selftests/i915_gem_execbuffer.c  |   4 +
 drivers/gpu/drm/i915/i915_cmd_parser.c| 132 +-
 drivers/gpu/drm/i915/i915_drv.h   |   7 +-
 4 files changed, 91 insertions(+), 279 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 5ea8b4e23e428..1ed7475de454d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,10 +25,8 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
-#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_memcpy.h"
 
 struct eb_vma {
struct i915_vma *vma;
@@ -1471,6 +1469,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
int err;
struct intel_engine_cs *engine = eb->engine;
 
+   /* If we need to copy for the cmdparser, we will stall anyway */
+   if (eb_use_cmdparser(eb))
+   return ERR_PTR(-EWOULDBLOCK);
+
if (!reloc_can_use_engine(engine)) {
engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
if (!engine)
@@ -2385,217 +2387,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
return vma;
 }
 
-struct eb_parse_work {
-   struct dma_fence_work base;
-   struct intel_engine_cs *engine;
-   struct i915_vma *batch;
-   struct i915_vma *shadow;
-   struct i915_vma *trampoline;
-   unsigned long batch_offset;
-   unsigned long batch_length;
-   unsigned long *jump_whitelist;
-   const void *batch_map;
-   void *shadow_map;
-};
-
-static int __eb_parse(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-   int ret;
-   bool cookie;
-
-   cookie = dma_fence_begin_signalling();
-   ret = intel_engine_cmd_parser(pw->engine,
- pw->batch,
- pw->batch_offset,
- pw->batch_length,
- pw->shadow,
- pw->jump_whitelist,
- pw->shadow_map,
- pw->batch_map);
-   dma_fence_end_signalling(cookie);
-
-   return ret;
-}
-
-static void __eb_parse_release(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-
-   if (!IS_ERR_OR_NULL(pw->jump_whitelist))
-   kfree(pw->jump_whitelist);
-
-   if (pw->batch_map)
-   i915_gem_object_unpin_map(pw->batch->obj);
-   else
-   i915_gem_object_unpin_pages(pw->batch->obj);
-
-   i915_gem_object_unpin_map(pw->shadow->obj);
-
-   if (pw->trampoline)
-   i915_active_release(>trampoline->active);
-   i915_active_release(>shadow->active);
-   i915_active_release(>batch->active);
-}
-
-static const struct dma_fence_work_ops eb_parse_ops = {
-   .name = "eb_parse",
-   .work = __eb_parse,
-   .release = __eb_parse_release,
-};
-
-static inline int
-__parser_mark_active(struct i915_vma *vma,
-struct intel_timeline *tl,
-struct dma_fence *fence)
-{
-   struct 

[Intel-gfx] [PATCH 0/5] drm/i915: Get rid of fence error propagation (v4)

2021-07-14 Thread Jason Ekstrand
Fence error propagation is sketchy at best.  Instead of explicitly handling
fences which might have errors set in the code which is aware of errors, we
just kick them down the line and hope that userspace knows what to do when
a wait eventually fails.  This is sketchy at best because most userspace
isn't prepared to handle errors in those places.  To make things worse, it
allows errors to propagate across processes in unpredictable ways.  This is
causing hangs in one client to kill X11.

Unfortunately, there's no quick path from here to there thanks to the fact
that we're now running the command parser asynchronously and relying on
fence errors for when it fails.  This series first gets rid of asynchronous
command parsing and then cleans up from there.  There was never any real
use-case for asynchronous parsing and the platforms that rely heavily on
the command parser are old enough (Gen7) that, when we changed the way the
command parser works, it wasn't really a change anyone was asking for
anyway.

I think we probably want this whole mess back-ported.  I'm happy to take
suggestions on the strategy there because the history there is a bit
annoying and I'm not 100% sure where the Linux release cuts land.  In any
case, I'm happy to make a version of this series per-release if needed for
Greg to back-port.

v2 (Daniel Vetter):
 - Re-order to put the reverts first
 - Add ACKs from Daniel
 - Add better CC and Fixes tags

v3 (Jason Ekstrand):
 - Rebase on drm-tip

v4 (Jason Ekstrand):
 - Rebase on drm-tip

Test-with: 20210714173141.1381686-1-ja...@jlekstrand.net

Jason Ekstrand (5):
  drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"
  Revert "drm/i915: Propagate errors on awaiting already signaled
fences"
  drm/i915: Remove allow_alloc from i915_gem_object_get_sg*
  drm/i915: Drop error handling from dma_fence_work
  Revert "drm/i915: Skip over MI_NOOP when parsing"

 drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   4 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 227 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  10 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |  20 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   2 +-
 .../i915/gem/selftests/i915_gem_execbuffer.c  |   4 +
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |   2 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c| 199 ---
 drivers/gpu/drm/i915/i915_drv.h   |   7 +-
 drivers/gpu/drm/i915/i915_request.c   |   8 +-
 drivers/gpu/drm/i915/i915_sw_fence_work.c |   5 +-
 drivers/gpu/drm/i915/i915_sw_fence_work.h |   2 +-
 drivers/gpu/drm/i915/i915_vma.c   |   3 +-
 13 files changed, 142 insertions(+), 351 deletions(-)

-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Add TTM offset argument to mmap.

2021-07-14 Thread Jason Ekstrand
On Wed, Jul 14, 2021 at 12:05 PM Maarten Lankhorst
 wrote:
>
> Op 14-07-2021 om 18:17 schreef Jason Ekstrand:
> > On Wed, Jul 14, 2021 at 5:03 AM Maarten Lankhorst
> >  wrote:
> >> The FIXED mapping is only used for ttm, and tells userspace that the
> >> mapping type is pre-defined. This disables the other type of mmap
> >> offsets when discrete memory is used, so fix the selftests as well.
> >>
> >> Document the struct as well, so it shows up in docbook.
> >>
> >> Cc: Jason Ekstrand 
> >> Reviewed-by: Daniel Vetter 
> >> Signed-off-by: Maarten Lankhorst 
> >> ---
> >>  drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 17 ++-
> >>  .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
> >>  .../drm/i915/gem/selftests/i915_gem_mman.c| 27 ++-
> >>  include/uapi/drm/i915_drm.h   | 46 ++-
> >>  4 files changed, 77 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
> >> b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> >> index a90f796e85c0..31c4021bb6be 100644
> >> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> >> @@ -679,10 +679,16 @@ __assign_mmap_offset(struct drm_i915_gem_object *obj,
> >> return -ENODEV;
> >>
> >> if (obj->ops->mmap_offset)  {
> >> +   if (mmap_type != I915_MMAP_TYPE_FIXED)
> >> +   return -ENODEV;
> >> +
> >> *offset = obj->ops->mmap_offset(obj);
> >> return 0;
> >> }
> >>
> >> +   if (mmap_type == I915_MMAP_TYPE_FIXED)
> >> +   return -ENODEV;
> >> +
> >> if (mmap_type != I915_MMAP_TYPE_GTT &&
> >> !i915_gem_object_has_struct_page(obj) &&
> >> !i915_gem_object_has_iomem(obj))
> >> @@ -727,7 +733,9 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
> >>  {
> >> enum i915_mmap_type mmap_type;
> >>
> >> -   if (boot_cpu_has(X86_FEATURE_PAT))
> >> +   if (HAS_LMEM(to_i915(dev)))
> >> +   mmap_type = I915_MMAP_TYPE_FIXED;
> >> +   else if (boot_cpu_has(X86_FEATURE_PAT))
> >> mmap_type = I915_MMAP_TYPE_WC;
> >> else if (!i915_ggtt_has_aperture(_i915(dev)->ggtt))
> >> return -ENODEV;
> >> @@ -798,6 +806,10 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, 
> >> void *data,
> >> type = I915_MMAP_TYPE_UC;
> >> break;
> >>
> >> +   case I915_MMAP_OFFSET_FIXED:
> >> +   type = I915_MMAP_TYPE_FIXED;
> >> +   break;
> >> +
> >> default:
> >> return -EINVAL;
> >> }
> >> @@ -968,6 +980,9 @@ int i915_gem_mmap(struct file *filp, struct 
> >> vm_area_struct *vma)
> >> vma->vm_ops = _ops_cpu;
> >> break;
> >>
> >> +   case I915_MMAP_TYPE_FIXED:
> >> +   GEM_WARN_ON(1);
> >> +   /* fall-through */
> >> case I915_MMAP_TYPE_WB:
> >> vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> >> vma->vm_ops = _ops_cpu;
> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
> >> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> >> index ef3de2ae9723..afbadfc5516b 100644
> >> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> >> @@ -105,6 +105,7 @@ enum i915_mmap_type {
> >> I915_MMAP_TYPE_WC,
> >> I915_MMAP_TYPE_WB,
> >> I915_MMAP_TYPE_UC,
> >> +   I915_MMAP_TYPE_FIXED,
> >>  };
> >>
> >>  struct i915_mmap_offset {
> >> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
> >> b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> >> index 1da8bd675e54..52789c8ad337 100644
> >> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> >> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> >> @@ -573,6 +573,14 @@ static int make_obj_busy(struct drm_i915_gem_object 
> >> *obj)
> >> return 0;
> >>  }
> >>
> >> +static enum i915_mmap_type default_mapping(struct drm_i915_private *i915)
> >> +{
> >> +   if (HAS_LMEM(i915))
> >> +   return I915_MMAP_TYPE_FIXED;
> >> +
> >> +   return I915_MMAP_TYPE_GTT;
> >> +}
> >> +
> >>  static bool assert_mmap_offset(struct drm_i915_private *i915,
> >>unsigned long size,
> >>int expected)
> >> @@ -585,7 +593,7 @@ static bool assert_mmap_offset(struct drm_i915_private 
> >> *i915,
> >> if (IS_ERR(obj))
> >> return expected && expected == PTR_ERR(obj);
> >>
> >> -   ret = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, , NULL);
> >> +   ret = __assign_mmap_offset(obj, default_mapping(i915), , 
> >> NULL);
> >> i915_gem_object_put(obj);
> >>
> >> return ret == expected;
> >> @@ -689,7 +697,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
> >> goto out;
> >>  

Re: [Intel-gfx] [PATCH v5 17/17] drm/amd/display: Add handling for new "Broadcast RGB" property

2021-07-14 Thread Werner Sembach

Am 30.06.21 um 17:10 schrieb Werner Sembach:

This commit implements the "Broadcast RGB" drm property for the AMD GPU
driver.

Signed-off-by: Werner Sembach 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 14 +++---
  .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c|  4 
  2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 02a5809d4993..80d5a11fb0c5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5247,7 +5247,8 @@ get_aspect_ratio(const struct drm_display_mode *mode_in)
  }
  
  static enum dc_color_space

-get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing)
+get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing,
+  enum drm_mode_color_range preferred_color_range)
  {
enum dc_color_space color_space = COLOR_SPACE_SRGB;
  
@@ -5278,7 +5279,10 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing)

}
break;
case PIXEL_ENCODING_RGB:
-   color_space = COLOR_SPACE_SRGB;
+   if (preferred_color_range == 
DRM_MODE_COLOR_RANGE_LIMITED_16_235)
+   color_space = COLOR_SPACE_SRGB_LIMITED;
+   else
+   color_space = COLOR_SPACE_SRGB;
break;


After some testing I found out, that what I did here, was useless.

amdgpu actually never sets the quantization_range range in the 
hdmi_avi_infoframe and from that I guess any quantization range, besides 
the default one, is not implemented in multiple places


Until limited RGB is properly implemented in amdgpu there kind of is no 
purpose of generalizing the Broadcast RGB switch.


  
  	default:

@@ -5424,7 +5428,10 @@ static void fill_stream_properties_from_drm_display_mode(
  
  	timing_out->aspect_ratio = get_aspect_ratio(mode_in);
  
-	stream->output_color_space = get_output_color_space(timing_out);

+   stream->output_color_space = get_output_color_space(timing_out,
+   connector_state ?
+   
connector_state->preferred_color_range :
+   
DRM_MODE_COLOR_RANGE_UNSET);
  
  	stream->out_transfer_func->type = TF_TYPE_PREDEFINED;

stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
@@ -7775,6 +7782,7 @@ void amdgpu_dm_connector_init_helper(struct 
amdgpu_display_manager *dm,
drm_connector_attach_active_bpc_property(>base, 8, 
16);

drm_connector_attach_preferred_color_format_property(>base);

drm_connector_attach_active_color_format_property(>base);
+   
drm_connector_attach_preferred_color_range_property(>base);

drm_connector_attach_active_color_range_property(>base);
}
  
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c

index 2563788ba95a..80e1389fd0ec 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -421,6 +421,10 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr 
*mgr,
if (connector->active_color_format_property)

drm_connector_attach_active_color_format_property(>base);
  
+	connector->preferred_color_range_property = master->base.preferred_color_range_property;

+   if (connector->preferred_color_range_property)
+   
drm_connector_attach_preferred_color_range_property(>base);
+
connector->active_color_range_property = 
master->base.active_color_range_property;
if (connector->active_color_range_property)

drm_connector_attach_active_color_range_property(>base);

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 03/17] drm/uAPI: Add "active bpc" as feedback channel for "max bpc" drm property

2021-07-14 Thread Werner Sembach

Am 01.07.21 um 13:30 schrieb Werner Sembach:

Am 01.07.21 um 09:42 schrieb Pekka Paalanen:

On Wed, 30 Jun 2021 11:42:10 +0200
Werner Sembach  wrote:


Am 30.06.21 um 10:21 schrieb Pekka Paalanen:

On Tue, 29 Jun 2021 13:02:05 +0200
Werner Sembach  wrote:
  

Am 28.06.21 um 19:03 schrieb Werner Sembach:

Am 18.06.21 um 11:11 schrieb Werner Sembach:

Add a new general drm property "active bpc" which can be used by graphic
drivers to report the applied bit depth per pixel back to userspace.

While "max bpc" can be used to change the color depth, there was no way to
check which one actually got used. While in theory the driver chooses the
best/highest color depth within the max bpc setting a user might not be
fully aware what his hardware is or isn't capable off. This is meant as a
quick way to double check the setup.

In the future, automatic color calibration for screens might also depend on
this information being available.

Signed-off-by: Werner Sembach 
---
   drivers/gpu/drm/drm_connector.c | 51 +
   include/drm/drm_connector.h |  8 ++
   2 files changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index da39e7ff6965..943f6b61053b 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1197,6 +1197,14 @@ static const struct drm_prop_enum_list dp_colorspaces[] 
= {
*   drm_connector_attach_max_bpc_property() to create and attach the
*   property to the connector during initialization.
*
+ * active bpc:
+ * This read-only range property tells userspace the pixel color bit depth
+ * actually used by the hardware display engine on "the cable" on a
+ * connector. The chosen value depends on hardware capabilities, both
+ * display engine and connected monitor, and the "max bpc" property.
+ * Drivers shall use drm_connector_attach_active_bpc_property() to install
+ * this property.
+ *

Regarding "on the cable" and dithering: As far as I can tell, what the 
dithering option does, is setting a hardware
register here:

- 
https://elixir.bootlin.com/linux/v5.13/source/drivers/gpu/drm/i915/display/intel_display.c#L4534

- 
https://elixir.bootlin.com/linux/v5.13/source/drivers/gpu/drm/i915/display/intel_display.c#L4571

So dithering seems to be calculated by fixed purpose hardware/firmware outside 
of the driver?

The Intel driver does not seem to set a target bpc/bpp for this hardware so I 
guess it defaults to 6 or 8 bpc?

Never mind it does. This switch-case does affect the dithering output:
https://elixir.bootlin.com/linux/v5.13/source/drivers/gpu/drm/i915/display/intel_display.c#L4537

Hi,

I obviously do not know the intel driver or hardware at all, but
to me that just looks like translating from bits per pixel to bits per
channel in RGB mapping?

No, if i understand the documentation correctly: Writing bit depth here
with dithering enabled sets the dithering target bpc.
  

As found in this documentation p.548:
https://01.org/sites/default/files/documentation/intel-gfx-prm-osrc-lkf-vol02c-commandreference-registers-part2.pdf

So max bpc and active bpc are affecting/affected by the bpc after dithering.

By definition, if the cable carries N bpc, then dithering does not
change that. The cable still carries N bpc, but due to spatial or
temporal dithering, the *observed* color resolution may or may not be
higher than the cable bpc.

Yes, and max bpc and active bpc tell the cable bpc ist not the
*observed* bpc.

Of course, if the cable bpc is 8, and dithering targets 6 bpc, then 2
LSB on the cable are always zero, right?

I would assume that in this case only 6 bpc are actually send? Isn't the
whole thing of dithering that you can't send, for example, 8 bpc?

Maybe one would want to do that if the monitor has a 6 bit panel and it
simply ignored the 2 LSB, and the cable cannot go down to 6 bpc.

Is there dithering actually doing this? aka is my assumption above wrong?

AMD code that confused me before, is hinting that you might be right:
https://elixir.bootlin.com/linux/v5.13/source/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c#L826

there is a set_clamp depth and a separate DCP_SPATIAL_DITHER_DEPTH_30BPP


So, what does "max bpc" mean right now?

It seems like dither on/off is insufficient information, one would also
need to control the dithering target bpc. I suppose the driver has a
policy on how it chooses the target bpc, but what is that policy? Is
the dither target bpc the cable bpc or the sink bpc?

Needless to say, I'm quite confused.

... We need someone who knows what dithering on intel and amd gpu
actually means.

But I don't want this to become a blocker for this patchset, because if
there is no dithering, which seems to be the norm, the active bpc
property is already really usefull as it is. So add a note to the docs
that the value might be invalid when dithering is active for now?

Hi,

not necessarily invalid. It all depends on how 

Re: [Intel-gfx] [PATCH v4 12/17] drm/uAPI: Add "preferred color format" drm property as setting for userspace

2021-07-14 Thread Werner Sembach

Am 06.07.21 um 09:09 schrieb Pekka Paalanen:

On Mon, 5 Jul 2021 17:49:42 +0200
Werner Sembach  wrote:


Am 01.07.21 um 15:24 schrieb Pekka Paalanen:

On Thu, 1 Jul 2021 14:50:13 +0200
Werner Sembach  wrote:
  

Am 01.07.21 um 10:07 schrieb Pekka Paalanen:
  

On Wed, 30 Jun 2021 11:20:18 +0200
Werner Sembach  wrote:


Am 30.06.21 um 10:41 schrieb Pekka Paalanen:


On Tue, 29 Jun 2021 13:39:18 +0200
Werner Sembach  wrote:
  

Am 29.06.21 um 13:17 schrieb Pekka Paalanen:

On Tue, 29 Jun 2021 08:12:54 +
Simon Ser  wrote:
 

On Tuesday, June 22nd, 2021 at 09:15, Pekka Paalanen  
wrote:
 

yes, I think this makes sense, even if it is a property that one can't
tell for sure what it does before hand.

Using a pair of properties, preference and active, to ask for something
and then check what actually worked is good for reducing the
combinatorial explosion caused by needing to "atomic TEST_ONLY commit"
test different KMS configurations. Userspace has a better chance of
finding a configuration that is possible.

OTOH, this has the problem than in UI one cannot tell the user in
advance which options are truly possible. Given that KMS properties are
rarely completely independent, and in this case known to depend on
several other KMS properties, I think it is good enough to know after
the fact.

If a driver does not use what userspace prefers, there is no way to
understand why, or what else to change to make it happen. That problem
exists anyway, because TEST_ONLY commits do not give useful feedback
but only a yes/no.

By submitting incremental atomic reqs with TEST_ONLY (i.e. only changing one
property at a time), user-space can discover which property makes the atomic
commit fail.

That works if the properties are independent of each other. Color
range, color format, bpc and more may all be interconnected,
allowing only certain combinations to work.

If all these properties have "auto" setting too, then it would be
possible to probe each property individually, but that still does not
tell which combinations are valid.

If you probe towards a certain configuration by setting the properties
one by one, then depending on the order you pick the properties, you
may come to a different conclusion on which property breaks the
configuration.

My mind crossed another point that must be considered: When plugin in
a Monitor a list of possible Resolutions+Framerate combinations is
created for xrandr and other userspace (I guess by atomic checks? but
I don't know).

Hi,

I would not think so, but I hope to be corrected if I'm wrong.

My belief is that the driver collects a list of modes from EDID, some
standard modes, and maybe some other hardcoded modes, and then
validates each entry against all the known limitations like vertical
and horizontal frequency limits, discarding modes that do not fit.

Not all limitations are known during that phase, which is why KMS
property "link-status" exists. When userspace actually programs a mode
(not a TEST_ONLY commit), the link training may fail. The kernel prunes
the mode from the list and sets the link status property to signal
failure, and sends a hotplug uevent. Userspace needs to re-check the
mode list and try again.

That is a generic escape hatch for when TEST_ONLY commit succeeds, but
in reality the hardware cannot do it, you just cannot know until you
actually try for real. It causes end user visible flicker if it happens
on an already running connector, but since it usually happens when
turning a connector on to begin with, there is no flicker to be seen,
just a small delay in finding a mode that works.
  

During this drm
properties are already considered, which is no problem atm because as
far as i can tell there is currently no drm property that would make
a certain Resolutions+Framerate combination unreachable that would be
possible with everything on default.

I would not expect KMS properties to be considered at all. It would
reject modes that are actually possible if the some KMS properties were
changed. So at least going forward, current KMS property values cannot
factor in.

At least the debugfs variable "force_yuv420_output" did change the
available modes here:
https://elixir.bootlin.com/linux/v5.13/source/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c#L5165
before my patch
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=68eb3ae3c63708f823aeeb63bb15197c727bd9bf

Hi,

debugfs is not proper UAPI, so we can just ignore it. Display servers
cannot be expected to poke in debugfs. Debugfs is not even supposed to
exist in production systems, but I'm sure people use it for hacking
stuff. But that's all it is for: developer testing and hacking.

e.g. Ubuntu has it active by default, but only read (and writable) by root.

Hi,

that's normal, yes. Root can do damage anyway, and it's useful for
debugging. KMS clients OTOH often do not run as root.
  


Forcing a color format via a DRM property in this 

[Intel-gfx] [PATCH v3 5/5] i915: map gvt pr_debug categories to bits in parameters/debug_gvt

2021-07-14 Thread Jim Cromie
The gvt component of this driver has ~120 pr_debugs, in 9 "classes".
Following the interface model of drm.debug, add a parameter to map
bits to these classes.

If CONFIG_DRM_USE_DYNAMIC_DEBUG=y (and CONFIG_DYNAMIC_DEBUG_CORE), add
-DDYNAMIC_DEBUG_MODULE into Makefile.  TBD: maybe add a separate
CONFIG_I915_USE_DYNAMIC_DEBUG to more fully optionalize this.

In i915_params.c, add callback to map bits to queries.

TBD: the callback code should probably be moved to lib/dynamic_debug,
and given a declarative interface, with implied bit-numbering,
something like:

MOD_PARM_BITMAP_DESC(__gvt_debug,
"gvt: cmd: ",  "command processing"
"gvt: core: ", "core help",
"gvt: dpy: ",  "display help",
"gvt: el: ",   "help",
"gvt: irq: ",  "help",
"gvt: mm: ",   "help",
"gvt: mmio: ", "help",
"gvt: render: ", "help",
"gvt: sched: " "help");

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/i915/gvt/Makefile  |  4 ++
 drivers/gpu/drm/i915/i915_params.c | 76 ++
 2 files changed, 80 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/Makefile 
b/drivers/gpu/drm/i915/gvt/Makefile
index ea8324abc784..846ba73b8de6 100644
--- a/drivers/gpu/drm/i915/gvt/Makefile
+++ b/drivers/gpu/drm/i915/gvt/Makefile
@@ -7,3 +7,7 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o 
trace_points.o firmware.o \
 
 ccflags-y  += -I $(srctree)/$(src) -I 
$(srctree)/$(src)/$(GVT_DIR)/
 i915-y += $(addprefix $(GVT_DIR)/, 
$(GVT_SOURCE))
+
+#ifdef CONFIG_DRM_USE_DYNAMIC_DEBUG
+ccflags-y  += -DDYNAMIC_DEBUG_MODULE
+#endif
diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index e07f4cfea63a..e0d13aff5274 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -265,3 +265,79 @@ void i915_params_free(struct i915_params *params)
I915_PARAMS_FOR_EACH(FREE);
 #undef FREE
 }
+
+/* POC for callback -> dynamic_debug_exec_queries */
+unsigned long __gvt_debug;
+EXPORT_SYMBOL(__gvt_debug);
+
+static char *format_prefix_classes[] = {
+   "gvt: cmd: ",
+   "gvt: core: ",
+   "gvt: dpy: ",
+   "gvt: el: ",
+   "gvt: irq: ",
+   "gvt: mm: ",
+   "gvt: mmio: ",
+   "gvt: render: ",
+   "gvt: sched: "
+};
+#define NUM_CLASSESARRAY_SIZE(format_prefix_classes)
+#define OUR_QUERY_SIZE 128 /* we need about 20 */
+
+#include 
+
+static int param_set_dyndbg(const char *instr, const struct kernel_param *kp)
+{
+   unsigned int val;
+   unsigned long changes, result;
+   int rc, chgct = 0, totct = 0, bitpos;
+   char query[OUR_QUERY_SIZE];
+
+   rc = kstrtouint(instr, 0, );
+   if (rc) {
+   pr_err("set_dyndbg: failed\n");
+   return -EINVAL;
+   }
+   result = val;
+   pr_info("set_dyndbg: result:0x%lx from %s\n", result, instr);
+
+   changes = result ^ __gvt_debug;
+
+   for_each_set_bit(bitpos, , NUM_CLASSES) {
+
+   sprintf(query, "format '^%s' %cp", 
format_prefix_classes[bitpos],
+   test_bit(bitpos, ) ? '+' : '-');
+
+   chgct = dynamic_debug_exec_queries(query, "i915");
+
+   pr_info("%d changes on: %s\n", chgct, query);
+   totct += chgct;
+   }
+   pr_info("total changes: %d\n", totct);
+   __gvt_debug = result;
+   return 0;
+}
+static int param_get_dyndbg(char *buffer, const struct kernel_param *kp)
+{
+   return scnprintf(buffer, PAGE_SIZE, "%u\n",
+*((unsigned int *)kp->arg));
+}
+static const struct kernel_param_ops param_ops_dyndbg = {
+   .set = param_set_dyndbg,
+   .get = param_get_dyndbg,
+};
+
+#define info_ln(hexi, prefix) "\n\t0x" __stringify(hexi) "\t" prefix
+
+MODULE_PARM_DESC(debug_gvt, " gvt debug categories:"
+info_ln(1, "gvt: cmd:")
+info_ln(2, "gvt: core:")
+info_ln(4, "gvt: dpy:")
+info_ln(8, "gvt: el:")
+info_ln(10, "gvt: irq:")
+info_ln(20, "gvt: mm:")
+info_ln(40, "gvt: mmio:")
+info_ln(80, "gvt: render:")
+info_ln(100, "gvt: sched:"));
+
+module_param_cb(debug_gvt, _ops_dyndbg, &__gvt_debug, 0644);
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 4/5] drm/print: move conditional deref into macro defn

2021-07-14 Thread Jim Cromie
commit 7911902129a8 ("drm/print: Handle potentially NULL drm_devices
in drm_dbg_*") added a maybe(deref) to 6 macro invocations of
drm_dev_dbg().

Commit 01ff672190bd("drm: RFC add choice to use dynamic debug in
drm-debug") then renamed that fn to _drm_dev_dbg(), and redefined
drm_dev_dbg() as a macro.

That new macro can do the maybe(deref), so the ~9 callers dont have to.

no functional changes. small word-count reduction.

Signed-off-by: Jim Cromie 
---
 include/drm/drm_print.h | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 499fa0b35200..573b513e7836 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -326,8 +326,8 @@ enum drm_debug_category {
  */
 #define __drm_dbg(cls, fmt, ...)   \
___drm_dbg(cls, fmt, ##__VA_ARGS__)
-#define drm_dev_dbg(dev, cls, fmt, ...)\
-   _drm_dev_dbg(dev, cls, fmt, ##__VA_ARGS__)
+#define drm_dev_dbg(drm, cls, fmt, ...)\
+   _drm_dev_dbg((drm) ? (drm)->dev : NULL, cls, fmt, ##__VA_ARGS__)
 
 #define cDRM_UT_CORE   DRM_UT_CORE
 #define cDRM_UT_DRIVER DRM_UT_DRIVER
@@ -488,25 +488,25 @@ void _drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
 
 
 #define drm_dbg_core(drm, fmt, ...)\
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_CORE, fmt, ##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_CORE, fmt, ##__VA_ARGS__)
 #define drm_dbg(drm, fmt, ...) \
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_DRIVER, fmt, 
##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_DRIVER, fmt, ##__VA_ARGS__)
 #define drm_dbg_kms(drm, fmt, ...) \
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_KMS, fmt, ##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_KMS, fmt, ##__VA_ARGS__)
 #define drm_dbg_prime(drm, fmt, ...)   \
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_PRIME, fmt, 
##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_PRIME, fmt, ##__VA_ARGS__)
 #define drm_dbg_atomic(drm, fmt, ...)  \
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_ATOMIC, fmt, 
##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_ATOMIC, fmt, ##__VA_ARGS__)
 #define drm_dbg_vbl(drm, fmt, ...) \
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_VBL, fmt, ##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_VBL, fmt, ##__VA_ARGS__)
 #define drm_dbg_state(drm, fmt, ...)   \
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_STATE, fmt, 
##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_STATE, fmt, ##__VA_ARGS__)
 #define drm_dbg_lease(drm, fmt, ...)   \
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_LEASE, fmt, 
##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_LEASE, fmt, ##__VA_ARGS__)
 #define drm_dbg_dp(drm, fmt, ...)  \
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_DP, fmt, ##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_DP, fmt, ##__VA_ARGS__)
 #define drm_dbg_drmres(drm, fmt, ...)  \
-   drm_dev_dbg((drm) ? (drm)->dev : NULL, cDRM_UT_DRMRES, fmt, 
##__VA_ARGS__)
+   drm_dev_dbg(drm, cDRM_UT_DRMRES, fmt, ##__VA_ARGS__)
 
 
 /*
@@ -578,8 +578,7 @@ void __drm_err(const char *format, ...);
const struct drm_device *drm_ = (drm);  \
\
if (drm_debug_enabled(DRM_UT) && __ratelimit(_)) \
-   drm_dev_dbg((drm_) ? (drm_)->dev : NULL,\
-   cDRM_UT, fmt, ##__VA_ARGS__);   \
+   drm_dev_dbg(drm_, cDRM_UT, fmt, ##__VA_ARGS__); \
 })
 
 #define drm_dbg_kms_ratelimited(drm, fmt, ...) \
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 3/5] drm/print: RFC add choice to use dynamic debug in drm-debug

2021-07-14 Thread Jim Cromie
drm's debug system uses distinct categories of debug messages, encoded
in an enum (DRM_UT_), which are mapped to bits in drm.debug.
drm_debug_enabled() does a lot of unlikely bit-mask checks on
drm.debug; we can use dynamic debug instead, and get all that
static_key/jump_label goodness.

Dynamic debug has no concept of category, but we can map the DRM_UT_*
to a set of distinct prefixes; "drm:core:", "drm:kms:" etc, and
prepend them to the given formats.

Then we can use:
  `echo module drm format ^drm:core: +p > control`

to enable every such "prefixed" pr_debug with one query.  This new
prefix changes pr_debug's output, so is user visible, but it seems
unlikely to cause trouble for log watchers; they're not relying on the
absence of class prefix strings.

This conversion yields ~2100 new callsites on my i7/i915 laptop:

  dyndbg: 195 debug prints in module drm_kms_helper
  dyndbg: 298 debug prints in module drm
  dyndbg: 1630 debug prints in module i915

CONFIG_DRM_USE_DYNAMIC_DEBUG enables this, and is available if
CONFIG_DYNAMIC_DEBUG or CONFIG_DYNAMIC_DEBUG_CORE is chosen, and if
CONFIG_JUMP_LABEL is enabled; this because its required to get the
promised optimizations.

The indirection/switchover is layered into the macro scheme:

0. A new callback on drm.debug which calls dynamic_debug_exec_queries
   to map those bits to specific query/commands
   dynamic_debug_exec_queries("format ^drm:kms: +p", "drm*");
   here for POC, this should be in dynamic_debug.c
   with a MODULE_PARAM_DEBUG_BITMAP(__drm_debug, { "prefix-1", "desc-1" }+)

1. A "converted" or "classy" DRM_UT_* map

   based on:   DRM_UT_* ( symbol => bit-mask )
   named it:  cDRM_UT_* ( symbol => format-class-prefix-string )

   So cDRM_UT_* is either:
   legacy: cDRM_UT_* <-- DRM_UT_*   ( !CONFIG_DRM_USE_DYNAMIC_DEBUG )
   enabled:
#define cDRM_UT_KMS"drm:kms: "
#define cDRM_UT_PRIME  "drm:prime: "
#define cDRM_UT_ATOMIC "drm:atomic: "

   DRM_UT_* are unchanged, since theyre used in drm_debug_enabled()
   and elsewhere.

2. drm_dev_dbg & drm_debug are renamed (prefixed with '_')

   old names are now macros, calling either:
 legacy:  -> to renamed fn
 enabled: -> dev_dbg & pr_debug, with cDRM-prefix # format.

   these names are used in a fat layer of macros (3) which supply the
   category; those macros are used throughout drm code, yielding the
   ~2100 new prdbgs reported above.

3. names in (2) are invoked by DRM_DEBUG_, drm_dbg_.

   all these macros get "converted" to use cDRM_UT_*
   to get right token type for both !/!! DRM_USE_DYNAMIC_DEBUG

4. simplification of __DRM_DEFINE_DBG_RATELIMITED macro

   remove DRM_UT_ ## KMS as extra indirection
   pass both DRM_UT & cDRM_UT, for drm_debug_enabled & drm_dev_dbg

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/Kconfig |  13 +
 drivers/gpu/drm/drm_print.c |  75 --
 include/drm/drm_print.h | 102 ++--
 3 files changed, 158 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 7ff89690a976..e4524ccba040 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -57,6 +57,19 @@ config DRM_DEBUG_MM
 
  If in doubt, say "N".
 
+config DRM_USE_DYNAMIC_DEBUG
+   bool "use dynamic debug to implement drm.debug"
+   default n
+   depends on DRM
+   depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE
+   depends on JUMP_LABEL
+   help
+ The drm debug category facility does a lot of unlikely bit-field
+ tests at runtime; while cheap individually, the cost accumulates.
+ This option uses dynamic debug facility (if configured and
+ using jump_label) to avoid those runtime checks, patching
+ the kernel when those debugs are desired.
+
 config DRM_DEBUG_SELFTEST
tristate "kselftests for DRM"
depends on DRM
diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 111b932cf2a9..e2acdfc7088b 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -52,8 +52,75 @@ MODULE_PARM_DESC(debug, "Enable debug output, where each bit 
enables a debug cat
 "\t\tBit 5 (0x20)  will enable VBL messages (vblank code)\n"
 "\t\tBit 7 (0x80)  will enable LEASE messages (leasing code)\n"
 "\t\tBit 8 (0x100) will enable DP messages (displayport code)");
+
+#ifndef CONFIG_DRM_USE_DYNAMIC_DEBUG
 module_param_named(debug, __drm_debug, int, 0600);
 
+#else
+static char *format_class_prefixes[] = {
+   cDRM_UT_CORE,
+   cDRM_UT_DRIVER,
+   cDRM_UT_KMS,
+   cDRM_UT_PRIME,
+   cDRM_UT_ATOMIC,
+   cDRM_UT_VBL,
+   cDRM_UT_STATE,
+   cDRM_UT_LEASE,
+   cDRM_UT_DP,
+   cDRM_UT_DRMRES
+};
+
+#define OUR_QUERY_SIZE 64 /* > strlen "format '^%s' %cp" + longest prefix */
+
+static int param_set_dyndbg(const char *instr, const struct kernel_param *kp)
+{
+   unsigned int val;
+   unsigned long changes, result;
+   int rc, 

[Intel-gfx] [PATCH v3 2/5] drm_print.h: rewrap __DRM_DEFINE_DBG_RATELIMITED macro

2021-07-14 Thread Jim Cromie
whitespace only, to minimize the diff of a later commit.
no functional changes

Signed-off-by: Jim Cromie 
---
 include/drm/drm_print.h | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 15a089a87c22..ff5ac0e88321 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -524,19 +524,23 @@ void __drm_err(const char *format, ...);
 #define DRM_DEBUG_DP(fmt, ...) \
__drm_dbg(DRM_UT_DP, fmt, ## __VA_ARGS__)
 
-#define __DRM_DEFINE_DBG_RATELIMITED(category, drm, fmt, ...)  
\
-({ 
\
-   static DEFINE_RATELIMIT_STATE(rs_, DEFAULT_RATELIMIT_INTERVAL, 
DEFAULT_RATELIMIT_BURST);\
-   const struct drm_device *drm_ = (drm);  
\
-   
\
-   if (drm_debug_enabled(DRM_UT_ ## category) && __ratelimit(_))
\
-   drm_dev_printk(drm_ ? drm_->dev : NULL, KERN_DEBUG, fmt, ## 
__VA_ARGS__);   \
+#define __DRM_DEFINE_DBG_RATELIMITED(category, drm, fmt, ...)  \
+({ \
+   static DEFINE_RATELIMIT_STATE(rs_,  \
+ DEFAULT_RATELIMIT_INTERVAL,   \
+ DEFAULT_RATELIMIT_BURST); \
+   const struct drm_device *drm_ = (drm);  \
+   \
+   if (drm_debug_enabled(DRM_UT_ ## category) && __ratelimit(_))\
+   drm_dev_printk(drm_ ? drm_->dev : NULL, \
+  KERN_DEBUG, fmt, ## __VA_ARGS__);\
 })
 
 #define drm_dbg_kms_ratelimited(drm, fmt, ...) \
__DRM_DEFINE_DBG_RATELIMITED(KMS, drm, fmt, ## __VA_ARGS__)
 
-#define DRM_DEBUG_KMS_RATELIMITED(fmt, ...) drm_dbg_kms_ratelimited(NULL, fmt, 
## __VA_ARGS__)
+#define DRM_DEBUG_KMS_RATELIMITED(fmt, ...) \
+   drm_dbg_kms_ratelimited(NULL, fmt, ## __VA_ARGS__)
 
 /*
  * struct drm_device based WARNs
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 1/5] drm/print: fixup spelling in a comment

2021-07-14 Thread Jim Cromie
s/prink/printk/ - no functional changes

Signed-off-by: Jim Cromie 
---
 include/drm/drm_print.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 9b66be54dd16..15a089a87c22 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -327,7 +327,7 @@ static inline bool drm_debug_enabled(enum 
drm_debug_category category)
 /*
  * struct device based logging
  *
- * Prefer drm_device based logging over device or prink based logging.
+ * Prefer drm_device based logging over device or printk based logging.
  */
 
 __printf(3, 4)
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 0/5] drm: use dyndbg in drm_print

2021-07-14 Thread Jim Cromie
hi dri-devel,

Im pretty new in this particular playground.
Im using this to send, is it too spammy ? too --to ?
   git send-email --dry-run \
   --to-cmd='scripts/get_maintainer.pl --no-rolestats v3-000*.patch' \
   --to=jba...@akamai.com v3-000*.patch

drm_debug_enabled() is called a lot (by drm_dev_dbg) to do unlikely
bit-tests to selectively enable debug printing; this is a good job for
dynamic-debug, IFF it is built with JUMP_LABEL.
 
This patchset enables the use of dynamic-debug to avoid those
drm_debug_enabled() overheads, if CONFIG_DRM_USE_DYNAMIC_DEBUG=y.

v3: fixes missed SOB, && on BOL, commit-log tweaks
reordered patches, 1 is comment, 2 is whitespace.
dropped RFC, to see what happens.
v2: https://lore.kernel.org/lkml/20210711055003.528167-1-jim.cro...@gmail.com/
v1: https://lore.kernel.org/lkml/20201204035318.332419-1-jim.cro...@gmail.com/

Doing so creates many new pr_debug callsites,
otherwise i915 has ~120 prdbgs, and drm has just 1;

  bash-5.1# modprobe i915
  dyndbg:   8 debug prints in module video
  dyndbg: 305 debug prints in module drm
  dyndbg: 207 debug prints in module drm_kms_helper
  dyndbg:   2 debug prints in module ttm
  dyndbg: 1720 debug prints in module i915

On amdgpu, enabling it adds ~3200 prdbgs, currently at 56 bytes each.
So CONFIG_DRM_USE_DYNAMIC_DEBUG=y affects resource requirements.

Im running this patchset bare-metal on an i915 laptop & an amdgpu
desktop (both as loadable modules).  I booted the amdgpu box with:

BOOT_IMAGE=(hd2,gpt2)/vmlinuz-5.13.0-dd7-13692-g8def25788f56 \
 root=UUID=mumble ro \
 rootflags=subvol=root00 rhgb \
 dynamic_debug.verbose=3 main.dyndbg=+p \
 amdgpu.debug=1 amdgpu.test=1 \
 "amdgpu.dyndbg=format ^[ +p"

That last line enables ~1700 prdbg callsites with a format like '[DML'
etc at boot, and amdgpu.test=1 triggers 3 minutes of tests, causing
~76k prdbgs in 409 seconds, before I turned them off with:

  echo module amdgpu -p > /proc/dynamic_debug/control

This is on top of master @ v5.14-rc1
Should I rebase onto something else ?

Jim Cromie (5):
  drm/print: fixup spelling in a comment
  drm_print.h: rewrap __DRM_DEFINE_DBG_RATELIMITED macro
  drm/print: RFC add choice to use dynamic debug in drm-debug
  drm/print: move conditional deref into macro defn
  i915: map gvt pr_debug categories to bits in parameters/debug_gvt

Note: 3/5, 5/5 have bits that are here for POC, but belong in
dynamic_debug.c.


 drivers/gpu/drm/Kconfig|  13 
 drivers/gpu/drm/drm_print.c|  75 +-
 drivers/gpu/drm/i915/gvt/Makefile  |   4 +
 drivers/gpu/drm/i915/i915_params.c |  76 +++
 include/drm/drm_print.h| 117 -
 5 files changed, 247 insertions(+), 38 deletions(-)

-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Add TTM offset argument to mmap.

2021-07-14 Thread Maarten Lankhorst
Op 14-07-2021 om 18:17 schreef Jason Ekstrand:
> On Wed, Jul 14, 2021 at 5:03 AM Maarten Lankhorst
>  wrote:
>> The FIXED mapping is only used for ttm, and tells userspace that the
>> mapping type is pre-defined. This disables the other type of mmap
>> offsets when discrete memory is used, so fix the selftests as well.
>>
>> Document the struct as well, so it shows up in docbook.
>>
>> Cc: Jason Ekstrand 
>> Reviewed-by: Daniel Vetter 
>> Signed-off-by: Maarten Lankhorst 
>> ---
>>  drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 17 ++-
>>  .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
>>  .../drm/i915/gem/selftests/i915_gem_mman.c| 27 ++-
>>  include/uapi/drm/i915_drm.h   | 46 ++-
>>  4 files changed, 77 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
>> b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
>> index a90f796e85c0..31c4021bb6be 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
>> @@ -679,10 +679,16 @@ __assign_mmap_offset(struct drm_i915_gem_object *obj,
>> return -ENODEV;
>>
>> if (obj->ops->mmap_offset)  {
>> +   if (mmap_type != I915_MMAP_TYPE_FIXED)
>> +   return -ENODEV;
>> +
>> *offset = obj->ops->mmap_offset(obj);
>> return 0;
>> }
>>
>> +   if (mmap_type == I915_MMAP_TYPE_FIXED)
>> +   return -ENODEV;
>> +
>> if (mmap_type != I915_MMAP_TYPE_GTT &&
>> !i915_gem_object_has_struct_page(obj) &&
>> !i915_gem_object_has_iomem(obj))
>> @@ -727,7 +733,9 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
>>  {
>> enum i915_mmap_type mmap_type;
>>
>> -   if (boot_cpu_has(X86_FEATURE_PAT))
>> +   if (HAS_LMEM(to_i915(dev)))
>> +   mmap_type = I915_MMAP_TYPE_FIXED;
>> +   else if (boot_cpu_has(X86_FEATURE_PAT))
>> mmap_type = I915_MMAP_TYPE_WC;
>> else if (!i915_ggtt_has_aperture(_i915(dev)->ggtt))
>> return -ENODEV;
>> @@ -798,6 +806,10 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void 
>> *data,
>> type = I915_MMAP_TYPE_UC;
>> break;
>>
>> +   case I915_MMAP_OFFSET_FIXED:
>> +   type = I915_MMAP_TYPE_FIXED;
>> +   break;
>> +
>> default:
>> return -EINVAL;
>> }
>> @@ -968,6 +980,9 @@ int i915_gem_mmap(struct file *filp, struct 
>> vm_area_struct *vma)
>> vma->vm_ops = _ops_cpu;
>> break;
>>
>> +   case I915_MMAP_TYPE_FIXED:
>> +   GEM_WARN_ON(1);
>> +   /* fall-through */
>> case I915_MMAP_TYPE_WB:
>> vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
>> vma->vm_ops = _ops_cpu;
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
>> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> index ef3de2ae9723..afbadfc5516b 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> @@ -105,6 +105,7 @@ enum i915_mmap_type {
>> I915_MMAP_TYPE_WC,
>> I915_MMAP_TYPE_WB,
>> I915_MMAP_TYPE_UC,
>> +   I915_MMAP_TYPE_FIXED,
>>  };
>>
>>  struct i915_mmap_offset {
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
>> b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> index 1da8bd675e54..52789c8ad337 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> @@ -573,6 +573,14 @@ static int make_obj_busy(struct drm_i915_gem_object 
>> *obj)
>> return 0;
>>  }
>>
>> +static enum i915_mmap_type default_mapping(struct drm_i915_private *i915)
>> +{
>> +   if (HAS_LMEM(i915))
>> +   return I915_MMAP_TYPE_FIXED;
>> +
>> +   return I915_MMAP_TYPE_GTT;
>> +}
>> +
>>  static bool assert_mmap_offset(struct drm_i915_private *i915,
>>unsigned long size,
>>int expected)
>> @@ -585,7 +593,7 @@ static bool assert_mmap_offset(struct drm_i915_private 
>> *i915,
>> if (IS_ERR(obj))
>> return expected && expected == PTR_ERR(obj);
>>
>> -   ret = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, , NULL);
>> +   ret = __assign_mmap_offset(obj, default_mapping(i915), , 
>> NULL);
>> i915_gem_object_put(obj);
>>
>> return ret == expected;
>> @@ -689,7 +697,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
>> goto out;
>> }
>>
>> -   err = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, , NULL);
>> +   err = __assign_mmap_offset(obj, default_mapping(i915), , 
>> NULL);
>> if (err) {
>> pr_err("Unable to insert object into reclaimed hole\n");
>> goto err_obj;
>> @@ -831,8 

Re: [Intel-gfx] [PATCH v4 3/4] drm/shmem-helpers: Allocate wc pages on x86

2021-07-14 Thread Christian König

Am 14.07.21 um 14:48 schrieb Daniel Vetter:

On Wed, Jul 14, 2021 at 01:54:50PM +0200, Christian König wrote:

Am 13.07.21 um 22:51 schrieb Daniel Vetter:

intel-gfx-ci realized that something is not quite coherent anymore on
some platforms for our i915+vgem tests, when I tried to switch vgem
over to shmem helpers.

After lots of head-scratching I realized that I've removed calls to
drm_clflush. And we need those. To make this a bit cleaner use the
same page allocation tooling as ttm, which does internally clflush
(and more, as neeeded on any platform instead of just the intel x86
cpus i915 can be combined with).

Unfortunately this doesn't exist on arm, or as a generic feature. For
that I think only the dma-api can get at wc memory reliably, so maybe
we'd need some kind of GFP_WC flag to do this properly.

The problem is that this stuff is extremely architecture specific. So GFP_WC
and GFP_UNCACHED are really what we should aim for in the long term.

And as far as I know we have at least the following possibilities how it is
implemented:

* A fixed amount of registers which tells the CPU the caching behavior for a
memory region, e.g. MTRR.
* Some bits of the memory pointers used, e.g. you see the same memory at
different locations with different caching attributes.
* Some bits in the CPUs page table.
* Some bits in a separate page table.

On top of that there is the PCIe specification which defines non-cache
snooping access as an extension.

Yeah dma-buf is extremely ill-defined even on x86 if you combine these
all. We just play a game of whack-a-mole with the cacheline dirt until
it's gone.

That's the other piece here, how do you even make sure that the page is
properly flushed and ready for wc access:
- easy case is x86 with clflush available pretty much everywhere (since
   10+ years at least)
- next are cpus which have some cache flush instructions, but it's highly
   cpu model specific
- next up is the same, but you absolutely have to make sure there's no
   other mapping around anymore or the coherency fabric just dies
- and I'm pretty sure there's worse stuff where you defacto can only
   allocate wc memory that's set aside at boot-up and that's all you ever
   get.


Well long story short you don't make sure that the page is flushed at all.

What you do is to allocate the page as WC in the first place, if you 
fail to do this you can't use it.


The whole idea TTM try to sell until a while ago that you can actually 
change that on the fly only works on x86 and even there only very very 
limited.


Cheers,
Christian.



Cheers, Daniel


Mixing that with the CPU caching behavior gets you some really nice ways to
break a driver. In general x86 seems to be rather graceful, but arm and
PowerPC are easily pissed if you mess that up.


Signed-off-by: Daniel Vetter 
Cc: Christian König 
Cc: "Thomas Hellström" 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 

Acked-by: Christian könig 

Regards,
Christian.


---
   drivers/gpu/drm/drm_gem_shmem_helper.c | 14 ++
   1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c 
b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 296ab1b7c07f..657d2490aaa5 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -10,6 +10,10 @@
   #include 
   #include 
+#ifdef CONFIG_X86
+#include 
+#endif
+
   #include 
   #include 
   #include 
@@ -162,6 +166,11 @@ static int drm_gem_shmem_get_pages_locked(struct 
drm_gem_shmem_object *shmem)
return PTR_ERR(pages);
}
+#ifdef CONFIG_X86
+   if (shmem->map_wc)
+   set_pages_array_wc(pages, obj->size >> PAGE_SHIFT);
+#endif
+
shmem->pages = pages;
return 0;
@@ -203,6 +212,11 @@ static void drm_gem_shmem_put_pages_locked(struct 
drm_gem_shmem_object *shmem)
if (--shmem->pages_use_count > 0)
return;
+#ifdef CONFIG_X86
+   if (shmem->map_wc)
+   set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT);
+#endif
+
drm_gem_put_pages(obj, shmem->pages,
  shmem->pages_mark_dirty_on_put,
  shmem->pages_mark_accessed_on_put);


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Add TTM offset argument to mmap.

2021-07-14 Thread Jason Ekstrand
On Wed, Jul 14, 2021 at 5:03 AM Maarten Lankhorst
 wrote:
>
> The FIXED mapping is only used for ttm, and tells userspace that the
> mapping type is pre-defined. This disables the other type of mmap
> offsets when discrete memory is used, so fix the selftests as well.
>
> Document the struct as well, so it shows up in docbook.
>
> Cc: Jason Ekstrand 
> Reviewed-by: Daniel Vetter 
> Signed-off-by: Maarten Lankhorst 
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 17 ++-
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
>  .../drm/i915/gem/selftests/i915_gem_mman.c| 27 ++-
>  include/uapi/drm/i915_drm.h   | 46 ++-
>  4 files changed, 77 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index a90f796e85c0..31c4021bb6be 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -679,10 +679,16 @@ __assign_mmap_offset(struct drm_i915_gem_object *obj,
> return -ENODEV;
>
> if (obj->ops->mmap_offset)  {
> +   if (mmap_type != I915_MMAP_TYPE_FIXED)
> +   return -ENODEV;
> +
> *offset = obj->ops->mmap_offset(obj);
> return 0;
> }
>
> +   if (mmap_type == I915_MMAP_TYPE_FIXED)
> +   return -ENODEV;
> +
> if (mmap_type != I915_MMAP_TYPE_GTT &&
> !i915_gem_object_has_struct_page(obj) &&
> !i915_gem_object_has_iomem(obj))
> @@ -727,7 +733,9 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
>  {
> enum i915_mmap_type mmap_type;
>
> -   if (boot_cpu_has(X86_FEATURE_PAT))
> +   if (HAS_LMEM(to_i915(dev)))
> +   mmap_type = I915_MMAP_TYPE_FIXED;
> +   else if (boot_cpu_has(X86_FEATURE_PAT))
> mmap_type = I915_MMAP_TYPE_WC;
> else if (!i915_ggtt_has_aperture(_i915(dev)->ggtt))
> return -ENODEV;
> @@ -798,6 +806,10 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void 
> *data,
> type = I915_MMAP_TYPE_UC;
> break;
>
> +   case I915_MMAP_OFFSET_FIXED:
> +   type = I915_MMAP_TYPE_FIXED;
> +   break;
> +
> default:
> return -EINVAL;
> }
> @@ -968,6 +980,9 @@ int i915_gem_mmap(struct file *filp, struct 
> vm_area_struct *vma)
> vma->vm_ops = _ops_cpu;
> break;
>
> +   case I915_MMAP_TYPE_FIXED:
> +   GEM_WARN_ON(1);
> +   /* fall-through */
> case I915_MMAP_TYPE_WB:
> vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> vma->vm_ops = _ops_cpu;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index ef3de2ae9723..afbadfc5516b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -105,6 +105,7 @@ enum i915_mmap_type {
> I915_MMAP_TYPE_WC,
> I915_MMAP_TYPE_WB,
> I915_MMAP_TYPE_UC,
> +   I915_MMAP_TYPE_FIXED,
>  };
>
>  struct i915_mmap_offset {
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
> b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> index 1da8bd675e54..52789c8ad337 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> @@ -573,6 +573,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
> return 0;
>  }
>
> +static enum i915_mmap_type default_mapping(struct drm_i915_private *i915)
> +{
> +   if (HAS_LMEM(i915))
> +   return I915_MMAP_TYPE_FIXED;
> +
> +   return I915_MMAP_TYPE_GTT;
> +}
> +
>  static bool assert_mmap_offset(struct drm_i915_private *i915,
>unsigned long size,
>int expected)
> @@ -585,7 +593,7 @@ static bool assert_mmap_offset(struct drm_i915_private 
> *i915,
> if (IS_ERR(obj))
> return expected && expected == PTR_ERR(obj);
>
> -   ret = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, , NULL);
> +   ret = __assign_mmap_offset(obj, default_mapping(i915), , NULL);
> i915_gem_object_put(obj);
>
> return ret == expected;
> @@ -689,7 +697,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
> goto out;
> }
>
> -   err = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, , NULL);
> +   err = __assign_mmap_offset(obj, default_mapping(i915), , NULL);
> if (err) {
> pr_err("Unable to insert object into reclaimed hole\n");
> goto err_obj;
> @@ -831,8 +839,14 @@ static int wc_check(struct drm_i915_gem_object *obj)
>
>  static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type 
> type)
>  {
> +   struct 

Re: [Intel-gfx] [PATCH v4 3/4] drm/shmem-helpers: Allocate wc pages on x86

2021-07-14 Thread Daniel Vetter
On Wed, Jul 14, 2021 at 02:58:26PM +0200, Christian König wrote:
> Am 14.07.21 um 14:48 schrieb Daniel Vetter:
> > On Wed, Jul 14, 2021 at 01:54:50PM +0200, Christian König wrote:
> > > Am 13.07.21 um 22:51 schrieb Daniel Vetter:
> > > > intel-gfx-ci realized that something is not quite coherent anymore on
> > > > some platforms for our i915+vgem tests, when I tried to switch vgem
> > > > over to shmem helpers.
> > > > 
> > > > After lots of head-scratching I realized that I've removed calls to
> > > > drm_clflush. And we need those. To make this a bit cleaner use the
> > > > same page allocation tooling as ttm, which does internally clflush
> > > > (and more, as neeeded on any platform instead of just the intel x86
> > > > cpus i915 can be combined with).
> > > > 
> > > > Unfortunately this doesn't exist on arm, or as a generic feature. For
> > > > that I think only the dma-api can get at wc memory reliably, so maybe
> > > > we'd need some kind of GFP_WC flag to do this properly.
> > > The problem is that this stuff is extremely architecture specific. So 
> > > GFP_WC
> > > and GFP_UNCACHED are really what we should aim for in the long term.
> > > 
> > > And as far as I know we have at least the following possibilities how it 
> > > is
> > > implemented:
> > > 
> > > * A fixed amount of registers which tells the CPU the caching behavior 
> > > for a
> > > memory region, e.g. MTRR.
> > > * Some bits of the memory pointers used, e.g. you see the same memory at
> > > different locations with different caching attributes.
> > > * Some bits in the CPUs page table.
> > > * Some bits in a separate page table.
> > > 
> > > On top of that there is the PCIe specification which defines non-cache
> > > snooping access as an extension.
> > Yeah dma-buf is extremely ill-defined even on x86 if you combine these
> > all. We just play a game of whack-a-mole with the cacheline dirt until
> > it's gone.
> > 
> > That's the other piece here, how do you even make sure that the page is
> > properly flushed and ready for wc access:
> > - easy case is x86 with clflush available pretty much everywhere (since
> >10+ years at least)
> > - next are cpus which have some cache flush instructions, but it's highly
> >cpu model specific
> > - next up is the same, but you absolutely have to make sure there's no
> >other mapping around anymore or the coherency fabric just dies
> > - and I'm pretty sure there's worse stuff where you defacto can only
> >allocate wc memory that's set aside at boot-up and that's all you ever
> >get.
> 
> Well long story short you don't make sure that the page is flushed at all.
> 
> What you do is to allocate the page as WC in the first place, if you fail to
> do this you can't use it.

Oh sure, but even when you allocate as wc you need to make sure the page
you have is actually wc coherent from the start. I'm chasing some fun
trying to convert vgem over to shmem helpers right now (i.e. this patch
series), and if you don't start out with flushed pages some of the vgem +
i915 igts just fail on the less coherent igpu platforms we have.

And if you look into what set_pages_wc actually does, then you spot the
clflush somewhere deep down (aside from all the other things it does).

On some ARM platforms that's just not possible, and you have to do a
carveout that you never even map as wb (so needs to be excluded from the
kernel map too and treated as highmem). There's some really bonkers stuff
here.

> The whole idea TTM try to sell until a while ago that you can actually
> change that on the fly only works on x86 and even there only very very
> limited.

Yeah that's clear, this is why we're locking down the i915 gem uapi a lot
for dgpu. All the tricks are out the window.
-Daniel


> 
> Cheers,
> Christian.
> 
> > 
> > Cheers, Daniel
> > 
> > > Mixing that with the CPU caching behavior gets you some really nice ways 
> > > to
> > > break a driver. In general x86 seems to be rather graceful, but arm and
> > > PowerPC are easily pissed if you mess that up.
> > > 
> > > > Signed-off-by: Daniel Vetter 
> > > > Cc: Christian König 
> > > > Cc: "Thomas Hellström" 
> > > > Cc: Maarten Lankhorst 
> > > > Cc: Maxime Ripard 
> > > > Cc: Thomas Zimmermann 
> > > > Cc: David Airlie 
> > > > Cc: Daniel Vetter 
> > > Acked-by: Christian könig 
> > > 
> > > Regards,
> > > Christian.
> > > 
> > > > ---
> > > >drivers/gpu/drm/drm_gem_shmem_helper.c | 14 ++
> > > >1 file changed, 14 insertions(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c 
> > > > b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > > > index 296ab1b7c07f..657d2490aaa5 100644
> > > > --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> > > > +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > > > @@ -10,6 +10,10 @@
> > > >#include 
> > > >#include 
> > > > +#ifdef CONFIG_X86
> > > > +#include 
> > > > +#endif
> > > > +
> > > >#include 
> > > >#include 
> > > >#include 
> > > > @@ -162,6 +166,11 @@ 

Re: [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915: Fix wm params for ccs

2021-07-14 Thread Vudum, Lakshminarayana
Re-reported.

From: Juha-Pekka Heikkilä 
Sent: Wednesday, July 14, 2021 12:58 AM
To: intel-gfx@lists.freedesktop.org; Vudum, Lakshminarayana 

Subject: Re: ✗ Fi.CI.IGT: failure for drm/i915: Fix wm params for ccs

Hi Lakshmi,

Here would be again one false positive result.

/Juha-Pekka

On Wed, Jul 14, 2021 at 7:38 AM Patchwork 
mailto:patchw...@emeril.freedesktop.org>> 
wrote:
Patch Details
Series:

drm/i915: Fix wm params for ccs

URL:

https://patchwork.freedesktop.org/series/92491/

State:

failure

Details:

https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/index.html

CI Bug Log - changes from CI_DRM_10342_full -> Patchwork_20589_full
Summary

FAILURE

Serious unknown changes coming with Patchwork_20589_full absolutely need to be
verified manually.

If you think the reported changes have nothing to do with the changes
introduced in Patchwork_20589_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.

Possible new issues

Here are the unknown changes that may have been introduced in 
Patchwork_20589_full:

IGT changes
Possible regressions

  *   igt@dumb_buffer@map-invalid-size:

 *   shard-apl: NOTRUN -> 
DMESG-WARN

Suppressed

The following results come from untrusted machines, tests, or statuses.
They do not affect the overall result.

  *   igt@kms_dither@fb-8bpc-vs-panel-6bpc:

 *   {shard-rkl}: NOTRUN -> 
SKIP

  *   igt@runner@aborted:

 *   {shard-rkl}: 
(FAIL,
 
FAIL,
 
FAIL,
 
FAIL,
 
FAIL)
 ([i915#3002] / [i915#3728]) -> 
(FAIL,
 
FAIL,
 
FAIL,
 
FAIL)
 ([i915#3002])

Known issues

Here are the changes found in Patchwork_20589_full that come from known issues:

IGT changes
Issues hit

  *   igt@gem_create@create-massive:

 *   shard-snb: NOTRUN -> 
DMESG-WARN
 ([i915#3002])

  *   igt@gem_ctx_persistence@legacy-engines-cleanup:

 *   shard-snb: NOTRUN -> 
SKIP
 ([fdo#109271] / [i915#1099]) +3 similar issues

  *   igt@gem_ctx_persistence@legacy-engines-hang@blt:

 *   shard-skl: NOTRUN -> 
SKIP
 ([fdo#109271]) +119 similar issues

  *   igt@gem_exec_fair@basic-deadline:

 *   shard-apl: NOTRUN -> 
FAIL
 ([i915#2846])

  *   igt@gem_exec_fair@basic-pace-share@rcs0:

 *   shard-tglb: 
PASS
 -> 
FAIL
 ([i915#2842])

  *   igt@gem_exec_fair@basic-throttle@rcs0:

 *   shard-glk: 
PASS
 -> 
FAIL
 ([i915#2842])
 *   shard-iclb: 
PASS
 -> 
FAIL
 ([i915#2849])

  *   igt@gem_exec_reloc@basic-wide-active@rcs0:

 *   shard-snb: NOTRUN -> 
FAIL
 ([i915#3633]) +2 similar issues

  *   igt@gem_exec_reloc@basic-wide-active@vcs1:

 *   shard-iclb: NOTRUN -> 

[Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915: Fix wm params for ccs

2021-07-14 Thread Patchwork
== Series Details ==

Series: drm/i915: Fix wm params for ccs
URL   : https://patchwork.freedesktop.org/series/92491/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_10342_full -> Patchwork_20589_full


Summary
---

  **SUCCESS**

  No regressions found.

  

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_20589_full:

### IGT changes ###

 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@kms_dither@fb-8bpc-vs-panel-6bpc:
- {shard-rkl}:NOTRUN -> [SKIP][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-rkl-5/igt@kms_dit...@fb-8bpc-vs-panel-6bpc.html

  * igt@runner@aborted:
- {shard-rkl}:([FAIL][2], [FAIL][3], [FAIL][4], [FAIL][5], 
[FAIL][6]) ([i915#3002] / [i915#3728]) -> ([FAIL][7], [FAIL][8], [FAIL][9], 
[FAIL][10]) ([i915#3002])
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10342/shard-rkl-5/igt@run...@aborted.html
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10342/shard-rkl-6/igt@run...@aborted.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10342/shard-rkl-2/igt@run...@aborted.html
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10342/shard-rkl-5/igt@run...@aborted.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10342/shard-rkl-5/igt@run...@aborted.html
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-rkl-2/igt@run...@aborted.html
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-rkl-1/igt@run...@aborted.html
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-rkl-5/igt@run...@aborted.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-rkl-1/igt@run...@aborted.html

  
Known issues


  Here are the changes found in Patchwork_20589_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@gem_create@create-massive:
- shard-snb:  NOTRUN -> [DMESG-WARN][11] ([i915#3002])
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-snb7/igt@gem_cre...@create-massive.html

  * igt@gem_ctx_persistence@legacy-engines-cleanup:
- shard-snb:  NOTRUN -> [SKIP][12] ([fdo#109271] / [i915#1099]) +3 
similar issues
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-snb2/igt@gem_ctx_persiste...@legacy-engines-cleanup.html

  * igt@gem_ctx_persistence@legacy-engines-hang@blt:
- shard-skl:  NOTRUN -> [SKIP][13] ([fdo#109271]) +119 similar 
issues
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-skl6/igt@gem_ctx_persistence@legacy-engines-h...@blt.html

  * igt@gem_exec_fair@basic-deadline:
- shard-apl:  NOTRUN -> [FAIL][14] ([i915#2846])
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-apl2/igt@gem_exec_f...@basic-deadline.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
- shard-tglb: [PASS][15] -> [FAIL][16] ([i915#2842])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10342/shard-tglb7/igt@gem_exec_fair@basic-pace-sh...@rcs0.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-tglb6/igt@gem_exec_fair@basic-pace-sh...@rcs0.html

  * igt@gem_exec_fair@basic-throttle@rcs0:
- shard-glk:  [PASS][17] -> [FAIL][18] ([i915#2842])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10342/shard-glk5/igt@gem_exec_fair@basic-throt...@rcs0.html
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-glk6/igt@gem_exec_fair@basic-throt...@rcs0.html
- shard-iclb: [PASS][19] -> [FAIL][20] ([i915#2849])
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10342/shard-iclb5/igt@gem_exec_fair@basic-throt...@rcs0.html
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-iclb6/igt@gem_exec_fair@basic-throt...@rcs0.html

  * igt@gem_exec_reloc@basic-wide-active@rcs0:
- shard-snb:  NOTRUN -> [FAIL][21] ([i915#3633]) +2 similar issues
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-snb6/igt@gem_exec_reloc@basic-wide-act...@rcs0.html

  * igt@gem_exec_reloc@basic-wide-active@vcs1:
- shard-iclb: NOTRUN -> [FAIL][22] ([i915#3633])
   [22]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-iclb4/igt@gem_exec_reloc@basic-wide-act...@vcs1.html

  * igt@gem_exec_suspend@basic-s3-devices:
- shard-iclb: [PASS][23] -> [DMESG-WARN][24] ([i915#1226])
   [23]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10342/shard-iclb6/igt@gem_exec_susp...@basic-s3-devices.html
   [24]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/shard-iclb6/igt@gem_exec_susp...@basic-s3-devices.html

  * igt@gem_fenced_exec_thrash@2-spare-fences:
- shard-snb:  

Re: [Intel-gfx] [PATCH v4 3/4] drm/shmem-helpers: Allocate wc pages on x86

2021-07-14 Thread Christian König

Am 13.07.21 um 22:51 schrieb Daniel Vetter:

intel-gfx-ci realized that something is not quite coherent anymore on
some platforms for our i915+vgem tests, when I tried to switch vgem
over to shmem helpers.

After lots of head-scratching I realized that I've removed calls to
drm_clflush. And we need those. To make this a bit cleaner use the
same page allocation tooling as ttm, which does internally clflush
(and more, as neeeded on any platform instead of just the intel x86
cpus i915 can be combined with).

Unfortunately this doesn't exist on arm, or as a generic feature. For
that I think only the dma-api can get at wc memory reliably, so maybe
we'd need some kind of GFP_WC flag to do this properly.


The problem is that this stuff is extremely architecture specific. So 
GFP_WC and GFP_UNCACHED are really what we should aim for in the long term.


And as far as I know we have at least the following possibilities how it 
is implemented:


* A fixed amount of registers which tells the CPU the caching behavior 
for a memory region, e.g. MTRR.
* Some bits of the memory pointers used, e.g. you see the same memory at 
different locations with different caching attributes.

* Some bits in the CPUs page table.
* Some bits in a separate page table.

On top of that there is the PCIe specification which defines non-cache 
snooping access as an extension.


Mixing that with the CPU caching behavior gets you some really nice ways 
to break a driver. In general x86 seems to be rather graceful, but arm 
and PowerPC are easily pissed if you mess that up.



Signed-off-by: Daniel Vetter 
Cc: Christian König 
Cc: "Thomas Hellström" 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 


Acked-by: Christian könig 

Regards,
Christian.


---
  drivers/gpu/drm/drm_gem_shmem_helper.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c 
b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 296ab1b7c07f..657d2490aaa5 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -10,6 +10,10 @@
  #include 
  #include 
  
+#ifdef CONFIG_X86

+#include 
+#endif
+
  #include 
  #include 
  #include 
@@ -162,6 +166,11 @@ static int drm_gem_shmem_get_pages_locked(struct 
drm_gem_shmem_object *shmem)
return PTR_ERR(pages);
}
  
+#ifdef CONFIG_X86

+   if (shmem->map_wc)
+   set_pages_array_wc(pages, obj->size >> PAGE_SHIFT);
+#endif
+
shmem->pages = pages;
  
  	return 0;

@@ -203,6 +212,11 @@ static void drm_gem_shmem_put_pages_locked(struct 
drm_gem_shmem_object *shmem)
if (--shmem->pages_use_count > 0)
return;
  
+#ifdef CONFIG_X86

+   if (shmem->map_wc)
+   set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT);
+#endif
+
drm_gem_put_pages(obj, shmem->pages,
  shmem->pages_mark_dirty_on_put,
  shmem->pages_mark_accessed_on_put);


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✓ Fi.CI.IGT: success for Begin enabling Xe_HP SDV and DG2 platforms (rev5)

2021-07-14 Thread Patchwork
== Series Details ==

Series: Begin enabling Xe_HP SDV and DG2 platforms (rev5)
URL   : https://patchwork.freedesktop.org/series/92135/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_10344_full -> Patchwork_20594_full


Summary
---

  **SUCCESS**

  No regressions found.

  

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_20594_full:

### IGT changes ###

 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@gem_mmap_gtt@basic-wc:
- {shard-rkl}:[PASS][1] -> [FAIL][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-rkl-1/igt@gem_mmap_...@basic-wc.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-rkl-2/igt@gem_mmap_...@basic-wc.html

  * igt@kms_ccs@pipe-a-bad-rotation-90-yf_tiled_ccs:
- {shard-rkl}:[FAIL][3] ([i915#3678]) -> [SKIP][4]
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-rkl-2/igt@kms_ccs@pipe-a-bad-rotation-90-yf_tiled_ccs.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-rkl-6/igt@kms_ccs@pipe-a-bad-rotation-90-yf_tiled_ccs.html

  * igt@kms_dither@fb-8bpc-vs-panel-8bpc:
- {shard-rkl}:NOTRUN -> [SKIP][5]
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-rkl-1/igt@kms_dit...@fb-8bpc-vs-panel-8bpc.html

  * igt@runner@aborted:
- {shard-rkl}:([FAIL][6], [FAIL][7], [FAIL][8], [FAIL][9]) 
([i915#3002]) -> ([FAIL][10], [FAIL][11], [FAIL][12], [FAIL][13]) ([i915#2029] 
/ [i915#3002])
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-rkl-5/igt@run...@aborted.html
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-rkl-2/igt@run...@aborted.html
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-rkl-6/igt@run...@aborted.html
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-rkl-2/igt@run...@aborted.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-rkl-1/igt@run...@aborted.html
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-rkl-6/igt@run...@aborted.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-rkl-6/igt@run...@aborted.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-rkl-5/igt@run...@aborted.html

  
Known issues


  Here are the changes found in Patchwork_20594_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@gem_ctx_persistence@idempotent:
- shard-snb:  NOTRUN -> [SKIP][14] ([fdo#109271] / [i915#1099]) +4 
similar issues
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-snb6/igt@gem_ctx_persiste...@idempotent.html

  * igt@gem_exec_fair@basic-flow@rcs0:
- shard-tglb: [PASS][15] -> [FAIL][16] ([i915#2842]) +2 similar 
issues
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-tglb5/igt@gem_exec_fair@basic-f...@rcs0.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-tglb3/igt@gem_exec_fair@basic-f...@rcs0.html

  * igt@gem_exec_fair@basic-pace@vcs1:
- shard-iclb: NOTRUN -> [FAIL][17] ([i915#2842])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-iclb2/igt@gem_exec_fair@basic-p...@vcs1.html

  * igt@gem_exec_reloc@basic-write-read-noreloc:
- shard-tglb: [PASS][18] -> [DMESG-WARN][19] ([i915#2868])
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-tglb2/igt@gem_exec_re...@basic-write-read-noreloc.html
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-tglb5/igt@gem_exec_re...@basic-write-read-noreloc.html

  * igt@gem_exec_whisper@basic-queues-forked-all:
- shard-glk:  [PASS][20] -> [DMESG-WARN][21] ([i915#118] / 
[i915#95]) +2 similar issues
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-glk8/igt@gem_exec_whis...@basic-queues-forked-all.html
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-glk3/igt@gem_exec_whis...@basic-queues-forked-all.html

  * igt@gem_mmap_gtt@cpuset-big-copy-xy:
- shard-iclb: [PASS][22] -> [FAIL][23] ([i915#307])
   [22]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10344/shard-iclb4/igt@gem_mmap_...@cpuset-big-copy-xy.html
   [23]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-iclb8/igt@gem_mmap_...@cpuset-big-copy-xy.html

  * igt@gem_pread@exhaustion:
- shard-tglb: NOTRUN -> [WARN][24] ([i915#2658])
   [24]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-tglb7/igt@gem_pr...@exhaustion.html
- shard-iclb: NOTRUN -> [WARN][25] ([i915#2658])
   [25]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20594/shard-iclb6/igt@gem_pr...@exhaustion.html

Re: [Intel-gfx] [PATCH] drm/i915/debugfs: DISPLAY_VER 13 lpsp capability

2021-07-14 Thread Gupta, Anshuman



> -Original Message-
> From: Manna, Animesh 
> Sent: Wednesday, July 14, 2021 6:30 PM
> To: Gupta, Anshuman ; intel-
> g...@lists.freedesktop.org
> Subject: RE: [PATCH] drm/i915/debugfs: DISPLAY_VER 13 lpsp capability
> 
> 
> 
> > -Original Message-
> > From: Gupta, Anshuman 
> > Sent: Tuesday, July 13, 2021 1:28 PM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Gupta, Anshuman ; Manna, Animesh
> > 
> > Subject: [PATCH] drm/i915/debugfs: DISPLAY_VER 13 lpsp capability
> >
> > Extend i915_lpsp_capability debugfs to DG2,ADLP and future platforms.
> >
> > v2: commit log modification.
> >
> > Cc: Animesh Manna 
> > Signed-off-by: Anshuman Gupta 
> 
> Changes looks good to me.
> 
> Reviewed-by: Animesh Manna 
Thanks for review pushed to drm-intel-next.
> 
> Regards,
> Animesh
> > ---
> >  drivers/gpu/drm/i915/display/intel_display_debugfs.c | 5 +
> >  1 file changed, 5 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
> > b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
> > index d5af5708c9da..65832c4d962f 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
> > +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
> > @@ -2256,6 +2256,11 @@ static int i915_lpsp_capability_show(struct
> > seq_file *m, void *data)
> > if (connector->status != connector_status_connected)
> > return -ENODEV;
> >
> > +   if (DISPLAY_VER(i915) >= 13) {
> > +   LPSP_CAPABLE(encoder->port <= PORT_B);
> > +   return 0;
> > +   }
> > +
> > switch (DISPLAY_VER(i915)) {
> > case 12:
> > /*
> > --
> > 2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 3/4] iommu/vt-d: Disable superpage for Skylake igfx

2021-07-14 Thread Ville Syrjala
From: Ville Syrjälä 

Skylake has known issues with VT-d superpage. Namely frame buffer
compression (FBC) can't be safely used when superpage is enabled.
Currently we're disabling FBC entirely when VT-d is active, but
I think just disabling superpage would be better since FBC can
save some power.

TODO: would be nice to use the macros from include/drm/i915_pciids.h,
  but can't do that with DECLARE_PCI_FIXUP_HEADER()

Cc: David Woodhouse 
Cc: Lu Baolu 
Cc: io...@lists.linux-foundation.org
Signed-off-by: Ville Syrjälä 
---
 drivers/iommu/intel/iommu.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ef717908647d..ea9c69dc13f5 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5681,6 +5681,33 @@ static void quirk_skip_igfx_superpage(struct pci_dev 
*dev)
iommu_skip_igfx_superpage = 1;
 }
 
+/* Skylake igfx has issues with superpage */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1906, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1913, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x190E, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1915, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1902, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x190A, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x190B, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1917, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1916, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1921, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x191E, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1912, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x191A, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x191B, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x191D, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1923, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1926, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1927, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x192A, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x192B, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x192D, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1932, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x193A, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x193B, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x193D, 
quirk_skip_igfx_superpage);
+
 /* Broxton igfx has issues with superpage */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0A84, 
quirk_skip_igfx_superpage);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1A84, 
quirk_skip_igfx_superpage);
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 4/4] drm/i915/fbc: Allow FBC + VT-d on SKL/BXT

2021-07-14 Thread Ville Syrjala
From: Ville Syrjälä 

With the iommu driver disabling VT-d superpage it should be
safe to use FBC on SKL/BXT with VT-d otherwise enabled.

Cc: David Woodhouse 
Cc: Lu Baolu 
Cc: io...@lists.linux-foundation.org
Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/display/intel_fbc.c | 16 
 1 file changed, 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c 
b/drivers/gpu/drm/i915/display/intel_fbc.c
index 82effb64a3b9..de44f93a33d0 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1448,19 +1448,6 @@ static int intel_sanitize_fbc_option(struct 
drm_i915_private *dev_priv)
return 0;
 }
 
-static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
-{
-   /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
-   if (intel_vtd_active() &&
-   (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
-   drm_info(_priv->drm,
-"Disabling framebuffer compression (FBC) to prevent 
screen flicker with VT-d enabled\n");
-   return true;
-   }
-
-   return false;
-}
-
 /**
  * intel_fbc_init - Initialize FBC
  * @dev_priv: the i915 device
@@ -1478,9 +1465,6 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
if (!drm_mm_initialized(_priv->mm.stolen))
mkwrite_device_info(dev_priv)->display.has_fbc = false;
 
-   if (need_fbc_vtd_wa(dev_priv))
-   mkwrite_device_info(dev_priv)->display.has_fbc = false;
-
dev_priv->params.enable_fbc = intel_sanitize_fbc_option(dev_priv);
drm_dbg_kms(_priv->drm, "Sanitized enable_fbc value: %d\n",
dev_priv->params.enable_fbc);
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 2/4] iommu/vt-d: Disable superpage for Broxton igfx

2021-07-14 Thread Ville Syrjala
From: Ville Syrjälä 

Broxton has known issues with VT-d superpage. Namely frame buffer
compression (FBC) can't be safely used when superpage is enabled.
Currently we're disabling FBC entirely when VT-d is active, but
I think just disabling superpage would be better since FBC can
save some power.

TODO: would be nice to use the macros from include/drm/i915_pciids.h,
  but can't do that with DECLARE_PCI_FIXUP_HEADER()

Cc: David Woodhouse 
Cc: Lu Baolu 
Cc: io...@lists.linux-foundation.org
Signed-off-by: Ville Syrjälä 
---
 drivers/iommu/intel/iommu.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 08ba412053e3..ef717908647d 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5681,6 +5681,13 @@ static void quirk_skip_igfx_superpage(struct pci_dev 
*dev)
iommu_skip_igfx_superpage = 1;
 }
 
+/* Broxton igfx has issues with superpage */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0A84, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1A84, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1A85, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5A84, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5A85, 
quirk_skip_igfx_superpage);
+
 /* Geminilake igfx appears to have issues with superpage */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3184, 
quirk_skip_igfx_superpage);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3185, 
quirk_skip_igfx_superpage);
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 1/4] iommu/vt-d: Disable superpage for Geminilake igfx

2021-07-14 Thread Ville Syrjala
From: Ville Syrjälä 

While running "gem_exec_big --r single" from igt-gpu-tools on
Geminilake as soon as a 2M mapping is made I tend to get a DMAR
write fault. Strangely the faulting address is always a 4K page
and usually very far away from the 2M page that got mapped.
But if no 2M mappings get used I can't reproduce the fault.

I also tried to dump the PTE for the faulting address but it actually
looks correct to me (ie. definitely seems to have the write bit set):
 DMAR: DRHD: handling fault status reg 2
 DMAR: [DMA Write] Request device [00:02.0] PASID  fault addr 
7fa8a78000 [fault reason 05] PTE Write access is not set
 DMAR: fault 7fa8a78000 (level=1) PTE = 149efc003

So not really sure what's going on and this might just be full on duct
tape, but it seems to work here. The machine has now survived a whole day
running that test whereas with superpage enabled it fails in less than
a minute usually.

Credit to Lu Baolu for the mechanism to disable superpage just
for the igfx iommu.

TODO: would be nice to use the macros from include/drm/i915_pciids.h,
  but can't do that with DECLARE_PCI_FIXUP_HEADER()

Cc: David Woodhouse 
Cc: Lu Baolu 
Cc: io...@lists.linux-foundation.org
Signed-off-by: Ville Syrjälä 
---
 drivers/iommu/intel/iommu.c | 34 +-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index b04bfb0d9409..08ba412053e3 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -365,6 +365,7 @@ static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
 static int iommu_skip_te_disable;
+static int iommu_skip_igfx_superpage;
 
 #define IDENTMAP_GFX   2
 #define IDENTMAP_AZALIA4
@@ -674,6 +675,27 @@ static bool domain_update_iommu_snooping(struct 
intel_iommu *skip)
return ret;
 }
 
+static bool domain_use_super_page(struct dmar_domain *domain)
+{
+   struct dmar_drhd_unit *drhd;
+   struct intel_iommu *iommu;
+   bool ret = true;
+
+   if (!intel_iommu_superpage)
+   return false;
+
+   rcu_read_lock();
+   for_each_active_iommu(iommu, drhd) {
+   if (drhd->gfx_dedicated && iommu_skip_igfx_superpage) {
+   ret = false;
+   break;
+   }
+   }
+   rcu_read_unlock();
+
+   return ret;
+}
+
 static int domain_update_iommu_superpage(struct dmar_domain *domain,
 struct intel_iommu *skip)
 {
@@ -681,7 +703,7 @@ static int domain_update_iommu_superpage(struct dmar_domain 
*domain,
struct intel_iommu *iommu;
int mask = 0x3;
 
-   if (!intel_iommu_superpage)
+   if (!domain_use_super_page(domain))
return 0;
 
/* set iommu_superpage to the smallest common denominator */
@@ -5653,6 +5675,16 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, 
quirk_iommu_igfx);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
 
+static void quirk_skip_igfx_superpage(struct pci_dev *dev)
+{
+   pci_info(dev, "Disabling IOMMU superpage for graphics on this 
chipset\n");
+   iommu_skip_igfx_superpage = 1;
+}
+
+/* Geminilake igfx appears to have issues with superpage */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3184, 
quirk_skip_igfx_superpage);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3185, 
quirk_skip_igfx_superpage);
+
 static void quirk_iommu_rwbf(struct pci_dev *dev)
 {
if (risky_device(dev))
-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 0/4] iommu/vt-d: Disable igfx iommu superpage on bxt/skl/glk

2021-07-14 Thread Ville Syrjala
From: Ville Syrjälä 

I ran into some kind of fail with VT-d superpage on Geminlake igfx,
so without any better ideas let's just disable it.

Additionally Skylake/Broxton igfx have known issues with VT-d
superpage as well, so let's disable it there as well. This should
let us re-enable frame buffer compression (FBC) for some extra
power savings when the display is on.

v2: disable superpage only for the igfx iommu

Cc: David Woodhouse 
Cc: Lu Baolu 
Cc: io...@lists.linux-foundation.org

Ville Syrjälä (4):
  iommu/vt-d: Disable superpage for Geminilake igfx
  iommu/vt-d: Disable superpage for Broxton igfx
  iommu/vt-d: Disable superpage for Skylake igfx
  drm/i915/fbc: Allow FBC + VT-d on SKL/BXT

 drivers/gpu/drm/i915/display/intel_fbc.c | 16 --
 drivers/iommu/intel/iommu.c  | 68 +++-
 2 files changed, 67 insertions(+), 17 deletions(-)

-- 
2.31.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/debugfs: DISPLAY_VER 13 lpsp capability

2021-07-14 Thread Manna, Animesh



> -Original Message-
> From: Gupta, Anshuman 
> Sent: Tuesday, July 13, 2021 1:28 PM
> To: intel-gfx@lists.freedesktop.org
> Cc: Gupta, Anshuman ; Manna, Animesh
> 
> Subject: [PATCH] drm/i915/debugfs: DISPLAY_VER 13 lpsp capability
> 
> Extend i915_lpsp_capability debugfs to DG2,ADLP and future platforms.
> 
> v2: commit log modification.
> 
> Cc: Animesh Manna 
> Signed-off-by: Anshuman Gupta 

Changes looks good to me.

Reviewed-by: Animesh Manna 

Regards,
Animesh 
> ---
>  drivers/gpu/drm/i915/display/intel_display_debugfs.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
> b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
> index d5af5708c9da..65832c4d962f 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
> +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
> @@ -2256,6 +2256,11 @@ static int i915_lpsp_capability_show(struct seq_file
> *m, void *data)
>   if (connector->status != connector_status_connected)
>   return -ENODEV;
> 
> + if (DISPLAY_VER(i915) >= 13) {
> + LPSP_CAPABLE(encoder->port <= PORT_B);
> + return 0;
> + }
> +
>   switch (DISPLAY_VER(i915)) {
>   case 12:
>   /*
> --
> 2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-14 Thread Daniel Vetter
On Wed, Jul 14, 2021 at 02:42:53PM +0300, Ville Syrjälä wrote:
> On Wed, Jul 14, 2021 at 01:16:57PM +0200, Daniel Vetter wrote:
> > On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> > > On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > > > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > > >  wrote:
> > > > >
> > > > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > > > + /**
> > > > > > > > +  * @cache_coherent:
> > > > > > > > +  *
> > > > > > > > +  * Track whether the pages are coherent with the GPU if 
> > > > > > > > reading or
> > > > > > > > +  * writing through the CPU cache.
> > > > > > > > +  *
> > > > > > > > +  * This largely depends on the @cache_level, for example 
> > > > > > > > if the object
> > > > > > > > +  * is marked as I915_CACHE_LLC, then GPU access is 
> > > > > > > > coherent for both
> > > > > > > > +  * reads and writes through the CPU cache.
> > > > > > > > +  *
> > > > > > > > +  * Note that on platforms with shared-LLC 
> > > > > > > > support(HAS_LLC) reads through
> > > > > > > > +  * the CPU cache are always coherent, regardless of the 
> > > > > > > > @cache_level. On
> > > > > > > > +  * snooping based platforms this is not the case, unless 
> > > > > > > > the full
> > > > > > > > +  * I915_CACHE_LLC or similar setting is used.
> > > > > > > > +  *
> > > > > > > > +  * As a result of this we need to track coherency 
> > > > > > > > separately for reads
> > > > > > > > +  * and writes, in order to avoid superfluous flushing on 
> > > > > > > > shared-LLC
> > > > > > > > +  * platforms, for reads.
> > > > > > > > +  *
> > > > > > > > +  * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > > > +  *
> > > > > > > > +  * When reading through the CPU cache, the GPU is still 
> > > > > > > > coherent. Note
> > > > > > > > +  * that no data has actually been modified here, so it 
> > > > > > > > might seem
> > > > > > > > +  * strange that we care about this.
> > > > > > > > +  *
> > > > > > > > +  * As an example, if some object is mapped on the CPU 
> > > > > > > > with write-back
> > > > > > > > +  * caching, and we read some page, then the cache likely 
> > > > > > > > now contains
> > > > > > > > +  * the data from that read. At this point the cache and 
> > > > > > > > main memory
> > > > > > > > +  * match up, so all good. But next the GPU needs to write 
> > > > > > > > some data to
> > > > > > > > +  * that same page. Now if the @cache_level is 
> > > > > > > > I915_CACHE_NONE and the
> > > > > > > > +  * the platform doesn't have the shared-LLC, then the GPU 
> > > > > > > > will
> > > > > > > > +  * effectively skip invalidating the cache(or however 
> > > > > > > > that works
> > > > > > > > +  * internally) when writing the new value.  This is 
> > > > > > > > really bad since the
> > > > > > > > +  * GPU has just written some new data to main memory, but 
> > > > > > > > the CPU cache
> > > > > > > > +  * is still valid and now contains stale data. As a 
> > > > > > > > result the next time
> > > > > > > > +  * we do a cached read with the CPU, we are rewarded with 
> > > > > > > > stale data.
> > > > > > > > +  * Likewise if the cache is later flushed, we might be 
> > > > > > > > rewarded with
> > > > > > > > +  * overwriting main memory with stale data.
> > > > > > > > +  *
> > > > > > > > +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > > > +  *
> > > > > > > > +  * When writing through the CPU cache, the GPU is still 
> > > > > > > > coherent. Note
> > > > > > > > +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > > > +  *
> > > > > > > > +  * This is never set when I915_CACHE_NONE is used for 
> > > > > > > > @cache_level,
> > > > > > > > +  * where instead we have to manually flush the caches 
> > > > > > > > after writing
> > > > > > > > +  * through the CPU cache. For other cache levels this 
> > > > > > > > should be set and
> > > > > > > > +  * the object is therefore considered coherent for both 
> > > > > > > > reads and writes
> > > > > > > > +  * through the CPU cache.
> > > > > > >
> > > > > > > I don't remember why we have this read vs. write split and this 
> > > > > > > new
> > > > > > > documentation doesn't seem to really explain it either.
> > > > > >
> > > > > > Hmm, I attempted to explain that earlier:
> > > > > >
> > > > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads 
> > > > > > through
> > > > > > * the CPU cache are always coherent, regardless of the 
> > > > > > @cache_level. On
> > > > > > * snooping based platforms this is not the case, unless the full
> > > > > > * I915_CACHE_LLC or 

Re: [Intel-gfx] [PATCH v4 3/4] drm/shmem-helpers: Allocate wc pages on x86

2021-07-14 Thread Daniel Vetter
On Wed, Jul 14, 2021 at 01:54:50PM +0200, Christian König wrote:
> Am 13.07.21 um 22:51 schrieb Daniel Vetter:
> > intel-gfx-ci realized that something is not quite coherent anymore on
> > some platforms for our i915+vgem tests, when I tried to switch vgem
> > over to shmem helpers.
> > 
> > After lots of head-scratching I realized that I've removed calls to
> > drm_clflush. And we need those. To make this a bit cleaner use the
> > same page allocation tooling as ttm, which does internally clflush
> > (and more, as neeeded on any platform instead of just the intel x86
> > cpus i915 can be combined with).
> > 
> > Unfortunately this doesn't exist on arm, or as a generic feature. For
> > that I think only the dma-api can get at wc memory reliably, so maybe
> > we'd need some kind of GFP_WC flag to do this properly.
> 
> The problem is that this stuff is extremely architecture specific. So GFP_WC
> and GFP_UNCACHED are really what we should aim for in the long term.
> 
> And as far as I know we have at least the following possibilities how it is
> implemented:
> 
> * A fixed amount of registers which tells the CPU the caching behavior for a
> memory region, e.g. MTRR.
> * Some bits of the memory pointers used, e.g. you see the same memory at
> different locations with different caching attributes.
> * Some bits in the CPUs page table.
> * Some bits in a separate page table.
> 
> On top of that there is the PCIe specification which defines non-cache
> snooping access as an extension.

Yeah dma-buf is extremely ill-defined even on x86 if you combine these
all. We just play a game of whack-a-mole with the cacheline dirt until
it's gone.

That's the other piece here, how do you even make sure that the page is
properly flushed and ready for wc access:
- easy case is x86 with clflush available pretty much everywhere (since
  10+ years at least)
- next are cpus which have some cache flush instructions, but it's highly
  cpu model specific
- next up is the same, but you absolutely have to make sure there's no
  other mapping around anymore or the coherency fabric just dies
- and I'm pretty sure there's worse stuff where you defacto can only
  allocate wc memory that's set aside at boot-up and that's all you ever
  get.

Cheers, Daniel

> Mixing that with the CPU caching behavior gets you some really nice ways to
> break a driver. In general x86 seems to be rather graceful, but arm and
> PowerPC are easily pissed if you mess that up.
> 
> > Signed-off-by: Daniel Vetter 
> > Cc: Christian König 
> > Cc: "Thomas Hellström" 
> > Cc: Maarten Lankhorst 
> > Cc: Maxime Ripard 
> > Cc: Thomas Zimmermann 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> 
> Acked-by: Christian könig 
> 
> Regards,
> Christian.
> 
> > ---
> >   drivers/gpu/drm/drm_gem_shmem_helper.c | 14 ++
> >   1 file changed, 14 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c 
> > b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > index 296ab1b7c07f..657d2490aaa5 100644
> > --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> > +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > @@ -10,6 +10,10 @@
> >   #include 
> >   #include 
> > +#ifdef CONFIG_X86
> > +#include 
> > +#endif
> > +
> >   #include 
> >   #include 
> >   #include 
> > @@ -162,6 +166,11 @@ static int drm_gem_shmem_get_pages_locked(struct 
> > drm_gem_shmem_object *shmem)
> > return PTR_ERR(pages);
> > }
> > +#ifdef CONFIG_X86
> > +   if (shmem->map_wc)
> > +   set_pages_array_wc(pages, obj->size >> PAGE_SHIFT);
> > +#endif
> > +
> > shmem->pages = pages;
> > return 0;
> > @@ -203,6 +212,11 @@ static void drm_gem_shmem_put_pages_locked(struct 
> > drm_gem_shmem_object *shmem)
> > if (--shmem->pages_use_count > 0)
> > return;
> > +#ifdef CONFIG_X86
> > +   if (shmem->map_wc)
> > +   set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT);
> > +#endif
> > +
> > drm_gem_put_pages(obj, shmem->pages,
> >   shmem->pages_mark_dirty_on_put,
> >   shmem->pages_mark_accessed_on_put);
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/vgem: use shmem helpers

2021-07-14 Thread Daniel Vetter
Aside from deleting lots of code the real motivation here is to switch
the mmap over to VM_PFNMAP, to be more consistent with what real gpu
drivers do. They're all VM_PFNMP, which means get_user_pages doesn't
work, and even if you try and there's a struct page behind that,
touching it and mucking around with its refcount can upset drivers
real bad.

v2: Review from Thomas:
- sort #include
- drop more dead code that I didn't spot somehow

v3: select DRM_GEM_SHMEM_HELPER to make it build (intel-gfx-ci)

v4: I got tricked by 0cf2ef46c6c0 ("drm/shmem-helper: Use cached
mappings by default"), and we need WC in vgem because vgem doesn't
have explicit begin/end cpu access ioctls.

Also add a comment why exactly vgem has to use wc.

v5: Don't set obj->base.funcs, it will default to drm_gem_shmem_funcs
(Thomas)

v6: vgem also needs an MMU for remapping

v7: I absolutely butchered the rebases over the vgem mmap change and
revert and broke the patch. Actually go back to v6 from before the
vgem mmap changes.

Cc: Thomas Zimmermann 
Acked-by: Thomas Zimmermann 
Cc: John Stultz 
Cc: Sumit Semwal 
Cc: "Christian König" 
Signed-off-by: Daniel Vetter 
Cc: Melissa Wen 
Cc: Chris Wilson 
---
 drivers/gpu/drm/Kconfig |   5 +-
 drivers/gpu/drm/vgem/vgem_drv.c | 342 ++--
 2 files changed, 16 insertions(+), 331 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 314eefa39892..28f7d2006e8b 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -272,7 +272,8 @@ source "drivers/gpu/drm/kmb/Kconfig"
 
 config DRM_VGEM
tristate "Virtual GEM provider"
-   depends on DRM
+   depends on DRM && MMU
+   select DRM_GEM_SHMEM_HELPER
help
  Choose this option to get a virtual graphics memory manager,
  as used by Mesa's software renderer for enhanced performance.
@@ -280,7 +281,7 @@ config DRM_VGEM
 
 config DRM_VKMS
tristate "Virtual KMS (EXPERIMENTAL)"
-   depends on DRM
+   depends on DRM && MMU
select DRM_KMS_HELPER
select DRM_GEM_SHMEM_HELPER
select CRC32
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index bf38a7e319d1..a87eafa89e9f 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -38,6 +38,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -50,87 +51,11 @@
 #define DRIVER_MAJOR   1
 #define DRIVER_MINOR   0
 
-static const struct drm_gem_object_funcs vgem_gem_object_funcs;
-
 static struct vgem_device {
struct drm_device drm;
struct platform_device *platform;
 } *vgem_device;
 
-static void vgem_gem_free_object(struct drm_gem_object *obj)
-{
-   struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
-
-   kvfree(vgem_obj->pages);
-   mutex_destroy(_obj->pages_lock);
-
-   if (obj->import_attach)
-   drm_prime_gem_destroy(obj, vgem_obj->table);
-
-   drm_gem_object_release(obj);
-   kfree(vgem_obj);
-}
-
-static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
-{
-   struct vm_area_struct *vma = vmf->vma;
-   struct drm_vgem_gem_object *obj = vma->vm_private_data;
-   /* We don't use vmf->pgoff since that has the fake offset */
-   unsigned long vaddr = vmf->address;
-   vm_fault_t ret = VM_FAULT_SIGBUS;
-   loff_t num_pages;
-   pgoff_t page_offset;
-   page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT;
-
-   num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE);
-
-   if (page_offset >= num_pages)
-   return VM_FAULT_SIGBUS;
-
-   mutex_lock(>pages_lock);
-   if (obj->pages) {
-   get_page(obj->pages[page_offset]);
-   vmf->page = obj->pages[page_offset];
-   ret = 0;
-   }
-   mutex_unlock(>pages_lock);
-   if (ret) {
-   struct page *page;
-
-   page = shmem_read_mapping_page(
-   file_inode(obj->base.filp)->i_mapping,
-   page_offset);
-   if (!IS_ERR(page)) {
-   vmf->page = page;
-   ret = 0;
-   } else switch (PTR_ERR(page)) {
-   case -ENOSPC:
-   case -ENOMEM:
-   ret = VM_FAULT_OOM;
-   break;
-   case -EBUSY:
-   ret = VM_FAULT_RETRY;
-   break;
-   case -EFAULT:
-   case -EINVAL:
-   ret = VM_FAULT_SIGBUS;
-   break;
-   default:
-   WARN_ON(PTR_ERR(page));
-   ret = VM_FAULT_SIGBUS;
-   break;
-   }
-
-   }
-   return ret;
-}
-
-static const struct 

Re: [Intel-gfx] [PATCH] drm/i915: Fix wm params for ccs

2021-07-14 Thread Imre Deak
On Tue, Jul 13, 2021 at 09:44:21PM +0300, Juha-Pekka Heikkila wrote:
> skl_compute_plane_wm_params() didn't take into account ccs
> modifiers on graphics ver >= 12
> 
> Signed-off-by: Juha-Pekka Heikkila 
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 0cbb79452fcf..540a7ecbf004 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5249,11 +5249,9 @@ skl_compute_wm_params(const struct intel_crtc_state 
> *crtc_state,
>  
>   wp->y_tiled = modifier == I915_FORMAT_MOD_Y_TILED ||
> modifier == I915_FORMAT_MOD_Yf_TILED ||
> -   modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
> -   modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
> +   is_ccs_modifier(modifier);
>   wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
> - wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
> -  modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
> + wp->rc_surface = is_ccs_modifier(modifier);

This sets rc_surface for MC modifiers as well. Doesn't matter atm,
because it's only used for a GEN 9 workaround, but better to keep it
consistent.

>   wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier);
>  
>   wp->width = width;
> -- 
> 2.28.0
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 resent] drm/i915: Add TTM offset argument to mmap.

2021-07-14 Thread Maarten Lankhorst
The FIXED mapping is only used for ttm, and tells userspace that the
mapping type is pre-defined. This disables the other type of mmap
offsets when discrete memory is used, so fix the selftests as well.

Document the struct as well, so it shows up in docbook.

Cc: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
Signed-off-by: Maarten Lankhorst 
---
Resent, forgot to cc dri-devel

 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 17 ++-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 .../drm/i915/gem/selftests/i915_gem_mman.c| 27 ++-
 include/uapi/drm/i915_drm.h   | 46 ++-
 4 files changed, 77 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a90f796e85c0..31c4021bb6be 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -679,10 +679,16 @@ __assign_mmap_offset(struct drm_i915_gem_object *obj,
return -ENODEV;
 
if (obj->ops->mmap_offset)  {
+   if (mmap_type != I915_MMAP_TYPE_FIXED)
+   return -ENODEV;
+
*offset = obj->ops->mmap_offset(obj);
return 0;
}
 
+   if (mmap_type == I915_MMAP_TYPE_FIXED)
+   return -ENODEV;
+
if (mmap_type != I915_MMAP_TYPE_GTT &&
!i915_gem_object_has_struct_page(obj) &&
!i915_gem_object_has_iomem(obj))
@@ -727,7 +733,9 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
 {
enum i915_mmap_type mmap_type;
 
-   if (boot_cpu_has(X86_FEATURE_PAT))
+   if (HAS_LMEM(to_i915(dev)))
+   mmap_type = I915_MMAP_TYPE_FIXED;
+   else if (boot_cpu_has(X86_FEATURE_PAT))
mmap_type = I915_MMAP_TYPE_WC;
else if (!i915_ggtt_has_aperture(_i915(dev)->ggtt))
return -ENODEV;
@@ -798,6 +806,10 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void 
*data,
type = I915_MMAP_TYPE_UC;
break;
 
+   case I915_MMAP_OFFSET_FIXED:
+   type = I915_MMAP_TYPE_FIXED;
+   break;
+
default:
return -EINVAL;
}
@@ -968,6 +980,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct 
*vma)
vma->vm_ops = _ops_cpu;
break;
 
+   case I915_MMAP_TYPE_FIXED:
+   GEM_WARN_ON(1);
+   /* fall-through */
case I915_MMAP_TYPE_WB:
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
vma->vm_ops = _ops_cpu;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index ef3de2ae9723..afbadfc5516b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -105,6 +105,7 @@ enum i915_mmap_type {
I915_MMAP_TYPE_WC,
I915_MMAP_TYPE_WB,
I915_MMAP_TYPE_UC,
+   I915_MMAP_TYPE_FIXED,
 };
 
 struct i915_mmap_offset {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 1da8bd675e54..52789c8ad337 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -573,6 +573,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
return 0;
 }
 
+static enum i915_mmap_type default_mapping(struct drm_i915_private *i915)
+{
+   if (HAS_LMEM(i915))
+   return I915_MMAP_TYPE_FIXED;
+
+   return I915_MMAP_TYPE_GTT;
+}
+
 static bool assert_mmap_offset(struct drm_i915_private *i915,
   unsigned long size,
   int expected)
@@ -585,7 +593,7 @@ static bool assert_mmap_offset(struct drm_i915_private 
*i915,
if (IS_ERR(obj))
return expected && expected == PTR_ERR(obj);
 
-   ret = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, , NULL);
+   ret = __assign_mmap_offset(obj, default_mapping(i915), , NULL);
i915_gem_object_put(obj);
 
return ret == expected;
@@ -689,7 +697,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
goto out;
}
 
-   err = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, , NULL);
+   err = __assign_mmap_offset(obj, default_mapping(i915), , NULL);
if (err) {
pr_err("Unable to insert object into reclaimed hole\n");
goto err_obj;
@@ -831,8 +839,14 @@ static int wc_check(struct drm_i915_gem_object *obj)
 
 static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
 {
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
bool no_map;
 
+   if (HAS_LMEM(i915))
+   return type == I915_MMAP_TYPE_FIXED;
+   else if (type == I915_MMAP_TYPE_FIXED)
+   return false;
+
if (type == I915_MMAP_TYPE_GTT &&
  

Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-14 Thread Ville Syrjälä
On Wed, Jul 14, 2021 at 02:42:53PM +0300, Ville Syrjälä wrote:
> On Wed, Jul 14, 2021 at 01:16:57PM +0200, Daniel Vetter wrote:
> > On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> > > On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > > > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > > >  wrote:
> > > > >
> > > > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > > > + /**
> > > > > > > > +  * @cache_coherent:
> > > > > > > > +  *
> > > > > > > > +  * Track whether the pages are coherent with the GPU if 
> > > > > > > > reading or
> > > > > > > > +  * writing through the CPU cache.
> > > > > > > > +  *
> > > > > > > > +  * This largely depends on the @cache_level, for example 
> > > > > > > > if the object
> > > > > > > > +  * is marked as I915_CACHE_LLC, then GPU access is 
> > > > > > > > coherent for both
> > > > > > > > +  * reads and writes through the CPU cache.
> > > > > > > > +  *
> > > > > > > > +  * Note that on platforms with shared-LLC 
> > > > > > > > support(HAS_LLC) reads through
> > > > > > > > +  * the CPU cache are always coherent, regardless of the 
> > > > > > > > @cache_level. On
> > > > > > > > +  * snooping based platforms this is not the case, unless 
> > > > > > > > the full
> > > > > > > > +  * I915_CACHE_LLC or similar setting is used.
> > > > > > > > +  *
> > > > > > > > +  * As a result of this we need to track coherency 
> > > > > > > > separately for reads
> > > > > > > > +  * and writes, in order to avoid superfluous flushing on 
> > > > > > > > shared-LLC
> > > > > > > > +  * platforms, for reads.
> > > > > > > > +  *
> > > > > > > > +  * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > > > +  *
> > > > > > > > +  * When reading through the CPU cache, the GPU is still 
> > > > > > > > coherent. Note
> > > > > > > > +  * that no data has actually been modified here, so it 
> > > > > > > > might seem
> > > > > > > > +  * strange that we care about this.
> > > > > > > > +  *
> > > > > > > > +  * As an example, if some object is mapped on the CPU 
> > > > > > > > with write-back
> > > > > > > > +  * caching, and we read some page, then the cache likely 
> > > > > > > > now contains
> > > > > > > > +  * the data from that read. At this point the cache and 
> > > > > > > > main memory
> > > > > > > > +  * match up, so all good. But next the GPU needs to write 
> > > > > > > > some data to
> > > > > > > > +  * that same page. Now if the @cache_level is 
> > > > > > > > I915_CACHE_NONE and the
> > > > > > > > +  * the platform doesn't have the shared-LLC, then the GPU 
> > > > > > > > will
> > > > > > > > +  * effectively skip invalidating the cache(or however 
> > > > > > > > that works
> > > > > > > > +  * internally) when writing the new value.  This is 
> > > > > > > > really bad since the
> > > > > > > > +  * GPU has just written some new data to main memory, but 
> > > > > > > > the CPU cache
> > > > > > > > +  * is still valid and now contains stale data. As a 
> > > > > > > > result the next time
> > > > > > > > +  * we do a cached read with the CPU, we are rewarded with 
> > > > > > > > stale data.
> > > > > > > > +  * Likewise if the cache is later flushed, we might be 
> > > > > > > > rewarded with
> > > > > > > > +  * overwriting main memory with stale data.
> > > > > > > > +  *
> > > > > > > > +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > > > +  *
> > > > > > > > +  * When writing through the CPU cache, the GPU is still 
> > > > > > > > coherent. Note
> > > > > > > > +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > > > +  *
> > > > > > > > +  * This is never set when I915_CACHE_NONE is used for 
> > > > > > > > @cache_level,
> > > > > > > > +  * where instead we have to manually flush the caches 
> > > > > > > > after writing
> > > > > > > > +  * through the CPU cache. For other cache levels this 
> > > > > > > > should be set and
> > > > > > > > +  * the object is therefore considered coherent for both 
> > > > > > > > reads and writes
> > > > > > > > +  * through the CPU cache.
> > > > > > >
> > > > > > > I don't remember why we have this read vs. write split and this 
> > > > > > > new
> > > > > > > documentation doesn't seem to really explain it either.
> > > > > >
> > > > > > Hmm, I attempted to explain that earlier:
> > > > > >
> > > > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads 
> > > > > > through
> > > > > > * the CPU cache are always coherent, regardless of the 
> > > > > > @cache_level. On
> > > > > > * snooping based platforms this is not the case, unless the full
> > > > > > * I915_CACHE_LLC or 

Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-14 Thread Ville Syrjälä
On Wed, Jul 14, 2021 at 01:16:57PM +0200, Daniel Vetter wrote:
> On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> > On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> > >  wrote:
> > > >
> > > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > > >  wrote:
> > > > > >
> > > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > > + /**
> > > > > > > +  * @cache_coherent:
> > > > > > > +  *
> > > > > > > +  * Track whether the pages are coherent with the GPU if 
> > > > > > > reading or
> > > > > > > +  * writing through the CPU cache.
> > > > > > > +  *
> > > > > > > +  * This largely depends on the @cache_level, for example if 
> > > > > > > the object
> > > > > > > +  * is marked as I915_CACHE_LLC, then GPU access is coherent 
> > > > > > > for both
> > > > > > > +  * reads and writes through the CPU cache.
> > > > > > > +  *
> > > > > > > +  * Note that on platforms with shared-LLC support(HAS_LLC) 
> > > > > > > reads through
> > > > > > > +  * the CPU cache are always coherent, regardless of the 
> > > > > > > @cache_level. On
> > > > > > > +  * snooping based platforms this is not the case, unless 
> > > > > > > the full
> > > > > > > +  * I915_CACHE_LLC or similar setting is used.
> > > > > > > +  *
> > > > > > > +  * As a result of this we need to track coherency 
> > > > > > > separately for reads
> > > > > > > +  * and writes, in order to avoid superfluous flushing on 
> > > > > > > shared-LLC
> > > > > > > +  * platforms, for reads.
> > > > > > > +  *
> > > > > > > +  * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > > +  *
> > > > > > > +  * When reading through the CPU cache, the GPU is still 
> > > > > > > coherent. Note
> > > > > > > +  * that no data has actually been modified here, so it 
> > > > > > > might seem
> > > > > > > +  * strange that we care about this.
> > > > > > > +  *
> > > > > > > +  * As an example, if some object is mapped on the CPU with 
> > > > > > > write-back
> > > > > > > +  * caching, and we read some page, then the cache likely 
> > > > > > > now contains
> > > > > > > +  * the data from that read. At this point the cache and 
> > > > > > > main memory
> > > > > > > +  * match up, so all good. But next the GPU needs to write 
> > > > > > > some data to
> > > > > > > +  * that same page. Now if the @cache_level is 
> > > > > > > I915_CACHE_NONE and the
> > > > > > > +  * the platform doesn't have the shared-LLC, then the GPU 
> > > > > > > will
> > > > > > > +  * effectively skip invalidating the cache(or however that 
> > > > > > > works
> > > > > > > +  * internally) when writing the new value.  This is really 
> > > > > > > bad since the
> > > > > > > +  * GPU has just written some new data to main memory, but 
> > > > > > > the CPU cache
> > > > > > > +  * is still valid and now contains stale data. As a result 
> > > > > > > the next time
> > > > > > > +  * we do a cached read with the CPU, we are rewarded with 
> > > > > > > stale data.
> > > > > > > +  * Likewise if the cache is later flushed, we might be 
> > > > > > > rewarded with
> > > > > > > +  * overwriting main memory with stale data.
> > > > > > > +  *
> > > > > > > +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > > +  *
> > > > > > > +  * When writing through the CPU cache, the GPU is still 
> > > > > > > coherent. Note
> > > > > > > +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > > +  *
> > > > > > > +  * This is never set when I915_CACHE_NONE is used for 
> > > > > > > @cache_level,
> > > > > > > +  * where instead we have to manually flush the caches after 
> > > > > > > writing
> > > > > > > +  * through the CPU cache. For other cache levels this 
> > > > > > > should be set and
> > > > > > > +  * the object is therefore considered coherent for both 
> > > > > > > reads and writes
> > > > > > > +  * through the CPU cache.
> > > > > >
> > > > > > I don't remember why we have this read vs. write split and this new
> > > > > > documentation doesn't seem to really explain it either.
> > > > >
> > > > > Hmm, I attempted to explain that earlier:
> > > > >
> > > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads 
> > > > > through
> > > > > * the CPU cache are always coherent, regardless of the @cache_level. 
> > > > > On
> > > > > * snooping based platforms this is not the case, unless the full
> > > > > * I915_CACHE_LLC or similar setting is used.
> > > > > *
> > > > > * As a result of this we need to track coherency separately for reads
> > > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > > * platforms, for reads.
> > > > >
> > > > > So AFAIK it's just because 

Re: [Intel-gfx] [PATCH 5/5] drm/i915/ehl: unconditionally flush the pages on acquire

2021-07-14 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 11:45:54AM +0100, Matthew Auld wrote:
> EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
> possible for userspace to bypass the GTT caching bits set by the kernel,
> as per the given object cache_level. This is troublesome since the heavy
> flush we apply when first acquiring the pages is skipped if the kernel
> thinks the object is coherent with the GPU. As a result it might be
> possible to bypass the cache and read the contents of the page directly,
> which could be stale data. If it's just a case of userspace shooting
> themselves in the foot then so be it, but since i915 takes the stance of
> always zeroing memory before handing it to userspace, we need to prevent
> this.
> 
> v2: this time actually set cache_dirty in put_pages()
> v3: move to get_pages() which looks simpler
> 
> BSpec: 34007
> References: 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"")
> Signed-off-by: Matthew Auld 
> Cc: Tejas Upadhyay 
> Cc: Francisco Jerez 
> Cc: Lucas De Marchi 
> Cc: Jon Bloomfield 
> Cc: Chris Wilson 
> Cc: Matt Roper 
> Cc: Daniel Vetter 

Reviewed-by: Daniel Vetter 

I was pondering whether we can have a solid testcase for this, but:
- igt lacks the visibility, since we can't check easily whether stuff
  leaks.
- selftests don't have rendercopy, where we could select the nasty
  mocs entry

So it's a bit awkward. Is there something, or is this pure hw workaround
stuff on theoretical grounds?
-Daniel
> ---
>  .../gpu/drm/i915/gem/i915_gem_object_types.h   |  6 ++
>  drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 18 ++
>  2 files changed, 24 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index da2194290436..7089d1b222c5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -522,6 +522,12 @@ struct drm_i915_gem_object {
>* I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
>* for both reads and writes though the CPU cache. So pretty much this
>* should only be needed for I915_CACHE_NONE objects.
> +  *
> +  * Update: Some bonkers hardware decided to add the 'Bypass LLC' MOCS
> +  * entry, which defeats our @cache_coherent tracking, since userspace
> +  * can freely bypass the CPU cache when touching the pages with the GPU,
> +  * where the kernel is completely unaware. On such platform we need
> +  * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
>*/
>   unsigned int cache_dirty:1;
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> index 6a04cce188fc..11f072193f3b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> @@ -182,6 +182,24 @@ static int shmem_get_pages(struct drm_i915_gem_object 
> *obj)
>   if (i915_gem_object_needs_bit17_swizzle(obj))
>   i915_gem_object_do_bit_17_swizzle(obj, st);
>  
> + /*
> +  * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
> +  * possible for userspace to bypass the GTT caching bits set by the
> +  * kernel, as per the given object cache_level. This is troublesome
> +  * since the heavy flush we apply when first gathering the pages is
> +  * skipped if the kernel thinks the object is coherent with the GPU. As
> +  * a result it might be possible to bypass the cache and read the
> +  * contents of the page directly, which could be stale data. If it's
> +  * just a case of userspace shooting themselves in the foot then so be
> +  * it, but since i915 takes the stance of always zeroing memory before
> +  * handing it to userspace, we need to prevent this.
> +  *
> +  * By setting cache_dirty here we make the clflush in set_pages
> +  * unconditional on such platforms.
> +  */
> + if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
> + obj->cache_dirty = true;
> +
>   __i915_gem_object_set_pages(obj, st, sg_page_sizes);
>  
>   return 0;
> -- 
> 2.26.3
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-14 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 09:46:30PM +0300, Ville Syrjälä wrote:
> On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> > On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
> >  wrote:
> > >
> > > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > > >  wrote:
> > > > >
> > > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > > + /**
> > > > > > +  * @cache_coherent:
> > > > > > +  *
> > > > > > +  * Track whether the pages are coherent with the GPU if 
> > > > > > reading or
> > > > > > +  * writing through the CPU cache.
> > > > > > +  *
> > > > > > +  * This largely depends on the @cache_level, for example if 
> > > > > > the object
> > > > > > +  * is marked as I915_CACHE_LLC, then GPU access is coherent 
> > > > > > for both
> > > > > > +  * reads and writes through the CPU cache.
> > > > > > +  *
> > > > > > +  * Note that on platforms with shared-LLC support(HAS_LLC) 
> > > > > > reads through
> > > > > > +  * the CPU cache are always coherent, regardless of the 
> > > > > > @cache_level. On
> > > > > > +  * snooping based platforms this is not the case, unless the 
> > > > > > full
> > > > > > +  * I915_CACHE_LLC or similar setting is used.
> > > > > > +  *
> > > > > > +  * As a result of this we need to track coherency separately 
> > > > > > for reads
> > > > > > +  * and writes, in order to avoid superfluous flushing on 
> > > > > > shared-LLC
> > > > > > +  * platforms, for reads.
> > > > > > +  *
> > > > > > +  * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > > +  *
> > > > > > +  * When reading through the CPU cache, the GPU is still 
> > > > > > coherent. Note
> > > > > > +  * that no data has actually been modified here, so it might 
> > > > > > seem
> > > > > > +  * strange that we care about this.
> > > > > > +  *
> > > > > > +  * As an example, if some object is mapped on the CPU with 
> > > > > > write-back
> > > > > > +  * caching, and we read some page, then the cache likely now 
> > > > > > contains
> > > > > > +  * the data from that read. At this point the cache and main 
> > > > > > memory
> > > > > > +  * match up, so all good. But next the GPU needs to write 
> > > > > > some data to
> > > > > > +  * that same page. Now if the @cache_level is I915_CACHE_NONE 
> > > > > > and the
> > > > > > +  * the platform doesn't have the shared-LLC, then the GPU will
> > > > > > +  * effectively skip invalidating the cache(or however that 
> > > > > > works
> > > > > > +  * internally) when writing the new value.  This is really 
> > > > > > bad since the
> > > > > > +  * GPU has just written some new data to main memory, but the 
> > > > > > CPU cache
> > > > > > +  * is still valid and now contains stale data. As a result 
> > > > > > the next time
> > > > > > +  * we do a cached read with the CPU, we are rewarded with 
> > > > > > stale data.
> > > > > > +  * Likewise if the cache is later flushed, we might be 
> > > > > > rewarded with
> > > > > > +  * overwriting main memory with stale data.
> > > > > > +  *
> > > > > > +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > > +  *
> > > > > > +  * When writing through the CPU cache, the GPU is still 
> > > > > > coherent. Note
> > > > > > +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > > +  *
> > > > > > +  * This is never set when I915_CACHE_NONE is used for 
> > > > > > @cache_level,
> > > > > > +  * where instead we have to manually flush the caches after 
> > > > > > writing
> > > > > > +  * through the CPU cache. For other cache levels this should 
> > > > > > be set and
> > > > > > +  * the object is therefore considered coherent for both reads 
> > > > > > and writes
> > > > > > +  * through the CPU cache.
> > > > >
> > > > > I don't remember why we have this read vs. write split and this new
> > > > > documentation doesn't seem to really explain it either.
> > > >
> > > > Hmm, I attempted to explain that earlier:
> > > >
> > > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > > * snooping based platforms this is not the case, unless the full
> > > > * I915_CACHE_LLC or similar setting is used.
> > > > *
> > > > * As a result of this we need to track coherency separately for reads
> > > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > > * platforms, for reads.
> > > >
> > > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > > also not being coherent for writes(CACHE_NONE),
> > >
> > > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > > never heard of any mechanism that would make it only partially coherent.
> > 
> > What do you mean by "comes to 

Re: [Intel-gfx] [PATCH v15 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing

2021-07-14 Thread Konrad Rzeszutek Wilk
..snip..
> > > I think the main question I have is how would you like to see patches for
> > > 5.15? i.e. as patches on top of devel/for-linus-5.14 or something else?
> > 
> > Yes that would be perfect. If there are any dependencies on the rc1, I
> > can rebase it on top of that.
> 
> Yes, please, rebasing would be very helpful. The broader rework of
> 'io_tlb_default_mem' is going to conflict quite badly otherwise.

There is a devel/for-linus-5.15 (based on v5.14-rc1) now.

Thank you!
> 
> Cheers,
> 
> Will
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Add TTM offset argument to mmap.

2021-07-14 Thread Maarten Lankhorst
The FIXED mapping is only used for ttm, and tells userspace that the
mapping type is pre-defined. This disables the other type of mmap
offsets when discrete memory is used, so fix the selftests as well.

Document the struct as well, so it shows up in docbook.

Cc: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
Signed-off-by: Maarten Lankhorst 
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 17 ++-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 .../drm/i915/gem/selftests/i915_gem_mman.c| 27 ++-
 include/uapi/drm/i915_drm.h   | 46 ++-
 4 files changed, 77 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a90f796e85c0..31c4021bb6be 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -679,10 +679,16 @@ __assign_mmap_offset(struct drm_i915_gem_object *obj,
return -ENODEV;
 
if (obj->ops->mmap_offset)  {
+   if (mmap_type != I915_MMAP_TYPE_FIXED)
+   return -ENODEV;
+
*offset = obj->ops->mmap_offset(obj);
return 0;
}
 
+   if (mmap_type == I915_MMAP_TYPE_FIXED)
+   return -ENODEV;
+
if (mmap_type != I915_MMAP_TYPE_GTT &&
!i915_gem_object_has_struct_page(obj) &&
!i915_gem_object_has_iomem(obj))
@@ -727,7 +733,9 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
 {
enum i915_mmap_type mmap_type;
 
-   if (boot_cpu_has(X86_FEATURE_PAT))
+   if (HAS_LMEM(to_i915(dev)))
+   mmap_type = I915_MMAP_TYPE_FIXED;
+   else if (boot_cpu_has(X86_FEATURE_PAT))
mmap_type = I915_MMAP_TYPE_WC;
else if (!i915_ggtt_has_aperture(_i915(dev)->ggtt))
return -ENODEV;
@@ -798,6 +806,10 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void 
*data,
type = I915_MMAP_TYPE_UC;
break;
 
+   case I915_MMAP_OFFSET_FIXED:
+   type = I915_MMAP_TYPE_FIXED;
+   break;
+
default:
return -EINVAL;
}
@@ -968,6 +980,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct 
*vma)
vma->vm_ops = _ops_cpu;
break;
 
+   case I915_MMAP_TYPE_FIXED:
+   GEM_WARN_ON(1);
+   /* fall-through */
case I915_MMAP_TYPE_WB:
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
vma->vm_ops = _ops_cpu;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index ef3de2ae9723..afbadfc5516b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -105,6 +105,7 @@ enum i915_mmap_type {
I915_MMAP_TYPE_WC,
I915_MMAP_TYPE_WB,
I915_MMAP_TYPE_UC,
+   I915_MMAP_TYPE_FIXED,
 };
 
 struct i915_mmap_offset {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 1da8bd675e54..52789c8ad337 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -573,6 +573,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
return 0;
 }
 
+static enum i915_mmap_type default_mapping(struct drm_i915_private *i915)
+{
+   if (HAS_LMEM(i915))
+   return I915_MMAP_TYPE_FIXED;
+
+   return I915_MMAP_TYPE_GTT;
+}
+
 static bool assert_mmap_offset(struct drm_i915_private *i915,
   unsigned long size,
   int expected)
@@ -585,7 +593,7 @@ static bool assert_mmap_offset(struct drm_i915_private 
*i915,
if (IS_ERR(obj))
return expected && expected == PTR_ERR(obj);
 
-   ret = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, , NULL);
+   ret = __assign_mmap_offset(obj, default_mapping(i915), , NULL);
i915_gem_object_put(obj);
 
return ret == expected;
@@ -689,7 +697,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
goto out;
}
 
-   err = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, , NULL);
+   err = __assign_mmap_offset(obj, default_mapping(i915), , NULL);
if (err) {
pr_err("Unable to insert object into reclaimed hole\n");
goto err_obj;
@@ -831,8 +839,14 @@ static int wc_check(struct drm_i915_gem_object *obj)
 
 static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
 {
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
bool no_map;
 
+   if (HAS_LMEM(i915))
+   return type == I915_MMAP_TYPE_FIXED;
+   else if (type == I915_MMAP_TYPE_FIXED)
+   return false;
+
if (type == I915_MMAP_TYPE_GTT &&

Re: [Intel-gfx] [v2] drm/i915/display/dsc: Force dsc BPP

2021-07-14 Thread Sharma, Swati2
With both review comments by Jani N addressed,
Reviewed-by: Swati Sharma 

Thanks and Regards,
Swati

-Original Message-
From: Intel-gfx  On Behalf Of Vandita 
Kulkarni
Sent: Thursday, July 8, 2021 7:31 PM
To: intel-gfx@lists.freedesktop.org
Cc: Nikula, Jani 
Subject: [Intel-gfx] [v2] drm/i915/display/dsc: Force dsc BPP

Set DSC BPP to the value forced through
debugfs. It can go from bpc to bpp-1.

v2: Use default dsc bpp when we are just
doing force_dsc_en, use default dsc bpp
for invalid force_dsc_bpp values. (Jani)

Signed-off-by: Vandita Kulkarni 
---
 drivers/gpu/drm/i915/display/intel_dp.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
b/drivers/gpu/drm/i915/display/intel_dp.c
index 5b52beaddada..c386ef8eb200 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1274,6 +1274,23 @@ static int intel_dp_dsc_compute_config(struct intel_dp 
*intel_dp,
   
pipe_config->pipe_bpp);
pipe_config->dsc.slice_count = dsc_dp_slice_count;
}
+
+   /* As of today we support DSC for only RGB */
+   if (intel_dp->force_dsc_bpp) {
+   if (intel_dp->force_dsc_bpp >= 8 &&
+   intel_dp->force_dsc_bpp < pipe_bpp) {
+   drm_dbg_kms(_priv->drm,
+   "DSC BPP forced to %d",
+   intel_dp->force_dsc_bpp);
+   pipe_config->dsc.compressed_bpp =
+   intel_dp->force_dsc_bpp;
+   } else {
+   drm_dbg_kms(_priv->drm,
+   "Invalid DSC BPP %d",
+   intel_dp->force_dsc_bpp);
+   }
+   }
+
/*
 * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
 * is greater than the maximum Cdclock and if slice count is even
-- 
2.32.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [igt-dev] [PATCH i-g-t] tests/kms_addfb_basic: pass the actual fd to gem_has_lmem

2021-07-14 Thread Ramalingam C
On 2021-07-07 at 11:52:38 +0100, Matthew Auld wrote:
> Currently we pass the devid as if it were the fd, which doesn't work.
> 
> Signed-off-by: Matthew Auld 
LGTM.

Reviewed-by: Ramalingam C 

> Cc: Mohammed Khajapasha 
> Cc: Latvala Petri 
> Cc: Michael J. Ruhl 
> ---
>  tests/kms_addfb_basic.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/tests/kms_addfb_basic.c b/tests/kms_addfb_basic.c
> index 91fb6ac9..eff1d9b2 100644
> --- a/tests/kms_addfb_basic.c
> +++ b/tests/kms_addfb_basic.c
> @@ -150,13 +150,11 @@ static void invalid_tests(int fd)
>   igt_describe("Check if addfb2 with a system memory gem object "
>"fails correctly if device requires local memory 
> framebuffers");
>   igt_subtest("invalid-smem-bo-on-discrete") {
> - int devid;
>   uint32_t handle, stride;
>   uint64_t size;
>  
>   igt_require_intel(fd);
> - devid = intel_get_drm_devid(fd);
> - igt_require(gem_has_lmem(devid));
> + igt_require(gem_has_lmem(fd));
>   igt_calc_fb_size(fd, f.width, f.height,
>   DRM_FORMAT_XRGB, 0, , );
>   handle = gem_create_in_memory_regions(fd, size, REGION_SMEM);
> -- 
> 2.26.3
> 
> ___
> igt-dev mailing list
> igt-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915/ehl: Resolve insufficient header credits in MIPI DSI

2021-07-14 Thread Patchwork
== Series Details ==

Series: drm/i915/ehl: Resolve insufficient header credits in MIPI DSI
URL   : https://patchwork.freedesktop.org/series/92498/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10343_full -> Patchwork_20591_full


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_20591_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_20591_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_20591_full:

### IGT changes ###

 Possible regressions 

  * igt@dumb_buffer@map-invalid-size:
- shard-snb:  NOTRUN -> [DMESG-WARN][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-snb2/igt@dumb_buf...@map-invalid-size.html

  * igt@gem_exec_schedule@u-independent@vcs1:
- shard-kbl:  [PASS][2] -> [FAIL][3]
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-kbl7/igt@gem_exec_schedule@u-independ...@vcs1.html
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-kbl6/igt@gem_exec_schedule@u-independ...@vcs1.html

  
 Warnings 

  * igt@dumb_buffer@map-invalid-size:
- shard-apl:  [DMESG-WARN][4] ([i915#1982]) -> [DMESG-WARN][5]
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-apl6/igt@dumb_buf...@map-invalid-size.html
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-apl6/igt@dumb_buf...@map-invalid-size.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@prime_vgem@sync@bcs0:
- {shard-rkl}:[PASS][6] -> [INCOMPLETE][7]
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-rkl-6/igt@prime_vgem@s...@bcs0.html
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-rkl-1/igt@prime_vgem@s...@bcs0.html

  * igt@runner@aborted:
- {shard-rkl}:([FAIL][8], [FAIL][9], [FAIL][10], [FAIL][11]) 
([i915#2029] / [i915#3002]) -> ([FAIL][12], [FAIL][13], [FAIL][14], [FAIL][15], 
[FAIL][16], [FAIL][17]) ([i915#2029] / [i915#3002] / [i915#3728])
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-rkl-2/igt@run...@aborted.html
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-rkl-1/igt@run...@aborted.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-rkl-1/igt@run...@aborted.html
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-rkl-5/igt@run...@aborted.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-rkl-5/igt@run...@aborted.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-rkl-2/igt@run...@aborted.html
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-rkl-1/igt@run...@aborted.html
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-rkl-1/igt@run...@aborted.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-rkl-6/igt@run...@aborted.html
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-rkl-6/igt@run...@aborted.html

  
Known issues


  Here are the changes found in Patchwork_20591_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@gem_ctx_persistence@legacy-engines-mixed:
- shard-snb:  NOTRUN -> [SKIP][18] ([fdo#109271] / [i915#1099]) +5 
similar issues
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-snb2/igt@gem_ctx_persiste...@legacy-engines-mixed.html

  * igt@gem_ctx_persistence@many-contexts:
- shard-tglb: [PASS][19] -> [FAIL][20] ([i915#2410])
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-tglb7/igt@gem_ctx_persiste...@many-contexts.html
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-tglb1/igt@gem_ctx_persiste...@many-contexts.html

  * igt@gem_exec_fair@basic-deadline:
- shard-skl:  NOTRUN -> [FAIL][21] ([i915#2846])
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-skl6/igt@gem_exec_f...@basic-deadline.html

  * igt@gem_exec_fair@basic-none-rrul@rcs0:
- shard-glk:  [PASS][22] -> [FAIL][23] ([i915#2842])
   [22]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-glk5/igt@gem_exec_fair@basic-none-r...@rcs0.html
   [23]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20591/shard-glk7/igt@gem_exec_fair@basic-none-r...@rcs0.html

  * igt@gem_exec_fair@basic-none-share@rcs0:
- shard-iclb: [PASS][24] -> [FAIL][25] ([i915#2842])
   [24]: 

Re: [Intel-gfx] [PATCH v4 09/18] drm/v3d: Use scheduler dependency handling

2021-07-14 Thread Melissa Wen
On 07/12, Daniel Vetter wrote:
> With the prep work out of the way this isn't tricky anymore.
> 
> Aside: The chaining of the various jobs is a bit awkward, with the
> possibility of failure in bad places. I think with the
> drm_sched_job_init/arm split and maybe preloading the
> job->dependencies xarray this should be fixable.
> 
> Cc: Melissa Wen 
> Signed-off-by: Daniel Vetter 
> Cc: Cc: Emma Anholt 
> ---
>  drivers/gpu/drm/v3d/v3d_drv.h   |  5 -
>  drivers/gpu/drm/v3d/v3d_gem.c   | 25 -
>  drivers/gpu/drm/v3d/v3d_sched.c | 29 +
>  3 files changed, 9 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index 1d870261eaac..f80f4ff1f7aa 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -192,11 +192,6 @@ struct v3d_job {
>   struct drm_gem_object **bo;
>   u32 bo_count;
>  
> - /* Array of struct dma_fence * to block on before submitting this job.
> -  */
> - struct xarray deps;
> - unsigned long last_dep;
> -
>   /* v3d fence to be signaled by IRQ handler when the job is complete. */
>   struct dma_fence *irq_fence;
>  
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 5eccd3658938..42b07ffbea5e 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -257,8 +257,8 @@ v3d_lock_bo_reservations(struct v3d_job *job,
>   return ret;
>  
>   for (i = 0; i < job->bo_count; i++) {
> - ret = drm_gem_fence_array_add_implicit(>deps,
> -job->bo[i], true);
> + ret = drm_sched_job_await_implicit(>base,
> +job->bo[i], true);
>   if (ret) {
>   drm_gem_unlock_reservations(job->bo, job->bo_count,
>   acquire_ctx);
> @@ -354,8 +354,6 @@ static void
>  v3d_job_free(struct kref *ref)
>  {
>   struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
> - unsigned long index;
> - struct dma_fence *fence;
>   int i;
>  
>   for (i = 0; i < job->bo_count; i++) {
> @@ -364,11 +362,6 @@ v3d_job_free(struct kref *ref)
>   }
>   kvfree(job->bo);
>  
> - xa_for_each(>deps, index, fence) {
> - dma_fence_put(fence);
> - }
> - xa_destroy(>deps);
> -
>   dma_fence_put(job->irq_fence);
>   dma_fence_put(job->done_fence);
>  
> @@ -452,7 +445,6 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
> *file_priv,
>   if (ret < 0)
>   return ret;
>  
> - xa_init_flags(>deps, XA_FLAGS_ALLOC);
>   ret = drm_sched_job_init(>base, _priv->sched_entity[queue],
>v3d_priv);
>   if (ret)
> @@ -462,7 +454,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
> *file_priv,
>   if (ret == -EINVAL)
>   goto fail_job;
>  
> - ret = drm_gem_fence_array_add(>deps, in_fence);
> + ret = drm_sched_job_await_fence(>base, in_fence);
>   if (ret)
>   goto fail_job;
>  
> @@ -472,7 +464,6 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
> *file_priv,
>  fail_job:
>   drm_sched_job_cleanup(>base);
>  fail:
> - xa_destroy(>deps);
>   pm_runtime_put_autosuspend(v3d->drm.dev);
>   return ret;
>  }
> @@ -619,8 +610,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>   if (bin) {
>   v3d_push_job(>base);
>  
> - ret = drm_gem_fence_array_add(>base.deps,
> -   
> dma_fence_get(bin->base.done_fence));
> + ret = drm_sched_job_await_fence(>base.base,
> + 
> dma_fence_get(bin->base.done_fence));
>   if (ret)
>   goto fail_unreserve;
>   }
> @@ -630,7 +621,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>   if (clean_job) {
>   struct dma_fence *render_fence =
>   dma_fence_get(render->base.done_fence);
> - ret = drm_gem_fence_array_add(_job->deps, render_fence);
> + ret = drm_sched_job_await_fence(_job->base, render_fence);
>   if (ret)
>   goto fail_unreserve;
>   v3d_push_job(clean_job);
> @@ -820,8 +811,8 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
>   mutex_lock(>sched_lock);
>   v3d_push_job(>base);
>  
> - ret = drm_gem_fence_array_add(_job->deps,
> -   dma_fence_get(job->base.done_fence));
> + ret = drm_sched_job_await_fence(_job->base,
> + dma_fence_get(job->base.done_fence));
>   if (ret)
>   goto fail_unreserve;
>  
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 

Re: [Intel-gfx] [PATCH v4 08/18] drm/v3d: Move drm_sched_job_init to v3d_job_init

2021-07-14 Thread Melissa Wen
On 07/12, Daniel Vetter wrote:
> Prep work for using the scheduler dependency handling. We need to call
> drm_sched_job_init earlier so we can use the new drm_sched_job_await*
> functions for dependency handling here.
> 
> v2: Slightly better commit message and rebase to include the
> drm_sched_job_arm() call (Emma).
> 
> v3: Cleanup jobs under construction correctly (Emma)
> 
> Cc: Melissa Wen 
> Signed-off-by: Daniel Vetter 
> Cc: Emma Anholt 
> ---
>  drivers/gpu/drm/v3d/v3d_drv.h   |  1 +
>  drivers/gpu/drm/v3d/v3d_gem.c   | 88 ++---
>  drivers/gpu/drm/v3d/v3d_sched.c | 15 +++---
>  3 files changed, 44 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index 8a390738d65b..1d870261eaac 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -332,6 +332,7 @@ int v3d_submit_csd_ioctl(struct drm_device *dev, void 
> *data,
>struct drm_file *file_priv);
>  int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> +void v3d_job_cleanup(struct v3d_job *job);
>  void v3d_job_put(struct v3d_job *job);
>  void v3d_reset(struct v3d_dev *v3d);
>  void v3d_invalidate_caches(struct v3d_dev *v3d);
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 69ac20e11b09..5eccd3658938 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -392,6 +392,12 @@ v3d_render_job_free(struct kref *ref)
>   v3d_job_free(ref);
>  }
>  
> +void v3d_job_cleanup(struct v3d_job *job)
> +{
> + drm_sched_job_cleanup(>base);
> + v3d_job_put(job);
> +}
> +
>  void v3d_job_put(struct v3d_job *job)
>  {
>   kref_put(>refcount, job->free);
> @@ -433,9 +439,10 @@ v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
>  static int
>  v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
>struct v3d_job *job, void (*free)(struct kref *ref),
> -  u32 in_sync)
> +  u32 in_sync, enum v3d_queue queue)
>  {
>   struct dma_fence *in_fence = NULL;
> + struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
>   int ret;
>  
>   job->v3d = v3d;
> @@ -446,35 +453,33 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
> *file_priv,
>   return ret;
>  
>   xa_init_flags(>deps, XA_FLAGS_ALLOC);
> + ret = drm_sched_job_init(>base, _priv->sched_entity[queue],
> +  v3d_priv);
> + if (ret)
> + goto fail;
>  
>   ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, _fence);
>   if (ret == -EINVAL)
> - goto fail;
> + goto fail_job;
>  
>   ret = drm_gem_fence_array_add(>deps, in_fence);
>   if (ret)
> - goto fail;
> + goto fail_job;
>  
>   kref_init(>refcount);
>  
>   return 0;
> +fail_job:
> + drm_sched_job_cleanup(>base);
>  fail:
>   xa_destroy(>deps);
>   pm_runtime_put_autosuspend(v3d->drm.dev);
>   return ret;
>  }
>  
> -static int
> -v3d_push_job(struct v3d_file_priv *v3d_priv,
> -  struct v3d_job *job, enum v3d_queue queue)
> +static void
> +v3d_push_job(struct v3d_job *job)
>  {
> - int ret;
> -
> - ret = drm_sched_job_init(>base, _priv->sched_entity[queue],
> -  v3d_priv);
> - if (ret)
> - return ret;
> -
>   drm_sched_job_arm(>base);
>  
>   job->done_fence = dma_fence_get(>base.s_fence->finished);
> @@ -483,8 +488,6 @@ v3d_push_job(struct v3d_file_priv *v3d_priv,
>   kref_get(>refcount);
>  
>   drm_sched_entity_push_job(>base);
> -
> - return 0;
>  }
>  
>  static void
> @@ -530,7 +533,6 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>   struct drm_file *file_priv)
>  {
>   struct v3d_dev *v3d = to_v3d_dev(dev);
> - struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
>   struct drm_v3d_submit_cl *args = data;
>   struct v3d_bin_job *bin = NULL;
>   struct v3d_render_job *render;
> @@ -556,7 +558,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>   INIT_LIST_HEAD(>unref_list);
>  
>   ret = v3d_job_init(v3d, file_priv, >base,
> -v3d_render_job_free, args->in_sync_rcl);
> +v3d_render_job_free, args->in_sync_rcl, V3D_RENDER);
>   if (ret) {
>   kfree(render);
>   return ret;
> @@ -570,7 +572,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>   }
>  
>   ret = v3d_job_init(v3d, file_priv, >base,
> -v3d_job_free, args->in_sync_bcl);
> +v3d_job_free, args->in_sync_bcl, V3D_BIN);
>   if (ret) {
>   v3d_job_put(>base);
>   kfree(bin);
> @@ -592,7 +594,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void 

[Intel-gfx] ✗ Fi.CI.BUILD: failure for drm/i915: Tweaked Wa_14010685332 for all PCHs (rev5)

2021-07-14 Thread Patchwork
== Series Details ==

Series: drm/i915: Tweaked Wa_14010685332 for all PCHs (rev5)
URL   : https://patchwork.freedesktop.org/series/88435/
State : failure

== Summary ==

Applying: drm/i915: Tweaked Wa_14010685332 for all PCHs
error: corrupt patch at line 6
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 drm/i915: Tweaked Wa_14010685332 for all PCHs
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ Fi.CI.IGT: failure for Minor revid/stepping and workaround cleanup (rev5)

2021-07-14 Thread Patchwork
== Series Details ==

Series: Minor revid/stepping and workaround cleanup (rev5)
URL   : https://patchwork.freedesktop.org/series/92299/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10343_full -> Patchwork_20590_full


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_20590_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_20590_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_20590_full:

### IGT changes ###

 Possible regressions 

  * igt@dumb_buffer@map-invalid-size:
- shard-snb:  NOTRUN -> [DMESG-WARN][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-snb7/igt@dumb_buf...@map-invalid-size.html

  * igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-mmap-gtt:
- shard-skl:  [PASS][2] -> [FAIL][3]
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-skl4/igt@kms_frontbuffer_track...@psr-1p-offscren-pri-shrfb-draw-mmap-gtt.html
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-skl8/igt@kms_frontbuffer_track...@psr-1p-offscren-pri-shrfb-draw-mmap-gtt.html

  
 Warnings 

  * igt@dumb_buffer@map-invalid-size:
- shard-apl:  [DMESG-WARN][4] ([i915#1982]) -> [DMESG-WARN][5]
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-apl6/igt@dumb_buf...@map-invalid-size.html
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-apl1/igt@dumb_buf...@map-invalid-size.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@kms_ccs@pipe-a-bad-rotation-90-yf_tiled_ccs:
- {shard-rkl}:[FAIL][6] ([i915#3678]) -> [SKIP][7]
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-rkl-5/igt@kms_ccs@pipe-a-bad-rotation-90-yf_tiled_ccs.html
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-rkl-6/igt@kms_ccs@pipe-a-bad-rotation-90-yf_tiled_ccs.html

  * igt@perf@polling:
- {shard-rkl}:NOTRUN -> [FAIL][8]
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-rkl-5/igt@p...@polling.html

  * igt@sysfs_preempt_timeout@timeout@rcs0:
- {shard-rkl}:[PASS][9] -> [FAIL][10] +3 similar issues
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-rkl-6/igt@sysfs_preempt_timeout@time...@rcs0.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-rkl-1/igt@sysfs_preempt_timeout@time...@rcs0.html

  
Known issues


  Here are the changes found in Patchwork_20590_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@gem_ctx_persistence@legacy-engines-mixed:
- shard-snb:  NOTRUN -> [SKIP][11] ([fdo#109271] / [i915#1099]) +5 
similar issues
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-snb2/igt@gem_ctx_persiste...@legacy-engines-mixed.html

  * igt@gem_eio@in-flight-contexts-1us:
- shard-tglb: [PASS][12] -> [TIMEOUT][13] ([i915#3063])
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-tglb6/igt@gem_...@in-flight-contexts-1us.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-tglb6/igt@gem_...@in-flight-contexts-1us.html

  * igt@gem_exec_fair@basic-deadline:
- shard-skl:  NOTRUN -> [FAIL][14] ([i915#2846])
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-skl7/igt@gem_exec_f...@basic-deadline.html

  * igt@gem_exec_fair@basic-none-share@rcs0:
- shard-iclb: [PASS][15] -> [FAIL][16] ([i915#2842])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-iclb2/igt@gem_exec_fair@basic-none-sh...@rcs0.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-iclb7/igt@gem_exec_fair@basic-none-sh...@rcs0.html

  * igt@gem_exec_fair@basic-none@vcs1:
- shard-iclb: NOTRUN -> [FAIL][17] ([i915#2842])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-iclb4/igt@gem_exec_fair@basic-n...@vcs1.html

  * igt@gem_exec_fair@basic-none@vecs0:
- shard-apl:  NOTRUN -> [FAIL][18] ([i915#2842] / [i915#3468])
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20590/shard-apl6/igt@gem_exec_fair@basic-n...@vecs0.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
- shard-glk:  [PASS][19] -> [FAIL][20] ([i915#2842]) +3 similar 
issues
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10343/shard-glk1/igt@gem_exec_fair@basic-pace-s...@rcs0.html
   [20]: 

Re: [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915: Fix wm params for ccs

2021-07-14 Thread Juha-Pekka Heikkilä
Hi Lakshmi,

Here would be again one false positive result.

/Juha-Pekka

On Wed, Jul 14, 2021 at 7:38 AM Patchwork 
wrote:

> *Patch Details*
> *Series:* drm/i915: Fix wm params for ccs
> *URL:* https://patchwork.freedesktop.org/series/92491/
> *State:* failure
> *Details:*
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20589/index.html CI
> Bug Log - changes from CI_DRM_10342_full -> Patchwork_20589_full Summary
>
> *FAILURE*
>
> Serious unknown changes coming with Patchwork_20589_full absolutely need
> to be
> verified manually.
>
> If you think the reported changes have nothing to do with the changes
> introduced in Patchwork_20589_full, please notify your bug team to allow
> them
> to document this new failure mode, which will reduce false positives in CI.
> Possible new issues
>
> Here are the unknown changes that may have been introduced in
> Patchwork_20589_full:
> IGT changes Possible regressions
>
>- igt@dumb_buffer@map-invalid-size:
>   - shard-apl: NOTRUN -> DMESG-WARN
>   
> 
>
> Suppressed
>
> The following results come from untrusted machines, tests, or statuses.
> They do not affect the overall result.
>
>-
>
>igt@kms_dither@fb-8bpc-vs-panel-6bpc:
>- {shard-rkl}: NOTRUN -> SKIP
>   
> 
>-
>
>igt@runner@aborted:
>- {shard-rkl}: (FAIL
>   
> ,
>   FAIL
>   
> ,
>   FAIL
>   
> ,
>   FAIL
>   
> ,
>   FAIL
>   
> )
>   ([i915#3002] / [i915#3728]) -> (FAIL
>   
> ,
>   FAIL
>   
> ,
>   FAIL
>   
> ,
>   FAIL
>   
> )
>   ([i915#3002])
>
> Known issues
>
> Here are the changes found in Patchwork_20589_full that come from known
> issues:
> IGT changes Issues hit
>
>-
>
>igt@gem_create@create-massive:
>- shard-snb: NOTRUN -> DMESG-WARN
>   
> 
>   ([i915#3002])
>-
>
>igt@gem_ctx_persistence@legacy-engines-cleanup:
>- shard-snb: NOTRUN -> SKIP
>   
> 
>   ([fdo#109271] / [i915#1099]) +3 similar issues
>-
>
>igt@gem_ctx_persistence@legacy-engines-hang@blt:
>- shard-skl: NOTRUN -> SKIP
>   
> 
>   ([fdo#109271]) +119 similar issues
>-
>
>igt@gem_exec_fair@basic-deadline:
>- shard-apl: NOTRUN -> FAIL
>   
> 
>   ([i915#2846])
>-
>
>igt@gem_exec_fair@basic-pace-share@rcs0:
>- shard-tglb: PASS
>   
> 
>   -> FAIL
>   
> 
>   ([i915#2842])
>-
>
>igt@gem_exec_fair@basic-throttle@rcs0:
>-
>
>   shard-glk: PASS
>   
> 
>   -> FAIL
>   
> 
>   ([i915#2842])
>   -
>
>   shard-iclb: PASS
>   
> 
>   -> FAIL
>   
> 
>   ([i915#2849])
>   -
>
>igt@gem_exec_reloc@basic-wide-active@rcs0:
>- shard-snb: NOTRUN -> FAIL
>   
> 

Re: [Intel-gfx] [REBASED v2] drm/i915: Tweaked Wa_14010685332 for all PCHs

2021-07-14 Thread Gupta, Anshuman
Thanks David for providing Ack on it.
Shall I use your " Tested-by: David E. Box " tag for 
this patch ?
Br,
Anshuman Gupta.

From: Box, David E  
Sent: Wednesday, July 14, 2021 2:02 AM
To: Gupta, Anshuman ; intel-gfx@lists.freedesktop.org
Cc: Roper, Matthew D ; Vivi, Rodrigo 
; Deak, Imre 
Subject: Re: [REBASED v2] drm/i915: Tweaked Wa_14010685332 for all PCHs

Tested and confirmed working on TGL-H Dell platforms.

David Box
Linux Power Management 
IAGS/SSE

From: Gupta, Anshuman 
Sent: Monday, July 12, 2021 12:09 AM
To: mailto:intel-gfx@lists.freedesktop.org 

Cc: Box, David E ; Gupta, Anshuman 
; Roper, Matthew D 
; Vivi, Rodrigo 
; Deak, Imre 
Subject: [REBASED v2] drm/i915: Tweaked Wa_14010685332 for all PCHs 
 
dispcnlunit1_cp_xosc_clkreq clock observed to be active on TGL-H platform
despite Wa_14010685332 original sequence, thus blocks entry to deeper s0ix 
state.

The Tweaked Wa_14010685332 sequence fixes this issue, therefore use tweaked
Wa_14010685332 sequence for every PCH since PCH_CNP.

v2:
- removed RKL from comment and simplified condition. [Rodrigo]

Fixes: b896898c7369 ("drm/i915: Tweaked Wa_14010685332 for PCHs used on gen11 
platforms")
Cc: Matt Roper 
Cc: Rodrigo Vivi 
Cc: Imre Deak 
Signed-off-by: Anshuman Gupta 
Reviewed-by: Rodrigo Vivi 
---
 .../drm/i915/display/intel_display_power.c    | 16 +++---
 drivers/gpu/drm/i915/i915_irq.c   | 21 ---
 2 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c 
b/drivers/gpu/drm/i915/display/intel_display_power.c
index 285380079aab..28a363119560 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -6388,13 +6388,13 @@ void intel_display_power_suspend_late(struct 
drm_i915_private *i915)
 if (DISPLAY_VER(i915) >= 11 || IS_GEMINILAKE(i915) ||
 IS_BROXTON(i915)) {
 bxt_enable_dc9(i915);
-   /* Tweaked Wa_14010685332:icp,jsp,mcc */
-   if (INTEL_PCH_TYPE(i915) >= PCH_ICP && INTEL_PCH_TYPE(i915) <= 
PCH_MCC)
-   intel_de_rmw(i915, SOUTH_CHICKEN1,
-    SBCLK_RUN_REFCLK_DIS, 
SBCLK_RUN_REFCLK_DIS);
 } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) {
 hsw_enable_pc8(i915);
 }
+
+   /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */
+   if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1)
+   intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 
SBCLK_RUN_REFCLK_DIS);
 }
 
 void intel_display_power_resume_early(struct drm_i915_private *i915)
@@ -6403,13 +6403,13 @@ void intel_display_power_resume_early(struct 
drm_i915_private *i915)
 IS_BROXTON(i915)) {
 gen9_sanitize_dc_state(i915);
 bxt_disable_dc9(i915);
-   /* Tweaked Wa_14010685332:icp,jsp,mcc */
-   if (INTEL_PCH_TYPE(i915) >= PCH_ICP && INTEL_PCH_TYPE(i915) <= 
PCH_MCC)
-   intel_de_rmw(i915, SOUTH_CHICKEN1, 
SBCLK_RUN_REFCLK_DIS, 0);
-
 } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) {
 hsw_disable_pc8(i915);
 }
+
+   /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */
+   if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1)
+   intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0);
 }
 
 void intel_display_power_suspend(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1d4c683c9de9..99c75a9d7ffa 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3064,24 +3064,6 @@ static void valleyview_irq_reset(struct drm_i915_private 
*dev_priv)
 spin_unlock_irq(_priv->irq_lock);
 }
 
-static void cnp_display_clock_wa(struct drm_i915_private *dev_priv)
-{
-   struct intel_uncore *uncore = _priv->uncore;
-
-   /*
-    * Wa_14010685332:cnp/cmp,tgp,adp
-    * TODO: Clarify which platforms this applies to
-    * TODO: Figure out if this workaround can be applied in the s0ix 
suspend/resume handlers as
-    * on earlier platforms and whether the workaround is also needed for 
runtime suspend/resume
-    */
-   if (INTEL_PCH_TYPE(dev_priv) == PCH_CNP ||
-   (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && INTEL_PCH_TYPE(dev_priv) < 
PCH_DG1)) {
-   intel_uncore_rmw(uncore, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS,
-