Re: [Intel-gfx] [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback

2022-07-20 Thread Cornelia Huck
On Tue, Jul 19 2022, Jason Gunthorpe  wrote:

> On Thu, Jul 07, 2022 at 03:37:16PM -0600, Alex Williamson wrote:
>> On Mon,  4 Jul 2022 21:59:03 -0300
>> Jason Gunthorpe  wrote:
>> > diff --git a/drivers/s390/cio/vfio_ccw_ops.c 
>> > b/drivers/s390/cio/vfio_ccw_ops.c
>> > index b49e2e9db2dc6f..09e0ce7b72324c 100644
>> > --- a/drivers/s390/cio/vfio_ccw_ops.c
>> > +++ b/drivers/s390/cio/vfio_ccw_ops.c
>> > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private 
>> > *private)
>> >return ret;
>> >  }
>> >  
>> > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
>> > -unsigned long action,
>> > -void *data)
>> > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 
>> > length)
>> >  {
>> >struct vfio_ccw_private *private =
>> > -  container_of(nb, struct vfio_ccw_private, nb);
>> > -
>> > -  /*
>> > -   * Vendor drivers MUST unpin pages in response to an
>> > -   * invalidation.
>> > -   */
>> > -  if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
>> > -  struct vfio_iommu_type1_dma_unmap *unmap = data;
>> > -
>> > -  if (!cp_iova_pinned(>cp, unmap->iova))
>> > -  return NOTIFY_OK;
>> > +  container_of(vdev, struct vfio_ccw_private, vdev);
>> >  
>> > -  if (vfio_ccw_mdev_reset(private))
>> > -  return NOTIFY_BAD;
>> > +  /* Drivers MUST unpin pages in response to an invalidation. */
>> > +  if (!cp_iova_pinned(>cp, iova))
>> > +  return;
>> >  
>> > -  cp_free(>cp);
>> > -  return NOTIFY_OK;
>> > -  }
>> > +  if (vfio_ccw_mdev_reset(private))
>> > +  return;
>> >  
>> > -  return NOTIFY_DONE;
>> > +  cp_free(>cp);
>> >  }
>> 
>> 
>> The cp_free() call is gone here with [1], so I think this function now
>> just ends with:
>> 
>>  ...
>>  vfio_ccw_mdev_reset(private);
>> }
>> 
>> There are also minor contextual differences elsewhere from that series,
>> so a quick respin to record the changes on list would be appreciated.
>> 
>> However the above kind of highlights that NOTIFY_BAD that silently gets
>> dropped here.  I realize we weren't testing the return value of the
>> notifier call chain and really we didn't intend that notifiers could
>> return a failure here, but does this warrant some logging or suggest
>> future work to allow a device to go offline here?  Thanks.
>
> It looks like no.
>
> If the FSM trapped in a bad state here, such as
> VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
> the pages and this is considered a success for this purpose

A rather pathological case would be a subchannel that cannot be
quiesced and does not end up being non-operational; in theory, the
hardware could still try to access the buffers we provided for I/O. I'd
say that is extremely unlikely, we might log it, but really cannot do
anything else.

>
> The return code here exists only to return to userspace so it can
> detect during a VFIO_DEVICE_RESET that the device has crashed
> irrecoverably.

Does it imply only that ("it's dead, Jim"), or can it also imply a
runaway device? Not that userspace can do much in any case.

>
> Thus just continuing to silently ignore it seems like the best thing.
>
> Jason



Re: [Intel-gfx] [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback

2022-07-07 Thread Alex Williamson
On Mon,  4 Jul 2022 21:59:03 -0300
Jason Gunthorpe  wrote:
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index b49e2e9db2dc6f..09e0ce7b72324c 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private 
> *private)
>   return ret;
>  }
>  
> -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> -   unsigned long action,
> -   void *data)
> +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 
> length)
>  {
>   struct vfio_ccw_private *private =
> - container_of(nb, struct vfio_ccw_private, nb);
> -
> - /*
> -  * Vendor drivers MUST unpin pages in response to an
> -  * invalidation.
> -  */
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> -
> - if (!cp_iova_pinned(>cp, unmap->iova))
> - return NOTIFY_OK;
> + container_of(vdev, struct vfio_ccw_private, vdev);
>  
> - if (vfio_ccw_mdev_reset(private))
> - return NOTIFY_BAD;
> + /* Drivers MUST unpin pages in response to an invalidation. */
> + if (!cp_iova_pinned(>cp, iova))
> + return;
>  
> - cp_free(>cp);
> - return NOTIFY_OK;
> - }
> + if (vfio_ccw_mdev_reset(private))
> + return;
>  
> - return NOTIFY_DONE;
> + cp_free(>cp);
>  }


The cp_free() call is gone here with [1], so I think this function now
just ends with:

...
vfio_ccw_mdev_reset(private);
}

There are also minor contextual differences elsewhere from that series,
so a quick respin to record the changes on list would be appreciated.

However the above kind of highlights that NOTIFY_BAD that silently gets
dropped here.  I realize we weren't testing the return value of the
notifier call chain and really we didn't intend that notifiers could
return a failure here, but does this warrant some logging or suggest
future work to allow a device to go offline here?  Thanks.

Alex

[1]https://lore.kernel.org/all/20220707135737.720765-1-far...@linux.ibm.com/



Re: [Intel-gfx] [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback

2022-07-05 Thread Zhenyu Wang
On 2022.07.04 21:59:03 -0300, Jason Gunthorpe wrote:
> Instead of having drivers register the notifier with explicit code just
> have them provide a dma_unmap callback op in their driver ops and rely on
> the core code to wire it up.
> 
> Suggested-by: Christoph Hellwig 
> Reviewed-by: Christoph Hellwig 
> Reviewed-by: Kevin Tian 
> Reviewed-by: Tony Krowiak 
> Reviewed-by: Eric Farman 
> Signed-off-by: Jason Gunthorpe 
> ---
>  drivers/gpu/drm/i915/gvt/gvt.h|   1 -
>  drivers/gpu/drm/i915/gvt/kvmgt.c  |  75 ---

gvt change looks fine to me.

Reviewed-by: Zhenyu Wang 

>  drivers/s390/cio/vfio_ccw_ops.c   |  41 ++--
>  drivers/s390/cio/vfio_ccw_private.h   |   2 -
>  drivers/s390/crypto/vfio_ap_ops.c |  53 ++-
>  drivers/s390/crypto/vfio_ap_private.h |   3 -
>  drivers/vfio/vfio.c   | 129 +-
>  drivers/vfio/vfio.h   |   3 +
>  include/linux/vfio.h  |  21 +
>  9 files changed, 88 insertions(+), 240 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index aee1a45da74bcb..705689e6401197 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -226,7 +226,6 @@ struct intel_vgpu {
>   unsigned long nr_cache_entries;
>   struct mutex cache_lock;
>  
> - struct notifier_block iommu_notifier;
>   atomic_t released;
>  
>   struct kvm_page_track_notifier_node track_node;
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c 
> b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index e2f6c56ab3420c..ecd5bb37b63a2a 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int 
> port_num)
>   return ret;
>  }
>  
> -static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
> -  unsigned long action, void *data)
> +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
> +  u64 length)
>  {
> - struct intel_vgpu *vgpu =
> - container_of(nb, struct intel_vgpu, iommu_notifier);
> -
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> - struct gvt_dma *entry;
> - unsigned long iov_pfn, end_iov_pfn;
> + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> + struct gvt_dma *entry;
> + u64 iov_pfn = iova >> PAGE_SHIFT;
> + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
>  
> - iov_pfn = unmap->iova >> PAGE_SHIFT;
> - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
> + mutex_lock(>cache_lock);
> + for (; iov_pfn < end_iov_pfn; iov_pfn++) {
> + entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
> + if (!entry)
> + continue;
>  
> - mutex_lock(>cache_lock);
> - for (; iov_pfn < end_iov_pfn; iov_pfn++) {
> - entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
> - if (!entry)
> - continue;
> -
> - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
> -entry->size);
> - __gvt_cache_remove_entry(vgpu, entry);
> - }
> - mutex_unlock(>cache_lock);
> + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
> +entry->size);
> + __gvt_cache_remove_entry(vgpu, entry);
>   }
> -
> - return NOTIFY_OK;
> + mutex_unlock(>cache_lock);
>  }
>  
>  static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
> @@ -783,36 +774,20 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
>  static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
>  {
>   struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> - unsigned long events;
> - int ret;
> -
> - vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
>  
> - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> - ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, ,
> -  >iommu_notifier);
> - if (ret != 0) {
> - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
> - ret);
> - goto out;
> - }
> -
> - ret = -EEXIST;
>   if (vgpu->attached)
> - goto undo_iommu;
> + return -EEXIST;
>  
> - ret = -ESRCH;
>   if (!vgpu->vfio_device.kvm ||
>   vgpu->vfio_device.kvm->mm != current->mm) {
>   gvt_vgpu_err("KVM is required to use Intel vGPU\n");
> - goto undo_iommu;
> + return -ESRCH;
>   }
>  
>   kvm_get_kvm(vgpu->vfio_device.kvm);
>  
> - ret = -EEXIST;
>   if (__kvmgt_vgpu_exist(vgpu))
> - goto undo_iommu;
> + return