Re: [PATCH v10 04/11] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type

2020-11-13 Thread Auger Eric
Hi Zenghui,
On 9/24/20 10:23 AM, Zenghui Yu wrote:
> Hi Eric,
> 
> On 2020/3/21 0:19, Eric Auger wrote:
>> Add a new specific DMA_FAULT region aiming to exposed nested mode
>> translation faults.
>>
>> The region has a ring buffer that contains the actual fault
>> records plus a header allowing to handle it (tail/head indices,
>> max capacity, entry size). At the moment the region is dimensionned
>> for 512 fault records.
>>
>> Signed-off-by: Eric Auger 
> 
> [...]
> 
>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>> index 379a02c36e37..586b89debed5 100644
>> --- a/drivers/vfio/pci/vfio_pci.c
>> +++ b/drivers/vfio/pci/vfio_pci.c
>> @@ -260,6 +260,69 @@ int vfio_pci_set_power_state(struct
>> vfio_pci_device *vdev, pci_power_t state)
>>   return ret;
>>   }
>>   +static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,
>> +   struct vfio_pci_region *region)
>> +{
>> +}
>> +
>> +static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device
>> *vdev,
>> + struct vfio_pci_region *region,
>> + struct vfio_info_cap *caps)
>> +{
>> +    struct vfio_region_info_cap_fault cap = {
>> +    .header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
>> +    .header.version = 1,
>> +    .version = 1,
>> +    };
>> +    return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
>> +}
>> +
>> +static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
>> +    .rw    = vfio_pci_dma_fault_rw,
>> +    .release    = vfio_pci_dma_fault_release,
>> +    .add_capability = vfio_pci_dma_fault_add_capability,
>> +};
>> +
>> +#define DMA_FAULT_RING_LENGTH 512
>> +
>> +static int vfio_pci_init_dma_fault_region(struct vfio_pci_device *vdev)
>> +{
>> +    struct vfio_region_dma_fault *header;
>> +    size_t size;
>> +    int ret;
>> +
>> +    mutex_init(&vdev->fault_queue_lock);
>> +
>> +    /*
>> + * We provision 1 page for the header and space for
>> + * DMA_FAULT_RING_LENGTH fault records in the ring buffer.
>> + */
>> +    size = ALIGN(sizeof(struct iommu_fault) *
>> + DMA_FAULT_RING_LENGTH, PAGE_SIZE) + PAGE_SIZE;
>> +
>> +    vdev->fault_pages = kzalloc(size, GFP_KERNEL);
>> +    if (!vdev->fault_pages)
>> +    return -ENOMEM;
>> +
>> +    ret = vfio_pci_register_dev_region(vdev,
>> +    VFIO_REGION_TYPE_NESTED,
>> +    VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
>> +    &vfio_pci_dma_fault_regops, size,
>> +    VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
>> +    vdev->fault_pages);
>> +    if (ret)
>> +    goto out;
>> +
>> +    header = (struct vfio_region_dma_fault *)vdev->fault_pages;
>> +    header->entry_size = sizeof(struct iommu_fault);
>> +    header->nb_entries = DMA_FAULT_RING_LENGTH;
>> +    header->offset = sizeof(struct vfio_region_dma_fault);
>> +    return 0;
>> +out:
>> +    kfree(vdev->fault_pages);
>> +    return ret;
>> +}
>> +
>>   static int vfio_pci_enable(struct vfio_pci_device *vdev)
>>   {
>>   struct pci_dev *pdev = vdev->pdev;
>> @@ -358,6 +421,10 @@ static int vfio_pci_enable(struct vfio_pci_device
>> *vdev)
>>   }
>>   }
>>   +    ret = vfio_pci_init_dma_fault_region(vdev);
>> +    if (ret)
>> +    goto disable_exit;
>> +
>>   vfio_pci_probe_mmaps(vdev);
>>     return 0;
>> @@ -1383,6 +1450,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
>>     vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
>>   kfree(vdev->region);
>> +    kfree(vdev->fault_pages);
>>   mutex_destroy(&vdev->ioeventfds_lock);
>>     if (!disable_idle_d3)
>> diff --git a/drivers/vfio/pci/vfio_pci_private.h
>> b/drivers/vfio/pci/vfio_pci_private.h
>> index 8a2c7607d513..a392f50e3a99 100644
>> --- a/drivers/vfio/pci/vfio_pci_private.h
>> +++ b/drivers/vfio/pci/vfio_pci_private.h
>> @@ -119,6 +119,8 @@ struct vfio_pci_device {
>>   int    ioeventfds_nr;
>>   struct eventfd_ctx    *err_trigger;
>>   struct eventfd_ctx    *req_trigger;
>> +    u8    *fault_pages;
> 
> What's the reason to use 'u8'? It doesn't match the type of header, nor
> the type of ring buffer.
actually it matches
u8  *pci_config_map;
u8  *vconfig;

fault_pages is the va of the ring buffer. In the header, offset is the
offset of the ring wrt start of the region.

> 
>> +    struct mutex    fault_queue_lock;
>>   struct list_head    dummy_resources_list;
>>   struct mutex    ioeventfds_lock;
>>   struct list_head    ioeventfds_list;
>> @@ -150,6 +152,14 @@ extern ssize_t vfio_pci_vga_rw(struct
>> vfio_pci_device *vdev, char __user *buf,
>>   extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t
>> offset,
>>  uint64_t data, int count, int fd);
>>   +struct vfio_pci_fault_abi {
>> +    u32 entry_size;
>> +};
> 
> This is not used by this patch (and the whole series).
removed
> 
>> +
>> +e

Re: [PATCH v10 04/11] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type

2020-09-24 Thread Zenghui Yu

Hi Eric,

On 2020/3/21 0:19, Eric Auger wrote:

Add a new specific DMA_FAULT region aiming to exposed nested mode
translation faults.

The region has a ring buffer that contains the actual fault
records plus a header allowing to handle it (tail/head indices,
max capacity, entry size). At the moment the region is dimensionned
for 512 fault records.

Signed-off-by: Eric Auger 


[...]


diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 379a02c36e37..586b89debed5 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -260,6 +260,69 @@ int vfio_pci_set_power_state(struct vfio_pci_device *vdev, 
pci_power_t state)
return ret;
  }
  
+static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,

+  struct vfio_pci_region *region)
+{
+}
+
+static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
+struct vfio_pci_region *region,
+struct vfio_info_cap *caps)
+{
+   struct vfio_region_info_cap_fault cap = {
+   .header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
+   .header.version = 1,
+   .version = 1,
+   };
+   return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
+   .rw = vfio_pci_dma_fault_rw,
+   .release= vfio_pci_dma_fault_release,
+   .add_capability = vfio_pci_dma_fault_add_capability,
+};
+
+#define DMA_FAULT_RING_LENGTH 512
+
+static int vfio_pci_init_dma_fault_region(struct vfio_pci_device *vdev)
+{
+   struct vfio_region_dma_fault *header;
+   size_t size;
+   int ret;
+
+   mutex_init(&vdev->fault_queue_lock);
+
+   /*
+* We provision 1 page for the header and space for
+* DMA_FAULT_RING_LENGTH fault records in the ring buffer.
+*/
+   size = ALIGN(sizeof(struct iommu_fault) *
+DMA_FAULT_RING_LENGTH, PAGE_SIZE) + PAGE_SIZE;
+
+   vdev->fault_pages = kzalloc(size, GFP_KERNEL);
+   if (!vdev->fault_pages)
+   return -ENOMEM;
+
+   ret = vfio_pci_register_dev_region(vdev,
+   VFIO_REGION_TYPE_NESTED,
+   VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
+   &vfio_pci_dma_fault_regops, size,
+   VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
+   vdev->fault_pages);
+   if (ret)
+   goto out;
+
+   header = (struct vfio_region_dma_fault *)vdev->fault_pages;
+   header->entry_size = sizeof(struct iommu_fault);
+   header->nb_entries = DMA_FAULT_RING_LENGTH;
+   header->offset = sizeof(struct vfio_region_dma_fault);
+   return 0;
+out:
+   kfree(vdev->fault_pages);
+   return ret;
+}
+
  static int vfio_pci_enable(struct vfio_pci_device *vdev)
  {
struct pci_dev *pdev = vdev->pdev;
@@ -358,6 +421,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
}
}
  
+	ret = vfio_pci_init_dma_fault_region(vdev);

+   if (ret)
+   goto disable_exit;
+
vfio_pci_probe_mmaps(vdev);
  
  	return 0;

@@ -1383,6 +1450,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
  
  	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);

kfree(vdev->region);
+   kfree(vdev->fault_pages);
mutex_destroy(&vdev->ioeventfds_lock);
  
  	if (!disable_idle_d3)

diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index 8a2c7607d513..a392f50e3a99 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -119,6 +119,8 @@ struct vfio_pci_device {
int ioeventfds_nr;
struct eventfd_ctx  *err_trigger;
struct eventfd_ctx  *req_trigger;
+   u8  *fault_pages;


What's the reason to use 'u8'? It doesn't match the type of header, nor
the type of ring buffer.


+   struct mutexfault_queue_lock;
struct list_headdummy_resources_list;
struct mutexioeventfds_lock;
struct list_headioeventfds_list;
@@ -150,6 +152,14 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device 
*vdev, char __user *buf,
  extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
   uint64_t data, int count, int fd);
  
+struct vfio_pci_fault_abi {

+   u32 entry_size;
+};


This is not used by this patch (and the whole series).


+
+extern size_t vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev,
+   char __user *buf, size_t count,
+   loff_t *ppos, bool iswrite);
+
  extern int vfio_pci_init_perm_bits(void);
  extern void vfio_pci_uninit_perm_bits(void);
  
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio

RE: [PATCH v10 04/11] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type

2020-04-05 Thread Liu, Yi L
Hi Eric,
> From: Auger Eric 
> Sent: Wednesday, April 1, 2020 9:31 PM
> To: Liu, Yi L ; eric.auger@gmail.com; 
> iommu@lists.linux-
> Subject: Re: [PATCH v10 04/11] vfio/pci: Add VFIO_REGION_TYPE_NESTED region
> type
> 
> Hi Yi,
> 
> On 4/1/20 3:18 PM, Liu, Yi L wrote:
> > Hi Eric,
> >
> > Just curious about your plan on this patch, I just heard my colleague
> > would like to reference the functions from this patch in his dsa driver 
> > work.
> 
> Well I intend to respin until somebody tells me it is completely vain or dead 
> follows.
> Joking aside, feel free to embed it in any series it would be beneficial to, 
> just please
> cc me in case code diverges.

got it. Please also cc me in future version. :-)

Regards,
Yi Liu

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 04/11] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type

2020-04-01 Thread Auger Eric
Hi Yi,

On 4/1/20 3:18 PM, Liu, Yi L wrote:
> Hi Eric,
> 
> Just curious about your plan on this patch, I just heard my colleague would 
> like
> to reference the functions from this patch in his dsa driver work.

Well I intend to respin until somebody tells me it is completely vain or
dead follows. Joking aside, feel free to embed it in any series it would
be beneficial to, just please cc me in case code diverges.

Thanks

Eric
> 
> Regards,
> Yi Liu
> 
>> From: Eric Auger 
>> Sent: Saturday, March 21, 2020 12:19 AM
>> To: eric.auger@gmail.com; eric.au...@redhat.com; iommu@lists.linux-
>> foundation.org; linux-ker...@vger.kernel.org; k...@vger.kernel.org;
>> kvm...@lists.cs.columbia.edu; j...@8bytes.org; alex.william...@redhat.com;
>> jacob.jun@linux.intel.com; Liu, Yi L ; jean-
>> philippe.bruc...@arm.com; will.dea...@arm.com; robin.mur...@arm.com
>> Cc: marc.zyng...@arm.com; peter.mayd...@linaro.org; zhangfei@gmail.com
>> Subject: [PATCH v10 04/11] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type
>>
>> Add a new specific DMA_FAULT region aiming to exposed nested mode
>> translation faults.
>>
>> The region has a ring buffer that contains the actual fault
>> records plus a header allowing to handle it (tail/head indices,
>> max capacity, entry size). At the moment the region is dimensionned
>> for 512 fault records.
>>
>> Signed-off-by: Eric Auger 
>>
>> ---
>>
>> v8 -> v9:
>> - Use a single region instead of a prod/cons region
>>
>> v4 -> v5
>> - check cons is not null in vfio_pci_check_cons_fault
>>
>> v3 -> v4:
>> - use 2 separate regions, respectively in read and write modes
>> - add the version capability
>> ---
>>  drivers/vfio/pci/vfio_pci.c | 68 +
>>  drivers/vfio/pci/vfio_pci_private.h | 10 +
>>  drivers/vfio/pci/vfio_pci_rdwr.c| 45 +++
>>  include/uapi/linux/vfio.h   | 35 +++
>>  4 files changed, 158 insertions(+)
>>
>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>> index 379a02c36e37..586b89debed5 100644
>> --- a/drivers/vfio/pci/vfio_pci.c
>> +++ b/drivers/vfio/pci/vfio_pci.c
>> @@ -260,6 +260,69 @@ int vfio_pci_set_power_state(struct vfio_pci_device 
>> *vdev,
>> pci_power_t state)
>>  return ret;
>>  }
>>
>> +static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,
>> +   struct vfio_pci_region *region)
>> +{
>> +}
>> +
>> +static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
>> + struct vfio_pci_region *region,
>> + struct vfio_info_cap *caps)
>> +{
>> +struct vfio_region_info_cap_fault cap = {
>> +.header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
>> +.header.version = 1,
>> +.version = 1,
>> +};
>> +return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
>> +}
>> +
>> +static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
>> +.rw = vfio_pci_dma_fault_rw,
>> +.release= vfio_pci_dma_fault_release,
>> +.add_capability = vfio_pci_dma_fault_add_capability,
>> +};
>> +
>> +#define DMA_FAULT_RING_LENGTH 512
>> +
>> +static int vfio_pci_init_dma_fault_region(struct vfio_pci_device *vdev)
>> +{
>> +struct vfio_region_dma_fault *header;
>> +size_t size;
>> +int ret;
>> +
>> +mutex_init(&vdev->fault_queue_lock);
>> +
>> +/*
>> + * We provision 1 page for the header and space for
>> + * DMA_FAULT_RING_LENGTH fault records in the ring buffer.
>> + */
>> +size = ALIGN(sizeof(struct iommu_fault) *
>> + DMA_FAULT_RING_LENGTH, PAGE_SIZE) + PAGE_SIZE;
>> +
>> +vdev->fault_pages = kzalloc(size, GFP_KERNEL);
>> +if (!vdev->fault_pages)
>> +return -ENOMEM;
>> +
>> +ret = vfio_pci_register_dev_region(vdev,
>> +VFIO_REGION_TYPE_NESTED,
>> +VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
>> +&vfio_pci_dma_fault_regops, size,
>> +VFIO_REGION_INFO_FLAG_READ |
>> VFIO_REGION_INFO_FLAG_WRITE,
>> +vdev->fault_pages);
>> +if (ret)
>> +goto out;
>> +
>> +header = (struct vfio_region_dma_fault *)vdev->fault_pages;
>> +header->entry

RE: [PATCH v10 04/11] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type

2020-04-01 Thread Liu, Yi L
Hi Eric,

Just curious about your plan on this patch, I just heard my colleague would like
to reference the functions from this patch in his dsa driver work.

Regards,
Yi Liu

> From: Eric Auger 
> Sent: Saturday, March 21, 2020 12:19 AM
> To: eric.auger@gmail.com; eric.au...@redhat.com; iommu@lists.linux-
> foundation.org; linux-ker...@vger.kernel.org; k...@vger.kernel.org;
> kvm...@lists.cs.columbia.edu; j...@8bytes.org; alex.william...@redhat.com;
> jacob.jun@linux.intel.com; Liu, Yi L ; jean-
> philippe.bruc...@arm.com; will.dea...@arm.com; robin.mur...@arm.com
> Cc: marc.zyng...@arm.com; peter.mayd...@linaro.org; zhangfei....@gmail.com
> Subject: [PATCH v10 04/11] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type
> 
> Add a new specific DMA_FAULT region aiming to exposed nested mode
> translation faults.
> 
> The region has a ring buffer that contains the actual fault
> records plus a header allowing to handle it (tail/head indices,
> max capacity, entry size). At the moment the region is dimensionned
> for 512 fault records.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> v8 -> v9:
> - Use a single region instead of a prod/cons region
> 
> v4 -> v5
> - check cons is not null in vfio_pci_check_cons_fault
> 
> v3 -> v4:
> - use 2 separate regions, respectively in read and write modes
> - add the version capability
> ---
>  drivers/vfio/pci/vfio_pci.c | 68 +
>  drivers/vfio/pci/vfio_pci_private.h | 10 +
>  drivers/vfio/pci/vfio_pci_rdwr.c| 45 +++
>  include/uapi/linux/vfio.h   | 35 +++
>  4 files changed, 158 insertions(+)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index 379a02c36e37..586b89debed5 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -260,6 +260,69 @@ int vfio_pci_set_power_state(struct vfio_pci_device 
> *vdev,
> pci_power_t state)
>   return ret;
>  }
> 
> +static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,
> +struct vfio_pci_region *region)
> +{
> +}
> +
> +static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
> +  struct vfio_pci_region *region,
> +  struct vfio_info_cap *caps)
> +{
> + struct vfio_region_info_cap_fault cap = {
> + .header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
> + .header.version = 1,
> + .version = 1,
> + };
> + return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
> +}
> +
> +static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
> + .rw = vfio_pci_dma_fault_rw,
> + .release= vfio_pci_dma_fault_release,
> + .add_capability = vfio_pci_dma_fault_add_capability,
> +};
> +
> +#define DMA_FAULT_RING_LENGTH 512
> +
> +static int vfio_pci_init_dma_fault_region(struct vfio_pci_device *vdev)
> +{
> + struct vfio_region_dma_fault *header;
> + size_t size;
> + int ret;
> +
> + mutex_init(&vdev->fault_queue_lock);
> +
> + /*
> +  * We provision 1 page for the header and space for
> +  * DMA_FAULT_RING_LENGTH fault records in the ring buffer.
> +  */
> + size = ALIGN(sizeof(struct iommu_fault) *
> +  DMA_FAULT_RING_LENGTH, PAGE_SIZE) + PAGE_SIZE;
> +
> + vdev->fault_pages = kzalloc(size, GFP_KERNEL);
> + if (!vdev->fault_pages)
> + return -ENOMEM;
> +
> + ret = vfio_pci_register_dev_region(vdev,
> + VFIO_REGION_TYPE_NESTED,
> + VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
> + &vfio_pci_dma_fault_regops, size,
> + VFIO_REGION_INFO_FLAG_READ |
> VFIO_REGION_INFO_FLAG_WRITE,
> + vdev->fault_pages);
> + if (ret)
> + goto out;
> +
> + header = (struct vfio_region_dma_fault *)vdev->fault_pages;
> + header->entry_size = sizeof(struct iommu_fault);
> + header->nb_entries = DMA_FAULT_RING_LENGTH;
> + header->offset = sizeof(struct vfio_region_dma_fault);
> + return 0;
> +out:
> + kfree(vdev->fault_pages);
> + return ret;
> +}
> +
>  static int vfio_pci_enable(struct vfio_pci_device *vdev)
>  {
>   struct pci_dev *pdev = vdev->pdev;
> @@ -358,6 +421,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
>   }
>   }
> 
> + ret = vfio_pci_init_dma_fault_region(vdev);
> + if (ret)
> + goto disable_exit;
> +
>   vfio_pci_probe_mmaps(vdev);
> 
>

[PATCH v10 04/11] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type

2020-03-20 Thread Eric Auger
Add a new specific DMA_FAULT region aiming to exposed nested mode
translation faults.

The region has a ring buffer that contains the actual fault
records plus a header allowing to handle it (tail/head indices,
max capacity, entry size). At the moment the region is dimensionned
for 512 fault records.

Signed-off-by: Eric Auger 

---

v8 -> v9:
- Use a single region instead of a prod/cons region

v4 -> v5
- check cons is not null in vfio_pci_check_cons_fault

v3 -> v4:
- use 2 separate regions, respectively in read and write modes
- add the version capability
---
 drivers/vfio/pci/vfio_pci.c | 68 +
 drivers/vfio/pci/vfio_pci_private.h | 10 +
 drivers/vfio/pci/vfio_pci_rdwr.c| 45 +++
 include/uapi/linux/vfio.h   | 35 +++
 4 files changed, 158 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 379a02c36e37..586b89debed5 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -260,6 +260,69 @@ int vfio_pci_set_power_state(struct vfio_pci_device *vdev, 
pci_power_t state)
return ret;
 }
 
+static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,
+  struct vfio_pci_region *region)
+{
+}
+
+static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
+struct vfio_pci_region *region,
+struct vfio_info_cap *caps)
+{
+   struct vfio_region_info_cap_fault cap = {
+   .header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
+   .header.version = 1,
+   .version = 1,
+   };
+   return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
+   .rw = vfio_pci_dma_fault_rw,
+   .release= vfio_pci_dma_fault_release,
+   .add_capability = vfio_pci_dma_fault_add_capability,
+};
+
+#define DMA_FAULT_RING_LENGTH 512
+
+static int vfio_pci_init_dma_fault_region(struct vfio_pci_device *vdev)
+{
+   struct vfio_region_dma_fault *header;
+   size_t size;
+   int ret;
+
+   mutex_init(&vdev->fault_queue_lock);
+
+   /*
+* We provision 1 page for the header and space for
+* DMA_FAULT_RING_LENGTH fault records in the ring buffer.
+*/
+   size = ALIGN(sizeof(struct iommu_fault) *
+DMA_FAULT_RING_LENGTH, PAGE_SIZE) + PAGE_SIZE;
+
+   vdev->fault_pages = kzalloc(size, GFP_KERNEL);
+   if (!vdev->fault_pages)
+   return -ENOMEM;
+
+   ret = vfio_pci_register_dev_region(vdev,
+   VFIO_REGION_TYPE_NESTED,
+   VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
+   &vfio_pci_dma_fault_regops, size,
+   VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
+   vdev->fault_pages);
+   if (ret)
+   goto out;
+
+   header = (struct vfio_region_dma_fault *)vdev->fault_pages;
+   header->entry_size = sizeof(struct iommu_fault);
+   header->nb_entries = DMA_FAULT_RING_LENGTH;
+   header->offset = sizeof(struct vfio_region_dma_fault);
+   return 0;
+out:
+   kfree(vdev->fault_pages);
+   return ret;
+}
+
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
struct pci_dev *pdev = vdev->pdev;
@@ -358,6 +421,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
}
}
 
+   ret = vfio_pci_init_dma_fault_region(vdev);
+   if (ret)
+   goto disable_exit;
+
vfio_pci_probe_mmaps(vdev);
 
return 0;
@@ -1383,6 +1450,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
 
vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
kfree(vdev->region);
+   kfree(vdev->fault_pages);
mutex_destroy(&vdev->ioeventfds_lock);
 
if (!disable_idle_d3)
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index 8a2c7607d513..a392f50e3a99 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -119,6 +119,8 @@ struct vfio_pci_device {
int ioeventfds_nr;
struct eventfd_ctx  *err_trigger;
struct eventfd_ctx  *req_trigger;
+   u8  *fault_pages;
+   struct mutexfault_queue_lock;
struct list_headdummy_resources_list;
struct mutexioeventfds_lock;
struct list_headioeventfds_list;
@@ -150,6 +152,14 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device 
*vdev, char __user *buf,
 extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
   uint64_t data, int count, int fd);
 
+struct vfio_pci_fault_abi {
+   u32 entry_size;
+};
+
+extern size_t vfio_pci_dma_fault_rw(struct vfio_pci_d