Re: [PATCH RFC v1 03/11] iommu/virtio: Handle incoming page faults

2021-10-11 Thread Vivek Gautam
Hi Jean,


On Tue, Sep 21, 2021 at 9:33 PM Jean-Philippe Brucker
 wrote:
>
> On Fri, Apr 23, 2021 at 03:21:39PM +0530, Vivek Gautam wrote:
> > Redirect the incoming page faults to the registered fault handler
> > that can take the fault information such as, pasid, page request
> > group-id, address and pasid flags.
> >
> > Signed-off-by: Vivek Gautam 
> > ---
> >  drivers/iommu/virtio-iommu.c  | 80 ++-
> >  include/uapi/linux/virtio_iommu.h |  1 +
> >  2 files changed, 80 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> > index c970f386f031..fd237cad1ce5 100644
> > --- a/drivers/iommu/virtio-iommu.c
> > +++ b/drivers/iommu/virtio-iommu.c
> > @@ -37,6 +37,13 @@
> >  /* Some architectures need an Address Space ID for each page table */
> >  DEFINE_XARRAY_ALLOC1(viommu_asid_xa);
> >
> > +struct viommu_dev_pri_work {
> > + struct work_struct  work;
> > + struct viommu_dev   *dev;
> > + struct virtio_iommu_fault   *vfault;
> > + u32 endpoint;
> > +};
> > +
> >  struct viommu_dev {
> >   struct iommu_device iommu;
> >   struct device   *dev;
> > @@ -49,6 +56,8 @@ struct viommu_dev {
> >   struct list_headrequests;
> >   void*evts;
> >   struct list_headendpoints;
> > + struct workqueue_struct *pri_wq;
> > + struct viommu_dev_pri_work  *pri_work;
>
> IOPF already has a workqueue, so the driver doesn't need one.
> iommu_report_device_fault() should be fast enough to be called from the
> event handler.

Sure, will call iommu_report_device_fault() directly from
viommu_fault_handler().

>
> >
> >   /* Device configuration */
> >   struct iommu_domain_geometrygeometry;
> > @@ -666,6 +675,58 @@ static int viommu_probe_endpoint(struct viommu_dev 
> > *viommu, struct device *dev)
> >   return ret;
> >  }
> >
> > +static void viommu_handle_ppr(struct work_struct *work)
> > +{
> > + struct viommu_dev_pri_work *pwork =
> > + container_of(work, struct 
> > viommu_dev_pri_work, work);
> > + struct viommu_dev *viommu = pwork->dev;
> > + struct virtio_iommu_fault *vfault = pwork->vfault;
> > + struct viommu_endpoint *vdev;
> > + struct viommu_ep_entry *ep;
> > + struct iommu_fault_event fault_evt = {
> > + .fault.type = IOMMU_FAULT_PAGE_REQ,
> > + };
> > + struct iommu_fault_page_request *prq = _evt.fault.prm;
> > +
> > + u32 flags   = le32_to_cpu(vfault->flags);
> > + u32 prq_flags   = le32_to_cpu(vfault->pr_evt_flags);
> > + u32 endpoint= pwork->endpoint;
> > +
> > + memset(prq, 0, sizeof(struct iommu_fault_page_request));
>
> The fault_evt struct is already initialized

Right, I will remove this line.

>
> > + prq->addr = le64_to_cpu(vfault->address);
> > +
> > + if (prq_flags & VIRTIO_IOMMU_FAULT_PRQ_F_LAST_PAGE)
> > + prq->flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> > + if (prq_flags & VIRTIO_IOMMU_FAULT_PRQ_F_PASID_VALID) {
> > + prq->flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> > + prq->pasid = le32_to_cpu(vfault->pasid);
> > + prq->grpid = le32_to_cpu(vfault->grpid);
> > + }
> > +
> > + if (flags & VIRTIO_IOMMU_FAULT_F_READ)
> > + prq->perm |= IOMMU_FAULT_PERM_READ;
> > + if (flags & VIRTIO_IOMMU_FAULT_F_WRITE)
> > + prq->perm |= IOMMU_FAULT_PERM_WRITE;
> > + if (flags & VIRTIO_IOMMU_FAULT_F_EXEC)
> > + prq->perm |= IOMMU_FAULT_PERM_EXEC;
> > + if (flags & VIRTIO_IOMMU_FAULT_F_PRIV)
> > + prq->perm |= IOMMU_FAULT_PERM_PRIV;
> > +
> > + list_for_each_entry(ep, >endpoints, list) {
> > + if (ep->eid == endpoint) {
> > + vdev = ep->vdev;

I have a question here though -
Is endpoint-ID unique across all the endpoints available per 'viommu_dev' or
per 'viommu_domain'?
If it is per 'viommu_domain' then the above list is also incorrect.
As you pointed to in the patch [1] -
[PATCH RFC v1 02/11] iommu/virtio: Maintain a list of endpoints served
by viommu_dev
I am planning to add endpoint ID into a static global xarray in
viommu_probe_

[PATCH RFC v1 11/11] iommu/virtio: Add support to send page response

2021-04-23 Thread Vivek Gautam
Add page_response iommu ops callback to send page response to
the device that generated io page fault.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/virtio-iommu.c | 47 
 1 file changed, 47 insertions(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 08f1294baeab..6d62d9eae452 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -1778,6 +1778,52 @@ static int viommu_of_xlate(struct device *dev, struct 
of_phandle_args *args)
return iommu_fwspec_add_ids(dev, args->args, 1);
 }
 
+static int viommu_page_response(struct device *dev,
+   struct iommu_fault_event *evt,
+   struct iommu_page_response *resp)
+{
+   struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+   struct viommu_domain *vdomain = to_viommu_domain(domain);
+   struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+   struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
+   struct viommu_dev *viommu = vdev->viommu;
+   bool pasid_valid = resp->flags & IOMMU_PAGE_RESP_PASID_VALID;
+   struct virtio_iommu_req_page_resp req = {
+   .head.type  = VIRTIO_IOMMU_T_PAGE_RESP,
+   .domain = cpu_to_le32(vdomain->id),
+   .endpoint   = cpu_to_le32(fwspec->ids[0]),
+   };
+
+   if (vdev->pri_supported) {
+   bool needs_pasid = (evt->fault.prm.flags &
+   IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID);
+
+   req.pasid_valid = needs_pasid && pasid_valid;
+   req.flags   = cpu_to_le32((needs_pasid && pasid_valid) ?
+  VIRTIO_IOMMU_PAGE_RESP_PASID_VALID : 0);
+   req.pasid   = cpu_to_le32(resp->pasid);
+   req.grpid   = cpu_to_le32(resp->grpid);
+
+   switch (resp->code) {
+   case IOMMU_PAGE_RESP_FAILURE:
+   req.resp_code = 
cpu_to_le16(VIRTIO_IOMMU_PAGE_RESP_FAILURE);
+   break;
+   case IOMMU_PAGE_RESP_INVALID:
+   req.resp_code = 
cpu_to_le16(VIRTIO_IOMMU_PAGE_RESP_INVALID);
+   break;
+   case IOMMU_PAGE_RESP_SUCCESS:
+   req.resp_code = 
cpu_to_le16(VIRTIO_IOMMU_PAGE_RESP_SUCCESS);
+   break;
+   default:
+   return -EINVAL;
+   }
+   } else {
+   return -ENODEV;
+   }
+
+   return viommu_send_req_sync(viommu, , sizeof(req));
+}
+
 static u32 viommu_sva_get_pasid(struct iommu_sva *handle)
 {
struct viommu_sva_bond *bond = sva_to_viommu_bond(handle);
@@ -2155,6 +2201,7 @@ static struct iommu_ops viommu_ops = {
.sva_bind   = viommu_sva_bind,
.sva_unbind = viommu_sva_unbind,
.sva_get_pasid  = viommu_sva_get_pasid,
+   .page_response  = viommu_page_response,
 };
 
 static int viommu_init_vqs(struct viommu_dev *viommu)
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 10/11] uapi/virtio-iommu: Add a new request type to send page response

2021-04-23 Thread Vivek Gautam
Once the page faults are handled, the response has to be sent to
virtio-iommu backend, from where it can be sent to the host to
prepare the response to a generated io page fault by the device.
Add a new virt-queue request type to handle this.

Signed-off-by: Vivek Gautam 
---
 include/uapi/linux/virtio_iommu.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/uapi/linux/virtio_iommu.h 
b/include/uapi/linux/virtio_iommu.h
index c12d9b6a7243..1b174b98663a 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -48,6 +48,7 @@ struct virtio_iommu_config {
 #define VIRTIO_IOMMU_T_PROBE   0x05
 #define VIRTIO_IOMMU_T_ATTACH_TABLE0x06
 #define VIRTIO_IOMMU_T_INVALIDATE  0x07
+#define VIRTIO_IOMMU_T_PAGE_RESP   0x08
 
 /* Status types */
 #define VIRTIO_IOMMU_S_OK  0x00
@@ -70,6 +71,23 @@ struct virtio_iommu_req_tail {
__u8reserved[3];
 };
 
+struct virtio_iommu_req_page_resp {
+   struct virtio_iommu_req_headhead;
+   __le32  domain;
+   __le32  endpoint;
+#define VIRTIO_IOMMU_PAGE_RESP_PASID_VALID (1 << 0)
+   __le32  flags;
+   __le32  pasid;
+   __le32  grpid;
+#define VIRTIO_IOMMU_PAGE_RESP_SUCCESS (0x0)
+#define VIRTIO_IOMMU_PAGE_RESP_INVALID (0x1)
+#define VIRTIO_IOMMU_PAGE_RESP_FAILURE (0x2)
+   __le16  resp_code;
+   __u8pasid_valid;
+   __u8reserved[9];
+   struct virtio_iommu_req_tailtail;
+};
+
 struct virtio_iommu_req_attach {
struct virtio_iommu_req_headhead;
__le32  domain;
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 09/11] iommu/virtio: Implement sva bind/unbind calls

2021-04-23 Thread Vivek Gautam
SVA bind and unbind implementations will allow to prepare translation
context with CPU page tables that can be programmed into host iommu
hardware to realize shared address space utilization between the CPU
and virtualized devices using virtio-iommu.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/virtio-iommu.c  | 199 +-
 include/uapi/linux/virtio_iommu.h |   2 +
 2 files changed, 199 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 250c137a211b..08f1294baeab 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -14,6 +14,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -28,6 +31,7 @@
 #include 
 #include "iommu-pasid-table.h"
 #include "iommu-sva-lib.h"
+#include "io-pgtable-arm.h"
 
 #define MSI_IOVA_BASE  0x800
 #define MSI_IOVA_LENGTH0x10
@@ -41,6 +45,7 @@ DEFINE_XARRAY_ALLOC1(viommu_asid_xa);
 
 static DEFINE_MUTEX(sva_lock);
 static DEFINE_MUTEX(iopf_lock);
+static DEFINE_MUTEX(viommu_asid_lock);
 
 struct viommu_dev_pri_work {
struct work_struct  work;
@@ -88,10 +93,22 @@ struct viommu_mapping {
 struct viommu_mm {
int pasid;
u64 archid;
+   struct viommu_sva_bond  *bond;
struct io_pgtable_ops   *ops;
struct viommu_domain*domain;
 };
 
+struct viommu_sva_bond {
+   struct iommu_svasva;
+   struct mm_struct*mm;
+   struct iommu_psdtable_mmu_notifier  *viommu_mn;
+   struct list_headlist;
+   refcount_t  refs;
+};
+
+#define sva_to_viommu_bond(handle) \
+   container_of(handle, struct viommu_sva_bond, sva)
+
 struct viommu_domain {
struct iommu_domain domain;
struct viommu_dev   *viommu;
@@ -136,6 +153,7 @@ struct viommu_endpoint {
boolpri_supported;
boolsva_enabled;
booliopf_enabled;
+   struct list_headbonds;
 };
 
 struct viommu_ep_entry {
@@ -1423,14 +1441,15 @@ static int viommu_attach_pasid_table(struct 
viommu_endpoint *vdev,
 
pst_cfg->iommu_dev = viommu->dev->parent;
 
+   mutex_lock(_asid_lock);
/* Prepare PASID tables info to allocate a new table */
ret = viommu_prepare_pst(vdev, pst_cfg, fmt);
if (ret)
-   return ret;
+   goto err_out_unlock;
 
ret = iommu_psdtable_alloc(tbl, pst_cfg);
if (ret)
-   return ret;
+   goto err_out_unlock;
 
pst_cfg->iommu_dev = viommu->dev->parent;
pst_cfg->fmt = PASID_TABLE_ARM_SMMU_V3;
@@ -1452,6 +1471,7 @@ static int viommu_attach_pasid_table(struct 
viommu_endpoint *vdev,
if (ret)
goto err_free_ops;
}
+   mutex_unlock(_asid_lock);
} else {
/* TODO: otherwise, check for compatibility with vdev. */
return -ENOSYS;
@@ -1467,6 +1487,8 @@ static int viommu_attach_pasid_table(struct 
viommu_endpoint *vdev,
 err_free_psdtable:
iommu_psdtable_free(tbl, >cfg);
 
+err_out_unlock:
+   mutex_unlock(_asid_lock);
return ret;
 }
 
@@ -1706,6 +1728,7 @@ static struct iommu_device *viommu_probe_device(struct 
device *dev)
vdev->dev = dev;
vdev->viommu = viommu;
INIT_LIST_HEAD(>resv_regions);
+   INIT_LIST_HEAD(>bonds);
dev_iommu_priv_set(dev, vdev);
 
if (viommu->probe_size) {
@@ -1755,6 +1778,175 @@ static int viommu_of_xlate(struct device *dev, struct 
of_phandle_args *args)
return iommu_fwspec_add_ids(dev, args->args, 1);
 }
 
+static u32 viommu_sva_get_pasid(struct iommu_sva *handle)
+{
+   struct viommu_sva_bond *bond = sva_to_viommu_bond(handle);
+
+   return bond->mm->pasid;
+}
+
+static void viommu_mmu_notifier_free(struct mmu_notifier *mn)
+{
+   kfree(mn_to_pstiommu(mn));
+}
+
+static struct mmu_notifier_ops viommu_mmu_notifier_ops = {
+   .free_notifier  = viommu_mmu_notifier_free,
+};
+
+/* Allocate or get existing MMU notifier for this {domain, mm} pair */
+static struct iommu_psdtable_mmu_notifier *
+viommu_mmu_notifier_get(struct viommu_domain *vdomain, struct mm_struct *mm,
+   u32 asid_bits)
+{
+   int ret;
+   struct iommu_psdtable_mmu_notifier *viommu_mn;
+   struct iommu_pasid_table *tbl = vdomain->pasid_tbl;
+
+   list_for_each_entry(viommu_mn, >mmu_notifiers, list) {
+   

[PATCH RFC v1 08/11] iommu/arm-smmu-v3: Implement shared context alloc and free ops

2021-04-23 Thread Vivek Gautam
Implementing the alloc_shared_cd and free_shared_cd in cd-lib, and
start using them for arm-smmu-v3-sva implementation.

Signed-off-by: Vivek Gautam 
---
 .../arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c  | 71 
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 83 ---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  1 -
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   | 14 
 4 files changed, 73 insertions(+), 96 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
index 537b7c784d40..b87829796596 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
@@ -285,16 +285,14 @@ static bool arm_smmu_free_asid(struct xarray *xa, void 
*cookie_cd)
  * descriptor is using it, try to replace it.
  */
 static struct arm_smmu_ctx_desc *
-arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
+arm_smmu_share_asid(struct iommu_pasid_table *tbl, struct mm_struct *mm,
+   struct xarray *xa, u16 asid, u32 asid_bits)
 {
int ret;
u32 new_asid;
struct arm_smmu_ctx_desc *cd;
-   struct arm_smmu_device *smmu;
-   struct arm_smmu_domain *smmu_domain;
-   struct iommu_pasid_table *tbl;
 
-   cd = xa_load(_smmu_asid_xa, asid);
+   cd = xa_load(xa, asid);
if (!cd)
return NULL;
 
@@ -306,12 +304,8 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
return cd;
}
 
-   smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd);
-   smmu = smmu_domain->smmu;
-   tbl = smmu_domain->tbl;
-
-   ret = xa_alloc(_smmu_asid_xa, _asid, cd,
-  XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
+   ret = xa_alloc(xa, _asid, cd, XA_LIMIT(1, (1 << asid_bits) - 1),
+  GFP_KERNEL);
if (ret)
return ERR_PTR(-ENOSPC);
/*
@@ -325,48 +319,52 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
 * be some overlap between use of both ASIDs, until we invalidate the
 * TLB.
 */
-   ret = iommu_psdtable_write(tbl, >cfg, 0, cd);
+   ret = arm_smmu_write_ctx_desc(>cfg, 0, cd);
if (ret)
return ERR_PTR(-ENOSYS);
 
/* Invalidate TLB entries previously associated with that context */
-   iommu_psdtable_flush_tlb(tbl, smmu_domain, asid);
+   iommu_psdtable_flush_tlb(tbl, tbl->cookie, asid);
 
-   xa_erase(_smmu_asid_xa, asid);
+   xa_erase(xa, asid);
return NULL;
 }
 
-struct arm_smmu_ctx_desc *
-arm_smmu_alloc_shared_cd(struct iommu_pasid_table *tbl, struct mm_struct *mm)
+static struct iommu_psdtable_mmu_notifier *
+arm_smmu_alloc_shared_cd(struct iommu_pasid_table *tbl, struct mm_struct *mm,
+struct xarray *xa, u32 asid_bits)
 {
u16 asid;
int err = 0;
u64 tcr, par, reg;
struct arm_smmu_ctx_desc *cd;
struct arm_smmu_ctx_desc *ret = NULL;
+   struct iommu_psdtable_mmu_notifier *pst_mn;
 
asid = arm64_mm_context_get(mm);
if (!asid)
return ERR_PTR(-ESRCH);
 
+   pst_mn = kzalloc(sizeof(*pst_mn), GFP_KERNEL);
+   if (!pst_mn) {
+   err = -ENOMEM;
+   goto out_put_context;
+   }
+
cd = kzalloc(sizeof(*cd), GFP_KERNEL);
if (!cd) {
err = -ENOMEM;
-   goto out_put_context;
+   goto out_free_mn;
}
 
refcount_set(>refs, 1);
 
-   mutex_lock(_smmu_asid_lock);
-   ret = arm_smmu_share_asid(mm, asid);
+   ret = arm_smmu_share_asid(tbl, mm, xa, asid, asid_bits);
if (ret) {
-   mutex_unlock(_smmu_asid_lock);
goto out_free_cd;
}
 
-   err = xa_insert(_smmu_asid_xa, asid, cd, GFP_KERNEL);
-   mutex_unlock(_smmu_asid_lock);
-
+   err = xa_insert(xa, asid, cd, GFP_KERNEL);
if (err)
goto out_free_asid;
 
@@ -406,21 +404,26 @@ arm_smmu_alloc_shared_cd(struct iommu_pasid_table *tbl, 
struct mm_struct *mm)
cd->asid = asid;
cd->mm = mm;
 
-   return cd;
+   pst_mn->vendor.cd = cd;
+   return pst_mn;
 
 out_free_asid:
-   iommu_psdtable_free_asid(tbl, _smmu_asid_xa, cd);
+   arm_smmu_free_asid(xa, cd);
 out_free_cd:
kfree(cd);
+out_free_mn:
+   kfree(pst_mn);
 out_put_context:
arm64_mm_context_put(mm);
return err < 0 ? ERR_PTR(err) : ret;
 }
 
-void arm_smmu_free_shared_cd(struct iommu_pasid_table *tbl,
-struct arm_smmu_ctx_desc *cd)
+static void arm_smmu_free_shared_cd(struct iommu_pasid_table *tbl,
+   struct xarray *xa, void *cookie)
 {
-   if (iommu_psdtable_free_asid(tbl, _smmu_asid_xa, cd)) {
+   struct arm_smmu_ctx_desc *cd = cookie;
+
+  

[PATCH RFC v1 07/11] iommu/arm-smmu-v3: Move shared context descriptor code to cd-lib

2021-04-23 Thread Vivek Gautam
In preparation to adding shared descriptor allocation and free pasid
table ops, pull out the alloc_shared_cd and free_shared_cd helpers from
arm-smmu-v3 and place them in arm-smmu-v3-cd-lib library driver.

Signed-off-by: Vivek Gautam 
---
 .../arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c  | 149 ++
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 147 -
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  12 ++
 drivers/iommu/iommu-pasid-table.h |   1 +
 4 files changed, 162 insertions(+), 147 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
index ea94f57ad261..537b7c784d40 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
@@ -6,9 +6,11 @@
  */
 
 #include 
+#include 
 
 #include "arm-smmu-v3.h"
 #include "../../iommu-pasid-table.h"
+#include "../../io-pgtable-arm.h"
 
 static int arm_smmu_alloc_cd_leaf_table(struct device *dev,
struct arm_smmu_l1_ctx_desc *l1_desc)
@@ -278,6 +280,153 @@ static bool arm_smmu_free_asid(struct xarray *xa, void 
*cookie_cd)
return free;
 }
 
+/*
+ * Check if the CPU ASID is available on the SMMU side. If a private context
+ * descriptor is using it, try to replace it.
+ */
+static struct arm_smmu_ctx_desc *
+arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
+{
+   int ret;
+   u32 new_asid;
+   struct arm_smmu_ctx_desc *cd;
+   struct arm_smmu_device *smmu;
+   struct arm_smmu_domain *smmu_domain;
+   struct iommu_pasid_table *tbl;
+
+   cd = xa_load(_smmu_asid_xa, asid);
+   if (!cd)
+   return NULL;
+
+   if (cd->mm) {
+   if (WARN_ON(cd->mm != mm))
+   return ERR_PTR(-EINVAL);
+   /* All devices bound to this mm use the same cd struct. */
+   refcount_inc(>refs);
+   return cd;
+   }
+
+   smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd);
+   smmu = smmu_domain->smmu;
+   tbl = smmu_domain->tbl;
+
+   ret = xa_alloc(_smmu_asid_xa, _asid, cd,
+  XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
+   if (ret)
+   return ERR_PTR(-ENOSPC);
+   /*
+* Race with unmap: TLB invalidations will start targeting the new ASID,
+* which isn't assigned yet. We'll do an invalidate-all on the old ASID
+* later, so it doesn't matter.
+*/
+   cd->asid = new_asid;
+   /*
+* Update ASID and invalidate CD in all associated masters. There will
+* be some overlap between use of both ASIDs, until we invalidate the
+* TLB.
+*/
+   ret = iommu_psdtable_write(tbl, >cfg, 0, cd);
+   if (ret)
+   return ERR_PTR(-ENOSYS);
+
+   /* Invalidate TLB entries previously associated with that context */
+   iommu_psdtable_flush_tlb(tbl, smmu_domain, asid);
+
+   xa_erase(_smmu_asid_xa, asid);
+   return NULL;
+}
+
+struct arm_smmu_ctx_desc *
+arm_smmu_alloc_shared_cd(struct iommu_pasid_table *tbl, struct mm_struct *mm)
+{
+   u16 asid;
+   int err = 0;
+   u64 tcr, par, reg;
+   struct arm_smmu_ctx_desc *cd;
+   struct arm_smmu_ctx_desc *ret = NULL;
+
+   asid = arm64_mm_context_get(mm);
+   if (!asid)
+   return ERR_PTR(-ESRCH);
+
+   cd = kzalloc(sizeof(*cd), GFP_KERNEL);
+   if (!cd) {
+   err = -ENOMEM;
+   goto out_put_context;
+   }
+
+   refcount_set(>refs, 1);
+
+   mutex_lock(_smmu_asid_lock);
+   ret = arm_smmu_share_asid(mm, asid);
+   if (ret) {
+   mutex_unlock(_smmu_asid_lock);
+   goto out_free_cd;
+   }
+
+   err = xa_insert(_smmu_asid_xa, asid, cd, GFP_KERNEL);
+   mutex_unlock(_smmu_asid_lock);
+
+   if (err)
+   goto out_free_asid;
+
+   tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - vabits_actual) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) |
+ FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) |
+ CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
+
+   switch (PAGE_SIZE) {
+   case SZ_4K:
+   tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_4K);
+   break;
+   case SZ_16K:
+   tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_16K);
+   break;
+   case SZ_64K:
+   tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_64K);
+   break;
+   default:
+   WARN_ON(1);
+   err = -EINVAL;
+   goto out_free_asid;
+   }
+
+   reg = read_sanitised_ftr_reg(SYS_ID_AA64MMF

[PATCH RFC v1 05/11] iommu/virtio: Add SVA feature and related enable/disable callbacks

2021-04-23 Thread Vivek Gautam
Add a feature flag to virtio iommu for Shared virtual addressing
(SVA). This feature would indicate the availablily path for handling
device page faults, and the provision for sending page response.
Also add necessary methods to enable and disable SVA so that the
masters can enable the SVA path. This also requires enabling the
PRI capability on the device.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/virtio-iommu.c  | 268 ++
 include/uapi/linux/virtio_iommu.h |   1 +
 2 files changed, 269 insertions(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 3da5f0807711..250c137a211b 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -26,6 +27,7 @@
 
 #include 
 #include "iommu-pasid-table.h"
+#include "iommu-sva-lib.h"
 
 #define MSI_IOVA_BASE  0x800
 #define MSI_IOVA_LENGTH0x10
@@ -37,6 +39,9 @@
 /* Some architectures need an Address Space ID for each page table */
 DEFINE_XARRAY_ALLOC1(viommu_asid_xa);
 
+static DEFINE_MUTEX(sva_lock);
+static DEFINE_MUTEX(iopf_lock);
+
 struct viommu_dev_pri_work {
struct work_struct  work;
struct viommu_dev   *dev;
@@ -71,6 +76,7 @@ struct viommu_dev {
 
boolhas_map:1;
boolhas_table:1;
+   boolhas_sva:1;
 };
 
 struct viommu_mapping {
@@ -124,6 +130,12 @@ struct viommu_endpoint {
void*pstf;
/* Preferred page table format */
void*pgtf;
+
+   /* sva */
+   boolats_supported;
+   boolpri_supported;
+   boolsva_enabled;
+   booliopf_enabled;
 };
 
 struct viommu_ep_entry {
@@ -582,6 +594,64 @@ static int viommu_add_pstf(struct viommu_endpoint *vdev, 
void *pstf, size_t len)
return 0;
 }
 
+static int viommu_init_ats_pri(struct viommu_endpoint *vdev)
+{
+   struct device *dev = vdev->dev;
+   struct pci_dev *pdev = to_pci_dev(dev);
+
+   if (!dev_is_pci(vdev->dev))
+   return -EINVAL;
+
+   if (pci_ats_supported(pdev))
+   vdev->ats_supported = true;
+
+   if (pci_pri_supported(pdev))
+   vdev->pri_supported = true;
+
+   return 0;
+}
+
+static int viommu_enable_pri(struct viommu_endpoint *vdev)
+{
+   int ret;
+   struct pci_dev *pdev;
+
+   /* Let's allow only 4 requests for PRI right now */
+   size_t max_inflight_pprs = 4;
+
+   if (!vdev->pri_supported || !vdev->ats_supported)
+   return -ENODEV;
+
+   pdev = to_pci_dev(vdev->dev);
+
+   ret = pci_reset_pri(pdev);
+   if (ret)
+   return ret;
+
+   ret = pci_enable_pri(pdev, max_inflight_pprs);
+   if (ret) {
+   dev_err(vdev->dev, "cannot enable PRI: %d\n", ret);
+   return ret;
+   }
+
+   return 0;
+}
+
+static void viommu_disable_pri(struct viommu_endpoint *vdev)
+{
+   struct pci_dev *pdev;
+
+   if (!dev_is_pci(vdev->dev))
+   return;
+
+   pdev = to_pci_dev(vdev->dev);
+
+   if (!pdev->pri_enabled)
+   return;
+
+   pci_disable_pri(pdev);
+}
+
 static int viommu_init_queues(struct viommu_dev *viommu)
 {
viommu->iopf_pri = iopf_queue_alloc(dev_name(viommu->dev));
@@ -684,6 +754,10 @@ static int viommu_probe_endpoint(struct viommu_dev 
*viommu, struct device *dev)
if (ret)
goto out_free_eps;
 
+   ret = viommu_init_ats_pri(vdev);
+   if (ret)
+   goto out_free_eps;
+
kfree(probe);
return 0;
 
@@ -1681,6 +1755,194 @@ static int viommu_of_xlate(struct device *dev, struct 
of_phandle_args *args)
return iommu_fwspec_add_ids(dev, args->args, 1);
 }
 
+static bool viommu_endpoint_iopf_supported(struct viommu_endpoint *vdev)
+{
+   /* TODO: support Stall model later */
+   return vdev->pri_supported;
+}
+
+bool viommu_endpoint_sva_supported(struct viommu_endpoint *vdev)
+{
+   struct viommu_dev *viommu = vdev->viommu;
+
+   if (!viommu->has_sva)
+   return false;
+
+   return vdev->pasid_bits;
+}
+
+bool viommu_endpoint_sva_enabled(struct viommu_endpoint *vdev)
+{
+   bool enabled;
+
+   mutex_lock(_lock);
+   enabled = vdev->sva_enabled;
+   mutex_unlock(_lock);
+   return enabled;
+}
+
+static int viommu_endpoint_sva_enable_iopf(struct viommu_endpoint *vdev)
+{
+   int ret;
+   struct device *dev = vdev->dev;
+
+   if (!viommu_endpoint_iopf_supported(vdev))
+   return 0;
+
+   

[PATCH RFC v1 06/11] iommu/pasid-table: Add pasid table ops for shared context management

2021-04-23 Thread Vivek Gautam
Add pasid table ops to allocate and free shared contexts. These helpers
interact using mmu notifiers, so add a mmu notifier implementation
structure in pasid tables as well. This change can help pull out the
shared pasid (context-descriptor) implementation out of arm-smmu-v3.

Signed-off-by: Vivek Gautam 
---
 .../arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c  |  1 +
 drivers/iommu/iommu-pasid-table.h | 42 +++
 2 files changed, 43 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
index fa1a6a632559..ea94f57ad261 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
@@ -299,6 +299,7 @@ arm_smmu_register_cd_table(struct device *dev, void *cookie,
tbl->cookie = cookie;
tbl->ops = _cd_table_ops;
tbl->cfg.sync = sync_ops;
+   INIT_LIST_HEAD(>mmu_notifiers);
 
return tbl;
 }
diff --git a/drivers/iommu/iommu-pasid-table.h 
b/drivers/iommu/iommu-pasid-table.h
index 00b1c66e6a9e..4d6590e60f9b 100644
--- a/drivers/iommu/iommu-pasid-table.h
+++ b/drivers/iommu/iommu-pasid-table.h
@@ -8,6 +8,7 @@
 #define __IOMMU_PASID_TABLE_H
 
 #include 
+#include 
 
 #include "arm/arm-smmu-v3/arm-smmu-v3.h"
 
@@ -52,6 +53,21 @@ struct iommu_vendor_psdtable_cfg {
 
 struct iommu_vendor_psdtable_ops;
 
+/* In-line with 'struct arm_smmu_mmu_notifier' */
+struct iommu_psdtable_mmu_notifier {
+   struct mmu_notifier mn;
+   boolcleared;
+   refcount_t  refs;
+   struct list_headlist;
+   /* cookie captures the domain implementation */
+   void*cookie;
+   union {
+   struct arm_smmu_ctx_desc*cd;
+   } vendor;
+};
+#define mn_to_pstiommu(mn) \
+   container_of(mn, struct iommu_psdtable_mmu_notifier, mn)
+
 /**
  * struct iommu_pasid_table - describes a set of PASID tables
  *
@@ -64,6 +80,7 @@ struct iommu_pasid_table {
void*cookie;
struct iommu_vendor_psdtable_cfgcfg;
const struct iommu_vendor_psdtable_ops  *ops;
+   struct list_headmmu_notifiers;
 };
 
 #define pasid_table_cfg_to_table(pst_cfg) \
@@ -77,6 +94,11 @@ struct iommu_vendor_psdtable_ops {
int (*write)(struct iommu_vendor_psdtable_cfg *cfg, int ssid,
 void *cookie);
bool (*free_asid)(struct xarray *xa, void *cookie_cd);
+   struct iommu_psdtable_mmu_notifier *
+   (*alloc_shared)(struct iommu_pasid_table *tbl, struct mm_struct *mm,
+   struct xarray *xa, u32 asid_bits);
+   void (*free_shared)(struct iommu_pasid_table *tbl, struct xarray *xa,
+   void *cookie_cd);
 };
 
 /* CD manipulation ops */
@@ -129,6 +151,26 @@ static inline bool iommu_psdtable_free_asid(struct 
iommu_pasid_table *tbl,
return tbl->ops->free_asid(xa, cookie_cd);
 }
 
+static inline struct iommu_psdtable_mmu_notifier *
+iommu_psdtable_alloc_shared(struct iommu_pasid_table *tbl,
+   struct mm_struct *mm, struct xarray *xa,
+   u32 asid_bits)
+{
+   if (!tbl->ops->alloc_shared)
+   return false;
+
+   return tbl->ops->alloc_shared(tbl, mm, xa, asid_bits);
+}
+
+static inline void iommu_psdtable_free_shared(struct iommu_pasid_table *tbl,
+ struct xarray *xa, void *cookie)
+{
+   if (!tbl->ops->free_shared)
+   return;
+
+   tbl->ops->free_shared(tbl, xa, cookie);
+}
+
 /* Sync ops for CD cfg or tlb */
 static inline void iommu_psdtable_flush(struct iommu_pasid_table *tbl,
void *cookie, int ssid, bool leaf)
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 03/11] iommu/virtio: Handle incoming page faults

2021-04-23 Thread Vivek Gautam
Redirect the incoming page faults to the registered fault handler
that can take the fault information such as, pasid, page request
group-id, address and pasid flags.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/virtio-iommu.c  | 80 ++-
 include/uapi/linux/virtio_iommu.h |  1 +
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index c970f386f031..fd237cad1ce5 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -37,6 +37,13 @@
 /* Some architectures need an Address Space ID for each page table */
 DEFINE_XARRAY_ALLOC1(viommu_asid_xa);
 
+struct viommu_dev_pri_work {
+   struct work_struct  work;
+   struct viommu_dev   *dev;
+   struct virtio_iommu_fault   *vfault;
+   u32 endpoint;
+};
+
 struct viommu_dev {
struct iommu_device iommu;
struct device   *dev;
@@ -49,6 +56,8 @@ struct viommu_dev {
struct list_headrequests;
void*evts;
struct list_headendpoints;
+   struct workqueue_struct *pri_wq;
+   struct viommu_dev_pri_work  *pri_work;
 
/* Device configuration */
struct iommu_domain_geometrygeometry;
@@ -666,6 +675,58 @@ static int viommu_probe_endpoint(struct viommu_dev 
*viommu, struct device *dev)
return ret;
 }
 
+static void viommu_handle_ppr(struct work_struct *work)
+{
+   struct viommu_dev_pri_work *pwork =
+   container_of(work, struct viommu_dev_pri_work, 
work);
+   struct viommu_dev *viommu = pwork->dev;
+   struct virtio_iommu_fault *vfault = pwork->vfault;
+   struct viommu_endpoint *vdev;
+   struct viommu_ep_entry *ep;
+   struct iommu_fault_event fault_evt = {
+   .fault.type = IOMMU_FAULT_PAGE_REQ,
+   };
+   struct iommu_fault_page_request *prq = _evt.fault.prm;
+
+   u32 flags   = le32_to_cpu(vfault->flags);
+   u32 prq_flags   = le32_to_cpu(vfault->pr_evt_flags);
+   u32 endpoint= pwork->endpoint;
+
+   memset(prq, 0, sizeof(struct iommu_fault_page_request));
+   prq->addr = le64_to_cpu(vfault->address);
+
+   if (prq_flags & VIRTIO_IOMMU_FAULT_PRQ_F_LAST_PAGE)
+   prq->flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+   if (prq_flags & VIRTIO_IOMMU_FAULT_PRQ_F_PASID_VALID) {
+   prq->flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+   prq->pasid = le32_to_cpu(vfault->pasid);
+   prq->grpid = le32_to_cpu(vfault->grpid);
+   }
+
+   if (flags & VIRTIO_IOMMU_FAULT_F_READ)
+   prq->perm |= IOMMU_FAULT_PERM_READ;
+   if (flags & VIRTIO_IOMMU_FAULT_F_WRITE)
+   prq->perm |= IOMMU_FAULT_PERM_WRITE;
+   if (flags & VIRTIO_IOMMU_FAULT_F_EXEC)
+   prq->perm |= IOMMU_FAULT_PERM_EXEC;
+   if (flags & VIRTIO_IOMMU_FAULT_F_PRIV)
+   prq->perm |= IOMMU_FAULT_PERM_PRIV;
+
+   list_for_each_entry(ep, >endpoints, list) {
+   if (ep->eid == endpoint) {
+   vdev = ep->vdev;
+   break;
+   }
+   }
+
+   if ((prq_flags & VIRTIO_IOMMU_FAULT_PRQ_F_PASID_VALID) &&
+   (prq_flags & VIRTIO_IOMMU_FAULT_PRQ_F_NEEDS_PASID))
+   prq->flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
+
+   if (iommu_report_device_fault(vdev->dev, _evt))
+   dev_err(vdev->dev, "Couldn't handle page request\n");
+}
+
 static int viommu_fault_handler(struct viommu_dev *viommu,
struct virtio_iommu_fault *fault)
 {
@@ -679,7 +740,13 @@ static int viommu_fault_handler(struct viommu_dev *viommu,
u32 pasid   = le32_to_cpu(fault->pasid);
 
if (type == VIRTIO_IOMMU_FAULT_F_PAGE_REQ) {
-   dev_info(viommu->dev, "Page request fault - unhandled\n");
+   dev_info_ratelimited(viommu->dev,
+"Page request fault from EP %u\n",
+endpoint);
+
+   viommu->pri_work->vfault = fault;
+   viommu->pri_work->endpoint = endpoint;
+   queue_work(viommu->pri_wq, >pri_work->work);
return 0;
}
 
@@ -1683,6 +1750,17 @@ static int viommu_probe(struct virtio_device *vdev)
goto err_free_vqs;
}
 
+   viommu->pri_work = kzalloc(sizeof(*viommu->pri_work), GFP_KERNEL);
+   if (!viommu->pri_work)
+   return -ENOMEM;
+
+   viommu->pri_work->dev = viommu;
+
+   INIT_WORK(>pri_work->work, viommu_handle_ppr);
+   viommu->pri_w

[PATCH RFC v1 04/11] iommu/virtio: Add a io page fault queue

2021-04-23 Thread Vivek Gautam
Add a io page fault queue to the driver so that it can be used
to redirect faults to the handler.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/virtio-iommu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index fd237cad1ce5..3da5f0807711 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -58,6 +58,7 @@ struct viommu_dev {
struct list_headendpoints;
struct workqueue_struct *pri_wq;
struct viommu_dev_pri_work  *pri_work;
+   struct iopf_queue   *iopf_pri;
 
/* Device configuration */
struct iommu_domain_geometrygeometry;
@@ -581,6 +582,15 @@ static int viommu_add_pstf(struct viommu_endpoint *vdev, 
void *pstf, size_t len)
return 0;
 }
 
+static int viommu_init_queues(struct viommu_dev *viommu)
+{
+   viommu->iopf_pri = iopf_queue_alloc(dev_name(viommu->dev));
+   if (!viommu->iopf_pri)
+   return -ENOMEM;
+
+   return 0;
+}
+
 static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
 {
int ret;
@@ -670,6 +680,15 @@ static int viommu_probe_endpoint(struct viommu_dev 
*viommu, struct device *dev)
 
list_add(>list, >endpoints);
 
+   ret = viommu_init_queues(viommu);
+   if (ret)
+   goto out_free_eps;
+
+   kfree(probe);
+   return 0;
+
+out_free_eps:
+   kfree(ep);
 out_free:
kfree(probe);
return ret;
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 02/11] iommu/virtio: Maintain a list of endpoints served by viommu_dev

2021-04-23 Thread Vivek Gautam
Keeping a record of list of endpoints that are served by the virtio-iommu
device would help in redirecting the requests of page faults to the
correct endpoint device to handle such requests.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/virtio-iommu.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 50039070e2aa..c970f386f031 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -48,6 +48,7 @@ struct viommu_dev {
spinlock_t  request_lock;
struct list_headrequests;
void*evts;
+   struct list_headendpoints;
 
/* Device configuration */
struct iommu_domain_geometrygeometry;
@@ -115,6 +116,12 @@ struct viommu_endpoint {
void*pgtf;
 };
 
+struct viommu_ep_entry {
+   u32 eid;
+   struct viommu_endpoint  *vdev;
+   struct list_headlist;
+};
+
 struct viommu_request {
struct list_headlist;
void*writeback;
@@ -573,6 +580,7 @@ static int viommu_probe_endpoint(struct viommu_dev *viommu, 
struct device *dev)
size_t probe_len;
struct virtio_iommu_req_probe *probe;
struct virtio_iommu_probe_property *prop;
+   struct viommu_ep_entry *ep;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
 
@@ -640,6 +648,18 @@ static int viommu_probe_endpoint(struct viommu_dev 
*viommu, struct device *dev)
prop = (void *)probe->properties + cur;
type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
}
+   if (ret)
+   goto out_free;
+
+   ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+   if (!ep) {
+   ret = -ENOMEM;
+   goto out_free;
+   }
+   ep->eid = probe->endpoint;
+   ep->vdev = vdev;
+
+   list_add(>list, >endpoints);
 
 out_free:
kfree(probe);
@@ -1649,6 +1669,7 @@ static int viommu_probe(struct virtio_device *vdev)
viommu->dev = dev;
viommu->vdev = vdev;
INIT_LIST_HEAD(>requests);
+   INIT_LIST_HEAD(>endpoints);
 
ret = viommu_init_vqs(viommu);
if (ret)
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 01/11] uapi/virtio-iommu: Add page request grp-id and flags information

2021-04-23 Thread Vivek Gautam
Add fault information for group-id and necessary flags for page
request faults that can be handled by page fault handler in
virtio-iommu driver.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 include/uapi/linux/virtio_iommu.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/include/uapi/linux/virtio_iommu.h 
b/include/uapi/linux/virtio_iommu.h
index f8bf927a0689..accc3318ce46 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -307,14 +307,27 @@ struct virtio_iommu_req_invalidate {
 #define VIRTIO_IOMMU_FAULT_F_DMA_UNRECOV   1
 #define VIRTIO_IOMMU_FAULT_F_PAGE_REQ  2
 
+#define VIRTIO_IOMMU_FAULT_PRQ_F_PASID_VALID   (1 << 0)
+#define VIRTIO_IOMMU_FAULT_PRQ_F_LAST_PAGE (1 << 1)
+#define VIRTIO_IOMMU_FAULT_PRQ_F_PRIV_DATA (1 << 2)
+#define VIRTIO_IOMMU_FAULT_PRQ_F_NEEDS_PASID   (1 << 3)
+
+#define VIRTIO_IOMMU_FAULT_UNREC_F_PASID_VALID (1 << 0)
+#define VIRTIO_IOMMU_FAULT_UNREC_F_ADDR_VALID  (1 << 1)
+#define VIRTIO_IOMMU_FAULT_UNREC_F_FETCH_ADDR_VALID(1 << 2)
+
 struct virtio_iommu_fault {
__u8reason;
__u8reserved[3];
__le16  flt_type;
__u8reserved2[2];
+   /* flags is actually permission flags */
__le32  flags;
+   /* flags for PASID and Page request handling info */
+   __le32  pr_evt_flags;
__le32  endpoint;
__le32  pasid;
+   __le32  grpid;
__u8reserved3[4];
__le64  address;
__u8reserved4[8];
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 00/11] iommu/virtio: vSVA support with Arm

2021-04-23 Thread Vivek Gautam
This patch series aims at enabling vSVA (Shared virtual addressing)
support in virtio-iommu driver for devices assigned to the guest
kernel using pci-passthrough technique (also called as Device
assignment). The changes are proposed to make virtio-iommu driver
become intelligent to support Shared virtual addressing.

The virtio-iommu device serves as the para-virtualized iommu for
IO devices running in the guest. This work is done for PCI devices
that are able to generate Address Translation Service (ATS) and
Page Request Interface (PRI) requests.

With vSVA support now devices running in guest can start using process
address space (Guest virtual address, GVA) for DMA. The below diagram
shows a simple system layout using iommus in guest and host.

 --
 |  virtio-iommu  |
 |driver  |
 |  (front-end)   |
 ||
   Guest kernel
  --

 --
 |  virtio-iommu  |
 |driver  |
 |  (back-end)|
 ||
kvmtool
  --

  -
 |  arm-smmu-v3   |
 |driver  |
 ||
Host kernel
  --

  -
 |  arm-smmu-v3   |
 |hardware|
 ||
   Hardware
  --

The flow of various messages/requests looks like below:
a) The stage-1 page tables are prepared by virtio-iommu driver using
   CPU page table info. This page table data then flow from guest kernel
   to host kernel using VFIO uapi changes [1]. The stage-1 page tables
   are then programmed into the hardware by the arm-smmu-v3 driver.
b) The device can then start initiating DMA transactions using ATS
   request.
c) When using GVA, SMMU encounters a translation fault and responds
   to the device with ATS success - translation failure.
d) The device can then send a PRI request that eventually populates
   the PRIQ of arm-smmu-v3.
e) The page fault info is captured from PRIQ, and sent to the guest
   kernel using VFIO dma fault region as added in [1].
f) The page fault is received on the virt-queue by virtio-iommu driver
   and is then passed to io-page-fault handler. The io page fault
   handler talks to mm fault handling engine in guest kernel and gets
   the CPU page tables updated.
g) Virtio-iommu driver then sends page_response backend virtio-iommu
   in vmm. From there this page_response info is passed to host kernel.
h) The arm-smmu-v3 driver running in the host will then use this page
   repsonse info and send a response to the requesting device.

The series use nested page table support [2] as the base to build vSVA
solution.

The changes are inspired from the SVA support in arm-smmu-v3 and are
making use of io-page-fault changes added by Jean Philippe [3].

The changes include:
- The mmu notifier structure, and allocation and freeing up of shared
  context descriptors are moved to arm-smmu-v3-cd-lib library driver,
  and changes have been made to arm-smmu-v3-sva driver to use these
  shared cd alloc/free helpers.
- In virtio-iommu driver:
  - changes have been added to add various iommu_ops to enable/disable
feature, and to perform sva_bind/unbind. 
  - A iopf queue has been added that accepts incoming page faults, and
work with mm fault handler to get the page resident.
  - Incoming page fault information from vmm is parsed using a work-queue
and passed to the iopf fault handler.
  - A new virt-queue request has been added to send page response back
to the vmm back-end driver. The page response is populated from the
mm fault handler response, and includes information on Page Request
pasid, group-id, and response code, etc.

[1] 
https://lore.kernel.org/linux-iommu/2021044659.15051-1-eric.au...@redhat.com/

https://lore.kernel.org/linux-iommu/2021041228.14386-1-eric.au...@redhat.com/
[2] 
https://lore.kernel.org/linux-iommu/20210115121342.15093-1-vivek.gau...@arm.com/
[3] https://www.spinics.net/lists/arm-kernel/msg886518.html

Vivek Gautam (11):
  uapi/virtio-iommu: Add page request grp-id and flags information
  iommu/virtio: Maintain a list of endpoints served by viommu_dev
  iommu/virtio: Handle incoming page faults
  iommu/virtio: Add a io page fault queue
  iommu/virtio: Add SVA feature and related enable/disable callbacks
  iommu/pasid-table: Add pasid table ops for shared context management

Re: [PATCH 2/2] iommu: arm-smmu-v3: Report domain nesting info reuqired for stage1

2021-03-02 Thread Vivek Gautam
Hi Eric,

On Fri, Feb 12, 2021 at 11:44 PM Auger Eric  wrote:
>
> Hi Vivek,
>
> On 2/12/21 11:58 AM, Vivek Gautam wrote:
> > Update nested domain information required for stage1 page table.
>
> s/reuqired/required in the commit title

Oh! my bad.

> >
> > Signed-off-by: Vivek Gautam 
> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 ++--
> >  1 file changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index c11dd3940583..728018921fae 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -2555,6 +2555,7 @@ static int arm_smmu_domain_nesting_info(struct 
> > arm_smmu_domain *smmu_domain,
> >   void *data)
> >  {
> >   struct iommu_nesting_info *info = (struct iommu_nesting_info *)data;
> > + struct arm_smmu_device *smmu = smmu_domain->smmu;
> >   unsigned int size;
> >
> >   if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> > @@ -2571,9 +2572,20 @@ static int arm_smmu_domain_nesting_info(struct 
> > arm_smmu_domain *smmu_domain,
> >   return 0;
> >   }
> >
> > - /* report an empty iommu_nesting_info for now */
> > - memset(info, 0x0, size);
> > + /* Update the nesting info as required for stage1 page tables */
> > + info->addr_width = smmu->ias;
> > + info->format = IOMMU_PASID_FORMAT_ARM_SMMU_V3;
> > + info->features = IOMMU_NESTING_FEAT_BIND_PGTBL |
> I understood IOMMU_NESTING_FEAT_BIND_PGTBL advertises the requirement to
> bind tables per PASID, ie. passing iommu_gpasid_bind_data.
> In ARM case I guess you plan to use attach/detach_pasid_table API with
> iommu_pasid_table_config struct. So I understood we should add a new
> feature here.

Right, the idea is to let vfio know that we support pasid table binding, and
I thought we could use the same flag. But clearly that's not the case.
Will add a new feature.

> > +  IOMMU_NESTING_FEAT_PAGE_RESP |
> > +  IOMMU_NESTING_FEAT_CACHE_INVLD;
> > + info->pasid_bits = smmu->ssid_bits;
> > + info->vendor.smmuv3.asid_bits = smmu->asid_bits;
> > + info->vendor.smmuv3.pgtbl_fmt = ARM_64_LPAE_S1;
> > + memset(>padding, 0x0, 12);
> > + memset(>vendor.smmuv3.padding, 0x0, 9);
> > +
> >   info->argsz = size;
> > +
> spurious new line

Sure, will remove it.

Best regards
Vivek

> >   return 0;
> >  }
> >
> >
>
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu: arm-smmu-v3: Report domain nesting info reuqired for stage1

2021-02-12 Thread Vivek Gautam
Update nested domain information required for stage1 page table.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index c11dd3940583..728018921fae 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2555,6 +2555,7 @@ static int arm_smmu_domain_nesting_info(struct 
arm_smmu_domain *smmu_domain,
void *data)
 {
struct iommu_nesting_info *info = (struct iommu_nesting_info *)data;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned int size;
 
if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
@@ -2571,9 +2572,20 @@ static int arm_smmu_domain_nesting_info(struct 
arm_smmu_domain *smmu_domain,
return 0;
}
 
-   /* report an empty iommu_nesting_info for now */
-   memset(info, 0x0, size);
+   /* Update the nesting info as required for stage1 page tables */
+   info->addr_width = smmu->ias;
+   info->format = IOMMU_PASID_FORMAT_ARM_SMMU_V3;
+   info->features = IOMMU_NESTING_FEAT_BIND_PGTBL |
+IOMMU_NESTING_FEAT_PAGE_RESP |
+IOMMU_NESTING_FEAT_CACHE_INVLD;
+   info->pasid_bits = smmu->ssid_bits;
+   info->vendor.smmuv3.asid_bits = smmu->asid_bits;
+   info->vendor.smmuv3.pgtbl_fmt = ARM_64_LPAE_S1;
+   memset(>padding, 0x0, 12);
+   memset(>vendor.smmuv3.padding, 0x0, 9);
+
info->argsz = size;
+
return 0;
 }
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu: Report domain nesting info for arm-smmu-v3

2021-02-12 Thread Vivek Gautam
Add a vendor specific structure for domain nesting info for
arm smmu-v3, and necessary info fields required to populate
stage1 page tables.

Signed-off-by: Vivek Gautam 
---
 include/uapi/linux/iommu.h | 31 +--
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 4d3d988fa353..5f059bcf7720 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -323,7 +323,8 @@ struct iommu_gpasid_bind_data {
 #define IOMMU_GPASID_BIND_VERSION_11
__u32 version;
 #define IOMMU_PASID_FORMAT_INTEL_VTD   1
-#define IOMMU_PASID_FORMAT_LAST2
+#define IOMMU_PASID_FORMAT_ARM_SMMU_V3 2
+#define IOMMU_PASID_FORMAT_LAST3
__u32 format;
__u32 addr_width;
 #define IOMMU_SVA_GPASID_VAL   (1 << 0) /* guest PASID valid */
@@ -409,6 +410,21 @@ struct iommu_nesting_info_vtd {
__u64   ecap_reg;
 };
 
+/*
+ * struct iommu_nesting_info_arm_smmuv3 - Arm SMMU-v3 nesting info.
+ */
+struct iommu_nesting_info_arm_smmuv3 {
+   __u32   flags;
+   __u16   asid_bits;
+
+   /* Arm LPAE page table format as per kernel */
+#define ARM_PGTBL_32_LPAE_S1   (0x0)
+#define ARM_PGTBL_64_LPAE_S1   (0x2)
+   __u8pgtbl_fmt;
+
+   __u8padding[9];
+};
+
 /*
  * struct iommu_nesting_info - Information for nesting-capable IOMMU.
  *userspace should check it before using
@@ -445,11 +461,13 @@ struct iommu_nesting_info_vtd {
  * +---+--+
  *
  * data struct types defined for @format:
- * ++=+
- * | @format| data struct |
- * ++=+
- * | IOMMU_PASID_FORMAT_INTEL_VTD   | struct iommu_nesting_info_vtd   |
- * ++-+
+ * ++==+
+ * | @format| data struct  |
+ * ++==+
+ * | IOMMU_PASID_FORMAT_INTEL_VTD   | struct iommu_nesting_info_vtd|
+ * +---+---+
+ * | IOMMU_PASID_FORMAT_ARM_SMMU_V3 | struct iommu_nesting_info_arm_smmuv3 |
+ * ++--+
  *
  */
 struct iommu_nesting_info {
@@ -466,6 +484,7 @@ struct iommu_nesting_info {
/* Vendor specific data */
union {
struct iommu_nesting_info_vtd vtd;
+   struct iommu_nesting_info_arm_smmuv3 smmuv3;
} vendor;
 };
 
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/2] Domain nesting info for arm-smmu

2021-02-12 Thread Vivek Gautam
These couple of patches are adding nesting information for arm
and are based on the domain nesting info patches by Yi [1,2,3].

Based on the discussion in the thread [4], sending these out as
I have been using in my tree [5] for nested translation based
on virtio-iommu on Arm reference platforms.

Thanks & regards
Vivek

[1] 
https://lore.kernel.org/kvm/1599734733-6431-2-git-send-email-yi.l@intel.com/
[2] 
https://lore.kernel.org/kvm/1599734733-6431-3-git-send-email-yi.l@intel.com/
[3] 
https://lore.kernel.org/kvm/1599734733-6431-4-git-send-email-yi.l@intel.com/
[4] https://lore.kernel.org/kvm/306e7dd2-9eb2-0ca3-6a93-7c9aa0821...@arm.com/
[5] 
https://github.com/vivek-arm/linux/tree/5.11-rc3-nested-pgtbl-arm-smmuv3-virtio-iommu

Vivek Gautam (2):
  iommu: Report domain nesting info for arm-smmu-v3
  iommu: arm-smmu-v3: Report domain nesting info reuqired for stage1

 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 +--
 include/uapi/linux/iommu.h  | 31 +
 2 files changed, 39 insertions(+), 8 deletions(-)

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 02/16] iommu/smmu: Report empty domain nesting info

2021-02-11 Thread Vivek Gautam
Hi Yi,


On Sat, Jan 23, 2021 at 2:29 PM Liu, Yi L  wrote:
>
> Hi Eric,
>
> > From: Auger Eric 
> > Sent: Tuesday, January 19, 2021 6:03 PM
> >
> > Hi Yi, Vivek,
> >
> [...]
> > > I see. I think there needs a change in the code there. Should also expect
> > > a nesting_info returned instead of an int anymore. @Eric, how about your
> > > opinion?
> > >
> > > domain = iommu_get_domain_for_dev(>pdev->dev);
> > > ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING,
> > );
> > > if (ret || !(info.features & IOMMU_NESTING_FEAT_PAGE_RESP)) {
> > > /*
> > >  * No need go futher as no page request service support.
> > >  */
> > > return 0;
> > > }
> > Sure I think it is "just" a matter of synchro between the 2 series. Yi,
>
> exactly.
>
> > do you have plans to respin part of
> > [PATCH v7 00/16] vfio: expose virtual Shared Virtual Addressing to VMs
> > or would you allow me to embed this patch in my series.
>
> My v7 hasn’t touch the prq change yet. So I think it's better for you to
> embed it to  your series. ^_^
>

Can you please let me know if you have an updated series of these
patches? It will help me to work with virtio-iommu/arm side changes.

Thanks & regards
Vivek
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH RFC v1 15/15] iommu/virtio: Update fault type and reason info for viommu fault

2021-01-15 Thread Vivek Gautam
Fault type information can tell about a page request fault or
an unreceoverable fault, and further additions to fault reasons
and the related PASID information can help in handling faults
efficiently.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Michael S. Tsirkin 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/virtio-iommu.c  | 27 +--
 include/uapi/linux/virtio_iommu.h | 13 -
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 9cc3d35125e9..10ef9e98214a 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -652,9 +652,16 @@ static int viommu_fault_handler(struct viommu_dev *viommu,
char *reason_str;
 
u8 reason   = fault->reason;
+   u16 type= fault->flt_type;
u32 flags   = le32_to_cpu(fault->flags);
u32 endpoint= le32_to_cpu(fault->endpoint);
u64 address = le64_to_cpu(fault->address);
+   u32 pasid   = le32_to_cpu(fault->pasid);
+
+   if (type == VIRTIO_IOMMU_FAULT_F_PAGE_REQ) {
+   dev_info(viommu->dev, "Page request fault - unhandled\n");
+   return 0;
+   }
 
switch (reason) {
case VIRTIO_IOMMU_FAULT_R_DOMAIN:
@@ -663,6 +670,21 @@ static int viommu_fault_handler(struct viommu_dev *viommu,
case VIRTIO_IOMMU_FAULT_R_MAPPING:
reason_str = "page";
break;
+   case VIRTIO_IOMMU_FAULT_R_WALK_EABT:
+   reason_str = "page walk external abort";
+   break;
+   case VIRTIO_IOMMU_FAULT_R_PTE_FETCH:
+   reason_str = "pte fetch";
+   break;
+   case VIRTIO_IOMMU_FAULT_R_PERMISSION:
+   reason_str = "permission";
+   break;
+   case VIRTIO_IOMMU_FAULT_R_ACCESS:
+   reason_str = "access";
+   break;
+   case VIRTIO_IOMMU_FAULT_R_OOR_ADDRESS:
+   reason_str = "output address";
+   break;
case VIRTIO_IOMMU_FAULT_R_UNKNOWN:
default:
reason_str = "unknown";
@@ -671,8 +693,9 @@ static int viommu_fault_handler(struct viommu_dev *viommu,
 
/* TODO: find EP by ID and report_iommu_fault */
if (flags & VIRTIO_IOMMU_FAULT_F_ADDRESS)
-   dev_err_ratelimited(viommu->dev, "%s fault from EP %u at %#llx 
[%s%s%s]\n",
-   reason_str, endpoint, address,
+   dev_err_ratelimited(viommu->dev,
+   "%s fault from EP %u PASID %u at %#llx 
[%s%s%s]\n",
+   reason_str, endpoint, pasid, address,
flags & VIRTIO_IOMMU_FAULT_F_READ ? "R" : 
"",
flags & VIRTIO_IOMMU_FAULT_F_WRITE ? "W" : 
"",
flags & VIRTIO_IOMMU_FAULT_F_EXEC ? "X" : 
"");
diff --git a/include/uapi/linux/virtio_iommu.h 
b/include/uapi/linux/virtio_iommu.h
index 608c8d642e1f..a537d82777f7 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -290,19 +290,30 @@ struct virtio_iommu_req_invalidate {
 #define VIRTIO_IOMMU_FAULT_R_UNKNOWN   0
 #define VIRTIO_IOMMU_FAULT_R_DOMAIN1
 #define VIRTIO_IOMMU_FAULT_R_MAPPING   2
+#define VIRTIO_IOMMU_FAULT_R_WALK_EABT 3
+#define VIRTIO_IOMMU_FAULT_R_PTE_FETCH 4
+#define VIRTIO_IOMMU_FAULT_R_PERMISSION5
+#define VIRTIO_IOMMU_FAULT_R_ACCESS6
+#define VIRTIO_IOMMU_FAULT_R_OOR_ADDRESS   7
 
 #define VIRTIO_IOMMU_FAULT_F_READ  (1 << 0)
 #define VIRTIO_IOMMU_FAULT_F_WRITE (1 << 1)
 #define VIRTIO_IOMMU_FAULT_F_EXEC  (1 << 2)
 #define VIRTIO_IOMMU_FAULT_F_ADDRESS   (1 << 8)
 
+#define VIRTIO_IOMMU_FAULT_F_DMA_UNRECOV   1
+#define VIRTIO_IOMMU_FAULT_F_PAGE_REQ  2
+
 struct virtio_iommu_fault {
__u8reason;
-   __u8reserved[3];
+   __le16  flt_type;
+   __u8reserved;
__le32  flags;
__le32  endpoint;
__u8reserved2[4];
__le64  address;
+   __le32  pasid;
+   __u8reserved3[4];
 };
 
 #endif
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 14/15] iommu/virtio: Add support for Arm LPAE page table format

2021-01-15 Thread Vivek Gautam
From: Jean-Philippe Brucker 

When PASID isn't supported, we can still register one set of tables.
Add support to register Arm LPAE based page table.

Signed-off-by: Jean-Philippe Brucker 
[Vivek: Clean-ups to add right tcr definitions and accomodate
with parent patches]
Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Michael S. Tsirkin 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/virtio-iommu.c  | 131 +-
 include/uapi/linux/virtio_iommu.h |  30 +++
 2 files changed, 139 insertions(+), 22 deletions(-)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index b5222da1dc74..9cc3d35125e9 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -135,6 +135,13 @@ struct viommu_event {
 #define to_viommu_domain(domain)   \
container_of(domain, struct viommu_domain, domain)
 
+#define VIRTIO_FIELD_PREP(_mask, _shift, _val) \
+   ({  \
+   (((_val) << VIRTIO_IOMMU_PGTF_ARM_ ## _shift) & \
+(VIRTIO_IOMMU_PGTF_ARM_ ## _mask <<\
+ VIRTIO_IOMMU_PGTF_ARM_ ## _shift));   \
+   })
+
 static int viommu_get_req_errno(void *buf, size_t len)
 {
struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail);
@@ -897,6 +904,76 @@ static int viommu_simple_attach(struct viommu_domain 
*vdomain,
return ret;
 }
 
+static int viommu_config_arm_pgt(struct viommu_endpoint *vdev,
+struct io_pgtable_cfg *cfg,
+struct virtio_iommu_req_attach_pgt_arm *req,
+u64 *asid)
+{
+   int id;
+   struct virtio_iommu_probe_table_format *pgtf = (void *)vdev->pgtf;
+   typeof(>arm_lpae_s1_cfg.tcr) tcr = >arm_lpae_s1_cfg.tcr;
+   u64 __tcr;
+
+   if (pgtf->asid_bits != 8 && pgtf->asid_bits != 16)
+   return -EINVAL;
+
+   id = ida_simple_get(_ida, 1, 1 << pgtf->asid_bits, GFP_KERNEL);
+   if (id < 0)
+   return -ENOMEM;
+
+   __tcr = VIRTIO_FIELD_PREP(T0SZ_MASK, T0SZ_SHIFT, tcr->tsz) |
+   VIRTIO_FIELD_PREP(IRGN0_MASK, IRGN0_SHIFT, tcr->irgn) |
+   VIRTIO_FIELD_PREP(ORGN0_MASK, ORGN0_SHIFT, tcr->orgn) |
+   VIRTIO_FIELD_PREP(SH0_MASK, SH0_SHIFT, tcr->sh) |
+   VIRTIO_FIELD_PREP(TG0_MASK, TG0_SHIFT, tcr->tg) |
+   VIRTIO_IOMMU_PGTF_ARM_EPD1 | VIRTIO_IOMMU_PGTF_ARM_HPD0 |
+   VIRTIO_IOMMU_PGTF_ARM_HPD1;
+
+   req->format = cpu_to_le16(VIRTIO_IOMMU_FOMRAT_PGTF_ARM_LPAE);
+   req->ttbr   = cpu_to_le64(cfg->arm_lpae_s1_cfg.ttbr);
+   req->tcr= cpu_to_le64(__tcr);
+   req->mair   = cpu_to_le64(cfg->arm_lpae_s1_cfg.mair);
+   req->asid   = cpu_to_le16(id);
+
+   *asid = id;
+   return 0;
+}
+
+static int viommu_attach_pgtable(struct viommu_endpoint *vdev,
+struct viommu_domain *vdomain,
+enum io_pgtable_fmt fmt,
+struct io_pgtable_cfg *cfg,
+u64 *asid)
+{
+   int ret;
+   int i, eid;
+
+   struct virtio_iommu_req_attach_table req = {
+   .head.type  = VIRTIO_IOMMU_T_ATTACH_TABLE,
+   .domain = cpu_to_le32(vdomain->id),
+   };
+
+   switch (fmt) {
+   case ARM_64_LPAE_S1:
+   ret = viommu_config_arm_pgt(vdev, cfg, (void *), asid);
+   if (ret)
+   return ret;
+   break;
+   default:
+   WARN_ON(1);
+   return -EINVAL;
+   }
+
+   vdev_for_each_id(i, eid, vdev) {
+   req.endpoint = cpu_to_le32(eid);
+   ret = viommu_send_req_sync(vdomain->viommu, , sizeof(req));
+   if (ret)
+   return ret;
+   }
+
+   return 0;
+}
+
 static int viommu_teardown_pgtable(struct viommu_domain *vdomain)
 {
struct iommu_vendor_psdtable_cfg *pst_cfg;
@@ -972,32 +1049,42 @@ static int viommu_setup_pgtable(struct viommu_endpoint 
*vdev,
if (!ops)
return -ENOMEM;
 
-   pst_cfg = >cfg;
-   cfgi = _cfg->vendor.cfg;
-   id = ida_simple_get(_ida, 1, 1 << desc->asid_bits, GFP_KERNEL);
-   if (id < 0) {
-   ret = id;
-   goto err_free_pgtable;
-   }
+   if (!tbl) {
+   /* No PASID support, send attach_table */
+   ret = viommu_attach_pgtable(vdev, vdomain, fmt, ,
+   >mm.archid);
+

[PATCH RFC v1 13/15] iommu/virtio: Attach Arm PASID tables when available

2021-01-15 Thread Vivek Gautam
From: Jean-Philippe Brucker 

When the ARM PASID table format is reported in a probe, send an attach
request and install the page tables for iommu_map/iommu_unmap use.
Architecture-specific components are already abstracted to libraries. We
just need to pass config bits around and setup an alternative mechanism to
the mapping tree.

We reuse the convention already adopted by other IOMMU architectures (ARM
SMMU and AMD IOMMU), that entry 0 in the PASID table is reserved for
non-PASID traffic. Bind the PASID table, and setup entry 0 to be modified
with iommu_map/unmap.

Signed-off-by: Jean-Philippe Brucker 
[Vivek: Bunch of refactoring and clean-ups to use iommu-pasid-table APIs,
creating iommu_pasid_table, and configuring based on reported
pasid format. Couple of additional methods have also been created
to configure vendor specific pasid configuration]
Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Michael S. Tsirkin 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/virtio-iommu.c | 314 +++
 1 file changed, 314 insertions(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 004ea94e3731..b5222da1dc74 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -25,6 +25,7 @@
 #include 
 
 #include 
+#include "iommu-pasid-table.h"
 
 #define MSI_IOVA_BASE  0x800
 #define MSI_IOVA_LENGTH0x10
@@ -33,6 +34,9 @@
 #define VIOMMU_EVENT_VQ1
 #define VIOMMU_NR_VQS  2
 
+/* Some architectures need an Address Space ID for each page table */
+static DEFINE_IDA(asid_ida);
+
 struct viommu_dev {
struct iommu_device iommu;
struct device   *dev;
@@ -55,6 +59,7 @@ struct viommu_dev {
u32 probe_size;
 
boolhas_map:1;
+   boolhas_table:1;
 };
 
 struct viommu_mapping {
@@ -76,6 +81,7 @@ struct viommu_domain {
struct mutexmutex; /* protects viommu pointer */
unsigned intid;
u32 map_flags;
+   struct iommu_pasid_table*pasid_tbl;
 
/* Default address space when a table is bound */
struct viommu_mmmm;
@@ -891,6 +897,285 @@ static int viommu_simple_attach(struct viommu_domain 
*vdomain,
return ret;
 }
 
+static int viommu_teardown_pgtable(struct viommu_domain *vdomain)
+{
+   struct iommu_vendor_psdtable_cfg *pst_cfg;
+   struct arm_smmu_cfg_info *cfgi;
+   u32 asid;
+
+   if (!vdomain->mm.ops)
+   return 0;
+
+   free_io_pgtable_ops(vdomain->mm.ops);
+   vdomain->mm.ops = NULL;
+
+   if (vdomain->pasid_tbl) {
+   pst_cfg = >pasid_tbl->cfg;
+   cfgi = _cfg->vendor.cfg;
+   asid = cfgi->s1_cfg->cd.asid;
+
+   iommu_psdtable_write(vdomain->pasid_tbl, pst_cfg, 0, NULL);
+   ida_simple_remove(_ida, asid);
+   }
+
+   return 0;
+}
+
+static int viommu_setup_pgtable(struct viommu_endpoint *vdev,
+   struct viommu_domain *vdomain)
+{
+   int ret, id;
+   u32 asid;
+   enum io_pgtable_fmt fmt;
+   struct io_pgtable_ops *ops = NULL;
+   struct viommu_dev *viommu = vdev->viommu;
+   struct virtio_iommu_probe_table_format *desc = vdev->pgtf;
+   struct iommu_pasid_table *tbl = vdomain->pasid_tbl;
+   struct iommu_vendor_psdtable_cfg *pst_cfg;
+   struct arm_smmu_cfg_info *cfgi;
+   struct io_pgtable_cfg cfg = {
+   .iommu_dev  = viommu->dev->parent,
+   .tlb= _flush_ops,
+   .pgsize_bitmap  = vdev->pgsize_mask ? vdev->pgsize_mask :
+ vdomain->domain.pgsize_bitmap,
+   .ias= (vdev->input_end ? ilog2(vdev->input_end) :
+  
ilog2(vdomain->domain.geometry.aperture_end)) + 1,
+   .oas= vdev->output_bits,
+   };
+
+   if (!desc)
+   return -EINVAL;
+
+   if (!vdev->output_bits)
+   return -ENODEV;
+
+   switch (le16_to_cpu(desc->format)) {
+   case VIRTIO_IOMMU_FOMRAT_PGTF_ARM_LPAE:
+   fmt = ARM_64_LPAE_S1;
+   break;
+   default:
+   dev_err(vdev->dev, "unsupported page table format 0x%x\n",
+   le16_to_cpu(desc->format));
+   return -EINVAL;
+   }
+
+   if (vdomain->mm.ops) {
+   /*
+ 

[PATCH RFC v1 12/15] iommu/virtio: Add support for INVALIDATE request

2021-01-15 Thread Vivek Gautam
From: Jean-Philippe Brucker 

Add support for tlb invalidation ops that can send invalidation
requests to back-end virtio-iommu when stage-1 page tables are
supported.

Signed-off-by: Jean-Philippe Brucker 
[Vivek: Refactoring the iommu_flush_ops, and adding only one pasid sync
op that's needed with current iommu-pasid-table infrastructure.
Also updating uapi defines as required by latest changes]
Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Michael S. Tsirkin 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/virtio-iommu.c | 95 
 1 file changed, 95 insertions(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index ae5dfd3f8269..004ea94e3731 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -63,6 +64,8 @@ struct viommu_mapping {
 };
 
 struct viommu_mm {
+   int pasid;
+   u64 archid;
struct io_pgtable_ops   *ops;
struct viommu_domain*domain;
 };
@@ -692,6 +695,98 @@ static void viommu_event_handler(struct virtqueue *vq)
virtqueue_kick(vq);
 }
 
+/* PASID and pgtable APIs */
+
+static void __viommu_flush_pasid_tlb_all(struct viommu_domain *vdomain,
+int pasid, u64 arch_id, int type)
+{
+   struct virtio_iommu_req_invalidate req = {
+   .head.type  = VIRTIO_IOMMU_T_INVALIDATE,
+   .inv_gran   = cpu_to_le32(VIRTIO_IOMMU_INVAL_G_PASID),
+   .flags  = cpu_to_le32(VIRTIO_IOMMU_INVAL_F_PASID),
+   .inv_type   = cpu_to_le32(type),
+
+   .domain = cpu_to_le32(vdomain->id),
+   .pasid  = cpu_to_le32(pasid),
+   .archid = cpu_to_le64(arch_id),
+   };
+
+   if (viommu_send_req_sync(vdomain->viommu, , sizeof(req)))
+   pr_debug("could not send invalidate request\n");
+}
+
+static void viommu_flush_tlb_add(struct iommu_iotlb_gather *gather,
+unsigned long iova, size_t granule,
+void *cookie)
+{
+   struct viommu_mm *viommu_mm = cookie;
+   struct viommu_domain *vdomain = viommu_mm->domain;
+   struct iommu_domain *domain = >domain;
+
+   iommu_iotlb_gather_add_page(domain, gather, iova, granule);
+}
+
+static void viommu_flush_tlb_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+   struct viommu_mm *viommu_mm = cookie;
+   struct viommu_domain *vdomain = viommu_mm->domain;
+   struct virtio_iommu_req_invalidate req = {
+   .head.type  = VIRTIO_IOMMU_T_INVALIDATE,
+   .inv_gran   = cpu_to_le32(VIRTIO_IOMMU_INVAL_G_VA),
+   .inv_type   = cpu_to_le32(VIRTIO_IOMMU_INV_T_IOTLB),
+   .flags  = cpu_to_le32(VIRTIO_IOMMU_INVAL_F_ARCHID),
+
+   .domain = cpu_to_le32(vdomain->id),
+   .pasid  = cpu_to_le32(viommu_mm->pasid),
+   .archid = cpu_to_le64(viommu_mm->archid),
+   .virt_start = cpu_to_le64(iova),
+   .nr_pages   = cpu_to_le64(size / granule),
+   .granule= ilog2(granule),
+   };
+
+   if (viommu_add_req(vdomain->viommu, , sizeof(req)))
+   pr_debug("could not add invalidate request\n");
+}
+
+static void viommu_flush_tlb_all(void *cookie)
+{
+   struct viommu_mm *viommu_mm = cookie;
+
+   if (!viommu_mm->archid)
+   return;
+
+   __viommu_flush_pasid_tlb_all(viommu_mm->domain, viommu_mm->pasid,
+viommu_mm->archid,
+VIRTIO_IOMMU_INV_T_IOTLB);
+}
+
+static struct iommu_flush_ops viommu_flush_ops = {
+   .tlb_flush_all  = viommu_flush_tlb_all,
+   .tlb_flush_walk = viommu_flush_tlb_walk,
+   .tlb_add_page   = viommu_flush_tlb_add,
+};
+
+static void viommu_flush_pasid(void *cookie, int pasid, bool leaf)
+{
+   struct viommu_domain *vdomain = cookie;
+   struct virtio_iommu_req_invalidate req = {
+   .head.type  = VIRTIO_IOMMU_T_INVALIDATE,
+   .inv_gran   = cpu_to_le32(VIRTIO_IOMMU_INVAL_G_PASID),
+   .inv_type   = cpu_to_le32(VIRTIO_IOMMU_INV_T_PASID),
+   .flags  = cpu_to_le32(VIRTIO_IOMMU_INVAL_F_PASID),
+
+   .domain = cpu_to_le32(vdomain->id),
+   .pasid  = cpu_to_le32(pasid),
+   };
+
+ 

[PATCH RFC v1 11/15] iommu/virtio: Add headers for binding pasid table in iommu

2021-01-15 Thread Vivek Gautam
From: Jean-Philippe Brucker 

Add the required UAPI defines for binding pasid tables in virtio-iommu.
This mode allows to hand stage-1 page tables over to the guest.

Signed-off-by: Jean-Philippe Brucker 
[Vivek: Refactor to cleanup headers for invalidation]
Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Michael S. Tsirkin 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 include/uapi/linux/virtio_iommu.h | 68 +++
 1 file changed, 68 insertions(+)

diff --git a/include/uapi/linux/virtio_iommu.h 
b/include/uapi/linux/virtio_iommu.h
index 8a0624bab4b2..3481e4a3dd24 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -16,6 +16,7 @@
 #define VIRTIO_IOMMU_F_BYPASS  3
 #define VIRTIO_IOMMU_F_PROBE   4
 #define VIRTIO_IOMMU_F_MMIO5
+#define VIRTIO_IOMMU_F_ATTACH_TABLE6
 
 struct virtio_iommu_range_64 {
__le64  start;
@@ -44,6 +45,8 @@ struct virtio_iommu_config {
 #define VIRTIO_IOMMU_T_MAP 0x03
 #define VIRTIO_IOMMU_T_UNMAP   0x04
 #define VIRTIO_IOMMU_T_PROBE   0x05
+#define VIRTIO_IOMMU_T_ATTACH_TABLE0x06
+#define VIRTIO_IOMMU_T_INVALIDATE  0x07
 
 /* Status types */
 #define VIRTIO_IOMMU_S_OK  0x00
@@ -82,6 +85,37 @@ struct virtio_iommu_req_detach {
struct virtio_iommu_req_tailtail;
 };
 
+struct virtio_iommu_req_attach_table {
+   struct virtio_iommu_req_headhead;
+   __le32  domain;
+   __le32  endpoint;
+   __le16  format;
+   __u8reserved[62];
+   struct virtio_iommu_req_tailtail;
+};
+
+#define VIRTIO_IOMMU_PSTF_ARM_SMMU_V3_LINEAR   0x0
+#define VIRTIO_IOMMU_PSTF_ARM_SMMU_V3_4KL2 0x1
+#define VIRTIO_IOMMU_PSTF_ARM_SMMU_V3_64KL20x2
+
+#define VIRTIO_IOMMU_PSTF_ARM_SMMU_V3_DSS_TERM 0x0
+#define VIRTIO_IOMMU_PSTF_ARM_SMMU_V3_DSS_BYPASS 0x1
+#define VIRTIO_IOMMU_PSTF_ARM_SMMU_V3_DSS_00x2
+
+/* Arm SMMUv3 PASID Table Descriptor */
+struct virtio_iommu_req_attach_pst_arm {
+   struct virtio_iommu_req_headhead;
+   __le32  domain;
+   __le32  endpoint;
+   __le16  format;
+   __u8s1fmt;
+   __u8s1dss;
+   __le64  s1contextptr;
+   __le32  s1cdmax;
+   __u8reserved[48];
+   struct virtio_iommu_req_tailtail;
+};
+
 #define VIRTIO_IOMMU_MAP_F_READ(1 << 0)
 #define VIRTIO_IOMMU_MAP_F_WRITE   (1 << 1)
 #define VIRTIO_IOMMU_MAP_F_MMIO(1 << 2)
@@ -188,6 +222,40 @@ struct virtio_iommu_req_probe {
 */
 };
 
+#define VIRTIO_IOMMU_INVAL_G_DOMAIN(1 << 0)
+#define VIRTIO_IOMMU_INVAL_G_PASID (1 << 1)
+#define VIRTIO_IOMMU_INVAL_G_VA(1 << 2)
+
+#define VIRTIO_IOMMU_INV_T_IOTLB   (1 << 0)
+#define VIRTIO_IOMMU_INV_T_DEV_IOTLB   (1 << 1)
+#define VIRTIO_IOMMU_INV_T_PASID   (1 << 2)
+
+#define VIRTIO_IOMMU_INVAL_F_PASID (1 << 0)
+#define VIRTIO_IOMMU_INVAL_F_ARCHID(1 << 1)
+#define VIRTIO_IOMMU_INVAL_F_LEAF  (1 << 2)
+
+struct virtio_iommu_req_invalidate {
+   struct virtio_iommu_req_headhead;
+   __le16  inv_gran;
+   __le16  inv_type;
+
+   __le16  flags;
+   __u8reserved1[2];
+   __le32  domain;
+
+   __le32  pasid;
+   __u8reserved2[4];
+
+   __le64  archid;
+   __le64  virt_start;
+   __le64  nr_pages;
+
+   /* Page size, in nr of bits, typically 12 for 4k, 30 for 2MB, etc.) */
+   __u8granule;
+   __u8reserved3[11];
+   struct virtio_iommu_req_tailtail;
+};
+
 /* Fault types */
 #define VIRTIO_IOMMU_FAULT_R_UNKNOWN   0
 #define VIRTIO_IOMMU_FAULT_R_DOMAIN1
-- 
2.17.1

_

[PATCH RFC v1 10/15] iommu/virtio: Prepare to add attach pasid table infrastructure

2021-01-15 Thread Vivek Gautam
In preparation to add attach pasid table op, separate out the
existing attach request code to a separate method.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Michael S. Tsirkin 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/virtio-iommu.c | 73 +---
 1 file changed, 51 insertions(+), 22 deletions(-)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 12d73321dbf4..ae5dfd3f8269 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -52,6 +52,8 @@ struct viommu_dev {
/* Supported MAP flags */
u32 map_flags;
u32 probe_size;
+
+   boolhas_map:1;
 };
 
 struct viommu_mapping {
@@ -60,6 +62,11 @@ struct viommu_mapping {
u32 flags;
 };
 
+struct viommu_mm {
+   struct io_pgtable_ops   *ops;
+   struct viommu_domain*domain;
+};
+
 struct viommu_domain {
struct iommu_domain domain;
struct viommu_dev   *viommu;
@@ -67,12 +74,20 @@ struct viommu_domain {
unsigned intid;
u32 map_flags;
 
+   /* Default address space when a table is bound */
+   struct viommu_mmmm;
+
+   /* When no table is bound, use generic mappings */
spinlock_t  mappings_lock;
struct rb_root_cached   mappings;
 
unsigned long   nr_endpoints;
 };
 
+#define vdev_for_each_id(i, eid, vdev) \
+   for (i = 0; i < vdev->dev->iommu->fwspec->num_ids &&\
+   ({ eid = vdev->dev->iommu->fwspec->ids[i]; 1; }); i++)
+
 struct viommu_endpoint {
struct device   *dev;
struct viommu_dev   *viommu;
@@ -750,12 +765,40 @@ static void viommu_domain_free(struct iommu_domain 
*domain)
kfree(vdomain);
 }
 
+static int viommu_simple_attach(struct viommu_domain *vdomain,
+   struct viommu_endpoint *vdev)
+{
+   int i, eid, ret;
+   struct virtio_iommu_req_attach req = {
+   .head.type  = VIRTIO_IOMMU_T_ATTACH,
+   .domain = cpu_to_le32(vdomain->id),
+   };
+
+   if (!vdomain->viommu->has_map)
+   return -ENODEV;
+
+   vdev_for_each_id(i, eid, vdev) {
+   req.endpoint = cpu_to_le32(eid);
+
+   ret = viommu_send_req_sync(vdomain->viommu, , sizeof(req));
+   if (ret)
+   return ret;
+   }
+
+   if (!vdomain->nr_endpoints) {
+   /*
+* This endpoint is the first to be attached to the domain.
+* Replay existing mappings if any (e.g. SW MSI).
+*/
+   ret = viommu_replay_mappings(vdomain);
+   }
+
+   return ret;
+}
+
 static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
-   int i;
int ret = 0;
-   struct virtio_iommu_req_attach req;
-   struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
struct viommu_domain *vdomain = to_viommu_domain(domain);
 
@@ -790,25 +833,9 @@ static int viommu_attach_dev(struct iommu_domain *domain, 
struct device *dev)
if (vdev->vdomain)
vdev->vdomain->nr_endpoints--;
 
-   req = (struct virtio_iommu_req_attach) {
-   .head.type  = VIRTIO_IOMMU_T_ATTACH,
-   .domain = cpu_to_le32(vdomain->id),
-   };
-
-   for (i = 0; i < fwspec->num_ids; i++) {
-   req.endpoint = cpu_to_le32(fwspec->ids[i]);
-
-   ret = viommu_send_req_sync(vdomain->viommu, , sizeof(req));
-   if (ret)
-   return ret;
-   }
-
-   if (!vdomain->nr_endpoints) {
-   /*
-* This endpoint is the first to be attached to the domain.
-* Replay existing mappings (e.g. SW MSI).
-*/
-   ret = viommu_replay_mappings(vdomain);
+   if (!vdomain->mm.ops) {
+   /* If we couldn't bind any table, use the mapping tree */
+   ret = viommu_simple_attach(vdomain, vdev);
if (ret)
return ret;
}
@@ -1142,6 +1169,8 @@ static int viommu_probe(struct virtio_device *vdev)
struct virtio_iommu_config, probe_size,
>probe_size);
 
+   viommu->has_map = virtio_has_feature(vdev,

[PATCH RFC v1 09/15] iommu/virtio: Update table format probing header

2021-01-15 Thread Vivek Gautam
Add info about asid_bits and additional flags to table format
probing header.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Michael S. Tsirkin 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 include/uapi/linux/virtio_iommu.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/virtio_iommu.h 
b/include/uapi/linux/virtio_iommu.h
index 43821e33e7af..8a0624bab4b2 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -169,7 +169,10 @@ struct virtio_iommu_probe_pasid_size {
 struct virtio_iommu_probe_table_format {
struct virtio_iommu_probe_property  head;
__le16  format;
-   __u8reserved[2];
+   __le16  asid_bits;
+
+   __le32  flags;
+   __u8reserved[4];
 };
 
 struct virtio_iommu_req_probe {
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 08/15] iommu: Add asid_bits to arm smmu-v3 stage1 table info

2021-01-15 Thread Vivek Gautam
aisd_bits data is required to prepare stage-1 tables for arm-smmu-v3.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 include/uapi/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 082d758dd016..96abbfc7c643 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -357,7 +357,7 @@ struct iommu_pasid_smmuv3 {
__u32   version;
__u8s1fmt;
__u8s1dss;
-   __u8padding[2];
+   __u16   asid_bits;
 };
 
 /**
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 07/15] iommu/virtio: Add table format probing

2021-01-15 Thread Vivek Gautam
From: Jean-Philippe Brucker 

The device may provide information about hardware tables and additional
capabilities for each device. Parse the new probe fields.

Signed-off-by: Jean-Philippe Brucker 
[Vivek: Refactor to use "struct virtio_iommu_probe_table_format" rather
than separate structures for page table and pasid table format.]
Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Michael S. Tsirkin 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/virtio-iommu.c | 102 ++-
 1 file changed, 101 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 2bfdd5734844..12d73321dbf4 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -78,6 +78,17 @@ struct viommu_endpoint {
struct viommu_dev   *viommu;
struct viommu_domain*vdomain;
struct list_headresv_regions;
+
+   /* properties of the physical IOMMU */
+   u64 pgsize_mask;
+   u64 input_start;
+   u64 input_end;
+   u8  output_bits;
+   u8  pasid_bits;
+   /* Preferred PASID table format */
+   void*pstf;
+   /* Preferred page table format */
+   void*pgtf;
 };
 
 struct viommu_request {
@@ -457,6 +468,72 @@ static int viommu_add_resv_mem(struct viommu_endpoint 
*vdev,
return 0;
 }
 
+static int viommu_add_pgsize_mask(struct viommu_endpoint *vdev,
+ struct virtio_iommu_probe_page_size_mask 
*prop,
+ size_t len)
+{
+   if (len < sizeof(*prop))
+   return -EINVAL;
+   vdev->pgsize_mask = le64_to_cpu(prop->mask);
+   return 0;
+}
+
+static int viommu_add_input_range(struct viommu_endpoint *vdev,
+ struct virtio_iommu_probe_input_range *prop,
+ size_t len)
+{
+   if (len < sizeof(*prop))
+   return -EINVAL;
+   vdev->input_start   = le64_to_cpu(prop->start);
+   vdev->input_end = le64_to_cpu(prop->end);
+   return 0;
+}
+
+static int viommu_add_output_size(struct viommu_endpoint *vdev,
+ struct virtio_iommu_probe_output_size *prop,
+ size_t len)
+{
+   if (len < sizeof(*prop))
+   return -EINVAL;
+   vdev->output_bits = prop->bits;
+   return 0;
+}
+
+static int viommu_add_pasid_size(struct viommu_endpoint *vdev,
+struct virtio_iommu_probe_pasid_size *prop,
+size_t len)
+{
+   if (len < sizeof(*prop))
+   return -EINVAL;
+   vdev->pasid_bits = prop->bits;
+   return 0;
+}
+
+static int viommu_add_pgtf(struct viommu_endpoint *vdev, void *pgtf, size_t 
len)
+{
+   /* Select the first page table format available */
+   if (len < sizeof(struct virtio_iommu_probe_table_format) || vdev->pgtf)
+   return -EINVAL;
+
+   vdev->pgtf = kmemdup(pgtf, len, GFP_KERNEL);
+   if (!vdev->pgtf)
+   return -ENOMEM;
+
+   return 0;
+}
+
+static int viommu_add_pstf(struct viommu_endpoint *vdev, void *pstf, size_t 
len)
+{
+   if (len < sizeof(struct virtio_iommu_probe_table_format) || vdev->pstf)
+   return -EINVAL;
+
+   vdev->pstf = kmemdup(pstf, len, GFP_KERNEL);
+   if (!vdev->pstf)
+   return -ENOMEM;
+
+   return 0;
+}
+
 static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
 {
int ret;
@@ -493,11 +570,30 @@ static int viommu_probe_endpoint(struct viommu_dev 
*viommu, struct device *dev)
 
while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
   cur < viommu->probe_size) {
+   void *value = prop;
len = le16_to_cpu(prop->length) + sizeof(*prop);
 
switch (type) {
case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
-   ret = viommu_add_resv_mem(vdev, (void *)prop, len);
+   ret = viommu_add_resv_mem(vdev, value, len);
+   break;
+   case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
+   ret = viommu_add_pgsize_mask(vdev, value, len);
+   break;
+   case VIRTIO_IOMMU_PROBE_T_INPUT_RANGE:
+   ret = viommu_add_input_range(vdev, value, len);
+   break;
+   case VIRTIO_IOMMU_PROBE_T_OUTPUT_SIZE:
+   

[PATCH RFC v1 06/15] iommu/virtio: Add headers for table format probing

2021-01-15 Thread Vivek Gautam
From: Jean-Philippe Brucker 

Add required UAPI defines for probing table format for underlying
iommu hardware. The device may provide information about hardware
tables and additional capabilities for each device.
This allows guest to correctly fabricate stage-1 page tables.

Signed-off-by: Jean-Philippe Brucker 
[Vivek: Use a single "struct virtio_iommu_probe_table_format" rather
than separate structures for page table and pasid table format.
Also update commit message.]
Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Michael S. Tsirkin 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 include/uapi/linux/virtio_iommu.h | 44 ++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/virtio_iommu.h 
b/include/uapi/linux/virtio_iommu.h
index 237e36a280cb..43821e33e7af 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -2,7 +2,7 @@
 /*
  * Virtio-iommu definition v0.12
  *
- * Copyright (C) 2019 Arm Ltd.
+ * Copyright (C) 2019-2021 Arm Ltd.
  */
 #ifndef _UAPI_LINUX_VIRTIO_IOMMU_H
 #define _UAPI_LINUX_VIRTIO_IOMMU_H
@@ -111,6 +111,12 @@ struct virtio_iommu_req_unmap {
 
 #define VIRTIO_IOMMU_PROBE_T_NONE  0
 #define VIRTIO_IOMMU_PROBE_T_RESV_MEM  1
+#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK2
+#define VIRTIO_IOMMU_PROBE_T_INPUT_RANGE   3
+#define VIRTIO_IOMMU_PROBE_T_OUTPUT_SIZE   4
+#define VIRTIO_IOMMU_PROBE_T_PASID_SIZE5
+#define VIRTIO_IOMMU_PROBE_T_PAGE_TABLE_FMT6
+#define VIRTIO_IOMMU_PROBE_T_PASID_TABLE_FMT   7
 
 #define VIRTIO_IOMMU_PROBE_T_MASK  0xfff
 
@@ -130,6 +136,42 @@ struct virtio_iommu_probe_resv_mem {
__le64  end;
 };
 
+struct virtio_iommu_probe_page_size_mask {
+   struct virtio_iommu_probe_property  head;
+   __u8reserved[4];
+   __le64  mask;
+};
+
+struct virtio_iommu_probe_input_range {
+   struct virtio_iommu_probe_property  head;
+   __u8reserved[4];
+   __le64  start;
+   __le64  end;
+};
+
+struct virtio_iommu_probe_output_size {
+   struct virtio_iommu_probe_property  head;
+   __u8bits;
+   __u8reserved[3];
+};
+
+struct virtio_iommu_probe_pasid_size {
+   struct virtio_iommu_probe_property  head;
+   __u8bits;
+   __u8reserved[3];
+};
+
+/* Arm LPAE page table format */
+#define VIRTIO_IOMMU_FOMRAT_PGTF_ARM_LPAE  1
+/* Arm smmu-v3 type PASID table format */
+#define VIRTIO_IOMMU_FORMAT_PSTF_ARM_SMMU_V3   2
+
+struct virtio_iommu_probe_table_format {
+   struct virtio_iommu_probe_property  head;
+   __le16  format;
+   __u8reserved[2];
+};
+
 struct virtio_iommu_req_probe {
struct virtio_iommu_req_headhead;
__le32  endpoint;
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 05/15] iommu/arm-smmu-v3: Set sync op from consumer driver of cd-lib

2021-01-15 Thread Vivek Gautam
Te change allows different consumers of arm-smmu-v3-cd-lib to set
their respective sync op for pasid entries.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c | 1 -
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c| 7 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
index ec37476c8d09..acaa09acecdd 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
@@ -265,7 +265,6 @@ struct iommu_vendor_psdtable_ops arm_cd_table_ops = {
.free= arm_smmu_free_cd_tables,
.prepare = arm_smmu_prepare_cd,
.write   = arm_smmu_write_ctx_desc,
-   .sync= arm_smmu_sync_cd,
 };
 
 struct iommu_pasid_table *arm_smmu_register_cd_table(struct device *dev,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 2f86c6ac42b6..0c644be22b4b 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1869,6 +1869,13 @@ static int arm_smmu_domain_finalise_s1(struct 
arm_smmu_domain *smmu_domain,
if (ret)
goto out_free_cd_tables;
 
+   /*
+* Strange to setup an op here?
+* cd-lib is the actual user of sync op, and therefore the platform
+* drivers should assign this sync/maintenance ops as per need.
+*/
+   tbl->ops->sync = arm_smmu_sync_cd;
+
/*
 * Note that this will end up calling arm_smmu_sync_cd() before
 * the master has been added to the devices list for this domain.
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 04/15] iommu/arm-smmu-v3: Update CD base address info for user-space

2021-01-15 Thread Vivek Gautam
Update base address information in vendor pasid table info to pass that
to user-space for stage1 table management.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
index 8a7187534706..ec37476c8d09 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
@@ -55,6 +55,9 @@ static __le64 *arm_smmu_get_cd_ptr(struct 
iommu_vendor_psdtable_cfg *pst_cfg,
if (arm_smmu_alloc_cd_leaf_table(dev, l1_desc))
return NULL;
 
+   if (s1cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+   pst_cfg->base = l1_desc->l2ptr_dma;
+
l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
/* An invalid L1CD can be cached */
@@ -211,6 +214,9 @@ static int arm_smmu_alloc_cd_tables(struct 
iommu_vendor_psdtable_cfg *pst_cfg)
goto err_free_l1;
}
 
+   if (s1cfg->s1fmt == STRTAB_STE_0_S1FMT_64K_L2)
+   pst_cfg->base = cdcfg->cdtab_dma;
+
return 0;
 
 err_free_l1:
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC v1 03/15] iommu/arm-smmu-v3: Update drivers to work with iommu-pasid-table

2021-01-15 Thread Vivek Gautam
Update arm-smmu-v3 context descriptor (CD) library driver to work
with iommu-pasid-table APIs. These APIs are then used in arm-smmu-v3
drivers to manage CD tables.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 .../arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c  | 127 +-
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  16 ++-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  47 ---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |   7 +-
 drivers/iommu/iommu-pasid-table.h |  10 +-
 5 files changed, 144 insertions(+), 63 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
index 97d1786a8a70..8a7187534706 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
@@ -8,17 +8,17 @@
 #include 
 
 #include "arm-smmu-v3.h"
+#include "../../iommu-pasid-table.h"
 
-static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
+static int arm_smmu_alloc_cd_leaf_table(struct device *dev,
struct arm_smmu_l1_ctx_desc *l1_desc)
 {
size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
 
-   l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
+   l1_desc->l2ptr = dmam_alloc_coherent(dev, size,
 _desc->l2ptr_dma, GFP_KERNEL);
if (!l1_desc->l2ptr) {
-   dev_warn(smmu->dev,
-"failed to allocate context descriptor table\n");
+   dev_warn(dev, "failed to allocate context descriptor table\n");
return -ENOMEM;
}
return 0;
@@ -34,35 +34,39 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
WRITE_ONCE(*dst, cpu_to_le64(val));
 }
 
-static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
+static __le64 *arm_smmu_get_cd_ptr(struct iommu_vendor_psdtable_cfg *pst_cfg,
   u32 ssid)
 {
__le64 *l1ptr;
unsigned int idx;
+   struct device *dev = pst_cfg->iommu_dev;
+   struct arm_smmu_cfg_info *cfgi = _cfg->vendor.cfg;
+   struct arm_smmu_s1_cfg *s1cfg = cfgi->s1_cfg;
+   struct arm_smmu_ctx_desc_cfg *cdcfg = >cdcfg;
struct arm_smmu_l1_ctx_desc *l1_desc;
-   struct arm_smmu_device *smmu = smmu_domain->smmu;
-   struct arm_smmu_ctx_desc_cfg *cdcfg = _domain->s1_cfg.cdcfg;
+   struct iommu_pasid_table *tbl = pasid_table_cfg_to_table(pst_cfg);
 
-   if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+   if (s1cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
 
idx = ssid >> CTXDESC_SPLIT;
l1_desc = >l1_desc[idx];
if (!l1_desc->l2ptr) {
-   if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
+   if (arm_smmu_alloc_cd_leaf_table(dev, l1_desc))
return NULL;
 
l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
/* An invalid L1CD can be cached */
-   arm_smmu_sync_cd(smmu_domain, ssid, false);
+   if (iommu_psdtable_sync(tbl, tbl->cookie, ssid, false))
+   return NULL;
}
idx = ssid & (CTXDESC_L2_ENTRIES - 1);
return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
 }
 
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
-   struct arm_smmu_ctx_desc *cd)
+static int arm_smmu_write_ctx_desc(struct iommu_vendor_psdtable_cfg *pst_cfg,
+  int ssid, void *cookie)
 {
/*
 * This function handles the following cases:
@@ -78,12 +82,15 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain 
*smmu_domain, int ssid,
u64 val;
bool cd_live;
__le64 *cdptr;
-   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg_info *cfgi = _cfg->vendor.cfg;
+   struct arm_smmu_s1_cfg *s1cfg = cfgi->s1_cfg;
+   struct iommu_pasid_table *tbl = pasid_table_cfg_to_table(pst_cfg);
+   struct arm_smmu_ctx_desc *cd = cookie;
 
-   if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+   if (WARN_ON(ssid >= (1 << s1cfg->s1cdmax)))
return -E2BIG;
 
-   cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+   cdptr = arm_smmu_get_cd_ptr(pst_cfg, ssid);
if (!cdptr)
return -ENOMEM;
 
@@ -111,7 +118,8 @@ int arm_smmu_write_ctx_desc

[PATCH RFC v1 02/15] iommu: Add a simple PASID table library

2021-01-15 Thread Vivek Gautam
Add a small API in iommu subsystem to handle PASID table allocation
requests from different consumer drivers, such as a paravirtualized
iommu driver. The API provides ops for allocating and freeing PASID
table, writing to it and managing the table caches.

This library also provides for registering a vendor API that attaches
to these ops. The vendor APIs would eventually perform arch level
implementations for these PASID tables.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/iommu-pasid-table.h | 134 ++
 1 file changed, 134 insertions(+)
 create mode 100644 drivers/iommu/iommu-pasid-table.h

diff --git a/drivers/iommu/iommu-pasid-table.h 
b/drivers/iommu/iommu-pasid-table.h
new file mode 100644
index ..bd4f57656f67
--- /dev/null
+++ b/drivers/iommu/iommu-pasid-table.h
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PASID table management for the IOMMU
+ *
+ * Copyright (C) 2021 Arm Ltd.
+ */
+#ifndef __IOMMU_PASID_TABLE_H
+#define __IOMMU_PASID_TABLE_H
+
+#include 
+
+#include "arm/arm-smmu-v3/arm-smmu-v3.h"
+
+enum pasid_table_fmt {
+   PASID_TABLE_ARM_SMMU_V3,
+   PASID_TABLE_NUM_FMTS,
+};
+
+/**
+ * struct arm_smmu_cfg_info - arm-smmu-v3 specific configuration data
+ *
+ * @s1_cfg: arm-smmu-v3 stage1 config data
+ * @feat_flag: features supported by arm-smmu-v3 implementation
+ */
+struct arm_smmu_cfg_info {
+   struct arm_smmu_s1_cfg  *s1_cfg;
+   u32 feat_flag;
+};
+
+/**
+ * struct iommu_vendor_psdtable_cfg - Configuration data for PASID tables
+ *
+ * @iommu_dev: device performing the DMA table walks
+ * @fmt: The PASID table format
+ * @base: DMA address of the allocated table, set by the vendor driver
+ * @cfg: arm-smmu-v3 specific config data
+ */
+struct iommu_vendor_psdtable_cfg {
+   struct device   *iommu_dev;
+   enum pasid_table_fmtfmt;
+   dma_addr_t  base;
+   union {
+   struct arm_smmu_cfg_infocfg;
+   } vendor;
+};
+
+struct iommu_vendor_psdtable_ops;
+
+/**
+ * struct iommu_pasid_table - describes a set of PASID tables
+ *
+ * @cookie: An opaque token provided by the IOMMU driver and passed back to any
+ * callback routine.
+ * @cfg: A copy of the PASID table configuration
+ * @ops: The PASID table operations in use for this set of page tables
+ */
+struct iommu_pasid_table {
+   void*cookie;
+   struct iommu_vendor_psdtable_cfgcfg;
+   struct iommu_vendor_psdtable_ops*ops;
+};
+
+#define pasid_table_cfg_to_table(pst_cfg) \
+   container_of((pst_cfg), struct iommu_pasid_table, cfg)
+
+struct iommu_vendor_psdtable_ops {
+   int (*alloc)(struct iommu_vendor_psdtable_cfg *cfg);
+   void (*free)(struct iommu_vendor_psdtable_cfg *cfg);
+   void (*prepare)(struct iommu_vendor_psdtable_cfg *cfg,
+   struct io_pgtable_cfg *pgtbl_cfg, u32 asid);
+   int (*write)(struct iommu_vendor_psdtable_cfg *cfg, int ssid,
+void *cookie);
+   void (*sync)(void *cookie, int ssid, bool leaf);
+};
+
+static inline int iommu_psdtable_alloc(struct iommu_pasid_table *tbl,
+  struct iommu_vendor_psdtable_cfg *cfg)
+{
+   if (!tbl->ops->alloc)
+   return -ENOSYS;
+
+   return tbl->ops->alloc(cfg);
+}
+
+static inline void iommu_psdtable_free(struct iommu_pasid_table *tbl,
+  struct iommu_vendor_psdtable_cfg *cfg)
+{
+   if (!tbl->ops->free)
+   return;
+
+   tbl->ops->free(cfg);
+}
+
+static inline int iommu_psdtable_prepare(struct iommu_pasid_table *tbl,
+struct iommu_vendor_psdtable_cfg *cfg,
+struct io_pgtable_cfg *pgtbl_cfg,
+u32 asid)
+{
+   if (!tbl->ops->prepare)
+   return -ENOSYS;
+
+   tbl->ops->prepare(cfg, pgtbl_cfg, asid);
+   return 0;
+}
+
+static inline int iommu_psdtable_write(struct iommu_pasid_table *tbl,
+  struct iommu_vendor_psdtable_cfg *cfg,
+  int ssid, void *cookie)
+{
+   if (!tbl->ops->write)
+   return -ENOSYS;
+
+   return tbl->ops->write(cfg, ssid, cookie);
+}
+
+static inline int iommu_psdtable_sync(struct iommu_pasid_table *tbl,
+ void *cookie, int ssid, bool leaf)
+{
+   if (!tbl->ops->sync)
+   return -ENOSYS;
+
+   tbl->ops->sync(cookie, ssid, leaf);
+   return 0;
+}
+
+/* A placeholder to register 

[PATCH RFC v1 01/15] iommu/arm-smmu-v3: Create a Context Descriptor library

2021-01-15 Thread Vivek Gautam
Para-virtualized iommu drivers in guest may require to create and manage
context descriptor (CD) tables as part of PASID table allocations.
The PASID tables are passed to host to configure stage-1 tables in
hardware.
Make way for a library driver for CD management to allow para-
virtualized iommu driver call such code.

Signed-off-by: Vivek Gautam 
Cc: Joerg Roedel 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Jean-Philippe Brucker 
Cc: Eric Auger 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Jacob Pan 
Cc: Liu Yi L 
Cc: Lorenzo Pieralisi 
Cc: Shameerali Kolothum Thodi 
---
 drivers/iommu/arm/arm-smmu-v3/Makefile|   2 +-
 .../arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c  | 223 ++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 216 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |   3 +
 4 files changed, 228 insertions(+), 216 deletions(-)
 create mode 100644 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c

diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile 
b/drivers/iommu/arm/arm-smmu-v3/Makefile
index 54feb1ecccad..ca1a05b8b8ad 100644
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
-arm_smmu_v3-objs-y += arm-smmu-v3.o
+arm_smmu_v3-objs-y += arm-smmu-v3.o arm-smmu-v3-cd-lib.o
 arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
 arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
new file mode 100644
index ..97d1786a8a70
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arm-smmu-v3 context descriptor handling library driver
+ *
+ * Copyright (C) 2021 Arm Ltd.
+ */
+
+#include 
+
+#include "arm-smmu-v3.h"
+
+static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
+   struct arm_smmu_l1_ctx_desc *l1_desc)
+{
+   size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
+
+   l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
+_desc->l2ptr_dma, GFP_KERNEL);
+   if (!l1_desc->l2ptr) {
+   dev_warn(smmu->dev,
+"failed to allocate context descriptor table\n");
+   return -ENOMEM;
+   }
+   return 0;
+}
+
+static void arm_smmu_write_cd_l1_desc(__le64 *dst,
+ struct arm_smmu_l1_ctx_desc *l1_desc)
+{
+   u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
+ CTXDESC_L1_DESC_V;
+
+   /* See comment in arm_smmu_write_ctx_desc() */
+   WRITE_ONCE(*dst, cpu_to_le64(val));
+}
+
+static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
+  u32 ssid)
+{
+   __le64 *l1ptr;
+   unsigned int idx;
+   struct arm_smmu_l1_ctx_desc *l1_desc;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_ctx_desc_cfg *cdcfg = _domain->s1_cfg.cdcfg;
+
+   if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+   return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
+
+   idx = ssid >> CTXDESC_SPLIT;
+   l1_desc = >l1_desc[idx];
+   if (!l1_desc->l2ptr) {
+   if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
+   return NULL;
+
+   l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
+   arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
+   /* An invalid L1CD can be cached */
+   arm_smmu_sync_cd(smmu_domain, ssid, false);
+   }
+   idx = ssid & (CTXDESC_L2_ENTRIES - 1);
+   return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
+}
+
+int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+   struct arm_smmu_ctx_desc *cd)
+{
+   /*
+* This function handles the following cases:
+*
+* (1) Install primary CD, for normal DMA traffic (SSID = 0).
+* (2) Install a secondary CD, for SID+SSID traffic.
+* (3) Update ASID of a CD. Atomically write the first 64 bits of the
+* CD, then invalidate the old entry and mappings.
+* (4) Quiesce the context without clearing the valid bit. Disable
+* translation, and ignore any translation fault.
+* (5) Remove a secondary CD.
+*/
+   u64 val;
+   bool cd_live;
+   __le64 *cdptr;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+   if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+   return -E2BIG;
+
+   cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+   if (!cdptr)
+   return -ENOMEM;
+
+   va

[PATCH RFC v1 00/15] iommu/virtio: Nested stage support with Arm

2021-01-15 Thread Vivek Gautam
This patch-series aims at enabling Nested stage translation in guests
using virtio-iommu as the paravirtualized iommu. The backend is supported
with Arm SMMU-v3 that provides nested stage-1 and stage-2 translation.

This series derives its purpose from various efforts happening to add
support for Shared Virtual Addressing (SVA) in host and guest. On Arm,
most of the support for SVA has already landed. The support for nested
stage translation and fault reporting to guest has been proposed [1].
The related changes required in VFIO [2] framework have also been put
forward.

This series proposes changes in virtio-iommu to program PASID tables
and related stage-1 page tables. A simple iommu-pasid-table library
is added for this purpose that interacts with vendor drivers to
allocate and populate PASID tables.
In Arm SMMUv3 we propose to pull the Context Descriptor (CD) management
code out of the arm-smmu-v3 driver and add that as a glue vendor layer
to support allocating CD tables, and populating them with right values.
These CD tables are essentially the PASID tables and contain stage-1
page table configurations too.
A request to setup these CD tables come from virtio-iommu driver using
the iommu-pasid-table library when running on Arm. The virtio-iommu
then pass these PASID tables to the host using the right virtio backend
and support in VMM.

For testing we have added necessary support in kvmtool. The changes in
kvmtool are based on virtio-iommu development branch by Jean-Philippe
Brucker [3].

The tested kernel branch contains following in the order bottom to top
on the git hash -
a) v5.11-rc3
b) arm-smmu-v3 [1] and vfio [2] changes from Eric to add nested page
   table support for Arm.
c) Smmu test engine patches from Jean-Philippe's branch [4]
d) This series
e) Domain nesting info patches [5][6][7].
f) Changes to add arm-smmu-v3 specific nesting info (to be sent to
   the list).

This kernel is tested on Neoverse reference software stack with
Fixed virtual platform. Public version of the software stack and
FVP is available here[8][9].

A big thanks to Jean-Philippe for his contributions towards this work
and for his valuable guidance.

[1] 
https://lore.kernel.org/linux-iommu/20201118112151.25412-1-eric.au...@redhat.com/T/
[2] 
https://lore.kernel.org/kvmarm/20201116110030.32335-12-eric.au...@redhat.com/T/
[3] https://jpbrucker.net/git/kvmtool/log/?h=virtio-iommu/devel
[4] https://jpbrucker.net/git/linux/log/?h=sva/smmute
[5] 
https://lore.kernel.org/kvm/1599734733-6431-2-git-send-email-yi.l@intel.com/
[6] 
https://lore.kernel.org/kvm/1599734733-6431-3-git-send-email-yi.l@intel.com/
[7] 
https://lore.kernel.org/kvm/1599734733-6431-4-git-send-email-yi.l@intel.com/
[8] 
https://developer.arm.com/tools-and-software/open-source-software/arm-platforms-software/arm-ecosystem-fvps
[9] 
https://git.linaro.org/landing-teams/working/arm/arm-reference-platforms.git/about/docs/rdn1edge/user-guide.rst

Jean-Philippe Brucker (6):
  iommu/virtio: Add headers for table format probing
  iommu/virtio: Add table format probing
  iommu/virtio: Add headers for binding pasid table in iommu
  iommu/virtio: Add support for INVALIDATE request
  iommu/virtio: Attach Arm PASID tables when available
  iommu/virtio: Add support for Arm LPAE page table format

Vivek Gautam (9):
  iommu/arm-smmu-v3: Create a Context Descriptor library
  iommu: Add a simple PASID table library
  iommu/arm-smmu-v3: Update drivers to work with iommu-pasid-table
  iommu/arm-smmu-v3: Update CD base address info for user-space
  iommu/arm-smmu-v3: Set sync op from consumer driver of cd-lib
  iommu: Add asid_bits to arm smmu-v3 stage1 table info
  iommu/virtio: Update table format probing header
  iommu/virtio: Prepare to add attach pasid table infrastructure
  iommu/virtio: Update fault type and reason info for viommu fault

 drivers/iommu/arm/arm-smmu-v3/Makefile|   2 +-
 .../arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c  | 283 +++
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  16 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 268 +--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |   4 +-
 drivers/iommu/iommu-pasid-table.h | 140 
 drivers/iommu/virtio-iommu.c  | 692 +-
 include/uapi/linux/iommu.h|   2 +-
 include/uapi/linux/virtio_iommu.h | 158 +++-
 9 files changed, 1303 insertions(+), 262 deletions(-)
 create mode 100644 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-cd-lib.c
 create mode 100644 drivers/iommu/iommu-pasid-table.h

-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 02/16] iommu/smmu: Report empty domain nesting info

2021-01-12 Thread Vivek Gautam
Hi Yi,


On Tue, Jan 12, 2021 at 2:51 PM Liu, Yi L  wrote:
>
> Hi Vivek,
>
> > From: Vivek Gautam 
> > Sent: Tuesday, January 12, 2021 2:50 PM
> >
> > Hi Yi,
> >
> >
> > On Thu, Sep 10, 2020 at 4:13 PM Liu Yi L  wrote:
> > >
> > > This patch is added as instead of returning a boolean for
> > DOMAIN_ATTR_NESTING,
> > > iommu_domain_get_attr() should return an iommu_nesting_info handle.
> > For
> > > now, return an empty nesting info struct for now as true nesting is not
> > > yet supported by the SMMUs.
> > >
> > > Cc: Will Deacon 
> > > Cc: Robin Murphy 
> > > Cc: Eric Auger 
> > > Cc: Jean-Philippe Brucker 
> > > Suggested-by: Jean-Philippe Brucker 
> > > Signed-off-by: Liu Yi L 
> > > Signed-off-by: Jacob Pan 
> > > Reviewed-by: Eric Auger 
> > > ---
> > > v5 -> v6:
> > > *) add review-by from Eric Auger.
> > >
> > > v4 -> v5:
> > > *) address comments from Eric Auger.
> > > ---
> > >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 29
> > +++--
> > >  drivers/iommu/arm/arm-smmu/arm-smmu.c   | 29
> > +++--
> > >  2 files changed, 54 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > index 7196207..016e2e5 100644
> > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > @@ -3019,6 +3019,32 @@ static struct iommu_group
> > *arm_smmu_device_group(struct device *dev)
> > > return group;
> > >  }
> > >
> > > +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain
> > *smmu_domain,
> > > +   void *data)
> > > +{
> > > +   struct iommu_nesting_info *info = (struct iommu_nesting_info
> > *)data;
> > > +   unsigned int size;
> > > +
> > > +   if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> > > +   return -ENODEV;
> > > +
> > > +   size = sizeof(struct iommu_nesting_info);
> > > +
> > > +   /*
> > > +* if provided buffer size is smaller than expected, should
> > > +* return 0 and also the expected buffer size to caller.
> > > +*/
> > > +   if (info->argsz < size) {
> > > +   info->argsz = size;
> > > +   return 0;
> > > +   }
> > > +
> > > +   /* report an empty iommu_nesting_info for now */
> > > +   memset(info, 0x0, size);
> > > +   info->argsz = size;
> > > +   return 0;
> > > +}
> > > +
> > >  static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> > > enum iommu_attr attr, void *data)
> > >  {
> > > @@ -3028,8 +3054,7 @@ static int arm_smmu_domain_get_attr(struct
> > iommu_domain *domain,
> > > case IOMMU_DOMAIN_UNMANAGED:
> > > switch (attr) {
> > > case DOMAIN_ATTR_NESTING:
> > > -   *(int *)data = (smmu_domain->stage ==
> > ARM_SMMU_DOMAIN_NESTED);
> > > -   return 0;
> > > +   return arm_smmu_domain_nesting_info(smmu_domain,
> > data);
> >
> > Thanks for the patch.
> > This would unnecessarily overflow 'data' for any caller that's expecting 
> > only
> > an int data. Dump from one such issue that I was seeing when testing
> > this change along with local kvmtool changes is pasted below [1].
> >
> > I could get around with the issue by adding another (iommu_attr) -
> > DOMAIN_ATTR_NESTING_INFO that returns (iommu_nesting_info).
>
> nice to hear from you. At first, we planned to have a separate iommu_attr
> for getting nesting_info. However, we considered there is no existing user
> which gets DOMAIN_ATTR_NESTING, so we decided to reuse it for iommu nesting
> info. Could you share me the code base you are using? If the error you
> encountered is due to this change, so there should be a place which gets
> DOMAIN_ATTR_NESTING.

I am currently working on top of Eric's tree for nested stage support [1].
My best guess was that the vfio_pci_dma_fault_init() method [2] that is
requesting DOMAIN_ATTR_NESTING causes stack overflow, and corruption.
That's when I added a new attribute.

I will soon publish my patches to the list for review. Let me know
your thoughts.

[1] https://github.com/eauger/linux/tree/5.10-rc4-2stage-v13
[2] 
https://github.com/eauger/linux/blob/5.10-rc4-2stage-v13/drivers/vfio/pci/vfio_pci.c#L494

Thanks
Vivek

>
> Regards,
> Yi Liu

[snip]
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 02/16] iommu/smmu: Report empty domain nesting info

2021-01-11 Thread Vivek Gautam
Hi Yi,


On Thu, Sep 10, 2020 at 4:13 PM Liu Yi L  wrote:
>
> This patch is added as instead of returning a boolean for DOMAIN_ATTR_NESTING,
> iommu_domain_get_attr() should return an iommu_nesting_info handle. For
> now, return an empty nesting info struct for now as true nesting is not
> yet supported by the SMMUs.
>
> Cc: Will Deacon 
> Cc: Robin Murphy 
> Cc: Eric Auger 
> Cc: Jean-Philippe Brucker 
> Suggested-by: Jean-Philippe Brucker 
> Signed-off-by: Liu Yi L 
> Signed-off-by: Jacob Pan 
> Reviewed-by: Eric Auger 
> ---
> v5 -> v6:
> *) add review-by from Eric Auger.
>
> v4 -> v5:
> *) address comments from Eric Auger.
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 29 
> +++--
>  drivers/iommu/arm/arm-smmu/arm-smmu.c   | 29 
> +++--
>  2 files changed, 54 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 7196207..016e2e5 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -3019,6 +3019,32 @@ static struct iommu_group 
> *arm_smmu_device_group(struct device *dev)
> return group;
>  }
>
> +static int arm_smmu_domain_nesting_info(struct arm_smmu_domain *smmu_domain,
> +   void *data)
> +{
> +   struct iommu_nesting_info *info = (struct iommu_nesting_info *)data;
> +   unsigned int size;
> +
> +   if (!info || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> +   return -ENODEV;
> +
> +   size = sizeof(struct iommu_nesting_info);
> +
> +   /*
> +* if provided buffer size is smaller than expected, should
> +* return 0 and also the expected buffer size to caller.
> +*/
> +   if (info->argsz < size) {
> +   info->argsz = size;
> +   return 0;
> +   }
> +
> +   /* report an empty iommu_nesting_info for now */
> +   memset(info, 0x0, size);
> +   info->argsz = size;
> +   return 0;
> +}
> +
>  static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
> enum iommu_attr attr, void *data)
>  {
> @@ -3028,8 +3054,7 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
> *domain,
> case IOMMU_DOMAIN_UNMANAGED:
> switch (attr) {
> case DOMAIN_ATTR_NESTING:
> -   *(int *)data = (smmu_domain->stage == 
> ARM_SMMU_DOMAIN_NESTED);
> -   return 0;
> +   return arm_smmu_domain_nesting_info(smmu_domain, 
> data);

Thanks for the patch.
This would unnecessarily overflow 'data' for any caller that's expecting only
an int data. Dump from one such issue that I was seeing when testing
this change along with local kvmtool changes is pasted below [1].

I could get around with the issue by adding another (iommu_attr) -
DOMAIN_ATTR_NESTING_INFO that returns (iommu_nesting_info).

Thanks & regards
Vivek

[1]--
[  811.756516] vfio-pci :08:00.1: vfio_ecap_init: hiding ecap 0x1b@0x108
[  811.756516] Kernel panic - not syncing: stack-protector: Kernel
stack is corrupted in: vfio_pci_open+0x644/0x648
[  811.756516] CPU: 0 PID: 175 Comm: lkvm-cleanup-ne Not tainted
5.10.0-rc5-00096-gf015061e14cf #43
[  811.756516] Call trace:
[  811.756516]  dump_backtrace+0x0/0x1b0
[  811.756516]  show_stack+0x18/0x68
[  811.756516]  dump_stack+0xd8/0x134
[  811.756516]  panic+0x174/0x33c
[  811.756516]  __stack_chk_fail+0x3c/0x40
[  811.756516]  vfio_pci_open+0x644/0x648
[  811.756516]  vfio_group_fops_unl_ioctl+0x4bc/0x648
[  811.756516]  0x0
[  811.756516] SMP: stopping secondary CPUs
[  811.756597] Kernel Offset: disabled
[  811.756597] CPU features: 0x0040006,6a00aa38
[  811.756602] Memory Limit: none
[  811.768497] ---[ end Kernel panic - not syncing: stack-protector:
Kernel stack is corrupted in: vfio_pci_open+0x644/0x648 ]
-
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 0/9] iommu: I/O page faults for SMMUv3

2020-12-03 Thread Vivek Gautam
Hi Jean,


On Thu, Nov 12, 2020 at 6:33 PM Jean-Philippe Brucker
 wrote:
>
> Add support for stall and PRI to the SMMUv3 driver, along with a common
> I/O Page Fault handler.
>
> These patches were last sent as part of v7 of the larger SVA series [1].
> Main changes since v7:
> * Dropped CONFIG_IOMMU_PAGE_FAULT, reuse CONFIG_IOMMU_SVA_LIB instead.
> * Extracted devicetree support into patch 4.
> * Added patch 5 for ACPI support.
> * Dropped event queue flush on unbind(). Since device drivers must
>   complete DMA transactions before calling unbind(), there cannot be any
>   pending stalled event.
> * A few small fixes.
>
> The series depends on "iommu/sva: Add PASID helpers" [2], since it
> provides the function to search an mm_struct by PASID.
>
> Has anyone been testing the PRI patches on hardware? I still only have a
> software model to test them, so as much as I'd like to cross this off my
> list, we could leave out patches 7-9 for now.
>
I have been testing this series for sometime now with a RDN1Edge platform model.
The public model for N1-Edge rd FVP can be found at [A].
With reference software [B] and your kernel branch with smmute [C], I
was able to
test smmute by initiating different DMA transactions. With model logs I was able
to validate ATS and PRI command flows as well.
So I am happy to give my tested-by tag.

Tested-by: Vivek Gautam 

Best regards
Vivek

[A] 
https://developer.arm.com/tools-and-software/open-source-software/arm-platforms-software/arm-ecosystem-fvps
[B] 
https://git.linaro.org/landing-teams/working/arm/arm-reference-platforms.git/about/docs/rdn1edge/user-guide.rst
[C] https://jpbrucker.net/git/linux/log/?h=sva/smmute-2020-11-12

> [1] 
> https://lore.kernel.org/linux-iommu/20200519175502.2504091-1-jean-phili...@linaro.org/
> [2] 
> https://lore.kernel.org/linux-iommu/20201106155048.997886-1-jean-phili...@linaro.org/
>
> Jean-Philippe Brucker (9):
>   iommu: Add a page fault handler
>   iommu/arm-smmu-v3: Maintain a SID->device structure
>   dt-bindings: document stall property for IOMMU masters
>   of/iommu: Support dma-can-stall property
>   ACPI/IORT: Enable stall support for platform devices
>   iommu/arm-smmu-v3: Add stall support for platform devices
>   PCI/ATS: Add PRI stubs
>   PCI/ATS: Export PRI functions
>   iommu/arm-smmu-v3: Add support for PRI
>
>  drivers/iommu/Makefile|   1 +
>  .../devicetree/bindings/iommu/iommu.txt   |  18 +
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  69 +-
>  drivers/iommu/iommu-sva-lib.h |  53 ++
>  include/linux/iommu.h |   4 +
>  include/linux/pci-ats.h   |   7 +
>  drivers/acpi/arm64/iort.c |   1 +
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  52 +-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 605 +++---
>  drivers/iommu/io-pgfault.c| 462 +
>  drivers/iommu/of_iommu.c  |   5 +-
>  drivers/pci/ats.c |   4 +
>  12 files changed, 1191 insertions(+), 90 deletions(-)
>  create mode 100644 drivers/iommu/io-pgfault.c
>
> --
> 2.29.1
>
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 3/3] iommu: arm-smmu-impl: Add sdm845 implementation hook

2019-09-06 Thread Vivek Gautam
On Fri, Aug 23, 2019 at 12:03 PM Vivek Gautam
 wrote:
>
> Add reset hook for sdm845 based platforms to turn off
> the wait-for-safe sequence.
>
> Understanding how wait-for-safe logic affects USB and UFS performance
> on MTP845 and DB845 boards:
>
> Qcom's implementation of arm,mmu-500 adds a WAIT-FOR-SAFE logic
> to address under-performance issues in real-time clients, such as
> Display, and Camera.
> On receiving an invalidation requests, the SMMU forwards SAFE request
> to these clients and waits for SAFE ack signal from real-time clients.
> The SAFE signal from such clients is used to qualify the start of
> invalidation.
> This logic is controlled by chicken bits, one for each - MDP (display),
> IFE0, and IFE1 (camera), that can be accessed only from secure software
> on sdm845.
>
> This configuration, however, degrades the performance of non-real time
> clients, such as USB, and UFS etc. This happens because, with wait-for-safe
> logic enabled the hardware tries to throttle non-real time clients while
> waiting for SAFE ack signals from real-time clients.
>
> On mtp845 and db845 devices, with wait-for-safe logic enabled by the
> bootloaders we see degraded performance of USB and UFS when kernel
> enables the smmu stage-1 translations for these clients.
> Turn off this wait-for-safe logic from the kernel gets us back the perf
> of USB and UFS devices until we re-visit this when we start seeing perf
> issues on display/camera on upstream supported SDM845 platforms.
> The bootloaders on these boards implement secure monitor callbacks to
> handle a specific command - QCOM_SCM_SVC_SMMU_PROGRAM with which the
> logic can be toggled.
>
> There are other boards such as cheza whose bootloaders don't enable this
> logic. Such boards don't implement callbacks to handle the specific SCM
> call so disabling this logic for such boards will be a no-op.
>
> This change is inspired by the downstream change from Patrick Daly
> to address performance issues with display and camera by handling
> this wait-for-safe within separte io-pagetable ops to do TLB
> maintenance. So a big thanks to him for the change and for all the
> offline discussions.
>
> Without this change the UFS reads are pretty slow:
> $ time dd if=/dev/sda of=/dev/zero bs=1048576 count=10 conv=sync
> 10+0 records in
> 10+0 records out
> 10485760 bytes (10.0MB) copied, 22.394903 seconds, 457.2KB/s
> real0m 22.39s
> user0m 0.00s
> sys 0m 0.01s
>
> With this change they are back to rock!
> $ time dd if=/dev/sda of=/dev/zero bs=1048576 count=300 conv=sync
> 300+0 records in
> 300+0 records out
> 314572800 bytes (300.0MB) copied, 1.030541 seconds, 291.1MB/s
> real0m 1.03s
> user0m 0.00s
> sys 0m 0.54s
>
> Signed-off-by: Vivek Gautam 
> ---
>  drivers/iommu/arm-smmu-impl.c | 27 ++-
>  1 file changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
> index 3f88cd078dd5..0aef87c41f9c 100644
> --- a/drivers/iommu/arm-smmu-impl.c
> +++ b/drivers/iommu/arm-smmu-impl.c
> @@ -6,6 +6,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  #include "arm-smmu.h"
>
> @@ -102,7 +103,6 @@ static struct arm_smmu_device 
> *cavium_smmu_impl_init(struct arm_smmu_device *smm
> return >smmu;
>  }
>
> -
>  #define ARM_MMU500_ACTLR_CPRE  (1 << 1)
>
>  #define ARM_MMU500_ACR_CACHE_LOCK  (1 << 26)
> @@ -147,6 +147,28 @@ static const struct arm_smmu_impl arm_mmu500_impl = {
> .reset = arm_mmu500_reset,
>  };
>
> +static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
> +{
> +   int ret;
> +
> +   arm_mmu500_reset(smmu);
> +
> +   /*
> +* To address performance degradation in non-real time clients,
> +* such as USB and UFS, turn off wait-for-safe on sdm845 based boards,
> +* such as MTP and db845, whose firmwares implement secure monitor
> +* call handlers to turn on/off the wait-for-safe logic.
> +*/
> +   ret = qcom_scm_qsmmu500_wait_safe_toggle(0);
> +   if (ret)
> +   dev_warn(smmu->dev, "Failed to turn off SAFE logic\n");
> +
> +   return 0;
> +}
> +
> +const struct arm_smmu_impl qcom_sdm845_smmu500_impl = {
> +   .reset = qcom_sdm845_smmu500_reset,
> +};
>
>  struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
>  {
> @@ -170,5 +192,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct 
> arm_smmu_device *smmu)
>   "calxeda,smmu-secure-config-access"))
> smmu->impl = _impl;
>

[PATCH v4 3/3] iommu: arm-smmu-impl: Add sdm845 implementation hook

2019-08-23 Thread Vivek Gautam
Add reset hook for sdm845 based platforms to turn off
the wait-for-safe sequence.

Understanding how wait-for-safe logic affects USB and UFS performance
on MTP845 and DB845 boards:

Qcom's implementation of arm,mmu-500 adds a WAIT-FOR-SAFE logic
to address under-performance issues in real-time clients, such as
Display, and Camera.
On receiving an invalidation requests, the SMMU forwards SAFE request
to these clients and waits for SAFE ack signal from real-time clients.
The SAFE signal from such clients is used to qualify the start of
invalidation.
This logic is controlled by chicken bits, one for each - MDP (display),
IFE0, and IFE1 (camera), that can be accessed only from secure software
on sdm845.

This configuration, however, degrades the performance of non-real time
clients, such as USB, and UFS etc. This happens because, with wait-for-safe
logic enabled the hardware tries to throttle non-real time clients while
waiting for SAFE ack signals from real-time clients.

On mtp845 and db845 devices, with wait-for-safe logic enabled by the
bootloaders we see degraded performance of USB and UFS when kernel
enables the smmu stage-1 translations for these clients.
Turn off this wait-for-safe logic from the kernel gets us back the perf
of USB and UFS devices until we re-visit this when we start seeing perf
issues on display/camera on upstream supported SDM845 platforms.
The bootloaders on these boards implement secure monitor callbacks to
handle a specific command - QCOM_SCM_SVC_SMMU_PROGRAM with which the
logic can be toggled.

There are other boards such as cheza whose bootloaders don't enable this
logic. Such boards don't implement callbacks to handle the specific SCM
call so disabling this logic for such boards will be a no-op.

This change is inspired by the downstream change from Patrick Daly
to address performance issues with display and camera by handling
this wait-for-safe within separte io-pagetable ops to do TLB
maintenance. So a big thanks to him for the change and for all the
offline discussions.

Without this change the UFS reads are pretty slow:
$ time dd if=/dev/sda of=/dev/zero bs=1048576 count=10 conv=sync
10+0 records in
10+0 records out
10485760 bytes (10.0MB) copied, 22.394903 seconds, 457.2KB/s
real0m 22.39s
user0m 0.00s
sys 0m 0.01s

With this change they are back to rock!
$ time dd if=/dev/sda of=/dev/zero bs=1048576 count=300 conv=sync
300+0 records in
300+0 records out
314572800 bytes (300.0MB) copied, 1.030541 seconds, 291.1MB/s
real0m 1.03s
user0m 0.00s
sys 0m 0.54s

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/arm-smmu-impl.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index 3f88cd078dd5..0aef87c41f9c 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -6,6 +6,7 @@
 
 #include 
 #include 
+#include 
 
 #include "arm-smmu.h"
 
@@ -102,7 +103,6 @@ static struct arm_smmu_device *cavium_smmu_impl_init(struct 
arm_smmu_device *smm
return >smmu;
 }
 
-
 #define ARM_MMU500_ACTLR_CPRE  (1 << 1)
 
 #define ARM_MMU500_ACR_CACHE_LOCK  (1 << 26)
@@ -147,6 +147,28 @@ static const struct arm_smmu_impl arm_mmu500_impl = {
.reset = arm_mmu500_reset,
 };
 
+static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
+{
+   int ret;
+
+   arm_mmu500_reset(smmu);
+
+   /*
+* To address performance degradation in non-real time clients,
+* such as USB and UFS, turn off wait-for-safe on sdm845 based boards,
+* such as MTP and db845, whose firmwares implement secure monitor
+* call handlers to turn on/off the wait-for-safe logic.
+*/
+   ret = qcom_scm_qsmmu500_wait_safe_toggle(0);
+   if (ret)
+   dev_warn(smmu->dev, "Failed to turn off SAFE logic\n");
+
+   return 0;
+}
+
+const struct arm_smmu_impl qcom_sdm845_smmu500_impl = {
+   .reset = qcom_sdm845_smmu500_reset,
+};
 
 struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
 {
@@ -170,5 +192,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct 
arm_smmu_device *smmu)
  "calxeda,smmu-secure-config-access"))
smmu->impl = _impl;
 
+   if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm845-smmu-500"))
+   smmu->impl = _sdm845_smmu500_impl;
+
return smmu;
 }
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 1/3] firmware: qcom_scm-64: Add atomic version of qcom_scm_call

2019-08-23 Thread Vivek Gautam
There are scnenarios where drivers are required to make a
scm call in atomic context, such as in one of the qcom's
arm-smmu-500 errata [1].

[1] ("https://source.codeaurora.org/quic/la/kernel/msm-4.9/
  tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4842")

Signed-off-by: Vivek Gautam 
Reviewed-by: Bjorn Andersson 
---
 drivers/firmware/qcom_scm-64.c | 136 -
 1 file changed, 92 insertions(+), 44 deletions(-)

diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c
index 91d5ad7cf58b..b6dca32c5ac4 100644
--- a/drivers/firmware/qcom_scm-64.c
+++ b/drivers/firmware/qcom_scm-64.c
@@ -62,32 +62,71 @@ static DEFINE_MUTEX(qcom_scm_lock);
 #define FIRST_EXT_ARG_IDX 3
 #define N_REGISTER_ARGS (MAX_QCOM_SCM_ARGS - N_EXT_QCOM_SCM_ARGS + 1)
 
-/**
- * qcom_scm_call() - Invoke a syscall in the secure world
- * @dev:   device
- * @svc_id:service identifier
- * @cmd_id:command identifier
- * @desc:  Descriptor structure containing arguments and return values
- *
- * Sends a command to the SCM and waits for the command to finish processing.
- * This should *only* be called in pre-emptible context.
-*/
-static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
-const struct qcom_scm_desc *desc,
-struct arm_smccc_res *res)
+static void __qcom_scm_call_do(const struct qcom_scm_desc *desc,
+  struct arm_smccc_res *res, u32 fn_id,
+  u64 x5, u32 type)
+{
+   u64 cmd;
+   struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6};
+
+   cmd = ARM_SMCCC_CALL_VAL(type, qcom_smccc_convention,
+ARM_SMCCC_OWNER_SIP, fn_id);
+
+   quirk.state.a6 = 0;
+
+   do {
+   arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
+   desc->args[1], desc->args[2], x5,
+   quirk.state.a6, 0, res, );
+
+   if (res->a0 == QCOM_SCM_INTERRUPTED)
+   cmd = res->a0;
+
+   } while (res->a0 == QCOM_SCM_INTERRUPTED);
+}
+
+static void qcom_scm_call_do(const struct qcom_scm_desc *desc,
+struct arm_smccc_res *res, u32 fn_id,
+u64 x5, bool atomic)
+{
+   int retry_count = 0;
+
+   if (!atomic) {
+   do {
+   mutex_lock(_scm_lock);
+
+   __qcom_scm_call_do(desc, res, fn_id, x5,
+  ARM_SMCCC_STD_CALL);
+
+   mutex_unlock(_scm_lock);
+
+   if (res->a0 == QCOM_SCM_V2_EBUSY) {
+   if (retry_count++ > QCOM_SCM_EBUSY_MAX_RETRY)
+   break;
+   msleep(QCOM_SCM_EBUSY_WAIT_MS);
+   }
+   }  while (res->a0 == QCOM_SCM_V2_EBUSY);
+   } else {
+   __qcom_scm_call_do(desc, res, fn_id, x5, ARM_SMCCC_FAST_CALL);
+   }
+}
+
+static int ___qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
+   const struct qcom_scm_desc *desc,
+   struct arm_smccc_res *res, bool atomic)
 {
int arglen = desc->arginfo & 0xf;
-   int retry_count = 0, i;
+   int i;
u32 fn_id = QCOM_SCM_FNID(svc_id, cmd_id);
-   u64 cmd, x5 = desc->args[FIRST_EXT_ARG_IDX];
+   u64 x5 = desc->args[FIRST_EXT_ARG_IDX];
dma_addr_t args_phys = 0;
void *args_virt = NULL;
size_t alloc_len;
-   struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6};
+   gfp_t flag = atomic ? GFP_ATOMIC : GFP_KERNEL;
 
if (unlikely(arglen > N_REGISTER_ARGS)) {
alloc_len = N_EXT_QCOM_SCM_ARGS * sizeof(u64);
-   args_virt = kzalloc(PAGE_ALIGN(alloc_len), GFP_KERNEL);
+   args_virt = kzalloc(PAGE_ALIGN(alloc_len), flag);
 
if (!args_virt)
return -ENOMEM;
@@ -117,33 +156,7 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, 
u32 cmd_id,
x5 = args_phys;
}
 
-   do {
-   mutex_lock(_scm_lock);
-
-   cmd = ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL,
-qcom_smccc_convention,
-ARM_SMCCC_OWNER_SIP, fn_id);
-
-   quirk.state.a6 = 0;
-
-   do {
-   arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
- desc->args[1], desc->args[2], x5,
- quirk.state.a6, 0, res, );
-
-   if (res->a0 == QCOM_SCM_INTERRUPTED)
-   cmd = res->a0;
-
-   } while (res->a0 == QCOM_SCM_INTERRUPT

[PATCH v4 2/3] firmware/qcom_scm: Add scm call to handle smmu errata

2019-08-23 Thread Vivek Gautam
Qcom's smmu-500 needs to toggle wait-for-safe sequence to
handle TLB invalidation sync's.
Few firmwares allow doing that through SCM interface.
Add API to toggle wait for safe from firmware through a
SCM call.

Signed-off-by: Vivek Gautam 
Reviewed-by: Bjorn Andersson 
---
 drivers/firmware/qcom_scm-32.c |  5 +
 drivers/firmware/qcom_scm-64.c | 13 +
 drivers/firmware/qcom_scm.c|  6 ++
 drivers/firmware/qcom_scm.h|  5 +
 include/linux/qcom_scm.h   |  2 ++
 5 files changed, 31 insertions(+)

diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c
index 215061c581e1..bee8729525ec 100644
--- a/drivers/firmware/qcom_scm-32.c
+++ b/drivers/firmware/qcom_scm-32.c
@@ -614,3 +614,8 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t 
addr, unsigned int val)
return qcom_scm_call_atomic2(QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE,
 addr, val);
 }
+
+int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool enable)
+{
+   return -ENODEV;
+}
diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c
index b6dca32c5ac4..41c06dcfa9e1 100644
--- a/drivers/firmware/qcom_scm-64.c
+++ b/drivers/firmware/qcom_scm-64.c
@@ -550,3 +550,16 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t 
addr, unsigned int val)
return qcom_scm_call(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE,
 , );
 }
+
+int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool en)
+{
+   struct qcom_scm_desc desc = {0};
+   struct arm_smccc_res res;
+
+   desc.args[0] = QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL;
+   desc.args[1] = en;
+   desc.arginfo = QCOM_SCM_ARGS(2);
+
+   return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_SMMU_PROGRAM,
+   QCOM_SCM_CONFIG_ERRATA1, , );
+}
diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index 2ddc118dba1b..2b3b7a8c4270 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -344,6 +344,12 @@ int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, 
u32 spare)
 }
 EXPORT_SYMBOL(qcom_scm_iommu_secure_ptbl_init);
 
+int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
+{
+   return __qcom_scm_qsmmu500_wait_safe_toggle(__scm->dev, en);
+}
+EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle);
+
 int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val)
 {
return __qcom_scm_io_readl(__scm->dev, addr, val);
diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h
index 99506bd873c0..baee744dbcfe 100644
--- a/drivers/firmware/qcom_scm.h
+++ b/drivers/firmware/qcom_scm.h
@@ -91,10 +91,15 @@ extern int __qcom_scm_restore_sec_cfg(struct device *dev, 
u32 device_id,
  u32 spare);
 #define QCOM_SCM_IOMMU_SECURE_PTBL_SIZE3
 #define QCOM_SCM_IOMMU_SECURE_PTBL_INIT4
+#define QCOM_SCM_SVC_SMMU_PROGRAM  0x15
+#define QCOM_SCM_CONFIG_ERRATA10x3
+#define QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL 0x2
 extern int __qcom_scm_iommu_secure_ptbl_size(struct device *dev, u32 spare,
 size_t *size);
 extern int __qcom_scm_iommu_secure_ptbl_init(struct device *dev, u64 addr,
 u32 size, u32 spare);
+extern int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev,
+   bool enable);
 #define QCOM_MEM_PROT_ASSIGN_ID0x16
 extern int  __qcom_scm_assign_mem(struct device *dev,
  phys_addr_t mem_region, size_t mem_sz,
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 3f12cc77fb58..aee3d8580d89 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -57,6 +57,7 @@ extern int qcom_scm_set_remote_state(u32 state, u32 id);
 extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare);
 extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size);
 extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare);
+extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
 extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val);
 extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val);
 #else
@@ -96,6 +97,7 @@ qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; 
}
 static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return 
-ENODEV; }
 static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { 
return -ENODEV; }
 static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 
spare) { return -ENODEV; }
+static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) { return 
-ENODEV; }
 static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) { 
return -ENODEV; }
 static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) { 
return -ENODEV; }
 #endif
-- 
QUALCOMM INDIA, on 

[PATCH v4 0/3] Qcom smmu-500 wait-for-safe handling for sdm845

2019-08-23 Thread Vivek Gautam
Previous version of the patches are at [1]:

Qcom's implementation of smmu-500 on sdm845 adds a hardware logic called
wait-for-safe. This logic helps in meeting the invalidation requirements
from 'real-time clients', such as display and camera. This wait-for-safe
logic ensures that the invalidations happen after getting an ack from these
devices.
In this patch-series we are disabling this wait-for-safe logic from the
arm-smmu driver's probe as with this enabled the hardware tries to
throttle invalidations from 'non-real-time clients', such as USB and UFS.

For detailed information please refer to patch [3/4] in this series.
I have included the device tree patch too in this series for someone who
would like to test out this. Here's a branch [2] that gets display on MTP
SDM845 device.

This patch series is inspired from downstream work to handle under-performance
issues on real-time clients on sdm845. In downstream we add separate page table
ops to handle TLB maintenance and toggle wait-for-safe in tlb_sync call so that
achieve required performance for display and camera [3, 4].

Changes since v3:
 * Based on arm-smmu implementation cleanup series [5] by Robin Murphy which is
   already merged in Will's tree [6].
 * Implemented the sdm845 specific reset hook which does arm_smmu_device_reset()
   followed by making SCM call to disable the wait-for-safe logic.
 * Removed depedency for SCM call on any dt flag. We invariably try to disable
   the wait-for-safe logic on sdm845. The platforms such as mtp845, and db845
   that implement handlers for this particular SCM call should be able disable
   wait-for-safe logic.
   Other platforms such as cheza don't enable the wait-for-safe logic at all
   from their bootloaders. So there's no need to disable the same.
 * No change in SCM call patches 1 & 2.

Changes since v2:
 * Dropped the patch to add atomic io_read/write scm API.
 * Removed support for any separate page table ops to handle wait-for-safe.
   Currently just disabling this wait-for-safe logic from 
arm_smmu_device_probe()
   to achieve performance on USB/UFS on sdm845.
 * Added a device tree patch to add smmu option for fw-implemented support
   for SCM call to take care of SAFE toggling.

Changes since v1:
 * Addressed Will and Robin's comments:
- Dropped the patch[4] that forked out __arm_smmu_tlb_inv_range_nosync(),
  and __arm_smmu_tlb_sync().
- Cleaned up the errata patch further to use downstream polling mechanism
  for tlb sync.
 * No change in SCM call patches - patches 1 to 3.

[1] https://lore.kernel.org/patchwork/cover/1087453/
[2] https://github.com/vivekgautam1/linux/tree/v5.2-rc4/sdm845-display-working
[3] 
https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/drivers/iommu/arm-smmu.c?h=CogSystems-msm-49/msm-4.9=da765c6c75266b38191b38ef086274943f353ea7
[4] 
https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/drivers/iommu/arm-smmu.c?h=CogSystems-msm-49/msm-4.9=8696005aaaf745de68f57793c1a534a34345c30a
[5] https://patchwork.kernel.org/patch/11096265/
[6] https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/

Vivek Gautam (3):
  firmware: qcom_scm-64: Add atomic version of qcom_scm_call
  firmware/qcom_scm: Add scm call to handle smmu errata
  iommu: arm-smmu-impl: Add sdm845 implementation hook

 drivers/firmware/qcom_scm-32.c |   5 ++
 drivers/firmware/qcom_scm-64.c | 149 +
 drivers/firmware/qcom_scm.c|   6 ++
 drivers/firmware/qcom_scm.h|   5 ++
 drivers/iommu/arm-smmu-impl.c  |  27 +++-
 include/linux/qcom_scm.h   |   2 +
 6 files changed, 149 insertions(+), 45 deletions(-)

-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 17/17] iommu/arm-smmu: Add context init implementation hook

2019-08-20 Thread Vivek Gautam
U_DOMAIN_S2)

-   cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
+   cfg->vmid = cfg->cbndx + 1;
else
-   cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+   cfg->asid = cfg->cbndx;
+
+   smmu_domain->smmu = smmu;
+   if (smmu->impl && smmu->impl->init_context) {
+   ret = smmu->impl->init_context(smmu_domain);
+   if (ret)
+   goto out_unlock;
+   }
  
  	pgtbl_cfg = (struct io_pgtable_cfg) {

.pgsize_bitmap  = smmu->pgsize_bitmap,
@@ -765,7 +733,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
if (smmu_domain->non_strict)
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
  
-	smmu_domain->smmu = smmu;

pgtbl_ops = alloc_io_pgtable_ops(fmt, _cfg, smmu_domain);
if (!pgtbl_ops) {
ret = -ENOMEM;
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index ddafe872a396..611ed742e56f 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -14,6 +14,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -270,14 +271,50 @@ struct arm_smmu_device {
struct clk_bulk_data*clks;
int num_clks;
  
-	u32cavium_id_base; /* Specific to Cavium */

-
spinlock_t  global_sync_lock;
  
  	/* IOMMU core code handle */

struct iommu_device iommu;
  };
  
+enum arm_smmu_context_fmt {

+   ARM_SMMU_CTX_FMT_NONE,
+   ARM_SMMU_CTX_FMT_AARCH64,
+   ARM_SMMU_CTX_FMT_AARCH32_L,
+   ARM_SMMU_CTX_FMT_AARCH32_S,
+};
+
+struct arm_smmu_cfg {
+   u8  cbndx;
+   u8  irptndx;
+   union {
+   u16 asid;
+   u16 vmid;
+   };
+   enum arm_smmu_cbar_type cbar;
+   enum arm_smmu_context_fmt   fmt;
+};
+#define INVALID_IRPTNDX0xff
+
+enum arm_smmu_domain_stage {
+   ARM_SMMU_DOMAIN_S1 = 0,
+   ARM_SMMU_DOMAIN_S2,
+   ARM_SMMU_DOMAIN_NESTED,
+   ARM_SMMU_DOMAIN_BYPASS,
+};
+
+struct arm_smmu_domain {
+   struct arm_smmu_device  *smmu;
+   struct io_pgtable_ops   *pgtbl_ops;
+   const struct iommu_gather_ops   *tlb_ops;
+   struct arm_smmu_cfg cfg;
+   enum arm_smmu_domain_stage  stage;
+   boolnon_strict;
+   struct mutexinit_mutex; /* Protects smmu pointer */
+   spinlock_t  cb_lock; /* Serialises ATS1* ops and 
TLB syncs */
+   struct iommu_domain domain;
+};
+
  
  /* Implementation details, yay! */

  struct arm_smmu_impl {
@@ -289,6 +326,7 @@ struct arm_smmu_impl {
u64 val);
int (*cfg_probe)(struct arm_smmu_device *smmu);
int (*reset)(struct arm_smmu_device *smmu);
+   int (*init_context)(struct arm_smmu_domain *smmu_domain);


Hi Robin,

Sorry for responding late to this series. I have couple of doubts here 
that I wanted to discuss.


Are we standardizing these implementation specific ops? Each vendor 
implementations will have something peculiar to take care. Things are 
good right now with 'reset', 'cfg_probe', and 'init_context' hooks.
But, on top of vendor implementation details, there can be SoC specific 
errata changes that need to be added.
Moreover, adding implementation data based on __model__ may not suffice 
for long. Do you suggest adding any other data variable in the 
ARM_SMMU_MATCH_DATA?
To show SoC specific needs, I have the change attached in this email to 
take care of the SDM845 'wait-for-safe' sequence.

Please take a look.

Thanks & Regards
Vivek


  };
  
  static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)


From 3830ec7e22deb49de72b6bc29bd965f7b07b9669 Mon Sep 17 00:00:00 2001
From: Vivek Gautam 
Date: Tue, 20 Aug 2019 15:28:16 +0530
Subject: [PATCH 3/4] iommu: arm-smmu-impl: Add SDM845 specific implementation
 hook

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/arm-smmu-impl.c | 31 +++
 drivers/iommu/arm-smmu.c  |  2 ++
 drivers/iommu/arm-smmu.h  |  1 +
 3 files changed, 34 insertions(+)

diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index 3f88cd078dd5..0e6f5ab0e0ce 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -6,6 +6,7 @@
 
 #include 
 #include 
+#include 
 
 #include "arm-smmu.h"
 
@@ -148,6 +149,32 @@ static const struct arm_smmu_impl arm_mmu500_impl = {
 };
 
 
+static int qcom_sdm845_smmu500_cfg_probe(struct arm_smmu_device *smmu)
+{
+   int ret;
+
+   /*
+* To address performance degradation in non-real time clien

Re: [PATCH v2 00/17] Arm SMMU refactoring

2019-08-20 Thread Vivek Gautam




On 8/16/2019 12:07 AM, Robin Murphy wrote:

Hi all,

v1 for context: https://patchwork.kernel.org/cover/11087347/

Here's a quick v2 attempting to address all the minor comments; I've
tweaked a whole bunch of names, added some verbosity in macros and
comments for clarity, and rejigged arm_smmu_impl_init() for a bit more
structure. The (new) patches #1 and #2 are up front as conceptual fixes,
although they're not actually critical - it turns out to be more of an
embarrassment than a real problem in practice.

For ease of reference, the overall diff against v1 is attached below.

Robin.


Hi,

I have given this series a shot with 5.3-rc5 kernel on MTP sdm845 
device, and smmu works as expected.


Tested-by: Vivek Gautam 

Best regards
Vivek



Robin Murphy (17):
   iommu/arm-smmu: Mask TLBI address correctly
   iommu/qcom: Mask TLBI addresses correctly
   iommu/arm-smmu: Convert GR0 registers to bitfields
   iommu/arm-smmu: Convert GR1 registers to bitfields
   iommu/arm-smmu: Convert context bank registers to bitfields
   iommu/arm-smmu: Rework cb_base handling
   iommu/arm-smmu: Split arm_smmu_tlb_inv_range_nosync()
   iommu/arm-smmu: Get rid of weird "atomic" write
   iommu/arm-smmu: Abstract GR1 accesses
   iommu/arm-smmu: Abstract context bank accesses
   iommu/arm-smmu: Abstract GR0 accesses
   iommu/arm-smmu: Rename arm-smmu-regs.h
   iommu/arm-smmu: Add implementation infrastructure
   iommu/arm-smmu: Move Secure access quirk to implementation
   iommu/arm-smmu: Add configuration implementation hook
   iommu/arm-smmu: Add reset implementation hook
   iommu/arm-smmu: Add context init implementation hook

  MAINTAINERS   |   3 +-
  drivers/iommu/Makefile|   2 +-
  drivers/iommu/arm-smmu-impl.c | 174 +++
  drivers/iommu/arm-smmu-regs.h | 210 -
  drivers/iommu/arm-smmu.c  | 573 +++---
  drivers/iommu/arm-smmu.h  | 394 +++
  drivers/iommu/qcom_iommu.c|  17 +-
  7 files changed, 764 insertions(+), 609 deletions(-)
  create mode 100644 drivers/iommu/arm-smmu-impl.c
  delete mode 100644 drivers/iommu/arm-smmu-regs.h
  create mode 100644 drivers/iommu/arm-smmu.h

->8-
diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index 3c731e087854..e22e9004f449 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -28,7 +28,7 @@ static int arm_smmu_gr0_ns(int offset)
  static u32 arm_smmu_read_ns(struct arm_smmu_device *smmu, int page,
int offset)
  {
-   if (page == 0)
+   if (page == ARM_SMMU_GR0)
offset = arm_smmu_gr0_ns(offset);
return readl_relaxed(arm_smmu_page(smmu, page) + offset);
  }
@@ -36,7 +36,7 @@ static u32 arm_smmu_read_ns(struct arm_smmu_device *smmu, int 
page,
  static void arm_smmu_write_ns(struct arm_smmu_device *smmu, int page,
  int offset, u32 val)
  {
-   if (page == 0)
+   if (page == ARM_SMMU_GR0)
offset = arm_smmu_gr0_ns(offset);
writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
  }
@@ -52,18 +52,17 @@ struct cavium_smmu {
struct arm_smmu_device smmu;
u32 id_base;
  };
-#define to_csmmu(s)container_of(s, struct cavium_smmu, smmu)
  
  static int cavium_cfg_probe(struct arm_smmu_device *smmu)

  {
static atomic_t context_count = ATOMIC_INIT(0);
+   struct cavium_smmu *cs = container_of(smmu, struct cavium_smmu, smmu);
/*
 * Cavium CN88xx erratum #27704.
 * Ensure ASID and VMID allocation is unique across all SMMUs in
 * the system.
 */
-   to_csmmu(smmu)->id_base = atomic_fetch_add(smmu->num_context_banks,
-  _count);
+   cs->id_base = atomic_fetch_add(smmu->num_context_banks, _count);
dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 
27704\n");
  
  	return 0;

@@ -71,12 +70,13 @@ static int cavium_cfg_probe(struct arm_smmu_device *smmu)
  
  int cavium_init_context(struct arm_smmu_domain *smmu_domain)

  {
-   u32 id_base = to_csmmu(smmu_domain->smmu)->id_base;
+   struct cavium_smmu *cs = container_of(smmu_domain->smmu,
+ struct cavium_smmu, smmu);
  
  	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)

-   smmu_domain->cfg.vmid += id_base;
+   smmu_domain->cfg.vmid += cs->id_base;
else
-   smmu_domain->cfg.asid += id_base;
+   smmu_domain->cfg.asid += cs->id_base;
  
  	return 0;

  }
@@ -88,18 +88,18 @@ const struct arm_smmu_impl cavium_impl = {
  
  struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smmu)

  {
-   struct cavium_smmu *csmmu;
+   struct cavium_smmu *cs;
  
-	csmmu = devm_kzalloc(smmu->dev, sizeof(*csmmu), GFP_KERNEL);

-   i

Re: [PATCH v3 4/4] arm64: dts/sdm845: Enable FW implemented safe sequence handler on MTP

2019-08-11 Thread Vivek Gautam
On Tue, Aug 6, 2019 at 3:56 AM Bjorn Andersson
 wrote:
>
> On Wed 12 Jun 00:15 PDT 2019, Vivek Gautam wrote:
>
> > Indicate on MTP SDM845 that firmware implements handler to
> > TLB invalidate erratum SCM call where SAFE sequence is toggled
> > to achieve optimum performance on real-time clients, such as
> > display and camera.
> >
> > Signed-off-by: Vivek Gautam 
> > ---
> >  arch/arm64/boot/dts/qcom/sdm845.dtsi | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
> > b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> > index 78ec373a2b18..6a73d9744a71 100644
> > --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
> > +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> > @@ -2368,6 +2368,7 @@
> >   compatible = "qcom,sdm845-smmu-500", "arm,mmu-500";
> >   reg = <0 0x1500 0 0x8>;
> >   #iommu-cells = <2>;
> > + qcom,smmu-500-fw-impl-safe-errata;
>
> Looked back at this series and started to wonder if there there is a
> case where this should not be set? I mean we're after all adding this to
> the top 845 dtsi...

My bad.
This is not valid in case of cheza. Cheza firmware doesn't implement
the safe errata handling hook.
On cheza we just have the liberty of accessing the secure registers
through scm calls - this is what
we were doing in earlier patch series handling this errata.
So, a property like this should go to mtp board's dts file.

Thanks

Vivek

>
> How about making it the default in the driver and opt out of the errata
> once there is a need?
>
> Regards,
> Bjorn
>
> >   #global-interrupts = <1>;
> >   interrupts = ,
> >,
> > --
> > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> > of Code Aurora Forum, hosted by The Linux Foundation
> >
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/4] firmware: qcom_scm-64: Add atomic version of qcom_scm_call

2019-08-08 Thread Vivek Gautam
On Tue, Aug 6, 2019 at 3:58 AM Bjorn Andersson
 wrote:
>
> On Wed 19 Jun 04:34 PDT 2019, Vivek Gautam wrote:
>
> > On Tue, Jun 18, 2019 at 11:25 PM Will Deacon  wrote:
> > >
> > > On Wed, Jun 12, 2019 at 12:45:51PM +0530, Vivek Gautam wrote:
> > > > There are scnenarios where drivers are required to make a
> > > > scm call in atomic context, such as in one of the qcom's
> > > > arm-smmu-500 errata [1].
> > > >
> > > > [1] ("https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/
> > > >   drivers/iommu/arm-smmu.c?h=CogSystems-msm-49/
> > > >   msm-4.9=da765c6c75266b38191b38ef086274943f353ea7")
> > > >
> > > > Signed-off-by: Vivek Gautam 
> > > > Reviewed-by: Bjorn Andersson 
> > > > ---
> > > >  drivers/firmware/qcom_scm-64.c | 136 
> > > > -
> > > >  1 file changed, 92 insertions(+), 44 deletions(-)
> > > >
> > > > diff --git a/drivers/firmware/qcom_scm-64.c 
> > > > b/drivers/firmware/qcom_scm-64.c
> > > > index 91d5ad7cf58b..b6dca32c5ac4 100644
> > > > --- a/drivers/firmware/qcom_scm-64.c
> > > > +++ b/drivers/firmware/qcom_scm-64.c
> >
> > [snip]
> >
> > > > +
> > > > +static void qcom_scm_call_do(const struct qcom_scm_desc *desc,
> > > > +  struct arm_smccc_res *res, u32 fn_id,
> > > > +  u64 x5, bool atomic)
> > > > +{
> > >
> > > Maybe pass in the call type (ARM_SMCCC_FAST_CALL vs ARM_SMCCC_STD_CALL)
> > > instead of "bool atomic"? Would certainly make the callsites easier to
> > > understand.
> >
> > Sure, will do that.
> >
> > >
> > > > + int retry_count = 0;
> > > > +
> > > > + if (!atomic) {
> > > > + do {
> > > > + mutex_lock(_scm_lock);
> > > > +
> > > > + __qcom_scm_call_do(desc, res, fn_id, x5,
> > > > +ARM_SMCCC_STD_CALL);
> > > > +
> > > > + mutex_unlock(_scm_lock);
> > > > +
> > > > + if (res->a0 == QCOM_SCM_V2_EBUSY) {
> > > > + if (retry_count++ > 
> > > > QCOM_SCM_EBUSY_MAX_RETRY)
> > > > + break;
> > > > + msleep(QCOM_SCM_EBUSY_WAIT_MS);
> > > > + }
> > > > + }  while (res->a0 == QCOM_SCM_V2_EBUSY);
> > > > + } else {
> > > > + __qcom_scm_call_do(desc, res, fn_id, x5, 
> > > > ARM_SMCCC_FAST_CALL);
> > > > + }
> > >
> > > Is it safe to make concurrent FAST calls?
> >
> > I better add a spinlock here.
> >
>
> Hi Vivek,
>
> Would you be able to respin this patch, so that we could unblock the
> introduction of the display nodes in the various device?

Will pointed [1] to the restructuring of arm-smmu to support
implementation specific details.
That hasn't been posted yet, and I haven't yet been able to work on that either.
I will be happy to respin this series with the comments addressed if
Will is okay to pull changes to unblock sdm845 devices. :)

[1] https://lore.kernel.org/patchwork/patch/1087457/

Thanks & Regards
Vivek

>
> Regards,
> Bjorn
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


Re: [PATCH v3 3/4] iommu/arm-smmu: Add support to handle Qcom's wait-for-safe logic

2019-06-27 Thread Vivek Gautam
On Wed, Jun 26, 2019 at 8:18 PM Will Deacon  wrote:
>
> On Wed, Jun 26, 2019 at 12:03:02PM +0530, Vivek Gautam wrote:
> > On Tue, Jun 25, 2019 at 7:09 PM Will Deacon  wrote:
> > >
> > > On Tue, Jun 25, 2019 at 12:34:56PM +0530, Vivek Gautam wrote:
> > > > On Mon, Jun 24, 2019 at 10:33 PM Will Deacon  wrote:
> > > > > Instead, I think this needs to be part of a separate file that is 
> > > > > maintained
> > > > > by you, which follows on from the work that Krishna is doing for 
> > > > > nvidia
> > > > > built on top of Robin's prototype patches:
> > > > >
> > > > > http://linux-arm.org/git?p=linux-rm.git;a=shortlog;h=refs/heads/iommu/smmu-impl
> > > >
> > > > Looking at this branch quickly, it seem there can be separate 
> > > > implementation
> > > > level configuration file that can be added.
> > > > But will this also handle separate page table ops when required in 
> > > > future.
> > >
> > > Nothing's set in stone, but having the implementation-specific code
> > > constrain the page-table format (especially wrt quirks) sounds reasonable 
> > > to
> > > me. I'm currently waiting for Krishna to respin the nvidia changes [1] on
> > > top of this so that we can see how well the abstractions are holding up.
> >
> > Sure. Would you want me to try Robin's branch and take out the qualcomm
> > related stuff to its own implementation? Or, would you like me to respin 
> > this
> > series so that you can take it in to enable SDM845 boards such as, MTP
> > and dragonboard to have a sane build - debian, etc. so people benefit
> > out of it.
>
> I can't take this series without Acks on the firmware calling changes, and I
> plan to send my 5.3 patches to Joerg at the end of the week so they get some
> time in -next. In which case, I think it may be worth you having a play with
> the branch above so we can get a better idea of any additional smmu_impl hooks
> you may need.

Cool. I will play around with it and get something tangible and meaningful.

>
> > Qualcomm stuff is lying in qcom-smmu and arm-smmu and may take some
> > time to stub out the implementation related details.
>
> Not sure I follow you here. Are you talking about qcom_iommu.c?

That's right. The qcom_iommu.c solved a different issue of secure context bank
allocations, when Rob forked out this driver and reused some of the
arm-smmu.c stuff.

We will take a look at that once we start adding the qcom implementation.

Thanks
Vivek

>
> Will
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/4] iommu/arm-smmu: Add support to handle Qcom's wait-for-safe logic

2019-06-26 Thread Vivek Gautam
On Tue, Jun 25, 2019 at 7:09 PM Will Deacon  wrote:
>
> On Tue, Jun 25, 2019 at 12:34:56PM +0530, Vivek Gautam wrote:
> > On Mon, Jun 24, 2019 at 10:33 PM Will Deacon  wrote:
> > > Instead, I think this needs to be part of a separate file that is 
> > > maintained
> > > by you, which follows on from the work that Krishna is doing for nvidia
> > > built on top of Robin's prototype patches:
> > >
> > > http://linux-arm.org/git?p=linux-rm.git;a=shortlog;h=refs/heads/iommu/smmu-impl
> >
> > Looking at this branch quickly, it seem there can be separate implementation
> > level configuration file that can be added.
> > But will this also handle separate page table ops when required in future.
>
> Nothing's set in stone, but having the implementation-specific code
> constrain the page-table format (especially wrt quirks) sounds reasonable to
> me. I'm currently waiting for Krishna to respin the nvidia changes [1] on
> top of this so that we can see how well the abstractions are holding up.

Sure. Would you want me to try Robin's branch and take out the qualcomm
related stuff to its own implementation? Or, would you like me to respin this
series so that you can take it in to enable SDM845 boards such as, MTP
and dragonboard to have a sane build - debian, etc. so people benefit
out of it.
Qualcomm stuff is lying in qcom-smmu and arm-smmu and may take some
time to stub out the implementation related details.
Let me know your take.

Thanks & regards
Vivek

>
> I certainly won't merge the stuff until we have a user.
>
> Will
>
> [1] 
> https://lkml.kernel.org/r/1543887414-18209-1-git-send-email-vdu...@nvidia.com
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


Re: [PATCH v3 3/4] iommu/arm-smmu: Add support to handle Qcom's wait-for-safe logic

2019-06-25 Thread Vivek Gautam
Hi Will,


On Mon, Jun 24, 2019 at 10:33 PM Will Deacon  wrote:
>
> [+Krishna]
>
> Hi Vivek,
>
> On Mon, Jun 24, 2019 at 03:58:32PM +0530, Vivek Gautam wrote:
> > On Tue, Jun 18, 2019 at 11:22 PM Will Deacon  wrote:
> > > On Fri, Jun 14, 2019 at 02:48:07PM +0530, Vivek Gautam wrote:
> > > > On 6/14/2019 9:35 AM, Bjorn Andersson wrote:
> > > > > On Wed 12 Jun 00:15 PDT 2019, Vivek Gautam wrote:
> > > > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > > > > > index 0ad086da399c..3c3ad43eda97 100644
> > > > > > --- a/drivers/iommu/arm-smmu.c
> > > > > > +++ b/drivers/iommu/arm-smmu.c
> > > > > > @@ -39,6 +39,7 @@
> > > > > >   #include 
> > > > > >   #include 
> > > > > >   #include 
> > > > > > +#include 
> > > > > >   #include 
> > > > > >   #include 
> > > > > > @@ -177,6 +178,7 @@ struct arm_smmu_device {
> > > > > >   u32 features;
> > > > > >   #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
> > > > > > +#define ARM_SMMU_OPT_QCOM_FW_IMPL_SAFE_ERRATA (1 << 1)
> > > > > >   u32 options;
> > > > > >   enum arm_smmu_arch_version  version;
> > > > > >   enum arm_smmu_implementationmodel;
> > > > > > @@ -262,6 +264,7 @@ static bool using_legacy_binding, 
> > > > > > using_generic_binding;
> > > > > >   static struct arm_smmu_option_prop arm_smmu_options[] = {
> > > > > >   { ARM_SMMU_OPT_SECURE_CFG_ACCESS, 
> > > > > > "calxeda,smmu-secure-config-access" },
> > > > > > + { ARM_SMMU_OPT_QCOM_FW_IMPL_SAFE_ERRATA, 
> > > > > > "qcom,smmu-500-fw-impl-safe-errata" },
> > > > > This should be added to the DT binding as well.
> > > >
> > > > Ah right. I missed that. Will add this and respin unless Robin and Will 
> > > > have
> > > > concerns with this change.
> > >
> > > My only concern really is whether it's safe for us to turn this off. It's
> > > clear that somebody went to a lot of effort to add this extra goodness to
> > > the IP, but your benchmarks suggest they never actually tried it out after
> > > they finished building it.
> > >
> > > Is there some downside I'm not seeing from disabling this stuff?
> >
> > This wait-for-safe is a TLB invalidation enhancement to help display
> > and camera devices.
> > The SMMU hardware throttles the invalidations so that clients such as
> > display and camera can indicate when to start the invalidation.
> > So the SMMU essentially reduces the rate at which invalidations are
> > serviced from its queue. This also throttles the invalidations from
> > other masters too.
> >
> > On sdm845, the software is expected to serialize the invalidation
> > command loading into SMMU invalidation FIFO using hardware locks
> > (downstream code [2]), and is also expected to throttle non-real time
> > clients while waiting for SAFE==1 (downstream code[2]). We don't do
> > any of these yet, and as per my understanding as this wait-for-safe is
> > enabled by the bootloader in a one time config, this logic reduces
> > performance of devices such as usb and ufs.
> >
> > There's isn't any downside from disabling this logic until we have all
> > the pieces together from downstream in upstream kernels, and until we
> > have sdm845 devices that are running with full display/gfx stack
> > running. That's when we plan to revisit this and enable all the pieces
> > to get display and USB/UFS working with their optimum performance.
>
> Generally, I'd agree that approaching this incrementally makes sense, but
> in this case you're adding new device-tree properties
> ("qcom,smmu-500-fw-impl-safe-errata") in order to do so, which seems
> questionable if they're only going to be used in the short-term and will
> be obsolete once Linux knows how to drive the device properly.

This device tree property will still be valid when we handle the wait-for-safe
properly for sdm845.
("qcom,smmu-500-fw-impl-safe-errata") property represents just that the
firmware has handles to do the entire sequence -
* read the secure register
* set/reset the bits in the register to enable/disable wait-for-safe for certain
  devices.
And this is valid when firmware masks access to t

Re: [PATCH v3 3/4] iommu/arm-smmu: Add support to handle Qcom's wait-for-safe logic

2019-06-24 Thread Vivek Gautam
Hi Will,

On Tue, Jun 18, 2019 at 11:22 PM Will Deacon  wrote:
>
> Hi Vivek,
>
> On Fri, Jun 14, 2019 at 02:48:07PM +0530, Vivek Gautam wrote:
> > On 6/14/2019 9:35 AM, Bjorn Andersson wrote:
> > > On Wed 12 Jun 00:15 PDT 2019, Vivek Gautam wrote:
> > >
> > > > Qcom's implementation of arm,mmu-500 adds a WAIT-FOR-SAFE logic
> > > > to address under-performance issues in real-time clients, such as
> > > > Display, and Camera.
> > > > On receiving an invalidation requests, the SMMU forwards SAFE request
> > > > to these clients and waits for SAFE ack signal from real-time clients.
> > > > The SAFE signal from such clients is used to qualify the start of
> > > > invalidation.
> > > > This logic is controlled by chicken bits, one for each - MDP (display),
> > > > IFE0, and IFE1 (camera), that can be accessed only from secure software
> > > > on sdm845.
> > > >
> > > > This configuration, however, degrades the performance of non-real time
> > > > clients, such as USB, and UFS etc. This happens because, with 
> > > > wait-for-safe
> > > > logic enabled the hardware tries to throttle non-real time clients while
> > > > waiting for SAFE ack signals from real-time clients.
> > > >
> > > > On MTP sdm845 devices, with wait-for-safe logic enabled at the boot time
> > > > by the bootloaders we see degraded performance of USB and UFS when 
> > > > kernel
> > > > enables the smmu stage-1 translations for these clients.
> > > > Turn off this wait-for-safe logic from the kernel gets us back the perf
> > > > of USB and UFS devices until we re-visit this when we start seeing perf
> > > > issues on display/camera on upstream supported SDM845 platforms.
>
> Re-visit what exactly, and how?
>
> > > > Now, different bootloaders with their access control policies allow this
> > > > register access differently through secure monitor calls -
> > > > 1) With one we can issue io-read/write secure monitor call (qcom-scm)
> > > > to update the register, while,
> > > > 2) With other, such as one on MTP sdm845 we should use the specific
> > > > qcom-scm command to send request to do the complete register
> > > > configuration.
> > > > Adding a separate device tree flag for arm-smmu to identify which
> > > > firmware configuration of the two mentioned above we use.
> > > > Not adding code change to allow type-(1) bootloaders to toggle the
> > > > safe using io-read/write qcom-scm call.
> > > >
> > > > This change is inspired by the downstream change from Patrick Daly
> > > > to address performance issues with display and camera by handling
> > > > this wait-for-safe within separte io-pagetable ops to do TLB
> > > > maintenance. So a big thanks to him for the change.
> > > >
> > > > Without this change the UFS reads are pretty slow:
> > > > $ time dd if=/dev/sda of=/dev/zero bs=1048576 count=10 conv=sync
> > > > 10+0 records in
> > > > 10+0 records out
> > > > 10485760 bytes (10.0MB) copied, 22.394903 seconds, 457.2KB/s
> > > > real0m 22.39s
> > > > user0m 0.00s
> > > > sys 0m 0.01s
> > > >
> > > > With this change they are back to rock!
> > > > $ time dd if=/dev/sda of=/dev/zero bs=1048576 count=300 conv=sync
> > > > 300+0 records in
> > > > 300+0 records out
> > > > 314572800 bytes (300.0MB) copied, 1.030541 seconds, 291.1MB/s
> > > > real0m 1.03s
> > > > user0m 0.00s
> > > > sys 0m 0.54s
> > > >
> > > > Signed-off-by: Vivek Gautam 
> > > > ---
> > > >   drivers/iommu/arm-smmu.c | 16 
> > > >   1 file changed, 16 insertions(+)
> > > >
> > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > > > index 0ad086da399c..3c3ad43eda97 100644
> > > > --- a/drivers/iommu/arm-smmu.c
> > > > +++ b/drivers/iommu/arm-smmu.c
> > > > @@ -39,6 +39,7 @@
> > > >   #include 
> > > >   #include 
> > > >   #include 
> > > > +#include 
> > > >   #include 
> > > >   #include 
> > > > @@ -177,6 +178,7 @@ struct arm_smmu_device {
> > > >   u32 features;
> > > >   #define ARM_SMMU_OPT_SECURE_

Re: [PATCH v3 1/4] firmware: qcom_scm-64: Add atomic version of qcom_scm_call

2019-06-19 Thread Vivek Gautam
On Tue, Jun 18, 2019 at 11:25 PM Will Deacon  wrote:
>
> On Wed, Jun 12, 2019 at 12:45:51PM +0530, Vivek Gautam wrote:
> > There are scnenarios where drivers are required to make a
> > scm call in atomic context, such as in one of the qcom's
> > arm-smmu-500 errata [1].
> >
> > [1] ("https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/
> >   drivers/iommu/arm-smmu.c?h=CogSystems-msm-49/
> >   msm-4.9=da765c6c75266b38191b38ef086274943f353ea7")
> >
> > Signed-off-by: Vivek Gautam 
> > Reviewed-by: Bjorn Andersson 
> > ---
> >  drivers/firmware/qcom_scm-64.c | 136 
> > -
> >  1 file changed, 92 insertions(+), 44 deletions(-)
> >
> > diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c
> > index 91d5ad7cf58b..b6dca32c5ac4 100644
> > --- a/drivers/firmware/qcom_scm-64.c
> > +++ b/drivers/firmware/qcom_scm-64.c

[snip]

> > +
> > +static void qcom_scm_call_do(const struct qcom_scm_desc *desc,
> > +  struct arm_smccc_res *res, u32 fn_id,
> > +  u64 x5, bool atomic)
> > +{
>
> Maybe pass in the call type (ARM_SMCCC_FAST_CALL vs ARM_SMCCC_STD_CALL)
> instead of "bool atomic"? Would certainly make the callsites easier to
> understand.

Sure, will do that.

>
> > + int retry_count = 0;
> > +
> > + if (!atomic) {
> > + do {
> > + mutex_lock(_scm_lock);
> > +
> > + __qcom_scm_call_do(desc, res, fn_id, x5,
> > +ARM_SMCCC_STD_CALL);
> > +
> > + mutex_unlock(_scm_lock);
> > +
> > + if (res->a0 == QCOM_SCM_V2_EBUSY) {
> > + if (retry_count++ > QCOM_SCM_EBUSY_MAX_RETRY)
> > + break;
> > + msleep(QCOM_SCM_EBUSY_WAIT_MS);
> > + }
> > + }  while (res->a0 == QCOM_SCM_V2_EBUSY);
> > + } else {
> > + __qcom_scm_call_do(desc, res, fn_id, x5, ARM_SMCCC_FAST_CALL);
> > + }
>
> Is it safe to make concurrent FAST calls?

I better add a spinlock here.

Thanks & regards
Vivek

>
> Will
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/4] iommu/arm-smmu: Add support to handle Qcom's wait-for-safe logic

2019-06-14 Thread Vivek Gautam




On 6/14/2019 9:35 AM, Bjorn Andersson wrote:

On Wed 12 Jun 00:15 PDT 2019, Vivek Gautam wrote:


Qcom's implementation of arm,mmu-500 adds a WAIT-FOR-SAFE logic
to address under-performance issues in real-time clients, such as
Display, and Camera.
On receiving an invalidation requests, the SMMU forwards SAFE request
to these clients and waits for SAFE ack signal from real-time clients.
The SAFE signal from such clients is used to qualify the start of
invalidation.
This logic is controlled by chicken bits, one for each - MDP (display),
IFE0, and IFE1 (camera), that can be accessed only from secure software
on sdm845.

This configuration, however, degrades the performance of non-real time
clients, such as USB, and UFS etc. This happens because, with wait-for-safe
logic enabled the hardware tries to throttle non-real time clients while
waiting for SAFE ack signals from real-time clients.

On MTP sdm845 devices, with wait-for-safe logic enabled at the boot time
by the bootloaders we see degraded performance of USB and UFS when kernel
enables the smmu stage-1 translations for these clients.
Turn off this wait-for-safe logic from the kernel gets us back the perf
of USB and UFS devices until we re-visit this when we start seeing perf
issues on display/camera on upstream supported SDM845 platforms.

Now, different bootloaders with their access control policies allow this
register access differently through secure monitor calls -
1) With one we can issue io-read/write secure monitor call (qcom-scm)
to update the register, while,
2) With other, such as one on MTP sdm845 we should use the specific
qcom-scm command to send request to do the complete register
configuration.
Adding a separate device tree flag for arm-smmu to identify which
firmware configuration of the two mentioned above we use.
Not adding code change to allow type-(1) bootloaders to toggle the
safe using io-read/write qcom-scm call.

This change is inspired by the downstream change from Patrick Daly
to address performance issues with display and camera by handling
this wait-for-safe within separte io-pagetable ops to do TLB
maintenance. So a big thanks to him for the change.

Without this change the UFS reads are pretty slow:
$ time dd if=/dev/sda of=/dev/zero bs=1048576 count=10 conv=sync
10+0 records in
10+0 records out
10485760 bytes (10.0MB) copied, 22.394903 seconds, 457.2KB/s
real0m 22.39s
user0m 0.00s
sys 0m 0.01s

With this change they are back to rock!
$ time dd if=/dev/sda of=/dev/zero bs=1048576 count=300 conv=sync
300+0 records in
300+0 records out
314572800 bytes (300.0MB) copied, 1.030541 seconds, 291.1MB/s
real0m 1.03s
user0m 0.00s
sys 0m 0.54s

Signed-off-by: Vivek Gautam 
---
  drivers/iommu/arm-smmu.c | 16 
  1 file changed, 16 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 0ad086da399c..3c3ad43eda97 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -39,6 +39,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  
@@ -177,6 +178,7 @@ struct arm_smmu_device {

u32 features;
  
  #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)

+#define ARM_SMMU_OPT_QCOM_FW_IMPL_SAFE_ERRATA (1 << 1)
u32 options;
enum arm_smmu_arch_version  version;
enum arm_smmu_implementationmodel;
@@ -262,6 +264,7 @@ static bool using_legacy_binding, using_generic_binding;
  
  static struct arm_smmu_option_prop arm_smmu_options[] = {

{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
+   { ARM_SMMU_OPT_QCOM_FW_IMPL_SAFE_ERRATA, 
"qcom,smmu-500-fw-impl-safe-errata" },

This should be added to the DT binding as well.


Ah right. I missed that. Will add this and respin unless Robin and Will 
have concerns with this change.





{ 0, NULL},
  };
  
@@ -2292,6 +2295,19 @@ static int arm_smmu_device_probe(struct platform_device *pdev)

arm_smmu_device_reset(smmu);
arm_smmu_test_smr_masks(smmu);
  
+	/*

+* To address performance degradation in non-real time clients,
+* such as USB and UFS, turn off wait-for-safe on sdm845 platforms,
+* such as MTP, whose firmwares implement corresponding secure monitor
+* call handlers.
+*/
+   if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm845-smmu-500") 
&&
+   smmu->options & ARM_SMMU_OPT_QCOM_FW_IMPL_SAFE_ERRATA) {
+   err = qcom_scm_qsmmu500_wait_safe_toggle(0);
+   if (err)
+   dev_warn(dev, "Failed to turn off SAFE logic\n");
+   }
+

This looks good, I presume at some point we can profile things and
review if it's worth toggling this on the fly, but given that this is
conditioned on smmu->options that should be an implementation detail..

Review

Re: [PATCH v3 4/4] arm64: dts/sdm845: Enable FW implemented safe sequence handler on MTP

2019-06-14 Thread Vivek Gautam




On 6/14/2019 9:36 AM, Bjorn Andersson wrote:

On Wed 12 Jun 00:15 PDT 2019, Vivek Gautam wrote:


Indicate on MTP SDM845 that firmware implements handler to
TLB invalidate erratum SCM call where SAFE sequence is toggled
to achieve optimum performance on real-time clients, such as
display and camera.

Signed-off-by: Vivek Gautam 

Reviewed-by: Bjorn Andersson 


Thanks Bjorn for reviewing this.

Best regards
Vivek

[snip]
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 0/4] Qcom smmu-500 wait-for-safe handling for sdm845

2019-06-12 Thread Vivek Gautam
Subject changed, older subject was -
Qcom smmu-500 TLB invalidation errata for sdm845.
Previous version of the patches are at [1]:

Qcom's implementation of smmu-500 on sdm845 adds a hardware logic called
wait-for-safe. This logic helps in meeting the invalidation requirements
from 'real-time clients', such as display and camera. This wait-for-safe
logic ensures that the invalidations happen after getting an ack from these
devices.
In this patch-series we are disabling this wait-for-safe logic from the
arm-smmu driver's probe as with this enabled the hardware tries to
throttle invalidations from 'non-real-time clients', such as USB and UFS.

For detailed information please refer to patch [3/4] in this series.
I have included the device tree patch too in this series for someone who
would like to test out this. Here's a branch [2] that gets display on MTP
SDM845 device.

This patch series is inspired from downstream work to handle under-performance
issues on real-time clients on sdm845. In downstream we add separate page table
ops to handle TLB maintenance and toggle wait-for-safe in tlb_sync call so that
achieve required performance for display and camera [3, 4].

Changes since v2:
 * Dropped the patch to add atomic io_read/write scm API.
 * Removed support for any separate page table ops to handle wait-for-safe.
   Currently just disabling this wait-for-safe logic from 
arm_smmu_device_probe()
   to achieve performance on USB/UFS on sdm845.
 * Added a device tree patch to add smmu option for fw-implemented support
   for SCM call to take care of SAFE toggling.

Changes since v1:
 * Addressed Will and Robin's comments:
- Dropped the patch[4] that forked out __arm_smmu_tlb_inv_range_nosync(),
  and __arm_smmu_tlb_sync().
- Cleaned up the errata patch further to use downstream polling mechanism
  for tlb sync.
 * No change in SCM call patches - patches 1 to 3.

[1] https://lore.kernel.org/patchwork/cover/983913/
[2] https://github.com/vivekgautam1/linux/tree/v5.2-rc4/sdm845-display-working
[3] 
https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/drivers/iommu/arm-smmu.c?h=CogSystems-msm-49/msm-4.9=da765c6c75266b38191b38ef086274943f353ea7
[4] 
https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/drivers/iommu/arm-smmu.c?h=CogSystems-msm-49/msm-4.9=8696005aaaf745de68f57793c1a534a34345c30a

Vivek Gautam (4):
  firmware: qcom_scm-64: Add atomic version of qcom_scm_call
  firmware/qcom_scm: Add scm call to handle smmu errata
  iommu/arm-smmu: Add support to handle Qcom's wait-for-safe logic
  arm64: dts/sdm845: Enable FW implemented safe sequence handler on MTP

 arch/arm64/boot/dts/qcom/sdm845.dtsi |   1 +
 drivers/firmware/qcom_scm-32.c   |   5 ++
 drivers/firmware/qcom_scm-64.c   | 149 ---
 drivers/firmware/qcom_scm.c  |   6 ++
 drivers/firmware/qcom_scm.h  |   5 ++
 drivers/iommu/arm-smmu.c |  16 
 include/linux/qcom_scm.h |   2 +
 7 files changed, 140 insertions(+), 44 deletions(-)

-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 2/4] firmware/qcom_scm: Add scm call to handle smmu errata

2019-06-12 Thread Vivek Gautam
Qcom's smmu-500 needs to toggle wait-for-safe logic to
handle TLB invalidations.
Few firmwares allow doing that through SCM interface.
Add API to toggle wait for safe from firmware through a
SCM call.

Signed-off-by: Vivek Gautam 
Reviewed-by: Bjorn Andersson 
---
 drivers/firmware/qcom_scm-32.c |  5 +
 drivers/firmware/qcom_scm-64.c | 13 +
 drivers/firmware/qcom_scm.c|  6 ++
 drivers/firmware/qcom_scm.h|  5 +
 include/linux/qcom_scm.h   |  2 ++
 5 files changed, 31 insertions(+)

diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c
index 215061c581e1..bee8729525ec 100644
--- a/drivers/firmware/qcom_scm-32.c
+++ b/drivers/firmware/qcom_scm-32.c
@@ -614,3 +614,8 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t 
addr, unsigned int val)
return qcom_scm_call_atomic2(QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE,
 addr, val);
 }
+
+int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool enable)
+{
+   return -ENODEV;
+}
diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c
index b6dca32c5ac4..23de54b75cd7 100644
--- a/drivers/firmware/qcom_scm-64.c
+++ b/drivers/firmware/qcom_scm-64.c
@@ -550,3 +550,16 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t 
addr, unsigned int val)
return qcom_scm_call(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE,
 , );
 }
+
+int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool en)
+{
+   struct qcom_scm_desc desc = {0};
+   struct arm_smccc_res res;
+
+   desc.args[0] = QCOM_SCM_CONFIG_SAFE_EN_CLIENT_ALL;
+   desc.args[1] = en;
+   desc.arginfo = QCOM_SCM_ARGS(2);
+
+   return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_SMMU_PROGRAM,
+   QCOM_SCM_CONFIG_SAFE_EN, , );
+}
diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index 2ddc118dba1b..2b3b7a8c4270 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -344,6 +344,12 @@ int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, 
u32 spare)
 }
 EXPORT_SYMBOL(qcom_scm_iommu_secure_ptbl_init);
 
+int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
+{
+   return __qcom_scm_qsmmu500_wait_safe_toggle(__scm->dev, en);
+}
+EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle);
+
 int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val)
 {
return __qcom_scm_io_readl(__scm->dev, addr, val);
diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h
index 99506bd873c0..0b63ded89b41 100644
--- a/drivers/firmware/qcom_scm.h
+++ b/drivers/firmware/qcom_scm.h
@@ -91,10 +91,15 @@ extern int __qcom_scm_restore_sec_cfg(struct device *dev, 
u32 device_id,
  u32 spare);
 #define QCOM_SCM_IOMMU_SECURE_PTBL_SIZE3
 #define QCOM_SCM_IOMMU_SECURE_PTBL_INIT4
+#define QCOM_SCM_SVC_SMMU_PROGRAM  0x15
+#define QCOM_SCM_CONFIG_SAFE_EN0x3
+#define QCOM_SCM_CONFIG_SAFE_EN_CLIENT_ALL 0x2
 extern int __qcom_scm_iommu_secure_ptbl_size(struct device *dev, u32 spare,
 size_t *size);
 extern int __qcom_scm_iommu_secure_ptbl_init(struct device *dev, u64 addr,
 u32 size, u32 spare);
+extern int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev,
+   bool enable);
 #define QCOM_MEM_PROT_ASSIGN_ID0x16
 extern int  __qcom_scm_assign_mem(struct device *dev,
  phys_addr_t mem_region, size_t mem_sz,
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 3f12cc77fb58..aee3d8580d89 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -57,6 +57,7 @@ extern int qcom_scm_set_remote_state(u32 state, u32 id);
 extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare);
 extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size);
 extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare);
+extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
 extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val);
 extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val);
 #else
@@ -96,6 +97,7 @@ qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; 
}
 static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return 
-ENODEV; }
 static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { 
return -ENODEV; }
 static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 
spare) { return -ENODEV; }
+static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) { return 
-ENODEV; }
 static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) { 
return -ENODEV; }
 static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) { 
return -ENODEV; }
 #endif
-- 
QUALCOMM INDIA, on behalf of Qu

[PATCH v3 3/4] iommu/arm-smmu: Add support to handle Qcom's wait-for-safe logic

2019-06-12 Thread Vivek Gautam
Qcom's implementation of arm,mmu-500 adds a WAIT-FOR-SAFE logic
to address under-performance issues in real-time clients, such as
Display, and Camera.
On receiving an invalidation requests, the SMMU forwards SAFE request
to these clients and waits for SAFE ack signal from real-time clients.
The SAFE signal from such clients is used to qualify the start of
invalidation.
This logic is controlled by chicken bits, one for each - MDP (display),
IFE0, and IFE1 (camera), that can be accessed only from secure software
on sdm845.

This configuration, however, degrades the performance of non-real time
clients, such as USB, and UFS etc. This happens because, with wait-for-safe
logic enabled the hardware tries to throttle non-real time clients while
waiting for SAFE ack signals from real-time clients.

On MTP sdm845 devices, with wait-for-safe logic enabled at the boot time
by the bootloaders we see degraded performance of USB and UFS when kernel
enables the smmu stage-1 translations for these clients.
Turn off this wait-for-safe logic from the kernel gets us back the perf
of USB and UFS devices until we re-visit this when we start seeing perf
issues on display/camera on upstream supported SDM845 platforms.

Now, different bootloaders with their access control policies allow this
register access differently through secure monitor calls -
1) With one we can issue io-read/write secure monitor call (qcom-scm)
   to update the register, while,
2) With other, such as one on MTP sdm845 we should use the specific
   qcom-scm command to send request to do the complete register
   configuration.
Adding a separate device tree flag for arm-smmu to identify which
firmware configuration of the two mentioned above we use.
Not adding code change to allow type-(1) bootloaders to toggle the
safe using io-read/write qcom-scm call.

This change is inspired by the downstream change from Patrick Daly
to address performance issues with display and camera by handling
this wait-for-safe within separte io-pagetable ops to do TLB
maintenance. So a big thanks to him for the change.

Without this change the UFS reads are pretty slow:
$ time dd if=/dev/sda of=/dev/zero bs=1048576 count=10 conv=sync
10+0 records in
10+0 records out
10485760 bytes (10.0MB) copied, 22.394903 seconds, 457.2KB/s
real0m 22.39s
user0m 0.00s
sys 0m 0.01s

With this change they are back to rock!
$ time dd if=/dev/sda of=/dev/zero bs=1048576 count=300 conv=sync
300+0 records in
300+0 records out
314572800 bytes (300.0MB) copied, 1.030541 seconds, 291.1MB/s
real0m 1.03s
user0m 0.00s
sys 0m 0.54s

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/arm-smmu.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 0ad086da399c..3c3ad43eda97 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -177,6 +178,7 @@ struct arm_smmu_device {
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
+#define ARM_SMMU_OPT_QCOM_FW_IMPL_SAFE_ERRATA (1 << 1)
u32 options;
enum arm_smmu_arch_version  version;
enum arm_smmu_implementationmodel;
@@ -262,6 +264,7 @@ static bool using_legacy_binding, using_generic_binding;
 
 static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
+   { ARM_SMMU_OPT_QCOM_FW_IMPL_SAFE_ERRATA, 
"qcom,smmu-500-fw-impl-safe-errata" },
{ 0, NULL},
 };
 
@@ -2292,6 +2295,19 @@ static int arm_smmu_device_probe(struct platform_device 
*pdev)
arm_smmu_device_reset(smmu);
arm_smmu_test_smr_masks(smmu);
 
+   /*
+* To address performance degradation in non-real time clients,
+* such as USB and UFS, turn off wait-for-safe on sdm845 platforms,
+* such as MTP, whose firmwares implement corresponding secure monitor
+* call handlers.
+*/
+   if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm845-smmu-500") 
&&
+   smmu->options & ARM_SMMU_OPT_QCOM_FW_IMPL_SAFE_ERRATA) {
+   err = qcom_scm_qsmmu500_wait_safe_toggle(0);
+   if (err)
+   dev_warn(dev, "Failed to turn off SAFE logic\n");
+   }
+
/*
 * We want to avoid touching dev->power.lock in fastpaths unless
 * it's really going to do something useful - pm_runtime_enabled()
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation



[PATCH v3 1/4] firmware: qcom_scm-64: Add atomic version of qcom_scm_call

2019-06-12 Thread Vivek Gautam
There are scnenarios where drivers are required to make a
scm call in atomic context, such as in one of the qcom's
arm-smmu-500 errata [1].

[1] ("https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/
  drivers/iommu/arm-smmu.c?h=CogSystems-msm-49/
  msm-4.9=da765c6c75266b38191b38ef086274943f353ea7")

Signed-off-by: Vivek Gautam 
Reviewed-by: Bjorn Andersson 
---
 drivers/firmware/qcom_scm-64.c | 136 -
 1 file changed, 92 insertions(+), 44 deletions(-)

diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c
index 91d5ad7cf58b..b6dca32c5ac4 100644
--- a/drivers/firmware/qcom_scm-64.c
+++ b/drivers/firmware/qcom_scm-64.c
@@ -62,32 +62,71 @@ static DEFINE_MUTEX(qcom_scm_lock);
 #define FIRST_EXT_ARG_IDX 3
 #define N_REGISTER_ARGS (MAX_QCOM_SCM_ARGS - N_EXT_QCOM_SCM_ARGS + 1)
 
-/**
- * qcom_scm_call() - Invoke a syscall in the secure world
- * @dev:   device
- * @svc_id:service identifier
- * @cmd_id:command identifier
- * @desc:  Descriptor structure containing arguments and return values
- *
- * Sends a command to the SCM and waits for the command to finish processing.
- * This should *only* be called in pre-emptible context.
-*/
-static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
-const struct qcom_scm_desc *desc,
-struct arm_smccc_res *res)
+static void __qcom_scm_call_do(const struct qcom_scm_desc *desc,
+  struct arm_smccc_res *res, u32 fn_id,
+  u64 x5, u32 type)
+{
+   u64 cmd;
+   struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6};
+
+   cmd = ARM_SMCCC_CALL_VAL(type, qcom_smccc_convention,
+ARM_SMCCC_OWNER_SIP, fn_id);
+
+   quirk.state.a6 = 0;
+
+   do {
+   arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
+   desc->args[1], desc->args[2], x5,
+   quirk.state.a6, 0, res, );
+
+   if (res->a0 == QCOM_SCM_INTERRUPTED)
+   cmd = res->a0;
+
+   } while (res->a0 == QCOM_SCM_INTERRUPTED);
+}
+
+static void qcom_scm_call_do(const struct qcom_scm_desc *desc,
+struct arm_smccc_res *res, u32 fn_id,
+u64 x5, bool atomic)
+{
+   int retry_count = 0;
+
+   if (!atomic) {
+   do {
+   mutex_lock(_scm_lock);
+
+   __qcom_scm_call_do(desc, res, fn_id, x5,
+  ARM_SMCCC_STD_CALL);
+
+   mutex_unlock(_scm_lock);
+
+   if (res->a0 == QCOM_SCM_V2_EBUSY) {
+   if (retry_count++ > QCOM_SCM_EBUSY_MAX_RETRY)
+   break;
+   msleep(QCOM_SCM_EBUSY_WAIT_MS);
+   }
+   }  while (res->a0 == QCOM_SCM_V2_EBUSY);
+   } else {
+   __qcom_scm_call_do(desc, res, fn_id, x5, ARM_SMCCC_FAST_CALL);
+   }
+}
+
+static int ___qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
+   const struct qcom_scm_desc *desc,
+   struct arm_smccc_res *res, bool atomic)
 {
int arglen = desc->arginfo & 0xf;
-   int retry_count = 0, i;
+   int i;
u32 fn_id = QCOM_SCM_FNID(svc_id, cmd_id);
-   u64 cmd, x5 = desc->args[FIRST_EXT_ARG_IDX];
+   u64 x5 = desc->args[FIRST_EXT_ARG_IDX];
dma_addr_t args_phys = 0;
void *args_virt = NULL;
size_t alloc_len;
-   struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6};
+   gfp_t flag = atomic ? GFP_ATOMIC : GFP_KERNEL;
 
if (unlikely(arglen > N_REGISTER_ARGS)) {
alloc_len = N_EXT_QCOM_SCM_ARGS * sizeof(u64);
-   args_virt = kzalloc(PAGE_ALIGN(alloc_len), GFP_KERNEL);
+   args_virt = kzalloc(PAGE_ALIGN(alloc_len), flag);
 
if (!args_virt)
return -ENOMEM;
@@ -117,33 +156,7 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, 
u32 cmd_id,
x5 = args_phys;
}
 
-   do {
-   mutex_lock(_scm_lock);
-
-   cmd = ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL,
-qcom_smccc_convention,
-ARM_SMCCC_OWNER_SIP, fn_id);
-
-   quirk.state.a6 = 0;
-
-   do {
-   arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
- desc->args[1], desc->args[2], x5,
- quirk.state.a6, 0, res, );
-
-   if (res->a0 == QCOM_SCM_INTERRUPTED)
-   cm

[PATCH v3 4/4] arm64: dts/sdm845: Enable FW implemented safe sequence handler on MTP

2019-06-12 Thread Vivek Gautam
Indicate on MTP SDM845 that firmware implements handler to
TLB invalidate erratum SCM call where SAFE sequence is toggled
to achieve optimum performance on real-time clients, such as
display and camera.

Signed-off-by: Vivek Gautam 
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 78ec373a2b18..6a73d9744a71 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -2368,6 +2368,7 @@
compatible = "qcom,sdm845-smmu-500", "arm,mmu-500";
reg = <0 0x1500 0 0x8>;
#iommu-cells = <2>;
+   qcom,smmu-500-fw-impl-safe-errata;
#global-interrupts = <1>;
interrupts = ,
 ,
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation



Re: [PATCH] of/device: add blacklist for iommu dma_ops

2019-06-03 Thread Vivek Gautam
On Mon, Jun 3, 2019 at 4:14 PM Rob Clark  wrote:
>
> On Mon, Jun 3, 2019 at 12:57 AM Vivek Gautam
>  wrote:
> >
> >
> >
> > On 6/3/2019 11:50 AM, Tomasz Figa wrote:
> > > On Mon, Jun 3, 2019 at 4:40 AM Rob Clark  wrote:
> > >> On Fri, May 10, 2019 at 7:35 AM Rob Clark  wrote:
> > >>> On Tue, Dec 4, 2018 at 2:29 PM Rob Herring  wrote:
> > >>>> On Sat, Dec 1, 2018 at 10:54 AM Rob Clark  wrote:
> > >>>>> This solves a problem we see with drm/msm, caused by getting
> > >>>>> iommu_dma_ops while we attach our own domain and manage it directly at
> > >>>>> the iommu API level:
> > >>>>>
> > >>>>>[0038] user address but active_mm is swapper
> > >>>>>Internal error: Oops: 9605 [#1] PREEMPT SMP
> > >>>>>Modules linked in:
> > >>>>>CPU: 7 PID: 70 Comm: kworker/7:1 Tainted: GW 
> > >>>>> 4.19.3 #90
> > >>>>>Hardware name: xxx (DT)
> > >>>>>Workqueue: events deferred_probe_work_func
> > >>>>>pstate: 80c9 (Nzcv daif +PAN +UAO)
> > >>>>>pc : iommu_dma_map_sg+0x7c/0x2c8
> > >>>>>lr : iommu_dma_map_sg+0x40/0x2c8
> > >>>>>sp : ff80095eb4f0
> > >>>>>x29: ff80095eb4f0 x28: 
> > >>>>>x27: ffc0f9431578 x26: 
> > >>>>>x25:  x24: 0003
> > >>>>>x23: 0001 x22: ffc0fa9ac010
> > >>>>>x21:  x20: ffc0fab40980
> > >>>>>x19: ffc0fab40980 x18: 0003
> > >>>>>x17: 01c4 x16: 0007
> > >>>>>x15: 000e x14: 
> > >>>>>x13:  x12: 0028
> > >>>>>x11: 0101010101010101 x10: 7f7f7f7f7f7f7f7f
> > >>>>>x9 :  x8 : ffc0fab409a0
> > >>>>>x7 :  x6 : 0002
> > >>>>>x5 : 0001 x4 : 
> > >>>>>x3 : 0001 x2 : 0002
> > >>>>>x1 : ffc0f9431578 x0 : 
> > >>>>>Process kworker/7:1 (pid: 70, stack limit = 0x17d08ffb)
> > >>>>>Call trace:
> > >>>>> iommu_dma_map_sg+0x7c/0x2c8
> > >>>>> __iommu_map_sg_attrs+0x70/0x84
> > >>>>> get_pages+0x170/0x1e8
> > >>>>> msm_gem_get_iova+0x8c/0x128
> > >>>>> _msm_gem_kernel_new+0x6c/0xc8
> > >>>>> msm_gem_kernel_new+0x4c/0x58
> > >>>>> dsi_tx_buf_alloc_6g+0x4c/0x8c
> > >>>>> msm_dsi_host_modeset_init+0xc8/0x108
> > >>>>> msm_dsi_modeset_init+0x54/0x18c
> > >>>>> _dpu_kms_drm_obj_init+0x430/0x474
> > >>>>> dpu_kms_hw_init+0x5f8/0x6b4
> > >>>>> msm_drm_bind+0x360/0x6c8
> > >>>>> try_to_bring_up_master.part.7+0x28/0x70
> > >>>>> component_master_add_with_match+0xe8/0x124
> > >>>>> msm_pdev_probe+0x294/0x2b4
> > >>>>> platform_drv_probe+0x58/0xa4
> > >>>>> really_probe+0x150/0x294
> > >>>>> driver_probe_device+0xac/0xe8
> > >>>>> __device_attach_driver+0xa4/0xb4
> > >>>>> bus_for_each_drv+0x98/0xc8
> > >>>>> __device_attach+0xac/0x12c
> > >>>>> device_initial_probe+0x24/0x30
> > >>>>> bus_probe_device+0x38/0x98
> > >>>>> deferred_probe_work_func+0x78/0xa4
> > >>>>> process_one_work+0x24c/0x3dc
> > >>>>> worker_thread+0x280/0x360
> > >>>>> kthread+0x134/0x13c
> > >>>>> ret_from_fork+0x10/0x18
> > >>>>>Code: d284 91000725 6b17039f 5400048a (f9401f40)
> > >>>>>---[ end trace f22dda57f3648e2c ]---
> > >>>>>Kernel panic - not syncing: Fatal exception
> > >>>>>SMP: stopping secondary CPUs
> > >>>>>Kernel Offset: disable

Re: [PATCH] of/device: add blacklist for iommu dma_ops

2019-06-03 Thread Vivek Gautam




On 6/3/2019 11:50 AM, Tomasz Figa wrote:

On Mon, Jun 3, 2019 at 4:40 AM Rob Clark  wrote:

On Fri, May 10, 2019 at 7:35 AM Rob Clark  wrote:

On Tue, Dec 4, 2018 at 2:29 PM Rob Herring  wrote:

On Sat, Dec 1, 2018 at 10:54 AM Rob Clark  wrote:

This solves a problem we see with drm/msm, caused by getting
iommu_dma_ops while we attach our own domain and manage it directly at
the iommu API level:

   [0038] user address but active_mm is swapper
   Internal error: Oops: 9605 [#1] PREEMPT SMP
   Modules linked in:
   CPU: 7 PID: 70 Comm: kworker/7:1 Tainted: GW 4.19.3 #90
   Hardware name: xxx (DT)
   Workqueue: events deferred_probe_work_func
   pstate: 80c9 (Nzcv daif +PAN +UAO)
   pc : iommu_dma_map_sg+0x7c/0x2c8
   lr : iommu_dma_map_sg+0x40/0x2c8
   sp : ff80095eb4f0
   x29: ff80095eb4f0 x28: 
   x27: ffc0f9431578 x26: 
   x25:  x24: 0003
   x23: 0001 x22: ffc0fa9ac010
   x21:  x20: ffc0fab40980
   x19: ffc0fab40980 x18: 0003
   x17: 01c4 x16: 0007
   x15: 000e x14: 
   x13:  x12: 0028
   x11: 0101010101010101 x10: 7f7f7f7f7f7f7f7f
   x9 :  x8 : ffc0fab409a0
   x7 :  x6 : 0002
   x5 : 0001 x4 : 
   x3 : 0001 x2 : 0002
   x1 : ffc0f9431578 x0 : 
   Process kworker/7:1 (pid: 70, stack limit = 0x17d08ffb)
   Call trace:
iommu_dma_map_sg+0x7c/0x2c8
__iommu_map_sg_attrs+0x70/0x84
get_pages+0x170/0x1e8
msm_gem_get_iova+0x8c/0x128
_msm_gem_kernel_new+0x6c/0xc8
msm_gem_kernel_new+0x4c/0x58
dsi_tx_buf_alloc_6g+0x4c/0x8c
msm_dsi_host_modeset_init+0xc8/0x108
msm_dsi_modeset_init+0x54/0x18c
_dpu_kms_drm_obj_init+0x430/0x474
dpu_kms_hw_init+0x5f8/0x6b4
msm_drm_bind+0x360/0x6c8
try_to_bring_up_master.part.7+0x28/0x70
component_master_add_with_match+0xe8/0x124
msm_pdev_probe+0x294/0x2b4
platform_drv_probe+0x58/0xa4
really_probe+0x150/0x294
driver_probe_device+0xac/0xe8
__device_attach_driver+0xa4/0xb4
bus_for_each_drv+0x98/0xc8
__device_attach+0xac/0x12c
device_initial_probe+0x24/0x30
bus_probe_device+0x38/0x98
deferred_probe_work_func+0x78/0xa4
process_one_work+0x24c/0x3dc
worker_thread+0x280/0x360
kthread+0x134/0x13c
ret_from_fork+0x10/0x18
   Code: d284 91000725 6b17039f 5400048a (f9401f40)
   ---[ end trace f22dda57f3648e2c ]---
   Kernel panic - not syncing: Fatal exception
   SMP: stopping secondary CPUs
   Kernel Offset: disabled
   CPU features: 0x0,22802a18
   Memory Limit: none

The problem is that when drm/msm does it's own iommu_attach_device(),
now the domain returned by iommu_get_domain_for_dev() is drm/msm's
domain, and it doesn't have domain->iova_cookie.

We kind of avoided this problem prior to sdm845/dpu because the iommu
was attached to the mdp node in dt, which is a child of the toplevel
mdss node (which corresponds to the dev passed in dma_map_sg()).  But
with sdm845, now the iommu is attached at the mdss level so we hit the
iommu_dma_ops in dma_map_sg().

But auto allocating/attaching a domain before the driver is probed was
already a blocking problem for enabling per-context pagetables for the
GPU.  This problem is also now solved with this patch.

Fixes: 97890ba9289c dma-mapping: detect and configure IOMMU in of_dma_configure
Tested-by: Douglas Anderson 
Signed-off-by: Rob Clark 
---
This is an alternative/replacement for [1].  What it lacks in elegance
it makes up for in practicality ;-)

[1] https://patchwork.freedesktop.org/patch/264930/

  drivers/of/device.c | 22 ++
  1 file changed, 22 insertions(+)

diff --git a/drivers/of/device.c b/drivers/of/device.c
index 5957cd4fa262..15ffee00fb22 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -72,6 +72,14 @@ int of_device_add(struct platform_device *ofdev)
 return device_add(>dev);
  }

+static const struct of_device_id iommu_blacklist[] = {
+   { .compatible = "qcom,mdp4" },
+   { .compatible = "qcom,mdss" },
+   { .compatible = "qcom,sdm845-mdss" },
+   { .compatible = "qcom,adreno" },
+   {}
+};

Not completely clear to whether this is still needed or not, but this
really won't scale. Why can't the driver for these devices override
whatever has been setup by default?


fwiw, at the moment it is not needed, but it will become needed again
to implement per-context pagetables (although I suppose for this we
only need to blacklist qcom,adreno and not also the display nodes).

So, another case I've come across, on the display side.. I'm working
on handling the case where bootloader enables display (and takes iommu
out of reset).. as soon as DMA domain gets attached we get iommu
faults, because bootloader has 

Re: [PATCH v5 1/1] iommu/io-pgtable-arm: Add support to use system cache

2019-05-23 Thread Vivek Gautam
On Thu, May 23, 2019 at 4:11 PM Robin Murphy  wrote:
>
> On 2019-05-16 10:30 am, Vivek Gautam wrote:
> > Few Qualcomm platforms such as, sdm845 have an additional outer
> > cache called as System cache, aka. Last level cache (LLC) that
> > allows non-coherent devices to upgrade to using caching.
> > This cache sits right before the DDR, and is tightly coupled
> > with the memory controller. The clients using this cache request
> > their slices from this system cache, make it active, and can then
> > start using it.
> >
> > There is a fundamental assumption that non-coherent devices can't
> > access caches. This change adds an exception where they *can* use
> > some level of cache despite still being non-coherent overall.
> > The coherent devices that use cacheable memory, and CPU make use of
> > this system cache by default.
> >
> > Looking at memory types, we have following -
> > a) Normal uncached :- MAIR 0x44, inner non-cacheable,
> >outer non-cacheable;
> > b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
> >outer read write-back non-transient;
> >attribute setting for coherenet I/O devices.
> > and, for non-coherent i/o devices that can allocate in system cache
> > another type gets added -
> > c) Normal sys-cached :- MAIR 0xf4, inner non-cacheable,
> >  outer read write-back non-transient
> >
> > Coherent I/O devices use system cache by marking the memory as
> > normal cached.
> > Non-coherent I/O devices should mark the memory as normal
> > sys-cached in page tables to use system cache.
> >
> > Signed-off-by: Vivek Gautam 
>
> Acked-by: Robin Murphy 

Thanks a lot Robin.

regards
Vivek
>
> There's a remote possibility that the IOMMU prot flag might be able to
> be somewhat generalised in future for panfrost, as Mali appears to have
> some pretty funky notions of cacheability, but this certainly looks fine
> for now, thanks.
>
> Robin.
>
> > ---
> >
> > V3 version of this patch and related series can be found at [1].
> > V4 of this patch is available at [2].
> >
> > The example usage of how a smmu master can make use of this protection
> > flag and set the correct memory attributes to start using system cache,
> > can be found at [3]; and here at [3] IOMMU_UPSTREAM_HINT is same as
> > IOMMU_QCOM_SYS_CACHE.
> >
> > Changes since v4:
> >   - Changed ARM_LPAE_MAIR_ATTR_QCOM_SYS_CACHE to
> > ARM_LPAE_MAIR_ATTR_INC_OWBRWA.
> >   - Changed ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE to
> > ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE.
> >   - Added comments to iommu protection flag - IOMMU_QCOM_SYS_CACHE.
> >
> > Changes since v3:
> >   - Dropping support to cache i/o page tables to system cache. Getting 
> > support
> > for data buffers is the first step.
> > Removed io-pgtable quirk and related change to add domain attribute.
> >
> > Glmark2 numbers on SDM845 based cheza board:
> >
> > S.No.|with LLC support   |without LLC support
> >   |   for data buffers   |
> > ---
> > 1|4480; 72.3fps  |4042; 65.2fps
> > 2|4500; 72.6fps  |4039; 65.1fps
> > 3|4523; 72.9fps  |4106; 66.2fps
> > 4|4489; 72.4fps  |4104; 66.2fps
> > 5|4518; 72.9fps  |4072; 65.7fps
> >
> > [1] https://patchwork.kernel.org/cover/10772629/
> > [2] https://lore.kernel.org/patchwork/patch/1072936/
> > [3] https://patchwork.kernel.org/patch/10302791/
> >
> >   drivers/iommu/io-pgtable-arm.c | 9 -
> >   include/linux/iommu.h  | 6 ++
> >   2 files changed, 14 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > index 4e21efbc4459..2454ac11aa97 100644
> > --- a/drivers/iommu/io-pgtable-arm.c
> > +++ b/drivers/iommu/io-pgtable-arm.c
> > @@ -167,10 +167,12 @@
> >   #define ARM_LPAE_MAIR_ATTR_MASK 0xff
> >   #define ARM_LPAE_MAIR_ATTR_DEVICE   0x04
> >   #define ARM_LPAE_MAIR_ATTR_NC   0x44
> > +#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA0xf4
> >   #define ARM_LPAE_MAIR_ATTR_WBRWA0xff
> >   #define ARM_LPAE_MAIR_ATTR_IDX_NC   0
> >   #define ARM_LPAE_MAIR_ATTR_IDX_CACHE1
> >   #define ARM_LPAE_MAIR_ATTR_IDX_DEV  2
> > +#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE3
> >
> >   #define A

Re: [PATCH v5 1/1] iommu/io-pgtable-arm: Add support to use system cache

2019-05-23 Thread Vivek Gautam
Hi Robin,



On Thu, May 16, 2019 at 3:00 PM Vivek Gautam
 wrote:
>
> Few Qualcomm platforms such as, sdm845 have an additional outer
> cache called as System cache, aka. Last level cache (LLC) that
> allows non-coherent devices to upgrade to using caching.
> This cache sits right before the DDR, and is tightly coupled
> with the memory controller. The clients using this cache request
> their slices from this system cache, make it active, and can then
> start using it.
>
> There is a fundamental assumption that non-coherent devices can't
> access caches. This change adds an exception where they *can* use
> some level of cache despite still being non-coherent overall.
> The coherent devices that use cacheable memory, and CPU make use of
> this system cache by default.
>
> Looking at memory types, we have following -
> a) Normal uncached :- MAIR 0x44, inner non-cacheable,
>   outer non-cacheable;
> b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
>   outer read write-back non-transient;
>   attribute setting for coherenet I/O devices.
> and, for non-coherent i/o devices that can allocate in system cache
> another type gets added -
> c) Normal sys-cached :- MAIR 0xf4, inner non-cacheable,
> outer read write-back non-transient
>
> Coherent I/O devices use system cache by marking the memory as
> normal cached.
> Non-coherent I/O devices should mark the memory as normal
> sys-cached in page tables to use system cache.
>
> Signed-off-by: Vivek Gautam 
> ---

Let me know if there's more to improve on this patch.

Best regards
Vivek

>
> V3 version of this patch and related series can be found at [1].
> V4 of this patch is available at [2].
>
> The example usage of how a smmu master can make use of this protection
> flag and set the correct memory attributes to start using system cache,
> can be found at [3]; and here at [3] IOMMU_UPSTREAM_HINT is same as
> IOMMU_QCOM_SYS_CACHE.
>
> Changes since v4:
>  - Changed ARM_LPAE_MAIR_ATTR_QCOM_SYS_CACHE to
>ARM_LPAE_MAIR_ATTR_INC_OWBRWA.
>  - Changed ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE to
>ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE.
>  - Added comments to iommu protection flag - IOMMU_QCOM_SYS_CACHE.
>
> Changes since v3:
>  - Dropping support to cache i/o page tables to system cache. Getting support
>for data buffers is the first step.
>Removed io-pgtable quirk and related change to add domain attribute.
>
> Glmark2 numbers on SDM845 based cheza board:
>
> S.No.|  with LLC support   |without LLC support
>  |  for data buffers   |
> ---
> 1|  4480; 72.3fps  |4042; 65.2fps
> 2|  4500; 72.6fps  |4039; 65.1fps
> 3|  4523; 72.9fps  |4106; 66.2fps
> 4|  4489; 72.4fps  |4104; 66.2fps
> 5|  4518; 72.9fps  |4072; 65.7fps
>
> [1] https://patchwork.kernel.org/cover/10772629/
> [2] https://lore.kernel.org/patchwork/patch/1072936/
> [3] https://patchwork.kernel.org/patch/10302791/
>
>  drivers/iommu/io-pgtable-arm.c | 9 -
>  include/linux/iommu.h  | 6 ++
>  2 files changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 4e21efbc4459..2454ac11aa97 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -167,10 +167,12 @@
>  #define ARM_LPAE_MAIR_ATTR_MASK0xff
>  #define ARM_LPAE_MAIR_ATTR_DEVICE  0x04
>  #define ARM_LPAE_MAIR_ATTR_NC  0x44
> +#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA  0xf4
>  #define ARM_LPAE_MAIR_ATTR_WBRWA   0xff
>  #define ARM_LPAE_MAIR_ATTR_IDX_NC  0
>  #define ARM_LPAE_MAIR_ATTR_IDX_CACHE   1
>  #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2
> +#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE  3
>
>  #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
>  #define ARM_MALI_LPAE_TTBR_READ_INNER  BIT(2)
> @@ -470,6 +472,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
> arm_lpae_io_pgtable *data,
> else if (prot & IOMMU_CACHE)
> pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
> << ARM_LPAE_PTE_ATTRINDX_SHIFT);
> +   else if (prot & IOMMU_QCOM_SYS_CACHE)
> +   pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE
> +   << ARM_LPAE_PTE_ATTRINDX_SHIFT);
> }
>
> if (prot & IOMMU_NOEXEC)
> @@ -857,7 +862,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
> void *cookie)
>   (A

[PATCH v5 1/1] iommu/io-pgtable-arm: Add support to use system cache

2019-05-16 Thread Vivek Gautam
Few Qualcomm platforms such as, sdm845 have an additional outer
cache called as System cache, aka. Last level cache (LLC) that
allows non-coherent devices to upgrade to using caching.
This cache sits right before the DDR, and is tightly coupled
with the memory controller. The clients using this cache request
their slices from this system cache, make it active, and can then
start using it.

There is a fundamental assumption that non-coherent devices can't
access caches. This change adds an exception where they *can* use
some level of cache despite still being non-coherent overall.
The coherent devices that use cacheable memory, and CPU make use of
this system cache by default.

Looking at memory types, we have following -
a) Normal uncached :- MAIR 0x44, inner non-cacheable,
  outer non-cacheable;
b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
  outer read write-back non-transient;
  attribute setting for coherenet I/O devices.
and, for non-coherent i/o devices that can allocate in system cache
another type gets added -
c) Normal sys-cached :- MAIR 0xf4, inner non-cacheable,
outer read write-back non-transient

Coherent I/O devices use system cache by marking the memory as
normal cached.
Non-coherent I/O devices should mark the memory as normal
sys-cached in page tables to use system cache.

Signed-off-by: Vivek Gautam 
---

V3 version of this patch and related series can be found at [1].
V4 of this patch is available at [2].

The example usage of how a smmu master can make use of this protection
flag and set the correct memory attributes to start using system cache,
can be found at [3]; and here at [3] IOMMU_UPSTREAM_HINT is same as
IOMMU_QCOM_SYS_CACHE.

Changes since v4:
 - Changed ARM_LPAE_MAIR_ATTR_QCOM_SYS_CACHE to
   ARM_LPAE_MAIR_ATTR_INC_OWBRWA.
 - Changed ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE to
   ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE.
 - Added comments to iommu protection flag - IOMMU_QCOM_SYS_CACHE.

Changes since v3:
 - Dropping support to cache i/o page tables to system cache. Getting support
   for data buffers is the first step.
   Removed io-pgtable quirk and related change to add domain attribute.

Glmark2 numbers on SDM845 based cheza board:

S.No.|  with LLC support   |without LLC support
 |  for data buffers   |
--- 
1|  4480; 72.3fps  |4042; 65.2fps
2|  4500; 72.6fps  |4039; 65.1fps
3|  4523; 72.9fps  |4106; 66.2fps
4|  4489; 72.4fps  |4104; 66.2fps
5|  4518; 72.9fps  |4072; 65.7fps

[1] https://patchwork.kernel.org/cover/10772629/
[2] https://lore.kernel.org/patchwork/patch/1072936/
[3] https://patchwork.kernel.org/patch/10302791/

 drivers/iommu/io-pgtable-arm.c | 9 -
 include/linux/iommu.h  | 6 ++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 4e21efbc4459..2454ac11aa97 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -167,10 +167,12 @@
 #define ARM_LPAE_MAIR_ATTR_MASK0xff
 #define ARM_LPAE_MAIR_ATTR_DEVICE  0x04
 #define ARM_LPAE_MAIR_ATTR_NC  0x44
+#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA  0xf4
 #define ARM_LPAE_MAIR_ATTR_WBRWA   0xff
 #define ARM_LPAE_MAIR_ATTR_IDX_NC  0
 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE   1
 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2
+#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE  3
 
 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
 #define ARM_MALI_LPAE_TTBR_READ_INNER  BIT(2)
@@ -470,6 +472,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
else if (prot & IOMMU_CACHE)
pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
+   else if (prot & IOMMU_QCOM_SYS_CACHE)
+   pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE
+   << ARM_LPAE_PTE_ATTRINDX_SHIFT);
}
 
if (prot & IOMMU_NOEXEC)
@@ -857,7 +862,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
  (ARM_LPAE_MAIR_ATTR_WBRWA
   << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
  (ARM_LPAE_MAIR_ATTR_DEVICE
-  << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
+  << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
+ (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
+  << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
 
cfg->arm_lpae_s1_cfg.mair[0] = reg;
cfg->arm_lpae_s1_cfg.mair[1] = 0;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a815cf6f6f47..8ee3fbaf5855 100644
--- a/include/linux/iommu.h
+++ b/in

Re: [PATCH] iommu: io-pgtable: Support non-coherent page tables

2019-05-16 Thread Vivek Gautam
On Thu, May 16, 2019 at 5:03 AM Bjorn Andersson
 wrote:
>
> Describe the memory related to page table walks as non-cachable for iommu
> instances that are not DMA coherent.
>
> Signed-off-by: Bjorn Andersson 
> ---
>  drivers/iommu/io-pgtable-arm.c | 12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 4e21efbc4459..68ff22ffd2cb 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -803,9 +803,15 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
> void *cookie)
> return NULL;
>
> /* TCR */
> -   reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
> - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
> - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
> +   if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) {
> +   reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
> + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
> + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
> +   } else {
> +   reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
> + (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) |
> + (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT);
> +   }

This looks okay to me based on the discussion that we had on a similar
patch that I
posted. So,
Reviewed-by: Vivek Gautam 

[1] https://lore.kernel.org/patchwork/patch/1032939/

Thanks & regards
Vivek

>
> switch (ARM_LPAE_GRANULE(data)) {
> case SZ_4K:
> --
> 2.18.0
>
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 1/1] iommu/io-pgtable-arm: Add support to use system cache

2019-05-15 Thread Vivek Gautam
On Tue, May 14, 2019 at 12:26 PM Vivek Gautam
 wrote:
>
> Hi Robin,
>
>
> On Mon, May 13, 2019 at 5:02 PM Robin Murphy  wrote:
> >
> > On 13/05/2019 11:04, Vivek Gautam wrote:
> > > Few Qualcomm platforms such as, sdm845 have an additional outer
> > > cache called as System cache, aka. Last level cache (LLC) that
> > > allows non-coherent devices to upgrade to using caching.
> > > This cache sits right before the DDR, and is tightly coupled
> > > with the memory controller. The clients using this cache request
> > > their slices from this system cache, make it active, and can then
> > > start using it.
> > >
> > > There is a fundamental assumption that non-coherent devices can't
> > > access caches. This change adds an exception where they *can* use
> > > some level of cache despite still being non-coherent overall.
> > > The coherent devices that use cacheable memory, and CPU make use of
> > > this system cache by default.
> > >
> > > Looking at memory types, we have following -
> > > a) Normal uncached :- MAIR 0x44, inner non-cacheable,
> > >outer non-cacheable;
> > > b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
> > >outer read write-back non-transient;
> > >attribute setting for coherenet I/O devices.
> > > and, for non-coherent i/o devices that can allocate in system cache
> > > another type gets added -
> > > c) Normal sys-cached :- MAIR 0xf4, inner non-cacheable,
> > >  outer read write-back non-transient
> > >
> > > Coherent I/O devices use system cache by marking the memory as
> > > normal cached.
> > > Non-coherent I/O devices should mark the memory as normal
> > > sys-cached in page tables to use system cache.
> > >
> > > Signed-off-by: Vivek Gautam 
> > > ---
> > >
> > > V3 version of this patch and related series can be found at [1].
> > >
> > > This change is a realisation of following changes from downstream msm-4.9:
> > > iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT[2]
> > >
> > > Changes since v3:
> > >   - Dropping support to cache i/o page tables to system cache. Getting 
> > > support
> > > for data buffers is the first step.
> > > Removed io-pgtable quirk and related change to add domain attribute.
> > >
> > > Glmark2 numbers on SDM845 based cheza board:
> > >
> > > S.No.|with LLC support   |without LLC support
> > >   |   for data buffers   |
> > > ---
> > > 1|4480; 72.3fps  |4042; 65.2fps
> > > 2|4500; 72.6fps  |4039; 65.1fps
> > > 3|4523; 72.9fps  |4106; 66.2fps
> > > 4|4489; 72.4fps  |4104; 66.2fps
> > > 5|4518; 72.9fps  |4072; 65.7fps
> > >
> > > [1] https://patchwork.kernel.org/cover/10772629/
> > > [2] 
> > > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602
> > >
> > >   drivers/iommu/io-pgtable-arm.c | 9 -
> > >   include/linux/iommu.h  | 1 +
> > >   2 files changed, 9 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/iommu/io-pgtable-arm.c 
> > > b/drivers/iommu/io-pgtable-arm.c
> > > index d3700ec15cbd..2dbafe697531 100644
> > > --- a/drivers/iommu/io-pgtable-arm.c
> > > +++ b/drivers/iommu/io-pgtable-arm.c
> > > @@ -167,10 +167,12 @@
> > >   #define ARM_LPAE_MAIR_ATTR_MASK 0xff
> > >   #define ARM_LPAE_MAIR_ATTR_DEVICE   0x04
> > >   #define ARM_LPAE_MAIR_ATTR_NC   0x44
> > > +#define ARM_LPAE_MAIR_ATTR_QCOM_SYS_CACHE0xf4

s/QCOM_SYS_CACHE/INC_OWBRWA/ looks more appropriate here.

> > >   #define ARM_LPAE_MAIR_ATTR_WBRWA0xff
> > >   #define ARM_LPAE_MAIR_ATTR_IDX_NC   0
> > >   #define ARM_LPAE_MAIR_ATTR_IDX_CACHE1
> > >   #define ARM_LPAE_MAIR_ATTR_IDX_DEV  2
> > > +#define ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE3
> >
> > Here at the implementation level, I'd rather just call these what they
> > are, i.e. s/QCOM_SYS_CACHE/INC_OWBRWA/.

Allow me to change this to - s/QCOM_SYS_CACHE/INC_OC, or
s/QCOM_SYS_CACHE/INC_OCACHE to go inline with IDX_NC and IDX_CACHE.
Sounds okay?

> >
>
> Thanks for

Re: [PATCH v4 1/1] iommu/io-pgtable-arm: Add support to use system cache

2019-05-14 Thread Vivek Gautam
Hi Robin,


On Mon, May 13, 2019 at 5:02 PM Robin Murphy  wrote:
>
> On 13/05/2019 11:04, Vivek Gautam wrote:
> > Few Qualcomm platforms such as, sdm845 have an additional outer
> > cache called as System cache, aka. Last level cache (LLC) that
> > allows non-coherent devices to upgrade to using caching.
> > This cache sits right before the DDR, and is tightly coupled
> > with the memory controller. The clients using this cache request
> > their slices from this system cache, make it active, and can then
> > start using it.
> >
> > There is a fundamental assumption that non-coherent devices can't
> > access caches. This change adds an exception where they *can* use
> > some level of cache despite still being non-coherent overall.
> > The coherent devices that use cacheable memory, and CPU make use of
> > this system cache by default.
> >
> > Looking at memory types, we have following -
> > a) Normal uncached :- MAIR 0x44, inner non-cacheable,
> >outer non-cacheable;
> > b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
> >outer read write-back non-transient;
> >attribute setting for coherenet I/O devices.
> > and, for non-coherent i/o devices that can allocate in system cache
> > another type gets added -
> > c) Normal sys-cached :- MAIR 0xf4, inner non-cacheable,
> >  outer read write-back non-transient
> >
> > Coherent I/O devices use system cache by marking the memory as
> > normal cached.
> > Non-coherent I/O devices should mark the memory as normal
> > sys-cached in page tables to use system cache.
> >
> > Signed-off-by: Vivek Gautam 
> > ---
> >
> > V3 version of this patch and related series can be found at [1].
> >
> > This change is a realisation of following changes from downstream msm-4.9:
> > iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT[2]
> >
> > Changes since v3:
> >   - Dropping support to cache i/o page tables to system cache. Getting 
> > support
> > for data buffers is the first step.
> > Removed io-pgtable quirk and related change to add domain attribute.
> >
> > Glmark2 numbers on SDM845 based cheza board:
> >
> > S.No.|with LLC support   |without LLC support
> >   |   for data buffers   |
> > ---
> > 1|4480; 72.3fps  |4042; 65.2fps
> > 2|4500; 72.6fps  |4039; 65.1fps
> > 3|4523; 72.9fps  |4106; 66.2fps
> > 4|4489; 72.4fps  |4104; 66.2fps
> > 5|4518; 72.9fps  |4072; 65.7fps
> >
> > [1] https://patchwork.kernel.org/cover/10772629/
> > [2] 
> > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602
> >
> >   drivers/iommu/io-pgtable-arm.c | 9 -
> >   include/linux/iommu.h  | 1 +
> >   2 files changed, 9 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > index d3700ec15cbd..2dbafe697531 100644
> > --- a/drivers/iommu/io-pgtable-arm.c
> > +++ b/drivers/iommu/io-pgtable-arm.c
> > @@ -167,10 +167,12 @@
> >   #define ARM_LPAE_MAIR_ATTR_MASK 0xff
> >   #define ARM_LPAE_MAIR_ATTR_DEVICE   0x04
> >   #define ARM_LPAE_MAIR_ATTR_NC   0x44
> > +#define ARM_LPAE_MAIR_ATTR_QCOM_SYS_CACHE0xf4
> >   #define ARM_LPAE_MAIR_ATTR_WBRWA0xff
> >   #define ARM_LPAE_MAIR_ATTR_IDX_NC   0
> >   #define ARM_LPAE_MAIR_ATTR_IDX_CACHE1
> >   #define ARM_LPAE_MAIR_ATTR_IDX_DEV  2
> > +#define ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE3
>
> Here at the implementation level, I'd rather just call these what they
> are, i.e. s/QCOM_SYS_CACHE/INC_OWBRWA/.
>

Thanks for the review.
Sure, will change this as suggested.

> >
> >   /* IOPTE accessors */
> >   #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
> > @@ -442,6 +444,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
> > arm_lpae_io_pgtable *data,
> >   else if (prot & IOMMU_CACHE)
> >   pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
> >   << ARM_LPAE_PTE_ATTRINDX_SHIFT);
> > + else if (prot & IOMMU_QCOM_SYS_CACHE)
>
> Where in the call stack is this going to be decided? (I don't recall the
> previous discussions ever really reaching a solid conclusion on how to
>

[PATCH v4 1/1] iommu/io-pgtable-arm: Add support to use system cache

2019-05-13 Thread Vivek Gautam
Few Qualcomm platforms such as, sdm845 have an additional outer
cache called as System cache, aka. Last level cache (LLC) that
allows non-coherent devices to upgrade to using caching.
This cache sits right before the DDR, and is tightly coupled
with the memory controller. The clients using this cache request
their slices from this system cache, make it active, and can then
start using it.

There is a fundamental assumption that non-coherent devices can't
access caches. This change adds an exception where they *can* use
some level of cache despite still being non-coherent overall.
The coherent devices that use cacheable memory, and CPU make use of
this system cache by default.

Looking at memory types, we have following -
a) Normal uncached :- MAIR 0x44, inner non-cacheable,
  outer non-cacheable;
b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
  outer read write-back non-transient;
  attribute setting for coherenet I/O devices.
and, for non-coherent i/o devices that can allocate in system cache
another type gets added -
c) Normal sys-cached :- MAIR 0xf4, inner non-cacheable,
outer read write-back non-transient

Coherent I/O devices use system cache by marking the memory as
normal cached.
Non-coherent I/O devices should mark the memory as normal
sys-cached in page tables to use system cache.

Signed-off-by: Vivek Gautam 
---

V3 version of this patch and related series can be found at [1].

This change is a realisation of following changes from downstream msm-4.9:
iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT[2]

Changes since v3:
 - Dropping support to cache i/o page tables to system cache. Getting support
   for data buffers is the first step.
   Removed io-pgtable quirk and related change to add domain attribute.

Glmark2 numbers on SDM845 based cheza board:

S.No.|  with LLC support   |without LLC support
 |  for data buffers   |
--- 
1|  4480; 72.3fps  |4042; 65.2fps
2|  4500; 72.6fps  |4039; 65.1fps
3|  4523; 72.9fps  |4106; 66.2fps
4|  4489; 72.4fps  |4104; 66.2fps
5|  4518; 72.9fps  |4072; 65.7fps

[1] https://patchwork.kernel.org/cover/10772629/
[2] 
https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602

 drivers/iommu/io-pgtable-arm.c | 9 -
 include/linux/iommu.h  | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index d3700ec15cbd..2dbafe697531 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -167,10 +167,12 @@
 #define ARM_LPAE_MAIR_ATTR_MASK0xff
 #define ARM_LPAE_MAIR_ATTR_DEVICE  0x04
 #define ARM_LPAE_MAIR_ATTR_NC  0x44
+#define ARM_LPAE_MAIR_ATTR_QCOM_SYS_CACHE  0xf4
 #define ARM_LPAE_MAIR_ATTR_WBRWA   0xff
 #define ARM_LPAE_MAIR_ATTR_IDX_NC  0
 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE   1
 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2
+#define ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE  3
 
 /* IOPTE accessors */
 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
@@ -442,6 +444,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
else if (prot & IOMMU_CACHE)
pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
+   else if (prot & IOMMU_QCOM_SYS_CACHE)
+   pte |= (ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE
+   << ARM_LPAE_PTE_ATTRINDX_SHIFT);
} else {
pte = ARM_LPAE_PTE_HAP_FAULT;
if (prot & IOMMU_READ)
@@ -841,7 +846,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
  (ARM_LPAE_MAIR_ATTR_WBRWA
   << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
  (ARM_LPAE_MAIR_ATTR_DEVICE
-  << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
+  << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
+ (ARM_LPAE_MAIR_ATTR_QCOM_SYS_CACHE
+  << 
ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE));
 
cfg->arm_lpae_s1_cfg.mair[0] = reg;
cfg->arm_lpae_s1_cfg.mair[1] = 0;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a815cf6f6f47..29dd2c624348 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -31,6 +31,7 @@
 #define IOMMU_CACHE(1 << 2) /* DMA cache coherency */
 #define IOMMU_NOEXEC   (1 << 3)
 #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */
+#define IOMMU_QCOM_SYS_CACHE   (1 << 6)
 /*
  * Where the bus hardware includes a privilege level as pa

[PATCH v2 1/1] iommu/arm-smmu: Log CBFRSYNRA register on context fault

2019-04-22 Thread Vivek Gautam
Bits[15:0] in CBFRSYNRA register contain information about
StreamID of the incoming transaction that generated the
fault. Dump CBFRSYNRA register to get this info.
This is specially useful in a distributed SMMU architecture
where multiple masters are connected to the SMMU.
SID information helps to quickly identify the faulting
master device.

Signed-off-by: Vivek Gautam 
Reviewed-by: Bjorn Andersson 
---

Changes since v1:
 - Addressed review comments, given by Bjorn, for nits.

 drivers/iommu/arm-smmu-regs.h | 2 ++
 drivers/iommu/arm-smmu.c  | 7 +--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
index a1226e4ab5f8..e9132a926761 100644
--- a/drivers/iommu/arm-smmu-regs.h
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -147,6 +147,8 @@ enum arm_smmu_s2cr_privcfg {
 #define CBAR_IRPTNDX_SHIFT 24
 #define CBAR_IRPTNDX_MASK  0xff
 
+#define ARM_SMMU_GR1_CBFRSYNRA(n)  (0x400 + ((n) << 2))
+
 #define ARM_SMMU_GR1_CBA2R(n)  (0x800 + ((n) << 2))
 #define CBA2R_RW64_32BIT   (0 << 0)
 #define CBA2R_RW64_64BIT   (1 << 0)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 045d93884164..e000473f8205 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -575,7 +575,9 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = _domain->cfg;
struct arm_smmu_device *smmu = smmu_domain->smmu;
+   void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
void __iomem *cb_base;
+   u32 cbfrsynra;
 
cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
@@ -585,10 +587,11 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
 
fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
+   cbfrsynra = readl_relaxed(gr1_base + 
ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
 
dev_err_ratelimited(smmu->dev,
-   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
-   fsr, iova, fsynr, cfg->cbndx);
+   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
cbfrsynra=0x%x, cb=%d\n",
+   fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
 
writel(fsr, cb_base + ARM_SMMU_CB_FSR);
return IRQ_HANDLED;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/arm-smmu: Log CBFRSYNRA register on context fault

2019-04-18 Thread Vivek Gautam
On Fri, Apr 19, 2019 at 5:55 AM Bjorn Andersson
 wrote:
>
> On Mon 15 Apr 10:37 PDT 2019, Vivek Gautam wrote:
>
> > Bits[15:0] in CBFRSYNRA register contain information about
> > StreamID of the incoming transaction that generated the
> > fault. Dump CBFRSYNRA register to get this info.
> > This is specially useful in a distributed SMMU architecture
> > where multiple masters are connected to the SMMU.
> > SID information helps to quickly identify the faulting
> > master device.
> >
> > Signed-off-by: Vivek Gautam 
> > ---
> >
> > V1 of the patch available @
> > https://lore.kernel.org/patchwork/patch/1061615/
> >
> > Changes from v1:
> >  - Dump the raw register value of CBFRSYNRA register in the
> >context fault log rather than extracting the SID inforamtion
> >and dumping that.
> >
> >  drivers/iommu/arm-smmu-regs.h | 2 ++
> >  drivers/iommu/arm-smmu.c  | 8 ++--
> >  2 files changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
> > index a1226e4ab5f8..e9132a926761 100644
> > --- a/drivers/iommu/arm-smmu-regs.h
> > +++ b/drivers/iommu/arm-smmu-regs.h
> > @@ -147,6 +147,8 @@ enum arm_smmu_s2cr_privcfg {
> >  #define CBAR_IRPTNDX_SHIFT   24
> >  #define CBAR_IRPTNDX_MASK0xff
> >
> > +#define ARM_SMMU_GR1_CBFRSYNRA(n)(0x400 + ((n) << 2))
> > +
> >  #define ARM_SMMU_GR1_CBA2R(n)(0x800 + ((n) << 2))
> >  #define CBA2R_RW64_32BIT (0 << 0)
> >  #define CBA2R_RW64_64BIT (1 << 0)
> > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > index 045d93884164..a4773e8c6b0e 100644
> > --- a/drivers/iommu/arm-smmu.c
> > +++ b/drivers/iommu/arm-smmu.c
> > @@ -575,7 +575,9 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
> > *dev)
> >   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> >   struct arm_smmu_cfg *cfg = _domain->cfg;
> >   struct arm_smmu_device *smmu = smmu_domain->smmu;
> > + void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
> >   void __iomem *cb_base;
> > + u32 cbfrsynra;
> >
> >   cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
> >   fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
> > @@ -585,10 +587,12 @@ static irqreturn_t arm_smmu_context_fault(int irq, 
> > void *dev)
> >
> >   fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
> >   iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
> > + cbfrsynra = readl_relaxed(gr1_base +
> > +   ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
>
> The 80 char limit is more like a guideline anyways...please don't wrap
> this.
>
> >
> >   dev_err_ratelimited(smmu->dev,
> > - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
> > cb=%d\n",
> > - fsr, iova, fsynr, cfg->cbndx);
> > + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
> > cbfrsynra = 0x%x, cb=%d\n",
>
> Drop the spaces around '='.
>
> With those addressed, you have my
>
> Reviewed-by: Bjorn Andersson 

Thanks for the review Bjorn. Will address above comments and respin.

Best regards
Vivek

>
> Regards,
> Bjorn
>
> > + fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
> >
> >   writel(fsr, cb_base + ARM_SMMU_CB_FSR);
> >   return IRQ_HANDLED;
> > --
> > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> > of Code Aurora Forum, hosted by The Linux Foundation
> >
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/1] iommu/arm-smmu: Log CBFRSYNRA register on context fault

2019-04-15 Thread Vivek Gautam
Bits[15:0] in CBFRSYNRA register contain information about
StreamID of the incoming transaction that generated the
fault. Dump CBFRSYNRA register to get this info.
This is specially useful in a distributed SMMU architecture
where multiple masters are connected to the SMMU.
SID information helps to quickly identify the faulting
master device.

Signed-off-by: Vivek Gautam 
---

V1 of the patch available @
https://lore.kernel.org/patchwork/patch/1061615/

Changes from v1:
 - Dump the raw register value of CBFRSYNRA register in the
   context fault log rather than extracting the SID inforamtion
   and dumping that.

 drivers/iommu/arm-smmu-regs.h | 2 ++
 drivers/iommu/arm-smmu.c  | 8 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
index a1226e4ab5f8..e9132a926761 100644
--- a/drivers/iommu/arm-smmu-regs.h
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -147,6 +147,8 @@ enum arm_smmu_s2cr_privcfg {
 #define CBAR_IRPTNDX_SHIFT 24
 #define CBAR_IRPTNDX_MASK  0xff
 
+#define ARM_SMMU_GR1_CBFRSYNRA(n)  (0x400 + ((n) << 2))
+
 #define ARM_SMMU_GR1_CBA2R(n)  (0x800 + ((n) << 2))
 #define CBA2R_RW64_32BIT   (0 << 0)
 #define CBA2R_RW64_64BIT   (1 << 0)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 045d93884164..a4773e8c6b0e 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -575,7 +575,9 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = _domain->cfg;
struct arm_smmu_device *smmu = smmu_domain->smmu;
+   void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
void __iomem *cb_base;
+   u32 cbfrsynra;
 
cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
@@ -585,10 +587,12 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
 
fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
+   cbfrsynra = readl_relaxed(gr1_base +
+ ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
 
dev_err_ratelimited(smmu->dev,
-   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
-   fsr, iova, fsynr, cfg->cbndx);
+   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra 
= 0x%x, cb=%d\n",
+   fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
 
writel(fsr, cb_base + ARM_SMMU_CB_FSR);
return IRQ_HANDLED;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/arm-smmu: Add SID information to context fault log

2019-04-15 Thread Vivek Gautam


On 4/15/2019 3:11 PM, Robin Murphy wrote:

On 15/04/2019 09:07, Vivek Gautam wrote:

Extract the SID and add the information to context fault log.
This is specially useful in a distributed smmu architecture
where multiple masters are connected to smmu. SID information
helps to quickly identify the faulting master device.


Hmm, given how it's UNKNOWN for translation faults, which are arguably 
the most likely context fault, I reckon it probably makes more sense 
to just dump the raw register value for the user to interpret, as we 
do for fsr/fsynr.



Thanks Robin. Sure will update it to dump the raw register value.

Regards
Vivek



Robin.


Signed-off-by: Vivek Gautam 
---
  drivers/iommu/arm-smmu-regs.h |  4 
  drivers/iommu/arm-smmu.c  | 14 --
  2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-regs.h 
b/drivers/iommu/arm-smmu-regs.h

index a1226e4ab5f8..e5be0344b610 100644
--- a/drivers/iommu/arm-smmu-regs.h
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -147,6 +147,10 @@ enum arm_smmu_s2cr_privcfg {
  #define CBAR_IRPTNDX_SHIFT    24
  #define CBAR_IRPTNDX_MASK    0xff
  +#define ARM_SMMU_GR1_CBFRSYNRA(n)    (0x400 + ((n) << 2))
+#define CBFRSYNRA_V2_SID_MASK    0x
+#define CBFRSYNRA_V1_SID_MASK    0x7fff
+
  #define ARM_SMMU_GR1_CBA2R(n)    (0x800 + ((n) << 2))
  #define CBA2R_RW64_32BIT    (0 << 0)
  #define CBA2R_RW64_64BIT    (1 << 0)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 045d93884164..aa3426dc68d0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -575,7 +575,10 @@ static irqreturn_t arm_smmu_context_fault(int 
irq, void *dev)

  struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
  struct arm_smmu_cfg *cfg = _domain->cfg;
  struct arm_smmu_device *smmu = smmu_domain->smmu;
+    void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
  void __iomem *cb_base;
+    u32 cbfrsynra;
+    u16 sid;
    cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
  fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
@@ -586,9 +589,16 @@ static irqreturn_t arm_smmu_context_fault(int 
irq, void *dev)

  fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
  iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
  +    cbfrsynra = readl_relaxed(gr1_base +
+  ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
+    if (smmu->version > ARM_SMMU_V1)
+    sid = cbfrsynra & CBFRSYNRA_V2_SID_MASK;
+    else
+    sid = cbfrsynra & CBFRSYNRA_V1_SID_MASK;
+
  dev_err_ratelimited(smmu->dev,
-    "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
cb=%d\n",

-    fsr, iova, fsynr, cfg->cbndx);
+    "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
cb=%d sid = %u\n",

+    fsr, iova, fsynr, cfg->cbndx, sid);
    writel(fsr, cb_base + ARM_SMMU_CB_FSR);
  return IRQ_HANDLED;


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 1/1] iommu/arm-smmu: Add SID information to context fault log

2019-04-15 Thread Vivek Gautam
Extract the SID and add the information to context fault log.
This is specially useful in a distributed smmu architecture
where multiple masters are connected to smmu. SID information
helps to quickly identify the faulting master device.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/arm-smmu-regs.h |  4 
 drivers/iommu/arm-smmu.c  | 14 --
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
index a1226e4ab5f8..e5be0344b610 100644
--- a/drivers/iommu/arm-smmu-regs.h
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -147,6 +147,10 @@ enum arm_smmu_s2cr_privcfg {
 #define CBAR_IRPTNDX_SHIFT 24
 #define CBAR_IRPTNDX_MASK  0xff
 
+#define ARM_SMMU_GR1_CBFRSYNRA(n)  (0x400 + ((n) << 2))
+#define CBFRSYNRA_V2_SID_MASK  0x
+#define CBFRSYNRA_V1_SID_MASK  0x7fff
+
 #define ARM_SMMU_GR1_CBA2R(n)  (0x800 + ((n) << 2))
 #define CBA2R_RW64_32BIT   (0 << 0)
 #define CBA2R_RW64_64BIT   (1 << 0)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 045d93884164..aa3426dc68d0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -575,7 +575,10 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = _domain->cfg;
struct arm_smmu_device *smmu = smmu_domain->smmu;
+   void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
void __iomem *cb_base;
+   u32 cbfrsynra;
+   u16 sid;
 
cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
@@ -586,9 +589,16 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
 
+   cbfrsynra = readl_relaxed(gr1_base +
+ ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
+   if (smmu->version > ARM_SMMU_V1)
+   sid = cbfrsynra & CBFRSYNRA_V2_SID_MASK;
+   else
+   sid = cbfrsynra & CBFRSYNRA_V1_SID_MASK;
+
dev_err_ratelimited(smmu->dev,
-   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
-   fsr, iova, fsynr, cfg->cbndx);
+   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d sid 
= %u\n",
+   fsr, iova, fsynr, cfg->cbndx, sid);
 
writel(fsr, cb_base + ARM_SMMU_CB_FSR);
return IRQ_HANDLED;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 2/3] arm64: dts: qcom: msm8998: Add PCIe SMMU node

2019-04-03 Thread Vivek Gautam



On 4/2/2019 7:24 PM, Robin Murphy wrote:

On 30/03/2019 14:18, Vivek Gautam wrote:

You should probably have some "bus" and "iface" clocks too, per the
requirement of "qcom,smmu-v2". Maybe Vivek might know what's relevant
for MSM8998?


As Jeffrey rightly mentioned, these clocks are not under the control 
of Linux.

So, we won't need to add clocks to this SMMU.


OK, in that case the "clock-names" part of binding doc probably wants 
refining to reflect which implementations do actually require clocks.


Certainly.
Marc, do you want to push a patch for the same? Or, let me know I can 
prepare one.


Thanks
Vivek



Robin.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 2/3] arm64: dts: qcom: msm8998: Add PCIe SMMU node

2019-03-30 Thread Vivek Gautam
Hi Marc,

On Fri, Mar 29, 2019 at 11:59 PM Robin Murphy  wrote:
>
> On 29/03/2019 10:51, Marc Gonzalez wrote:
> > On 28/03/2019 18:05, Marc Gonzalez wrote:
> >
> >> ANOC1 SMMU filters PCIe accesses.
> >
> > I'm not sure this description is entirely accurate...
> >
> > ANOC likely stands for "Application Network-On-Chip"

How about simply saying - "Add device node for ANOC1 SMMU" for
commit title.
Commit text -
ANOC1 smmu services a list of peripherals - USB, UFS, BLSP2, and PCIe.
Add the device node for this smmu.

> >
> >
> >>   arch/arm64/boot/dts/qcom/msm8998.dtsi | 15 +++
> >>   1 file changed, 15 insertions(+)
> >>
> >> diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi 
> >> b/arch/arm64/boot/dts/qcom/msm8998.dtsi
> >> index f9a922fdae75..5a1c0961b281 100644
> >> --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
> >> +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
> >> @@ -606,6 +606,21 @@
> >>  #thermal-sensor-cells = <1>;
> >>  };
> >>
> >> +anoc1_smmu: arm,smmu@168 {
> >> +compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
> >
> > Maybe I should instead use "qcom,msm8998-smmu-v2", "qcom,smmu-v2";
> > and define "qcom,msm8998-smmu-v2" in
> > Documentation/devicetree/bindings/iommu/arm,smmu.txt ?
>
> Yes please.
>
> > (Would the Documentation change need to be in a separate patch?)
>
> That's generally preferred, yes.
>
> >
> >> +reg = <0x0168 0x1>;
> >> +#iommu-cells = <1>;
> >
> > I'm not sure about this property. IIRC, Robin said <0> is not valid,
> > but I don't have any iommus prop, only iommu-map.
>
> You have to join the dots between the various bindings a little, but the
> iommu-base part of each iommu-map entry is an IOMMU specifier, and IOMMU
> specifiers are #iommu-cells long.
>
> To cut a long story short, 1 is definitely appropriate, because
> arm-smmu's definition of a 2-cell specifier wouldn't make much sense in
> the iommu-map context (and the current code for parsing iommu-map
> actually just assumes 1 anyway).

Besides this,
Looking at the SID mappings for ANOC1 smmu, devices such as USB, and UFS
can very well enable iommu support, and thus iommu-cells = 1 seems
like the correct thingy.

>
> >> +
> >> +#global-interrupts = <0>;
> >> +interrupts =
> >> +,
> >> +,
> >> +,
> >> +,
> >> +,
> >> +;
> >> +};
> >> +
> >
> > The rest of the node looks fairly straight-forward.
>
> You should probably have some "bus" and "iface" clocks too, per the
> requirement of "qcom,smmu-v2". Maybe Vivek might know what's relevant
> for MSM8998?

As Jeffrey rightly mentioned, these clocks are not under the control of Linux.
So, we won't need to add clocks to this SMMU.

Thanks
Vivek

>
> >
> > DT code was adapted from:
> >
> > https://source.codeaurora.org/quic/la/kernel/msm-4.4/tree/arch/arm/boot/dts/qcom/msm-arm-smmu-8998.dtsi?h=LE.UM.1.3.r3.25#n18
> >
> > I left out the so-called implementation-defined init:
> >
> > https://source.codeaurora.org/quic/la/kernel/msm-4.4/tree/arch/arm/boot/dts/qcom/msm-arm-smmu-impl-defs-8998.dtsi?h=LE.UM.1.3.r3.25#n123
> >
> > Should I try to merge this part in mainline?
> > (I don't have any documentation for it though.)
>
> "pls no :("
>
> I'm not sure what route the path takes from "DT describes the platform"
> to get to "DT lists opaque register addresses and magic data to write
> into them", but I imagine it might involve getting hit in the head along
> the way.
>
> Robin.
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/4] firmware: qcom_scm-64: Add atomic version of qcom_scm_call

2019-03-26 Thread Vivek Gautam



On 3/26/2019 2:39 AM, Bjorn Andersson wrote:

On Sun 09 Sep 23:25 PDT 2018, Vivek Gautam wrote:


There are scnenarios where drivers are required to make a
scm call in atomic context, such as in one of the qcom's
arm-smmu-500 errata [1].

[1] ("https://source.codeaurora.org/quic/la/kernel/msm-4.9/
   tree/drivers/iommu/arm-smmu.c?h=msm-4.9#n4842")

Signed-off-by: Vivek Gautam 

Reviewed-by: Bjorn Andersson 



Thanks Bjorn for reviewing and testing this series.
I will repost the series on latest head.

Best regards
Vivek



Regards,
Bjorn


---
  drivers/firmware/qcom_scm-64.c | 136 -
  1 file changed, 92 insertions(+), 44 deletions(-)

diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c
index 688525dd4aee..3a8c867cdf51 100644
--- a/drivers/firmware/qcom_scm-64.c
+++ b/drivers/firmware/qcom_scm-64.c
@@ -70,32 +70,71 @@ static DEFINE_MUTEX(qcom_scm_lock);
  #define FIRST_EXT_ARG_IDX 3
  #define N_REGISTER_ARGS (MAX_QCOM_SCM_ARGS - N_EXT_QCOM_SCM_ARGS + 1)
  
-/**

- * qcom_scm_call() - Invoke a syscall in the secure world
- * @dev:   device
- * @svc_id:service identifier
- * @cmd_id:command identifier
- * @desc:  Descriptor structure containing arguments and return values
- *
- * Sends a command to the SCM and waits for the command to finish processing.
- * This should *only* be called in pre-emptible context.
-*/
-static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
-const struct qcom_scm_desc *desc,
-struct arm_smccc_res *res)
+static void __qcom_scm_call_do(const struct qcom_scm_desc *desc,
+  struct arm_smccc_res *res, u32 fn_id,
+  u64 x5, u32 type)
+{
+   u64 cmd;
+   struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6};
+
+   cmd = ARM_SMCCC_CALL_VAL(type, qcom_smccc_convention,
+ARM_SMCCC_OWNER_SIP, fn_id);
+
+   quirk.state.a6 = 0;
+
+   do {
+   arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
+   desc->args[1], desc->args[2], x5,
+   quirk.state.a6, 0, res, );
+
+   if (res->a0 == QCOM_SCM_INTERRUPTED)
+   cmd = res->a0;
+
+   } while (res->a0 == QCOM_SCM_INTERRUPTED);
+}
+
+static void qcom_scm_call_do(const struct qcom_scm_desc *desc,
+struct arm_smccc_res *res, u32 fn_id,
+u64 x5, bool atomic)
+{
+   int retry_count = 0;
+
+   if (!atomic) {
+   do {
+   mutex_lock(_scm_lock);
+
+   __qcom_scm_call_do(desc, res, fn_id, x5,
+  ARM_SMCCC_STD_CALL);
+
+   mutex_unlock(_scm_lock);
+
+   if (res->a0 == QCOM_SCM_V2_EBUSY) {
+   if (retry_count++ > QCOM_SCM_EBUSY_MAX_RETRY)
+   break;
+   msleep(QCOM_SCM_EBUSY_WAIT_MS);
+   }
+   }  while (res->a0 == QCOM_SCM_V2_EBUSY);
+   } else {
+   __qcom_scm_call_do(desc, res, fn_id, x5, ARM_SMCCC_FAST_CALL);
+   }
+}
+
+static int ___qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
+   const struct qcom_scm_desc *desc,
+   struct arm_smccc_res *res, bool atomic)
  {
int arglen = desc->arginfo & 0xf;
-   int retry_count = 0, i;
+   int i;
u32 fn_id = QCOM_SCM_FNID(svc_id, cmd_id);
-   u64 cmd, x5 = desc->args[FIRST_EXT_ARG_IDX];
+   u64 x5 = desc->args[FIRST_EXT_ARG_IDX];
dma_addr_t args_phys = 0;
void *args_virt = NULL;
size_t alloc_len;
-   struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6};
+   gfp_t flag = atomic ? GFP_ATOMIC : GFP_KERNEL;
  
  	if (unlikely(arglen > N_REGISTER_ARGS)) {

alloc_len = N_EXT_QCOM_SCM_ARGS * sizeof(u64);
-   args_virt = kzalloc(PAGE_ALIGN(alloc_len), GFP_KERNEL);
+   args_virt = kzalloc(PAGE_ALIGN(alloc_len), flag);
  
  		if (!args_virt)

return -ENOMEM;
@@ -125,33 +164,7 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, 
u32 cmd_id,
x5 = args_phys;
}
  
-	do {

-   mutex_lock(_scm_lock);
-
-   cmd = ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL,
-qcom_smccc_convention,
-ARM_SMCCC_OWNER_SIP, fn_id);
-
-   quirk.state.a6 = 0;
-
-   do {
-   arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
- desc->args[1], desc->args[

Re: [PATCH 0/3] iommu/arm-smmu: Add support to use Last level cache

2019-01-29 Thread Vivek Gautam
On Tue, Jan 29, 2019 at 8:34 PM Ard Biesheuvel
 wrote:
>
> (+ Bjorn)
>
> On Mon, 28 Jan 2019 at 12:27, Vivek Gautam  
> wrote:
> >
> > Hi Ard,
> >
> > On Thu, Jan 24, 2019 at 1:25 PM Ard Biesheuvel
> >  wrote:
> > >
> > > On Thu, 24 Jan 2019 at 07:58, Vivek Gautam  
> > > wrote:
> > > >
> > > > On Mon, Jan 21, 2019 at 7:55 PM Ard Biesheuvel
> > > >  wrote:
> > > > >
> > > > > On Mon, 21 Jan 2019 at 14:56, Robin Murphy  
> > > > > wrote:
> > > > > >
> > > > > > On 21/01/2019 13:36, Ard Biesheuvel wrote:
> > > > > > > On Mon, 21 Jan 2019 at 14:25, Robin Murphy  
> > > > > > > wrote:
> > > > > > >>
> > > > > > >> On 21/01/2019 10:50, Ard Biesheuvel wrote:
> > > > > > >>> On Mon, 21 Jan 2019 at 11:17, Vivek Gautam 
> > > > > > >>>  wrote:
> > > > > > >>>>
> > > > > > >>>> Hi,
> > > > > > >>>>
> > > > > > >>>>
> > > > > > >>>> On Mon, Jan 21, 2019 at 12:56 PM Ard Biesheuvel
> > > > > > >>>>  wrote:
> > > > > > >>>>>
> > > > > > >>>>> On Mon, 21 Jan 2019 at 06:54, Vivek Gautam 
> > > > > > >>>>>  wrote:
> > > > > > >>>>>>
> > > > > > >>>>>> Qualcomm SoCs have an additional level of cache called as
> > > > > > >>>>>> System cache, aka. Last level cache (LLC). This cache sits 
> > > > > > >>>>>> right
> > > > > > >>>>>> before the DDR, and is tightly coupled with the memory 
> > > > > > >>>>>> controller.
> > > > > > >>>>>> The clients using this cache request their slices from this
> > > > > > >>>>>> system cache, make it active, and can then start using it.
> > > > > > >>>>>> For these clients with smmu, to start using the system cache 
> > > > > > >>>>>> for
> > > > > > >>>>>> buffers and, related page tables [1], memory attributes need 
> > > > > > >>>>>> to be
> > > > > > >>>>>> set accordingly. This series add the required support.
> > > > > > >>>>>>
> > > > > > >>>>>
> > > > > > >>>>> Does this actually improve performance on reads from a 
> > > > > > >>>>> device? The
> > > > > > >>>>> non-cache coherent DMA routines perform an unconditional 
> > > > > > >>>>> D-cache
> > > > > > >>>>> invalidate by VA to the PoC before reading from the buffers 
> > > > > > >>>>> filled by
> > > > > > >>>>> the device, and I would expect the PoC to be defined as lying 
> > > > > > >>>>> beyond
> > > > > > >>>>> the LLC to still guarantee the architected behavior.
> > > > > > >>>>
> > > > > > >>>> We have seen performance improvements when running Manhattan
> > > > > > >>>> GFXBench benchmarks.
> > > > > > >>>>
> > > > > > >>>
> > > > > > >>> Ah ok, that makes sense, since in that case, the data flow is 
> > > > > > >>> mostly
> > > > > > >>> to the device, not from the device.
> > > > > > >>>
> > > > > > >>>> As for the PoC, from my knowledge on sdm845 the system cache, 
> > > > > > >>>> aka
> > > > > > >>>> Last level cache (LLC) lies beyond the point of coherency.
> > > > > > >>>> Non-cache coherent buffers will not be cached to system cache 
> > > > > > >>>> also, and
> > > > > > >>>> no additional software cache maintenance ops are required for 
> > > > > > >>>> system cache.
> > > > > > >>>> Pratik can add 

Re: [PATCH 2/2] iommu/arm-smmu: Add support for non-coherent page table mappings

2019-01-29 Thread Vivek Gautam
Hi Will,

On Tue, Jan 22, 2019 at 11:14 AM Will Deacon  wrote:
>
> On Mon, Jan 21, 2019 at 11:35:30AM +0530, Vivek Gautam wrote:
> > On Sun, Jan 20, 2019 at 5:31 AM Will Deacon  wrote:
> > > On Thu, Jan 17, 2019 at 02:57:18PM +0530, Vivek Gautam wrote:
> > > > Adding a device tree option for arm smmu to enable non-cacheable
> > > > memory for page tables.
> > > > We already enable a smmu feature for coherent walk based on
> > > > whether the smmu device is dma-coherent or not. Have an option
> > > > to enable non-cacheable page table memory to force set it for
> > > > particular smmu devices.
> > >
> > > Hmm, I must be missing something here. What is the difference between this
> > > new property, and simply omitting dma-coherent on the SMMU?
> >
> > So, this is what I understood from the email thread for Last level
> > cache support -
> > Robin pointed to the fact that we may need to add support for setting
> > non-cacheable
> > mappings in the TCR.
> > Currently, we don't do that for SMMUs that omit dma-coherent.
> > We rely on the interconnect to handle the configuration set in TCR,
> > and let interconnect
> > ignore the cacheability if it can't support.
>
> I think that's a bug. With that fixed, can you get what you want by omitting
> "dma-coherent"?

Based on the discussion on the first patch in this series [1], I can
update the series.
First thing can be -
if QUIRK_NO_DMA is set (i.e. the IOMMU _is_ coherent) then we use a
cacheable TCR;
So, we may need an additional check for this when setting the TCR.

For the second case -
IOMMUs that are *not* coherent, i.e ones that are omitting
'dma-coherent' property,
anyways have to access the page table directly from memory. We take
care of the CPU
side of this by allocating non-coherent memory, and making sure that we sync the
PTEs from map call.
Shouldn't we mark TCR for these IOMMUs as non-cacheable for inner and outer
cacheability attribute?


[1] https://lore.kernel.org/patchwork/patch/1032939/

Regards
Vivek

>
> Will



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/2] iommu/io-pgtable-arm: Add support for non-coherent page tables

2019-01-28 Thread Vivek Gautam
On Mon, Jan 21, 2019 at 6:43 PM Robin Murphy  wrote:
>
> On 17/01/2019 09:27, Vivek Gautam wrote:
> >  From Robin's comment [1] about touching TCR configurations -
> >
> > "TBH if we're going to touch the TCR attributes at all then we should
> > probably correct that sloppiness first - there's an occasional argument
> > for using non-cacheable pagetables even on a coherent SMMU if reducing
> > snoop traffic/latency on walks outweighs the cost of cache maintenance
> > on PTE updates, but anyone thinking they can get that by overriding
> > dma-coherent silently gets the worst of both worlds thanks to this
> > current TCR value."
> >
> > We have IO_PGTABLE_QUIRK_NO_DMA quirk present, but we don't force
> > anybody _not_ using dma-coherent smmu to have non-cacheable page table
> > mappings.
> > Having another quirk flag can help in having non-cacheable memory for
> > page tables once and for all.
> >
> > [1] https://lore.kernel.org/patchwork/patch/1020906/
> >
> > Signed-off-by: Vivek Gautam 
> > ---
> >   drivers/iommu/io-pgtable-arm.c | 17 -
> >   drivers/iommu/io-pgtable.h |  6 ++
> >   2 files changed, 18 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > index 237cacd4a62b..c76919c30f1a 100644
> > --- a/drivers/iommu/io-pgtable-arm.c
> > +++ b/drivers/iommu/io-pgtable-arm.c
> > @@ -780,7 +780,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg 
> > *cfg, void *cookie)
> >   struct arm_lpae_io_pgtable *data;
> >
> >   if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA 
> > |
> > - IO_PGTABLE_QUIRK_NON_STRICT))
> > + IO_PGTABLE_QUIRK_NON_STRICT |
> > + IO_PGTABLE_QUIRK_NON_COHERENT))
> >   return NULL;
> >
> >   data = arm_lpae_alloc_pgtable(cfg);
> > @@ -788,9 +789,14 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg 
> > *cfg, void *cookie)
> >   return NULL;
> >
> >   /* TCR */
> > - reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
> > -   (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
> > -   (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
> > + reg = ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT;
> > +
> > + if (cfg->quirks & IO_PGTABLE_QUIRK_NON_COHERENT)
> > + reg |= ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT |
> > +ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT;
> > + else
> > + reg |= ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT |
> > +ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT;
> >
> >   switch (ARM_LPAE_GRANULE(data)) {
> >   case SZ_4K:
> > @@ -873,7 +879,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg 
> > *cfg, void *cookie)
> >
> >   /* The NS quirk doesn't apply at stage 2 */
> >   if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA |
> > - IO_PGTABLE_QUIRK_NON_STRICT))
> > + IO_PGTABLE_QUIRK_NON_STRICT |
> > + IO_PGTABLE_QUIRK_NON_COHERENT))
> >   return NULL;
> >
> >   data = arm_lpae_alloc_pgtable(cfg);
> > diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
> > index 47d5ae559329..46604cf7b017 100644
> > --- a/drivers/iommu/io-pgtable.h
> > +++ b/drivers/iommu/io-pgtable.h
> > @@ -75,6 +75,11 @@ struct io_pgtable_cfg {
> >* IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
> >*  on unmap, for DMA domains using the flush queue mechanism for
> >*  delayed invalidation.
> > +  *
> > +  * IO_PGTABLE_QUIRK_NON_COHERENT: Enforce non-cacheable mappings for
> > +  *  pagetables even on a coherent SMMU for cases where reducing
> > +  *  snoop traffic/latency on walks outweighs the cost of cache
> > +  *  maintenance on PTE updates.
>
> Hmm, we can't actually "enforce" anything with this as-is - all we're
> doing is setting the attributes that the IOMMU will use for pagetable
> walks, and that has no impact on how the CPU actually writes PTEs to
> memory. In particular, in the case of a hardware-coherent IOMMU which is
> described as such, even if we make the dma_map/sync calls they still
> won't do anything since they 'know' that the IOMM

Re: [PATCH 0/3] iommu/arm-smmu: Add support to use Last level cache

2019-01-28 Thread Vivek Gautam
Hi Ard,

On Thu, Jan 24, 2019 at 1:25 PM Ard Biesheuvel
 wrote:
>
> On Thu, 24 Jan 2019 at 07:58, Vivek Gautam  
> wrote:
> >
> > On Mon, Jan 21, 2019 at 7:55 PM Ard Biesheuvel
> >  wrote:
> > >
> > > On Mon, 21 Jan 2019 at 14:56, Robin Murphy  wrote:
> > > >
> > > > On 21/01/2019 13:36, Ard Biesheuvel wrote:
> > > > > On Mon, 21 Jan 2019 at 14:25, Robin Murphy  
> > > > > wrote:
> > > > >>
> > > > >> On 21/01/2019 10:50, Ard Biesheuvel wrote:
> > > > >>> On Mon, 21 Jan 2019 at 11:17, Vivek Gautam 
> > > > >>>  wrote:
> > > > >>>>
> > > > >>>> Hi,
> > > > >>>>
> > > > >>>>
> > > > >>>> On Mon, Jan 21, 2019 at 12:56 PM Ard Biesheuvel
> > > > >>>>  wrote:
> > > > >>>>>
> > > > >>>>> On Mon, 21 Jan 2019 at 06:54, Vivek Gautam 
> > > > >>>>>  wrote:
> > > > >>>>>>
> > > > >>>>>> Qualcomm SoCs have an additional level of cache called as
> > > > >>>>>> System cache, aka. Last level cache (LLC). This cache sits right
> > > > >>>>>> before the DDR, and is tightly coupled with the memory 
> > > > >>>>>> controller.
> > > > >>>>>> The clients using this cache request their slices from this
> > > > >>>>>> system cache, make it active, and can then start using it.
> > > > >>>>>> For these clients with smmu, to start using the system cache for
> > > > >>>>>> buffers and, related page tables [1], memory attributes need to 
> > > > >>>>>> be
> > > > >>>>>> set accordingly. This series add the required support.
> > > > >>>>>>
> > > > >>>>>
> > > > >>>>> Does this actually improve performance on reads from a device? The
> > > > >>>>> non-cache coherent DMA routines perform an unconditional D-cache
> > > > >>>>> invalidate by VA to the PoC before reading from the buffers 
> > > > >>>>> filled by
> > > > >>>>> the device, and I would expect the PoC to be defined as lying 
> > > > >>>>> beyond
> > > > >>>>> the LLC to still guarantee the architected behavior.
> > > > >>>>
> > > > >>>> We have seen performance improvements when running Manhattan
> > > > >>>> GFXBench benchmarks.
> > > > >>>>
> > > > >>>
> > > > >>> Ah ok, that makes sense, since in that case, the data flow is mostly
> > > > >>> to the device, not from the device.
> > > > >>>
> > > > >>>> As for the PoC, from my knowledge on sdm845 the system cache, aka
> > > > >>>> Last level cache (LLC) lies beyond the point of coherency.
> > > > >>>> Non-cache coherent buffers will not be cached to system cache 
> > > > >>>> also, and
> > > > >>>> no additional software cache maintenance ops are required for 
> > > > >>>> system cache.
> > > > >>>> Pratik can add more if I am missing something.
> > > > >>>>
> > > > >>>> To take care of the memory attributes from DMA APIs side, we can 
> > > > >>>> add a
> > > > >>>> DMA_ATTR definition to take care of any dma non-coherent APIs 
> > > > >>>> calls.
> > > > >>>>
> > > > >>>
> > > > >>> So does the device use the correct inner non-cacheable, outer
> > > > >>> writeback cacheable attributes if the SMMU is in pass-through?
> > > > >>>
> > > > >>> We have been looking into another use case where the fact that the
> > > > >>> SMMU overrides memory attributes is causing issues (WC mappings used
> > > > >>> by the radeon and amdgpu driver). So if the SMMU would honour the
> > > > >>> existing attributes, would you still need the SMMU changes?
> > > > >>
> > > > >> Even if we could force a 

Re: [PATCH 0/3] iommu/arm-smmu: Add support to use Last level cache

2019-01-23 Thread Vivek Gautam
On Mon, Jan 21, 2019 at 7:55 PM Ard Biesheuvel
 wrote:
>
> On Mon, 21 Jan 2019 at 14:56, Robin Murphy  wrote:
> >
> > On 21/01/2019 13:36, Ard Biesheuvel wrote:
> > > On Mon, 21 Jan 2019 at 14:25, Robin Murphy  wrote:
> > >>
> > >> On 21/01/2019 10:50, Ard Biesheuvel wrote:
> > >>> On Mon, 21 Jan 2019 at 11:17, Vivek Gautam 
> > >>>  wrote:
> > >>>>
> > >>>> Hi,
> > >>>>
> > >>>>
> > >>>> On Mon, Jan 21, 2019 at 12:56 PM Ard Biesheuvel
> > >>>>  wrote:
> > >>>>>
> > >>>>> On Mon, 21 Jan 2019 at 06:54, Vivek Gautam 
> > >>>>>  wrote:
> > >>>>>>
> > >>>>>> Qualcomm SoCs have an additional level of cache called as
> > >>>>>> System cache, aka. Last level cache (LLC). This cache sits right
> > >>>>>> before the DDR, and is tightly coupled with the memory controller.
> > >>>>>> The clients using this cache request their slices from this
> > >>>>>> system cache, make it active, and can then start using it.
> > >>>>>> For these clients with smmu, to start using the system cache for
> > >>>>>> buffers and, related page tables [1], memory attributes need to be
> > >>>>>> set accordingly. This series add the required support.
> > >>>>>>
> > >>>>>
> > >>>>> Does this actually improve performance on reads from a device? The
> > >>>>> non-cache coherent DMA routines perform an unconditional D-cache
> > >>>>> invalidate by VA to the PoC before reading from the buffers filled by
> > >>>>> the device, and I would expect the PoC to be defined as lying beyond
> > >>>>> the LLC to still guarantee the architected behavior.
> > >>>>
> > >>>> We have seen performance improvements when running Manhattan
> > >>>> GFXBench benchmarks.
> > >>>>
> > >>>
> > >>> Ah ok, that makes sense, since in that case, the data flow is mostly
> > >>> to the device, not from the device.
> > >>>
> > >>>> As for the PoC, from my knowledge on sdm845 the system cache, aka
> > >>>> Last level cache (LLC) lies beyond the point of coherency.
> > >>>> Non-cache coherent buffers will not be cached to system cache also, and
> > >>>> no additional software cache maintenance ops are required for system 
> > >>>> cache.
> > >>>> Pratik can add more if I am missing something.
> > >>>>
> > >>>> To take care of the memory attributes from DMA APIs side, we can add a
> > >>>> DMA_ATTR definition to take care of any dma non-coherent APIs calls.
> > >>>>
> > >>>
> > >>> So does the device use the correct inner non-cacheable, outer
> > >>> writeback cacheable attributes if the SMMU is in pass-through?
> > >>>
> > >>> We have been looking into another use case where the fact that the
> > >>> SMMU overrides memory attributes is causing issues (WC mappings used
> > >>> by the radeon and amdgpu driver). So if the SMMU would honour the
> > >>> existing attributes, would you still need the SMMU changes?
> > >>
> > >> Even if we could force a stage 2 mapping with the weakest pagetable
> > >> attributes (such that combining would work), there would still need to
> > >> be a way to set the TCR attributes appropriately if this behaviour is
> > >> wanted for the SMMU's own table walks as well.
> > >>
> > >
> > > Isn't that just a matter of implementing support for SMMUs that lack
> > > the 'dma-coherent' attribute?
> >
> > Not quite - in general they need INC-ONC attributes in case there
> > actually is something in the architectural outer-cacheable domain.
>
> But is it a problem to use INC-ONC attributes for the SMMU PTW on this
> chip? AIUI, the reason for the SMMU changes is to avoid the
> performance hit of snooping, which is more expensive than cache
> maintenance of SMMU page tables. So are you saying the by-VA cache
> maintenance is not relayed to this system cache, resulting in page
> table updates to be invisible to masters using INC-ONC attributes?

The reason for this SMMU changes is that the non-

Re: [PATCH 1/3] iommu/arm-smmu: Move to bitmap for arm_smmu_domain atrributes

2019-01-22 Thread Vivek Gautam
On Mon, Jan 21, 2019 at 7:23 PM Robin Murphy  wrote:
>
> On 21/01/2019 05:53, Vivek Gautam wrote:
> > A number of arm_smmu_domain's attributes can be assigned based
> > on the iommu domains's attributes. These local attributes better
> > be managed by a bitmap.
> > So remove boolean flags and move to a 32-bit bitmap, and enable
> > each bits separtely.
> >
> > Signed-off-by: Vivek Gautam 
> > ---
> >   drivers/iommu/arm-smmu.c | 10 ++
> >   1 file changed, 6 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > index 7ebbcf1b2eb3..52b300dfc096 100644
> > --- a/drivers/iommu/arm-smmu.c
> > +++ b/drivers/iommu/arm-smmu.c
> > @@ -257,10 +257,11 @@ struct arm_smmu_domain {
> >   const struct iommu_gather_ops   *tlb_ops;
> >   struct arm_smmu_cfg cfg;
> >   enum arm_smmu_domain_stage  stage;
> > - boolnon_strict;
> >   struct mutexinit_mutex; /* Protects smmu pointer 
> > */
> >   spinlock_t  cb_lock; /* Serialises ATS1* ops and 
> > TLB syncs */
> >   struct iommu_domain domain;
> > +#define ARM_SMMU_DOMAIN_ATTR_NON_STRICT  BIT(0)
> > + unsigned intattr;
> >   };
> >
> >   struct arm_smmu_option_prop {
> > @@ -901,7 +902,7 @@ static int arm_smmu_init_domain_context(struct 
> > iommu_domain *domain,
> >   if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
> >   pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
> >
> > - if (smmu_domain->non_strict)
> > + if (smmu_domain->attr & ARM_SMMU_DOMAIN_ATTR_NON_STRICT)
> >   pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
> >
> >   /* Non coherent page table mappings only for Stage-1 */
> > @@ -1598,7 +1599,8 @@ static int arm_smmu_domain_get_attr(struct 
> > iommu_domain *domain,
> >   case IOMMU_DOMAIN_DMA:
> >   switch (attr) {
> >   case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
> > - *(int *)data = smmu_domain->non_strict;
> > + *(int *)data = !!(smmu_domain->attr &
> > +   ARM_SMMU_DOMAIN_ATTR_NON_STRICT);
> >   return 0;
> >   default:
> >   return -ENODEV;
> > @@ -1638,7 +1640,7 @@ static int arm_smmu_domain_set_attr(struct 
> > iommu_domain *domain,
> >   case IOMMU_DOMAIN_DMA:
> >   switch (attr) {
> >   case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
> > - smmu_domain->non_strict = *(int *)data;
> > + smmu_domain->attr |= ARM_SMMU_DOMAIN_ATTR_NON_STRICT;
>
> But what if *data == 0?

Right, a check for data here also similar to what we are doing for
QCOM_SYS_CACHE [1].

[1] https://lore.kernel.org/patchwork/patch/1033796/

Regards
Vivek

>
> Robin.
>
> >   break;
> >   default:
> >   ret = -ENODEV;
> >
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/3] iommu/arm-smmu: Add support to use Last level cache

2019-01-21 Thread Vivek Gautam
Hi,


On Mon, Jan 21, 2019 at 12:56 PM Ard Biesheuvel
 wrote:
>
> On Mon, 21 Jan 2019 at 06:54, Vivek Gautam  
> wrote:
> >
> > Qualcomm SoCs have an additional level of cache called as
> > System cache, aka. Last level cache (LLC). This cache sits right
> > before the DDR, and is tightly coupled with the memory controller.
> > The clients using this cache request their slices from this
> > system cache, make it active, and can then start using it.
> > For these clients with smmu, to start using the system cache for
> > buffers and, related page tables [1], memory attributes need to be
> > set accordingly. This series add the required support.
> >
>
> Does this actually improve performance on reads from a device? The
> non-cache coherent DMA routines perform an unconditional D-cache
> invalidate by VA to the PoC before reading from the buffers filled by
> the device, and I would expect the PoC to be defined as lying beyond
> the LLC to still guarantee the architected behavior.

We have seen performance improvements when running Manhattan
GFXBench benchmarks.

As for the PoC, from my knowledge on sdm845 the system cache, aka
Last level cache (LLC) lies beyond the point of coherency.
Non-cache coherent buffers will not be cached to system cache also, and
no additional software cache maintenance ops are required for system cache.
Pratik can add more if I am missing something.

To take care of the memory attributes from DMA APIs side, we can add a
DMA_ATTR definition to take care of any dma non-coherent APIs calls.

Regards
Vivek
>
>
>
> > This change is a realisation of following changes from downstream msm-4.9:
> > iommu: io-pgtable-arm: Support DOMAIN_ATTRIBUTE_USE_UPSTREAM_HINT[2]
> > iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT[3]
> >
> > Changes since v2:
> >  - Split the patches into io-pgtable-arm driver and arm-smmu driver.
> >  - Converted smmu domain attributes to a bitmap, so multiple attributes
> >can be managed easily.
> >  - With addition of non-coherent page table mapping support [4], this
> >patch series now aligns with the understanding of upgrading the
> >non-coherent devices to use some level of outer cache.
> >  - Updated the macros and comments to reflect the use of QCOM_SYS_CACHE.
> >  - QCOM_SYS_CACHE can still be used at stage 2, so that doens't depend on
> >stage-1 mapping.
> >  - Added change to disable the attribute from arm_smmu_domain_set_attr()
> >when needed.
> >  - Removed the page protection controls for QCOM_SYS_CACHE at the DMA API
> >level.
> >
> > Goes on top of the non-coherent page tables support patch series [4]
> >
> > [1] https://patchwork.kernel.org/patch/10302791/
> > [2] 
> > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=bf762276796e79ca90014992f4d9da5593fa7d51
> > [3] 
> > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602
> > [4] https://lore.kernel.org/patchwork/cover/1032938/
> >
> > Vivek Gautam (3):
> >   iommu/arm-smmu: Move to bitmap for arm_smmu_domain atrributes
> >   iommu/io-pgtable-arm: Add support to use system cache
> >   iommu/arm-smmu: Add support to use system cache
> >
> >  drivers/iommu/arm-smmu.c   | 28 
> >  drivers/iommu/io-pgtable-arm.c | 15 +--
> >  drivers/iommu/io-pgtable.h |  4 
> >  include/linux/iommu.h  |  2 ++
> >  4 files changed, 43 insertions(+), 6 deletions(-)
> >
> > --
> > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> > of Code Aurora Forum, hosted by The Linux Foundation
> >
> >
> > ___
> > linux-arm-kernel mailing list
> > linux-arm-ker...@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/2] iommu/arm-smmu: Add support for non-coherent page table mappings

2019-01-20 Thread Vivek Gautam
Hi Will,


On Sun, Jan 20, 2019 at 5:31 AM Will Deacon  wrote:
>
> On Thu, Jan 17, 2019 at 02:57:18PM +0530, Vivek Gautam wrote:
> > Adding a device tree option for arm smmu to enable non-cacheable
> > memory for page tables.
> > We already enable a smmu feature for coherent walk based on
> > whether the smmu device is dma-coherent or not. Have an option
> > to enable non-cacheable page table memory to force set it for
> > particular smmu devices.
>
> Hmm, I must be missing something here. What is the difference between this
> new property, and simply omitting dma-coherent on the SMMU?

So, this is what I understood from the email thread for Last level
cache support -
Robin pointed to the fact that we may need to add support for setting
non-cacheable
mappings in the TCR.
Currently, we don't do that for SMMUs that omit dma-coherent.
We rely on the interconnect to handle the configuration set in TCR,
and let interconnect
ignore the cacheability if it can't support.

Moreover, Robin suggested that we should take care of SMMUs, for which
removing snoop latency on walks by making mappings as non-cacheable
outweighs the cost of cache maintenance on PTE updates.

So, this change adds another property to do this non-cacheable mappings
explicitly. As I pointed, omitting 'dma-coherent', and corresponding
IO_PGTABLE_QUIRK_NO_DMA' does takes care of few things.

Should we handle the TCR settings too with this quirk?

Regards
Vivek
>
> Will
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/3] iommu/io-pgtable-arm: Add support to use system cache

2019-01-20 Thread Vivek Gautam
Few Qualcomm platforms such as, sdm845 have an additional outer
cache called as System cache, aka. Last level cache (LLC) that
allows non-coherent devices to upgrade to using caching.

There is a fundamental assumption that non-coherent devices can't
access caches. This change adds an exception where they *can* use
some level of cache despite still being non-coherent overall.
The coherent devices that use cacheable memory, and CPU make use of
this system cache by default.

Looking at memory types, we have following -
a) Normal uncached :- MAIR 0x44, inner non-cacheable,
  outer non-cacheable;
b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
  outer read write-back non-transient;
  attribute setting for coherenet I/O devices.
and, for non-coherent i/o devices that can allocate in system cache
another type gets added -
c) Normal sys-cached :- MAIR 0xf4, inner non-cacheable,
outer read write-back non-transient

Coherent I/O devices use system cache by marking the memory as
normal cached.
Non-coherent I/O devices should mark the memory as normal
sys-cached in page tables to use system cache.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/io-pgtable-arm.c | 15 +--
 drivers/iommu/io-pgtable.h |  4 
 include/linux/iommu.h  |  2 ++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index c76919c30f1a..0e55772702da 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -168,10 +168,12 @@
 #define ARM_LPAE_MAIR_ATTR_MASK0xff
 #define ARM_LPAE_MAIR_ATTR_DEVICE  0x04
 #define ARM_LPAE_MAIR_ATTR_NC  0x44
+#define ARM_LPAE_MAIR_ATTR_QCOM_SYS_CACHE  0xf4
 #define ARM_LPAE_MAIR_ATTR_WBRWA   0xff
 #define ARM_LPAE_MAIR_ATTR_IDX_NC  0
 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE   1
 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2
+#define ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE  3
 
 /* IOPTE accessors */
 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
@@ -443,6 +445,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
else if (prot & IOMMU_CACHE)
pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
+   else if (prot & IOMMU_QCOM_SYS_CACHE)
+   pte |= (ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE
+   << ARM_LPAE_PTE_ATTRINDX_SHIFT);
} else {
pte = ARM_LPAE_PTE_HAP_FAULT;
if (prot & IOMMU_READ)
@@ -781,7 +786,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
 
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
IO_PGTABLE_QUIRK_NON_STRICT |
-   IO_PGTABLE_QUIRK_NON_COHERENT))
+   IO_PGTABLE_QUIRK_NON_COHERENT |
+   IO_PGTABLE_QUIRK_QCOM_SYS_CACHE))
return NULL;
 
data = arm_lpae_alloc_pgtable(cfg);
@@ -794,6 +800,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
if (cfg->quirks & IO_PGTABLE_QUIRK_NON_COHERENT)
reg |= ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT |
   ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT;
+   else if (cfg->quirks & IO_PGTABLE_QUIRK_QCOM_SYS_CACHE)
+   reg |= ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT |
+ ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT;
else
reg |= ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT |
   ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT;
@@ -848,7 +857,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
  (ARM_LPAE_MAIR_ATTR_WBRWA
   << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
  (ARM_LPAE_MAIR_ATTR_DEVICE
-  << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
+  << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
+ (ARM_LPAE_MAIR_ATTR_QCOM_SYS_CACHE
+  << 
ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_QCOM_SYS_CACHE));
 
cfg->arm_lpae_s1_cfg.mair[0] = reg;
cfg->arm_lpae_s1_cfg.mair[1] = 0;
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 46604cf7b017..fb237e8aa9f1 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -80,6 +80,9 @@ struct io_pgtable_cfg {
 *  pagetables even on a coherent SMMU for cases where reducing
 *  snoop traffic/latency on walks outweighs the cost of cache
 *  maintenance on PTE updates

[PATCH 0/3] iommu/arm-smmu: Add support to use Last level cache

2019-01-20 Thread Vivek Gautam
Qualcomm SoCs have an additional level of cache called as
System cache, aka. Last level cache (LLC). This cache sits right
before the DDR, and is tightly coupled with the memory controller.
The clients using this cache request their slices from this
system cache, make it active, and can then start using it.
For these clients with smmu, to start using the system cache for
buffers and, related page tables [1], memory attributes need to be
set accordingly. This series add the required support.

This change is a realisation of following changes from downstream msm-4.9:
iommu: io-pgtable-arm: Support DOMAIN_ATTRIBUTE_USE_UPSTREAM_HINT[2]
iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT[3]

Changes since v2:
 - Split the patches into io-pgtable-arm driver and arm-smmu driver.
 - Converted smmu domain attributes to a bitmap, so multiple attributes
   can be managed easily.
 - With addition of non-coherent page table mapping support [4], this
   patch series now aligns with the understanding of upgrading the
   non-coherent devices to use some level of outer cache.
 - Updated the macros and comments to reflect the use of QCOM_SYS_CACHE.
 - QCOM_SYS_CACHE can still be used at stage 2, so that doens't depend on
   stage-1 mapping.
 - Added change to disable the attribute from arm_smmu_domain_set_attr()
   when needed.
 - Removed the page protection controls for QCOM_SYS_CACHE at the DMA API
   level.

Goes on top of the non-coherent page tables support patch series [4]

[1] https://patchwork.kernel.org/patch/10302791/
[2] 
https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=bf762276796e79ca90014992f4d9da5593fa7d51
[3] 
https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602
[4] https://lore.kernel.org/patchwork/cover/1032938/

Vivek Gautam (3):
  iommu/arm-smmu: Move to bitmap for arm_smmu_domain atrributes
  iommu/io-pgtable-arm: Add support to use system cache
  iommu/arm-smmu: Add support to use system cache

 drivers/iommu/arm-smmu.c   | 28 
 drivers/iommu/io-pgtable-arm.c | 15 +--
 drivers/iommu/io-pgtable.h |  4 
 include/linux/iommu.h  |  2 ++
 4 files changed, 43 insertions(+), 6 deletions(-)

-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] iommu/arm-smmu: Move to bitmap for arm_smmu_domain atrributes

2019-01-20 Thread Vivek Gautam
A number of arm_smmu_domain's attributes can be assigned based
on the iommu domains's attributes. These local attributes better
be managed by a bitmap.
So remove boolean flags and move to a 32-bit bitmap, and enable
each bits separtely.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/arm-smmu.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7ebbcf1b2eb3..52b300dfc096 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -257,10 +257,11 @@ struct arm_smmu_domain {
const struct iommu_gather_ops   *tlb_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage  stage;
-   boolnon_strict;
struct mutexinit_mutex; /* Protects smmu pointer */
spinlock_t  cb_lock; /* Serialises ATS1* ops and 
TLB syncs */
struct iommu_domain domain;
+#define ARM_SMMU_DOMAIN_ATTR_NON_STRICTBIT(0)
+   unsigned intattr;
 };
 
 struct arm_smmu_option_prop {
@@ -901,7 +902,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
 
-   if (smmu_domain->non_strict)
+   if (smmu_domain->attr & ARM_SMMU_DOMAIN_ATTR_NON_STRICT)
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
 
/* Non coherent page table mappings only for Stage-1 */
@@ -1598,7 +1599,8 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
*domain,
case IOMMU_DOMAIN_DMA:
switch (attr) {
case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
-   *(int *)data = smmu_domain->non_strict;
+   *(int *)data = !!(smmu_domain->attr &
+ ARM_SMMU_DOMAIN_ATTR_NON_STRICT);
return 0;
default:
return -ENODEV;
@@ -1638,7 +1640,7 @@ static int arm_smmu_domain_set_attr(struct iommu_domain 
*domain,
case IOMMU_DOMAIN_DMA:
switch (attr) {
case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
-   smmu_domain->non_strict = *(int *)data;
+   smmu_domain->attr |= ARM_SMMU_DOMAIN_ATTR_NON_STRICT;
break;
default:
ret = -ENODEV;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/arm-smmu: Add support for non-coherent page table mappings

2019-01-17 Thread Vivek Gautam
Adding a device tree option for arm smmu to enable non-cacheable
memory for page tables.
We already enable a smmu feature for coherent walk based on
whether the smmu device is dma-coherent or not. Have an option
to enable non-cacheable page table memory to force set it for
particular smmu devices.

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/arm-smmu.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index af18a7e7f917..7ebbcf1b2eb3 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -188,6 +188,7 @@ struct arm_smmu_device {
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
+#define ARM_SMMU_OPT_PGTBL_NON_COHERENT (1 << 1)
u32 options;
enum arm_smmu_arch_version  version;
enum arm_smmu_implementationmodel;
@@ -273,6 +274,7 @@ static bool using_legacy_binding, using_generic_binding;
 
 static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
+   { ARM_SMMU_OPT_PGTBL_NON_COHERENT, "arm,smmu-pgtable-non-coherent" },
{ 0, NULL},
 };
 
@@ -902,6 +904,11 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
if (smmu_domain->non_strict)
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
 
+   /* Non coherent page table mappings only for Stage-1 */
+   if (smmu->options & ARM_SMMU_OPT_PGTBL_NON_COHERENT &&
+   smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+   pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_COHERENT;
+
smmu_domain->smmu = smmu;
pgtbl_ops = alloc_io_pgtable_ops(fmt, _cfg, smmu_domain);
if (!pgtbl_ops) {
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/2] iommu/arm: Add support for non-coherent page tables

2019-01-17 Thread Vivek Gautam
As discussed in the Qcom system cache support thread [1], it is
imperative that we enable the support for non-cacheable page tables
for SMMU implementations for which removing snoop latency on walks
by making mappings as non-cacheable, outweighs the cost of cache
maintenance on PTE updates.

This series adds a new SMMU device tree option to let the particular
SMMU configuration setup cacheable or non-cacheable mappings for
page-tables out of box. We set a new quirk for i/o page tables -
IO_PGTABLE_QUIRK_NON_COHERENT, that lets us set different TCR
configurations.

This quirk enables the non-cacheable page tables for all masters
sitting on SMMU. Should this control be available per smmu_domain
as each master may have a different perf requirement?
Enabling this for the entire SMMU may not be desirable for all
masters.

[1] https://lore.kernel.org/patchwork/patch/1020906/

Vivek Gautam (2):
  iommu/io-pgtable-arm: Add support for non-coherent page tables
  iommu/arm-smmu: Add support for non-coherent page table mappings

 drivers/iommu/arm-smmu.c   |  7 +++
 drivers/iommu/io-pgtable-arm.c | 17 -
 drivers/iommu/io-pgtable.h |  6 ++
 3 files changed, 25 insertions(+), 5 deletions(-)

-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu/io-pgtable-arm: Add support for non-coherent page tables

2019-01-17 Thread Vivek Gautam
>From Robin's comment [1] about touching TCR configurations -

"TBH if we're going to touch the TCR attributes at all then we should
probably correct that sloppiness first - there's an occasional argument
for using non-cacheable pagetables even on a coherent SMMU if reducing
snoop traffic/latency on walks outweighs the cost of cache maintenance
on PTE updates, but anyone thinking they can get that by overriding
dma-coherent silently gets the worst of both worlds thanks to this
current TCR value."

We have IO_PGTABLE_QUIRK_NO_DMA quirk present, but we don't force
anybody _not_ using dma-coherent smmu to have non-cacheable page table
mappings.
Having another quirk flag can help in having non-cacheable memory for
page tables once and for all.

[1] https://lore.kernel.org/patchwork/patch/1020906/

Signed-off-by: Vivek Gautam 
---
 drivers/iommu/io-pgtable-arm.c | 17 -
 drivers/iommu/io-pgtable.h |  6 ++
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 237cacd4a62b..c76919c30f1a 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -780,7 +780,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
struct arm_lpae_io_pgtable *data;
 
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
-   IO_PGTABLE_QUIRK_NON_STRICT))
+   IO_PGTABLE_QUIRK_NON_STRICT |
+   IO_PGTABLE_QUIRK_NON_COHERENT))
return NULL;
 
data = arm_lpae_alloc_pgtable(cfg);
@@ -788,9 +789,14 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
return NULL;
 
/* TCR */
-   reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
- (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
- (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
+   reg = ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT;
+
+   if (cfg->quirks & IO_PGTABLE_QUIRK_NON_COHERENT)
+   reg |= ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT |
+  ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT;
+   else
+   reg |= ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT |
+  ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT;
 
switch (ARM_LPAE_GRANULE(data)) {
case SZ_4K:
@@ -873,7 +879,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, 
void *cookie)
 
/* The NS quirk doesn't apply at stage 2 */
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA |
-   IO_PGTABLE_QUIRK_NON_STRICT))
+   IO_PGTABLE_QUIRK_NON_STRICT |
+   IO_PGTABLE_QUIRK_NON_COHERENT))
return NULL;
 
data = arm_lpae_alloc_pgtable(cfg);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 47d5ae559329..46604cf7b017 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -75,6 +75,11 @@ struct io_pgtable_cfg {
 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
 *  on unmap, for DMA domains using the flush queue mechanism for
 *  delayed invalidation.
+*
+* IO_PGTABLE_QUIRK_NON_COHERENT: Enforce non-cacheable mappings for
+*  pagetables even on a coherent SMMU for cases where reducing
+*  snoop traffic/latency on walks outweighs the cost of cache
+*  maintenance on PTE updates.
 */
#define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
#define IO_PGTABLE_QUIRK_NO_PERMS   BIT(1)
@@ -82,6 +87,7 @@ struct io_pgtable_cfg {
#define IO_PGTABLE_QUIRK_ARM_MTK_4GBBIT(3)
#define IO_PGTABLE_QUIRK_NO_DMA BIT(4)
#define IO_PGTABLE_QUIRK_NON_STRICT BIT(5)
+   #define IO_PGTABLE_QUIRK_NON_COHERENT   BIT(6)
unsigned long   quirks;
unsigned long   pgsize_bitmap;
unsigned intias;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 2/2] dts: arm64/sdm845: Add node for arm,mmu-500

2019-01-08 Thread Vivek Gautam


On 1/8/2019 12:29 PM, Bjorn Andersson wrote:

On Thu 11 Oct 02:49 PDT 2018, Vivek Gautam wrote:


Add device node for arm,mmu-500 available on sdm845.
This MMU-500 with single TCU and multiple TBU architecture
is shared among all the peripherals except gpu.


Hi Vivek,

Applying this patch together with UFS ([1] and [2]) ontop of v5.0-rc1
causes my MTP reboot once the UFSHCD module is inserted and probed.
Independently the patches seems to work fine.

Do you have any suggestion to why this would be?



Hi Bjorn,

Enabling SMMU on sdm845 when you have UFS also enabled, would need 
addition of

'iommus' property to ufs dt node.
You will need to add the following with ufs:

iommus = <_smmu 0x100 0xf>;

Thanks
Vivek



[1] https://lore.kernel.org/lkml/20181210192826.241350-4-evgr...@chromium.org/
[2] https://lore.kernel.org/lkml/20181210192826.241350-5-evgr...@chromium.org/

Regards,
Bjorn


Signed-off-by: Vivek Gautam 
---

Changes since v3:
  - none.

  arch/arm64/boot/dts/qcom/sdm845.dtsi | 72 
  1 file changed, 72 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index b72bdb0a31a5..0aace729643d 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -1297,6 +1297,78 @@
cell-index = <0>;
};
  
+		apps_smmu: iommu@1500 {

+   compatible = "qcom,sdm845-smmu-500", "arm,mmu-500";
+   reg = <0x1500 0x8>;
+   #iommu-cells = <2>;
+   #global-interrupts = <1>;
+   interrupts = ,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+;
+   };
+
apss_shared: mailbox@1799 {
compatible = "qcom,sdm845-apss-shared";
reg = <0x1799 0x1000>;
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 1/1] iommu/arm-smmu: Add support to use Last level cache

2019-01-01 Thread Vivek Gautam
Hi Robin,

On Fri, Dec 7, 2018 at 2:54 PM Vivek Gautam  wrote:
>
> Hi Robin,
>
> On Tue, Dec 4, 2018 at 8:51 PM Robin Murphy  wrote:
> >
> > On 04/12/2018 11:01, Vivek Gautam wrote:
> > > Qualcomm SoCs have an additional level of cache called as
> > > System cache, aka. Last level cache (LLC). This cache sits right
> > > before the DDR, and is tightly coupled with the memory controller.
> > > The cache is available to all the clients present in the SoC system.
> > > The clients request their slices from this system cache, make it
> > > active, and can then start using it.
> > > For these clients with smmu, to start using the system cache for
> > > buffers and, related page tables [1], memory attributes need to be
> > > set accordingly.
> > > This change updates the MAIR and TCR configurations with correct
> > > attributes to use this system cache.
> > >
> > > To explain a little about memory attribute requirements here:
> > >
> > > Non-coherent I/O devices can't look-up into inner caches. However,
> > > coherent I/O devices can. But both can allocate in the system cache
> > > based on system policy and configured memory attributes in page
> > > tables.
> > > CPUs can access both inner and outer caches (including system cache,
> > > aka. Last level cache), and can allocate into system cache too
> > > based on memory attributes, and system policy.
> > >
> > > Further looking at memory types, we have following -
> > > a) Normal uncached :- MAIR 0x44, inner non-cacheable,
> > >outer non-cacheable;
> > > b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
> > >outer read write-back non-transient;
> > >attribute setting for coherenet I/O devices.
> > >
> > > and, for non-coherent i/o devices that can allocate in system cache
> > > another type gets added -
> > > c) Normal sys-cached/non-inner-cached :-
> > >MAIR 0xf4, inner non-cacheable,
> > >outer read write-back non-transient
> > >
> > > So, CPU will automatically use the system cache for memory marked as
> > > normal cached. The normal sys-cached is downgraded to normal non-cached
> > > memory for CPUs.
> > > Coherent I/O devices can use system cache by marking the memory as
> > > normal cached.
> > > Non-coherent I/O devices, to use system cache, should mark the memory as
> > > normal sys-cached in page tables.
> > >
> > > This change is a realisation of following changes
> > > from downstream msm-4.9:
> > > iommu: io-pgtable-arm: Support DOMAIN_ATTRIBUTE_USE_UPSTREAM_HINT[2]
> > > iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT[3]
> > >
> > > [1] https://patchwork.kernel.org/patch/10302791/
> > > [2] 
> > > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=bf762276796e79ca90014992f4d9da5593fa7d51
> > > [3] 
> > > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602
> > >
> > > Signed-off-by: Vivek Gautam 
> > > ---
> > >
> > > Changes since v1:
> > >   - Addressed Tomasz's comments for basing the change on
> > > "NO_INNER_CACHE" concept for non-coherent I/O devices
> > > rather than capturing "SYS_CACHE". This is to indicate
> > > clearly the intent of non-coherent I/O devices that
> > > can't access inner caches.
> >
> > That seems backwards to me - there is already a fundamental assumption
> > that non-coherent devices can't access caches. What we're adding here is
> > a weird exception where they *can* use some level of cache despite still
> > being non-coherent overall.
> >
> > In other words, it's not a case of downgrading coherent devices'
> > accesses to bypass inner caches, it's upgrading non-coherent devices'
> > accesses to hit the outer cache. That's certainly the understanding I
> > got from talking with Pratik at Plumbers, and it does appear to fit with
> > your explanation above despite the final conclusion you draw being
> > different.
>
> Thanks for the thorough review of the change.
> Right, I guess it's rather an upgrade for non-coherent devices to use
> an outer cache than a downgrade for coherent devices.
>
> >
> > I do see what Tomasz meant in terms of the TCR attributes, but what we
> > 

Re: [PATCH 1/1] iommu/arm-smmu: Add support to use Last level cache

2019-01-01 Thread Vivek Gautam
On Thu, Dec 13, 2018 at 9:20 AM Tomasz Figa  wrote:
>
> On Fri, Dec 7, 2018 at 6:25 PM Vivek Gautam  
> wrote:
> >
> > Hi Robin,
> >
> > On Tue, Dec 4, 2018 at 8:51 PM Robin Murphy  wrote:
> > >
> > > On 04/12/2018 11:01, Vivek Gautam wrote:
> > > > Qualcomm SoCs have an additional level of cache called as
> > > > System cache, aka. Last level cache (LLC). This cache sits right
> > > > before the DDR, and is tightly coupled with the memory controller.
> > > > The cache is available to all the clients present in the SoC system.
> > > > The clients request their slices from this system cache, make it
> > > > active, and can then start using it.
> > > > For these clients with smmu, to start using the system cache for
> > > > buffers and, related page tables [1], memory attributes need to be
> > > > set accordingly.
> > > > This change updates the MAIR and TCR configurations with correct
> > > > attributes to use this system cache.
> > > >
> > > > To explain a little about memory attribute requirements here:
> > > >
> > > > Non-coherent I/O devices can't look-up into inner caches. However,
> > > > coherent I/O devices can. But both can allocate in the system cache
> > > > based on system policy and configured memory attributes in page
> > > > tables.
> > > > CPUs can access both inner and outer caches (including system cache,
> > > > aka. Last level cache), and can allocate into system cache too
> > > > based on memory attributes, and system policy.
> > > >
> > > > Further looking at memory types, we have following -
> > > > a) Normal uncached :- MAIR 0x44, inner non-cacheable,
> > > >outer non-cacheable;
> > > > b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
> > > >outer read write-back non-transient;
> > > >attribute setting for coherenet I/O devices.
> > > >
> > > > and, for non-coherent i/o devices that can allocate in system cache
> > > > another type gets added -
> > > > c) Normal sys-cached/non-inner-cached :-
> > > >MAIR 0xf4, inner non-cacheable,
> > > >outer read write-back non-transient
> > > >
> > > > So, CPU will automatically use the system cache for memory marked as
> > > > normal cached. The normal sys-cached is downgraded to normal non-cached
> > > > memory for CPUs.
> > > > Coherent I/O devices can use system cache by marking the memory as
> > > > normal cached.
> > > > Non-coherent I/O devices, to use system cache, should mark the memory as
> > > > normal sys-cached in page tables.
> > > >
> > > > This change is a realisation of following changes
> > > > from downstream msm-4.9:
> > > > iommu: io-pgtable-arm: Support DOMAIN_ATTRIBUTE_USE_UPSTREAM_HINT[2]
> > > > iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT[3]
> > > >
> > > > [1] https://patchwork.kernel.org/patch/10302791/
> > > > [2] 
> > > > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=bf762276796e79ca90014992f4d9da5593fa7d51
> > > > [3] 
> > > > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602
> > > >
> > > > Signed-off-by: Vivek Gautam 
> > > > ---
> > > >
> > > > Changes since v1:
> > > >   - Addressed Tomasz's comments for basing the change on
> > > > "NO_INNER_CACHE" concept for non-coherent I/O devices
> > > > rather than capturing "SYS_CACHE". This is to indicate
> > > > clearly the intent of non-coherent I/O devices that
> > > > can't access inner caches.
> > >
> > > That seems backwards to me - there is already a fundamental assumption
> > > that non-coherent devices can't access caches. What we're adding here is
> > > a weird exception where they *can* use some level of cache despite still
> > > being non-coherent overall.
> > >
> > > In other words, it's not a case of downgrading coherent devices'
> > > accesses to bypass inner caches, it's upgrading non-coherent devices'
> > > accesses to hit the outer cache. That's certainly the understanding I
> > > got from

Re: [RESEND PATCH v4 1/1] dt-bindings: arm-smmu: Add binding doc for Qcom smmu-500

2018-12-16 Thread Vivek Gautam
On Thu, Dec 13, 2018 at 4:16 PM Will Deacon  wrote:
>
> On Thu, Dec 13, 2018 at 02:35:07PM +0530, Vivek Gautam wrote:
> > Qcom's implementation of arm,mmu-500 works well with current
> > arm-smmu driver implementation. Adding a soc specific compatible
> > along with arm,mmu-500 makes the bindings future safe.
> >
> > Signed-off-by: Vivek Gautam 
> > Reviewed-by: Rob Herring 
> > Cc: Will Deacon 
> > ---
> >
> > Hi Joerg,
> > I am picking this out separately from the sdm845 smmu support
> > series [1], so that this can go through iommu tree.
> > The dt patch from the series [1] can be taken through arm-soc tree.
> >
> > Hi Will,
> > As asked [2], here's the resend version of dt binding patch for sdm845.
> > Kindly ack this so that Joerg can pull this in.
>
> Acked-by: Will Deacon 

Thanks a lot Will for the Ack.

Regards
Vivek

>
> Joerg -- please can you take this on top of the pull request I sent already?
> Vivek included it as part of a separate series which I thought was going
> via arm-soc, but actually it needs to go with the other arm-smmu patches
> in order to avoid conflicts.
>
> Cheers,
>
> Will
>
> >  Documentation/devicetree/bindings/iommu/arm,smmu.txt | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
> > b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
> > index a6504b37cc21..3133f3ba7567 100644
> > --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
> > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
> > @@ -27,6 +27,10 @@ conditions.
> >"qcom,msm8996-smmu-v2", "qcom,smmu-v2",
> >"qcom,sdm845-smmu-v2", "qcom,smmu-v2".
> >
> > +  Qcom SoCs implementing "arm,mmu-500" must also include,
> > +  as below, SoC-specific compatibles:
> > +  "qcom,sdm845-smmu-500", "arm,mmu-500"
> > +
> >  - reg   : Base address and size of the SMMU.
> >
> >  - #global-interrupts : The number of global interrupts exposed by the
> > --
> > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> > of Code Aurora Forum, hosted by The Linux Foundation
> >
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu



-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH v4 1/1] dt-bindings: arm-smmu: Add binding doc for Qcom smmu-500

2018-12-13 Thread Vivek Gautam
Qcom's implementation of arm,mmu-500 works well with current
arm-smmu driver implementation. Adding a soc specific compatible
along with arm,mmu-500 makes the bindings future safe.

Signed-off-by: Vivek Gautam 
Reviewed-by: Rob Herring 
Cc: Will Deacon 
---

Hi Joerg,
I am picking this out separately from the sdm845 smmu support
series [1], so that this can go through iommu tree.
The dt patch from the series [1] can be taken through arm-soc tree.

Hi Will,
As asked [2], here's the resend version of dt binding patch for sdm845.
Kindly ack this so that Joerg can pull this in.

Thanks
Vivek

[1] https://patchwork.kernel.org/cover/10636359/
[2] https://patchwork.kernel.org/patch/10636363/

 Documentation/devicetree/bindings/iommu/arm,smmu.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index a6504b37cc21..3133f3ba7567 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -27,6 +27,10 @@ conditions.
   "qcom,msm8996-smmu-v2", "qcom,smmu-v2",
   "qcom,sdm845-smmu-v2", "qcom,smmu-v2".
 
+  Qcom SoCs implementing "arm,mmu-500" must also include,
+  as below, SoC-specific compatibles:
+  "qcom,sdm845-smmu-500", "arm,mmu-500"
+
 - reg   : Base address and size of the SMMU.
 
 - #global-interrupts : The number of global interrupts exposed by the
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 1/2] dt-bindings: arm-smmu: Add binding doc for Qcom smmu-500

2018-12-12 Thread Vivek Gautam
Hi Will,

On Fri, Oct 12, 2018 at 11:37 AM Vivek Gautam
 wrote:
>
>
>
> On 10/12/2018 3:46 AM, Rob Herring wrote:
> > On Thu, 11 Oct 2018 15:19:29 +0530, Vivek Gautam wrote:
> >> Qcom's implementation of arm,mmu-500 works well with current
> >> arm-smmu driver implementation. Adding a soc specific compatible
> >> along with arm,mmu-500 makes the bindings future safe.
> >>
> >> Signed-off-by: Vivek Gautam 
> >> ---
> >>
> >> Changes since v3:
> >>   - Refined language more to state things directly for the bindings
> >> description.
> >>
> >>   Documentation/devicetree/bindings/iommu/arm,smmu.txt | 4 
> >>   1 file changed, 4 insertions(+)
> >>
> > Reviewed-by: Rob Herring 
>
> Thank you Rob.
>

Can you please pick this one as well to your tree? This goes on top of the
bindings patch for "qcom,smmu-v2". So, it can't go through Andy's tree.
Will ask Andy to pick the second patch of the series, that adds the dt node.

I guess as I sent this one along with the dt patch, I would have
mistakenly added
you to 'cc' list rather than 'to' list.
Let me know if you would like me to resend it.

Thank
Vivek

-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/arm-smmu: Add support to use Last level cache

2018-12-07 Thread Vivek Gautam
Hi Robin,

On Tue, Dec 4, 2018 at 8:51 PM Robin Murphy  wrote:
>
> On 04/12/2018 11:01, Vivek Gautam wrote:
> > Qualcomm SoCs have an additional level of cache called as
> > System cache, aka. Last level cache (LLC). This cache sits right
> > before the DDR, and is tightly coupled with the memory controller.
> > The cache is available to all the clients present in the SoC system.
> > The clients request their slices from this system cache, make it
> > active, and can then start using it.
> > For these clients with smmu, to start using the system cache for
> > buffers and, related page tables [1], memory attributes need to be
> > set accordingly.
> > This change updates the MAIR and TCR configurations with correct
> > attributes to use this system cache.
> >
> > To explain a little about memory attribute requirements here:
> >
> > Non-coherent I/O devices can't look-up into inner caches. However,
> > coherent I/O devices can. But both can allocate in the system cache
> > based on system policy and configured memory attributes in page
> > tables.
> > CPUs can access both inner and outer caches (including system cache,
> > aka. Last level cache), and can allocate into system cache too
> > based on memory attributes, and system policy.
> >
> > Further looking at memory types, we have following -
> > a) Normal uncached :- MAIR 0x44, inner non-cacheable,
> >outer non-cacheable;
> > b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
> >outer read write-back non-transient;
> >attribute setting for coherenet I/O devices.
> >
> > and, for non-coherent i/o devices that can allocate in system cache
> > another type gets added -
> > c) Normal sys-cached/non-inner-cached :-
> >MAIR 0xf4, inner non-cacheable,
> >outer read write-back non-transient
> >
> > So, CPU will automatically use the system cache for memory marked as
> > normal cached. The normal sys-cached is downgraded to normal non-cached
> > memory for CPUs.
> > Coherent I/O devices can use system cache by marking the memory as
> > normal cached.
> > Non-coherent I/O devices, to use system cache, should mark the memory as
> > normal sys-cached in page tables.
> >
> > This change is a realisation of following changes
> > from downstream msm-4.9:
> > iommu: io-pgtable-arm: Support DOMAIN_ATTRIBUTE_USE_UPSTREAM_HINT[2]
> > iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT[3]
> >
> > [1] https://patchwork.kernel.org/patch/10302791/
> > [2] 
> > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=bf762276796e79ca90014992f4d9da5593fa7d51
> > [3] 
> > https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602
> >
> > Signed-off-by: Vivek Gautam 
> > ---
> >
> > Changes since v1:
> >   - Addressed Tomasz's comments for basing the change on
> > "NO_INNER_CACHE" concept for non-coherent I/O devices
> > rather than capturing "SYS_CACHE". This is to indicate
> > clearly the intent of non-coherent I/O devices that
> > can't access inner caches.
>
> That seems backwards to me - there is already a fundamental assumption
> that non-coherent devices can't access caches. What we're adding here is
> a weird exception where they *can* use some level of cache despite still
> being non-coherent overall.
>
> In other words, it's not a case of downgrading coherent devices'
> accesses to bypass inner caches, it's upgrading non-coherent devices'
> accesses to hit the outer cache. That's certainly the understanding I
> got from talking with Pratik at Plumbers, and it does appear to fit with
> your explanation above despite the final conclusion you draw being
> different.

Thanks for the thorough review of the change.
Right, I guess it's rather an upgrade for non-coherent devices to use
an outer cache than a downgrade for coherent devices.

>
> I do see what Tomasz meant in terms of the TCR attributes, but what we
> currently do there is a little unintuitive and not at all representative
> of actual mapping attributes - I'll come back to that inline.
>
> >   drivers/iommu/arm-smmu.c   | 15 +++
> >   drivers/iommu/dma-iommu.c  |  3 +++
> >   drivers/iommu/io-pgtable-arm.c | 22 +-
> >   drivers/iommu/io-pgtable.h |  5 +
> >   include/linux/iommu.h  |  3 +++
> >   5 files changed, 43 insertions(+), 5 deletions(-)

[PATCH 1/1] iommu/arm-smmu: Add support to use Last level cache

2018-12-04 Thread Vivek Gautam
Qualcomm SoCs have an additional level of cache called as
System cache, aka. Last level cache (LLC). This cache sits right
before the DDR, and is tightly coupled with the memory controller.
The cache is available to all the clients present in the SoC system.
The clients request their slices from this system cache, make it
active, and can then start using it.
For these clients with smmu, to start using the system cache for
buffers and, related page tables [1], memory attributes need to be
set accordingly.
This change updates the MAIR and TCR configurations with correct
attributes to use this system cache.

To explain a little about memory attribute requirements here:

Non-coherent I/O devices can't look-up into inner caches. However,
coherent I/O devices can. But both can allocate in the system cache
based on system policy and configured memory attributes in page
tables.
CPUs can access both inner and outer caches (including system cache,
aka. Last level cache), and can allocate into system cache too
based on memory attributes, and system policy.

Further looking at memory types, we have following -
a) Normal uncached :- MAIR 0x44, inner non-cacheable,
  outer non-cacheable;
b) Normal cached :-   MAIR 0xff, inner read write-back non-transient,
  outer read write-back non-transient;
  attribute setting for coherenet I/O devices.

and, for non-coherent i/o devices that can allocate in system cache
another type gets added -
c) Normal sys-cached/non-inner-cached :-
  MAIR 0xf4, inner non-cacheable,
  outer read write-back non-transient

So, CPU will automatically use the system cache for memory marked as
normal cached. The normal sys-cached is downgraded to normal non-cached
memory for CPUs.
Coherent I/O devices can use system cache by marking the memory as
normal cached.
Non-coherent I/O devices, to use system cache, should mark the memory as
normal sys-cached in page tables.

This change is a realisation of following changes
from downstream msm-4.9:
iommu: io-pgtable-arm: Support DOMAIN_ATTRIBUTE_USE_UPSTREAM_HINT[2]
iommu: io-pgtable-arm: Implement IOMMU_USE_UPSTREAM_HINT[3]

[1] https://patchwork.kernel.org/patch/10302791/
[2] 
https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=bf762276796e79ca90014992f4d9da5593fa7d51
[3] 
https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?h=msm-4.9=d4c72c413ea27c43f60825193d4de9cb8ffd9602

Signed-off-by: Vivek Gautam 
---

Changes since v1:
 - Addressed Tomasz's comments for basing the change on
   "NO_INNER_CACHE" concept for non-coherent I/O devices
   rather than capturing "SYS_CACHE". This is to indicate
   clearly the intent of non-coherent I/O devices that
   can't access inner caches.

 drivers/iommu/arm-smmu.c   | 15 +++
 drivers/iommu/dma-iommu.c  |  3 +++
 drivers/iommu/io-pgtable-arm.c | 22 +-
 drivers/iommu/io-pgtable.h |  5 +
 include/linux/iommu.h  |  3 +++
 5 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index ba18d89d4732..047f7ff95b0d 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -255,6 +255,7 @@ struct arm_smmu_domain {
struct mutexinit_mutex; /* Protects smmu pointer */
spinlock_t  cb_lock; /* Serialises ATS1* ops and 
TLB syncs */
struct iommu_domain domain;
+   boolno_inner_cache;
 };
 
 struct arm_smmu_option_prop {
@@ -897,6 +898,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
if (smmu_domain->non_strict)
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
 
+   if (smmu_domain->no_inner_cache)
+   pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NO_IC;
+
smmu_domain->smmu = smmu;
pgtbl_ops = alloc_io_pgtable_ops(fmt, _cfg, smmu_domain);
if (!pgtbl_ops) {
@@ -1579,6 +1583,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
*domain,
case DOMAIN_ATTR_NESTING:
*(int *)data = (smmu_domain->stage == 
ARM_SMMU_DOMAIN_NESTED);
return 0;
+   case DOMAIN_ATTR_NO_IC:
+   *((int *)data) = smmu_domain->no_inner_cache;
+   return 0;
default:
return -ENODEV;
}
@@ -1619,6 +1626,14 @@ static int arm_smmu_domain_set_attr(struct iommu_domain 
*domain,
else
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
break;
+   case DOMAIN_ATTR_NO_IC:
+   if (smmu_domain->smmu) {
+   ret = -EPERM;
+   goto out_unlock;
+   }

Re: [PATCH] of/device: add blacklist for iommu dma_ops

2018-12-03 Thread Vivek Gautam
On Mon, Dec 3, 2018 at 7:56 PM Rob Clark  wrote:
>
> On Mon, Dec 3, 2018 at 7:45 AM Robin Murphy  wrote:
> >
> > Hi Rob,
> >
> > On 01/12/2018 16:53, Rob Clark wrote:
> > > This solves a problem we see with drm/msm, caused by getting
> > > iommu_dma_ops while we attach our own domain and manage it directly at
> > > the iommu API level:
> > >
> > >[0038] user address but active_mm is swapper
> > >Internal error: Oops: 9605 [#1] PREEMPT SMP
> > >Modules linked in:
> > >CPU: 7 PID: 70 Comm: kworker/7:1 Tainted: GW 4.19.3 #90
> > >Hardware name: xxx (DT)
> > >Workqueue: events deferred_probe_work_func
> > >pstate: 80c9 (Nzcv daif +PAN +UAO)
> > >pc : iommu_dma_map_sg+0x7c/0x2c8
> > >lr : iommu_dma_map_sg+0x40/0x2c8
> > >sp : ff80095eb4f0
> > >x29: ff80095eb4f0 x28: 
> > >x27: ffc0f9431578 x26: 
> > >x25:  x24: 0003
> > >x23: 0001 x22: ffc0fa9ac010
> > >x21:  x20: ffc0fab40980
> > >x19: ffc0fab40980 x18: 0003
> > >x17: 01c4 x16: 0007
> > >x15: 000e x14: 
> > >x13:  x12: 0028
> > >x11: 0101010101010101 x10: 7f7f7f7f7f7f7f7f
> > >x9 :  x8 : ffc0fab409a0
> > >x7 :  x6 : 0002
> > >x5 : 0001 x4 : 
> > >x3 : 0001 x2 : 0002
> > >x1 : ffc0f9431578 x0 : 
> > >Process kworker/7:1 (pid: 70, stack limit = 0x17d08ffb)
> > >Call trace:
> > > iommu_dma_map_sg+0x7c/0x2c8
> > > __iommu_map_sg_attrs+0x70/0x84
> > > get_pages+0x170/0x1e8
> > > msm_gem_get_iova+0x8c/0x128
> > > _msm_gem_kernel_new+0x6c/0xc8
> > > msm_gem_kernel_new+0x4c/0x58
> > > dsi_tx_buf_alloc_6g+0x4c/0x8c
> > > msm_dsi_host_modeset_init+0xc8/0x108
> > > msm_dsi_modeset_init+0x54/0x18c
> > > _dpu_kms_drm_obj_init+0x430/0x474
> > > dpu_kms_hw_init+0x5f8/0x6b4
> > > msm_drm_bind+0x360/0x6c8
> > > try_to_bring_up_master.part.7+0x28/0x70
> > > component_master_add_with_match+0xe8/0x124
> > > msm_pdev_probe+0x294/0x2b4
> > > platform_drv_probe+0x58/0xa4
> > > really_probe+0x150/0x294
> > > driver_probe_device+0xac/0xe8
> > > __device_attach_driver+0xa4/0xb4
> > > bus_for_each_drv+0x98/0xc8
> > > __device_attach+0xac/0x12c
> > > device_initial_probe+0x24/0x30
> > > bus_probe_device+0x38/0x98
> > > deferred_probe_work_func+0x78/0xa4
> > > process_one_work+0x24c/0x3dc
> > > worker_thread+0x280/0x360
> > > kthread+0x134/0x13c
> > > ret_from_fork+0x10/0x18
> > >Code: d284 91000725 6b17039f 5400048a (f9401f40)
> > >---[ end trace f22dda57f3648e2c ]---
> > >Kernel panic - not syncing: Fatal exception
> > >SMP: stopping secondary CPUs
> > >Kernel Offset: disabled
> > >CPU features: 0x0,22802a18
> > >Memory Limit: none
> > >
> > > The problem is that when drm/msm does it's own iommu_attach_device(),
> > > now the domain returned by iommu_get_domain_for_dev() is drm/msm's
> > > domain, and it doesn't have domain->iova_cookie.
> >
> > Does this crash still happen with 4.20-rc? Because as of 6af588fed391 it
> > really shouldn't.
> >
> > > We kind of avoided this problem prior to sdm845/dpu because the iommu
> > > was attached to the mdp node in dt, which is a child of the toplevel
> > > mdss node (which corresponds to the dev passed in dma_map_sg()).  But
> > > with sdm845, now the iommu is attached at the mdss level so we hit the
> > > iommu_dma_ops in dma_map_sg().
> > >
> > > But auto allocating/attaching a domain before the driver is probed was
> > > already a blocking problem for enabling per-context pagetables for the
> > > GPU.  This problem is also now solved with this patch.
> >
> > s/solved/worked around/
> >
> > If you want a guarantee of actually getting a specific hardware context
> > allocated for a given domain, there needs to be code in the IOMMU driver
> > to understand and honour that. Implicitly depending on whatever happens
> > to fall out of current driver behaviour on current systems is not a real
> > solution.
> >
> > > Fixes: 97890ba9289c dma-mapping: detect and configure IOMMU in 
> > > of_dma_configure
> >
> > That's rather misleading, since the crash described above depends on at
> > least two other major changes which came long after that commit.
> >
> > It's not that I don't understand exactly what you want here - just that
> > this commit message isn't a coherent justification for that ;)
> >
> > > Tested-by: Douglas Anderson 
> > > Signed-off-by: Rob Clark 
> > > ---
> > > This is an alternative/replacement for [1].  What it lacks in elegance
> > > it makes up for in practicality ;-)
> > >
> > > [1] 

[PATCH v19 4/5] dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2

2018-12-03 Thread Vivek Gautam
Add bindings doc for Qcom's smmu-v2 implementation.

Signed-off-by: Vivek Gautam 
Reviewed-by: Tomasz Figa 
Tested-by: Srinivas Kandagatla 
Reviewed-by: Rob Herring 
Reviewed-by: Robin Murphy 
---

Changes since v18:
 None.

 .../devicetree/bindings/iommu/arm,smmu.txt | 39 ++
 1 file changed, 39 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 8a6ffce12af5..a6504b37cc21 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -17,10 +17,16 @@ conditions.
 "arm,mmu-401"
 "arm,mmu-500"
 "cavium,smmu-v2"
+"qcom,smmu-v2"
 
   depending on the particular implementation and/or the
   version of the architecture implemented.
 
+  Qcom SoCs must contain, as below, SoC-specific compatibles
+  along with "qcom,smmu-v2":
+  "qcom,msm8996-smmu-v2", "qcom,smmu-v2",
+  "qcom,sdm845-smmu-v2", "qcom,smmu-v2".
+
 - reg   : Base address and size of the SMMU.
 
 - #global-interrupts : The number of global interrupts exposed by the
@@ -71,6 +77,22 @@ conditions.
   or using stream matching with #iommu-cells = <2>, and
   may be ignored if present in such cases.
 
+- clock-names:List of the names of clocks input to the device. The
+  required list depends on particular implementation and
+  is as follows:
+  - for "qcom,smmu-v2":
+- "bus": clock required for downstream bus access and
+ for the smmu ptw,
+- "iface": clock required to access smmu's registers
+   through the TCU's programming interface.
+  - unspecified for other implementations.
+
+- clocks: Specifiers for all clocks listed in the clock-names property,
+  as per generic clock bindings.
+
+- power-domains:  Specifiers for power domains required to be powered on for
+  the SMMU to operate, as per generic power domain bindings.
+
 ** Deprecated properties:
 
 - mmu-masters (deprecated in favour of the generic "iommus" binding) :
@@ -137,3 +159,20 @@ conditions.
 iommu-map = <0  0 0x400>;
 ...
 };
+
+   /* Qcom's arm,smmu-v2 implementation */
+   smmu4: iommu@d0 {
+   compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
+   reg = <0xd0 0x1>;
+
+   #global-interrupts = <1>;
+   interrupts = ,
+,
+;
+   #iommu-cells = <1>;
+   power-domains = < MDSS_GDSC>;
+
+   clocks = < SMMU_MDP_AXI_CLK>,
+< SMMU_MDP_AHB_CLK>;
+   clock-names = "bus", "iface";
+   };
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v19 3/5] iommu/arm-smmu: Add the device_link between masters and smmu

2018-12-03 Thread Vivek Gautam
From: Sricharan R 

Finally add the device link between the master device and
smmu, so that the smmu gets runtime enabled/disabled only when the
master needs it. This is done from add_device callback which gets
called once when the master is added to the smmu.

Signed-off-by: Sricharan R 
Signed-off-by: Vivek Gautam 
Reviewed-by: Tomasz Figa 
Tested-by: Srinivas Kandagatla 
Reviewed-by: Robin Murphy 
---

Changes since v18:
 None.

 drivers/iommu/arm-smmu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 1917d214c4d9..b6b11642b3a9 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1500,6 +1500,9 @@ static int arm_smmu_add_device(struct device *dev)
 
iommu_device_link(>iommu, dev);
 
+   device_link_add(dev, smmu->dev,
+   DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
+
return 0;
 
 out_cfg_free:
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


  1   2   3   4   >