Re: [PATCH v8 6/8] perf tool: Add support for parsing HiSilicon PCIe Trace packet

2022-05-16 Thread Jonathan Cameron via iommu
On Mon, 16 May 2022 20:52:21 +0800
Yicong Yang  wrote:

> From: Qi Liu 
> 
> Add support for using 'perf report --dump-raw-trace' to parse PTT packet.
> 
> Example usage:
> 
> Output will contain raw PTT data and its textual representation, such
> as:
> 
> 0 0 0x5810 [0x30]: PERF_RECORD_AUXTRACE size: 0x40  offset: 0
> ref: 0xa5d50c725  idx: 0  tid: -1  cpu: 0
> .
> . ... HISI PTT data: size 4194304 bytes
> .  : 00 00 00 00 Prefix
> .  0004: 08 20 00 60 Header DW0
> .  0008: ff 02 00 01 Header DW1
> .  000c: 20 08 00 00 Header DW2
> .  0010: 10 e7 44 ab Header DW3
> .  0014: 2a a8 1e 01 Time
> .  0020: 00 00 00 00 Prefix
> .  0024: 01 00 00 60 Header DW0
> .  0028: 0f 1e 00 01 Header DW1
> .  002c: 04 00 00 00 Header DW2
> .  0030: 40 00 81 02 Header DW3
> .  0034: ee 02 00 00 Time
> 
> 
> Signed-off-by: Qi Liu 
> Signed-off-by: Yicong Yang 

>From point of view of a reviewer who doesn't know this code well, this
all looks sensible.  One trivial comment inline.

Thanks,

Jonathan

> diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c
> new file mode 100644
> index ..2afc1a663c2a
> --- /dev/null
> +
> +static void hisi_ptt_free(struct perf_session *session)
> +{
> + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
> + auxtrace);
> +
> + session->auxtrace = NULL;
> + free(ptt);
> +}
> +
> +static bool hisi_ptt_evsel_is_auxtrace(struct perf_session *session,
> +struct evsel *evsel)
> +{
> + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, 
> auxtrace);

Check for consistent wrapping of lines like this. This doesn't match the one 
just above.



___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 5/8] perf tool: Add support for HiSilicon PCIe Tune and Trace device driver

2022-05-16 Thread Jonathan Cameron via iommu
On Mon, 16 May 2022 20:52:20 +0800
Yicong Yang  wrote:

> From: Qi Liu 
> 
> HiSilicon PCIe tune and trace device (PTT) could dynamically tune
> the PCIe link's events, and trace the TLP headers).
> 
> This patch add support for PTT device in perf tool, so users could
> use 'perf record' to get TLP headers trace data.
> 
> Signed-off-by: Qi Liu 
> Signed-off-by: Yicong Yang 

One query inline.


> diff --git a/tools/perf/arch/arm/util/auxtrace.c 
> b/tools/perf/arch/arm/util/auxtrace.c
> index 384c7cfda0fd..297fffedf45e 100644
> --- a/tools/perf/arch/arm/util/auxtrace.c
> +++ b/tools/perf/arch/arm/util/auxtrace.c

...

>  static struct perf_pmu *find_pmu_for_event(struct perf_pmu **pmus,
>  int pmu_nr, struct evsel *evsel)
>  {
> @@ -71,17 +120,21 @@ struct auxtrace_record
>  {
>   struct perf_pmu *cs_etm_pmu = NULL;
>   struct perf_pmu **arm_spe_pmus = NULL;
> + struct perf_pmu **hisi_ptt_pmus = NULL;
>   struct evsel *evsel;
>   struct perf_pmu *found_etm = NULL;
>   struct perf_pmu *found_spe = NULL;
> + struct perf_pmu *found_ptt = NULL;
>   int auxtrace_event_cnt = 0;
>   int nr_spes = 0;
> + int nr_ptts = 0;
>  
>   if (!evlist)
>   return NULL;
>  
>   cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
>   arm_spe_pmus = find_all_arm_spe_pmus(_spes, err);
> + hisi_ptt_pmus = find_all_hisi_ptt_pmus(_ptts, err);
>  
>   evlist__for_each_entry(evlist, evsel) {
>   if (cs_etm_pmu && !found_etm)
> @@ -89,9 +142,13 @@ struct auxtrace_record
>  
>   if (arm_spe_pmus && !found_spe)
>   found_spe = find_pmu_for_event(arm_spe_pmus, nr_spes, 
> evsel);
> +
> + if (arm_spe_pmus && !found_spe)

if (hisi_ptt_pmus && !found_ptt) ?

Otherwise, I'm not sure what the purpose of the checking against spe is.

> + found_ptt = find_pmu_for_event(hisi_ptt_pmus, nr_ptts, 
> evsel);
>   }
>  
>   free(arm_spe_pmus);
> + free(hisi_ptt_pmus);
>  
>   if (found_etm)
>   auxtrace_event_cnt++;
> @@ -99,6 +156,9 @@ struct auxtrace_record
>   if (found_spe)
>   auxtrace_event_cnt++;
>  
> + if (found_ptt)
> + auxtrace_event_cnt++;
> +
>   if (auxtrace_event_cnt > 1) {
>   pr_err("Concurrent AUX trace operation not currently 
> supported\n");
>   *err = -EOPNOTSUPP;
> @@ -111,6 +171,9 @@ struct auxtrace_record
>  #if defined(__aarch64__)
>   if (found_spe)
>   return arm_spe_recording_init(err, found_spe);
> +
> + if (found_ptt)
> + return hisi_ptt_recording_init(err, found_ptt);
>  #endif
>  
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 4/8] perf arm: Refactor event list iteration in auxtrace_record__init()

2022-05-16 Thread Jonathan Cameron via iommu
On Mon, 16 May 2022 20:52:19 +0800
Yicong Yang  wrote:

> From: Qi Liu 
> 
> Use find_pmu_for_event() to simplify logic in auxtrace_record__init().
Possibly reword as 

"Add find_pmu_for_event() and use to simplify logic in
auxtrace_record_init(). find_pmu_for_event() will be
reused in subsequent patches."

> 
> Signed-off-by: Qi Liu 
> Signed-off-by: Yicong Yang 
FWIW as this isn't an area I know much about. It seems
like a good cleanup and functionally equivalent.

Reviewed-by: Jonathan Cameron 
> ---
>  tools/perf/arch/arm/util/auxtrace.c | 53 ++---
>  1 file changed, 34 insertions(+), 19 deletions(-)
> 
> diff --git a/tools/perf/arch/arm/util/auxtrace.c 
> b/tools/perf/arch/arm/util/auxtrace.c
> index 5fc6a2a3dbc5..384c7cfda0fd 100644
> --- a/tools/perf/arch/arm/util/auxtrace.c
> +++ b/tools/perf/arch/arm/util/auxtrace.c
> @@ -50,16 +50,32 @@ static struct perf_pmu **find_all_arm_spe_pmus(int 
> *nr_spes, int *err)
>   return arm_spe_pmus;
>  }
>  
> +static struct perf_pmu *find_pmu_for_event(struct perf_pmu **pmus,
> +int pmu_nr, struct evsel *evsel)
> +{
> + int i;
> +
> + if (!pmus)
> + return NULL;
> +
> + for (i = 0; i < pmu_nr; i++) {
> + if (evsel->core.attr.type == pmus[i]->type)
> + return pmus[i];
> + }
> +
> + return NULL;
> +}
> +
>  struct auxtrace_record
>  *auxtrace_record__init(struct evlist *evlist, int *err)
>  {
> - struct perf_pmu *cs_etm_pmu;
> + struct perf_pmu *cs_etm_pmu = NULL;
> + struct perf_pmu **arm_spe_pmus = NULL;
>   struct evsel *evsel;
> - bool found_etm = false;
> + struct perf_pmu *found_etm = NULL;
>   struct perf_pmu *found_spe = NULL;
> - struct perf_pmu **arm_spe_pmus = NULL;
> + int auxtrace_event_cnt = 0;
>   int nr_spes = 0;
> - int i = 0;
>  
>   if (!evlist)
>   return NULL;
> @@ -68,24 +84,23 @@ struct auxtrace_record
>   arm_spe_pmus = find_all_arm_spe_pmus(_spes, err);
>  
>   evlist__for_each_entry(evlist, evsel) {
> - if (cs_etm_pmu &&
> - evsel->core.attr.type == cs_etm_pmu->type)
> - found_etm = true;
> -
> - if (!nr_spes || found_spe)
> - continue;
> -
> - for (i = 0; i < nr_spes; i++) {
> - if (evsel->core.attr.type == arm_spe_pmus[i]->type) {
> - found_spe = arm_spe_pmus[i];
> - break;
> - }
> - }
> + if (cs_etm_pmu && !found_etm)
> + found_etm = find_pmu_for_event(_etm_pmu, 1, evsel);
> +
> + if (arm_spe_pmus && !found_spe)
> + found_spe = find_pmu_for_event(arm_spe_pmus, nr_spes, 
> evsel);
>   }
> +
>   free(arm_spe_pmus);
>  
> - if (found_etm && found_spe) {
> - pr_err("Concurrent ARM Coresight ETM and SPE operation not 
> currently supported\n");
> + if (found_etm)
> + auxtrace_event_cnt++;
> +
> + if (found_spe)
> + auxtrace_event_cnt++;
> +
> + if (auxtrace_event_cnt > 1) {
> + pr_err("Concurrent AUX trace operation not currently 
> supported\n");
>   *err = -EOPNOTSUPP;
>   return NULL;
>   }

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 2/8] hwtracing: hisi_ptt: Add trace function support for HiSilicon PCIe Tune and Trace device

2022-05-16 Thread Jonathan Cameron via iommu
On Mon, 16 May 2022 20:52:17 +0800
Yicong Yang  wrote:

> HiSilicon PCIe tune and trace device(PTT) is a PCIe Root Complex integrated
> Endpoint(RCiEP) device, providing the capability to dynamically monitor and
> tune the PCIe traffic and trace the TLP headers.
> 
> Add the driver for the device to enable the trace function. Register PMU
> device of PTT trace, then users can use trace through perf command. The
> driver makes use of perf AUX trace function and support the following
> events to configure the trace:
> 
> - filter: select Root port or Endpoint to trace
> - type: select the type of traced TLP headers
> - direction: select the direction of traced TLP headers
> - format: select the data format of the traced TLP headers
> 
> This patch initially add a basic driver of PTT trace.
> 
> Signed-off-by: Yicong Yang 

Hi Yicong,

It's been a while since I looked at this driver, so I'll admit
I can't remember if any of the things I've raised below were
previously discussed. 

All minor stuff (biggest is question of failing cleanly in unlikely
case of failing the allocation in the filter addition vs carrying
on anyway), so feel free to add

Reviewed-by: Jonathan Cameron 

> diff --git a/drivers/hwtracing/ptt/Makefile b/drivers/hwtracing/ptt/Makefile
> new file mode 100644
> index ..908c09a98161
> --- /dev/null
> +++ b/drivers/hwtracing/ptt/Makefile
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0
> +obj-$(CONFIG_HISI_PTT) += hisi_ptt.o
> diff --git a/drivers/hwtracing/ptt/hisi_ptt.c 
> b/drivers/hwtracing/ptt/hisi_ptt.c
> new file mode 100644
> index ..ef25ce98f664
> --- /dev/null
> +++ b/drivers/hwtracing/ptt/hisi_ptt.c


...


> +
> +static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
> +{
> + struct hisi_ptt_filter_desc *filter;
> + struct hisi_ptt *hisi_ptt = data;
> +
> + filter = kzalloc(sizeof(*filter), GFP_KERNEL);
> + if (!filter) {
> + pci_err(hisi_ptt->pdev, "failed to add filter %s\n", 
> pci_name(pdev));

If this fails we carry on anyway (no error checking on the bus_walk).
I think we should error out in that case (would need to use a flag placed
somewhere in hisi_ptt to tell we had an error).

That would complicate the unwind though.
Easiest way to do that unwind is probably to register a separate
devm_add_action_or_reset() callback for each filter.

If you prefer to carry on even with this allocation error, then maybe add a 
comment
here somewhere to make it clear that will happen.

> + return -ENOMEM;
> + }
> +
> + filter->devid = PCI_DEVID(pdev->bus->number, pdev->devfn);
> +
> + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) {
> + filter->is_port = true;
> + list_add_tail(>list, _ptt->port_filters);
> +
> + /* Update the available port mask */
> + hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, 
> true);
> + } else {
> + list_add_tail(>list, _ptt->req_filters);
> + }
> +
> + return 0;
> +}
> +
> +static void hisi_ptt_release_filters(void *data)
> +{
> + struct hisi_ptt_filter_desc *filter, *tmp;
> + struct hisi_ptt *hisi_ptt = data;
> +
> + list_for_each_entry_safe(filter, tmp, _ptt->req_filters, list) {
> + list_del(>list);
> + kfree(filter);

I think with separate release per entry above, this bit become simpler as
we walk all the elements in the devm_ callback list rather than two lists here.

> + }
> +
> + list_for_each_entry_safe(filter, tmp, _ptt->port_filters, list) {
> + list_del(>list);
> + kfree(filter);
> + }
> +}
> +

...

> +
> +static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
> +{
> + struct pci_dev *pdev = hisi_ptt->pdev;
> + struct pci_bus *bus;
> + int ret;
> + u32 reg;
> +
> + INIT_LIST_HEAD(_ptt->port_filters);
> + INIT_LIST_HEAD(_ptt->req_filters);
> +
> + ret = hisi_ptt_config_trace_buf(hisi_ptt);
> + if (ret)
> + return ret;
> +
> + /*
> +  * The device range register provides the information about the
> +  * root ports which the RCiEP can control and trace. The RCiEP
> +  * and the root ports it support are on the same PCIe core, with
> +  * same domain number but maybe different bus number. The device
> +  * range register will tell us which root ports we can support,
> +  * Bit[31:16] indicates the upper BDF numbers of the root port,
> +  * while Bit[15:0] indicates the lower.
> +  */
> + reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
>

Re: [PATCH v5 3/8] hisi_ptt: Register PMU device for PTT trace

2022-03-08 Thread Jonathan Cameron via iommu
On Tue, 8 Mar 2022 19:13:08 +0800
Yicong Yang  wrote:

> On 2022/3/8 18:21, Jonathan Cameron wrote:
> > On Tue, 8 Mar 2022 16:49:25 +0800
> > Yicong Yang  wrote:
> >   
> >> Register PMU device of PTT trace, then users can use trace through perf
> >> command. The driver makes use of perf AUX trace and support following
> >> events to configure the trace:
> >>
> >> - filter: select Root port or Endpoint to trace
> >> - type: select the type of traced TLP headers
> >> - direction: select the direction of traced TLP headers
> >> - format: select the data format of the traced TLP headers
> >>
> >> This patch adds the PMU driver part of PTT trace. The perf command support
> >> of PTT trace is added in the following patch.
> >>
> >> Signed-off-by: Yicong Yang   
> > 
> > It seems to me that you ended up doing both suggestions for
> > how to clean up the remove order when it was meant to be
> > a question of picking one or the other.
> > 
> > Otherwise this looks good to me - so with that tidied up
> >   
> 
> Hi Jonathan,
> 
> Thanks for the comments. I'd like to illustrate the reason why I decide to
> manually unregister the PMU device.
> 
> The DMA buffers are devm allocated when necessary. They're only allocated
> when user is going to use the PTT in the first time after the driver's probe,
> so when driver removal the buffers are released prior to the PMU device's
> unregistration. I think there's a race condition.
> 
> IIUC, The PMU device(as the user interface) should be unregistered first then
> we're safe to free the DMA buffers. But unregister the PMU device by devm
> cannot keep that order.

Ok. Please add a comment in the remove() giving this reasoning.

Jonathan

> 
> Thanks,
> Yicong
> 
> > Reviewed-by: Jonathan Cameron 
> >   
> >> ---  
> >   
> >> +
> >> +static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
> >> +{
> >> +  u16 core_id, sicl_id;
> >> +  char *pmu_name;
> >> +  u32 reg;
> >> +
> >> +  hisi_ptt->hisi_ptt_pmu = (struct pmu) {
> >> +  .module = THIS_MODULE,
> >> +  .capabilities   = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
> >> +  .task_ctx_nr= perf_sw_context,
> >> +  .attr_groups= hisi_ptt_pmu_groups,
> >> +  .event_init = hisi_ptt_pmu_event_init,
> >> +  .setup_aux  = hisi_ptt_pmu_setup_aux,
> >> +  .free_aux   = hisi_ptt_pmu_free_aux,
> >> +  .start  = hisi_ptt_pmu_start,
> >> +  .stop   = hisi_ptt_pmu_stop,
> >> +  .add= hisi_ptt_pmu_add,
> >> +  .del= hisi_ptt_pmu_del,
> >> +  };
> >> +
> >> +  reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
> >> +  core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
> >> +  sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);
> >> +
> >> +  pmu_name = devm_kasprintf(_ptt->pdev->dev, GFP_KERNEL, 
> >> "hisi_ptt%u_%u",
> >> +sicl_id, core_id);
> >> +  if (!pmu_name)
> >> +  return -ENOMEM;
> >> +
> >> +  return perf_pmu_register(_ptt->hisi_ptt_pmu, pmu_name, -1);  
> > 
> > As below, you can put back the devm cleanup that you had in v4 now you
> > have modified how the filter cleanup is done to also be devm managed.
> >   
> >> +}
> >> +
> >>  /*
> >>   * The DMA of PTT trace can only use direct mapping, due to some
> >>   * hardware restriction. Check whether there is an IOMMU or the
> >> @@ -303,15 +825,32 @@ static int hisi_ptt_probe(struct pci_dev *pdev,
> >>  
> >>pci_set_master(pdev);
> >>  
> >> +  ret = hisi_ptt_register_irq(hisi_ptt);
> >> +  if (ret)
> >> +  return ret;
> >> +
> >>ret = hisi_ptt_init_ctrls(hisi_ptt);
> >>if (ret) {
> >>pci_err(pdev, "failed to init controls, ret = %d.\n", ret);
> >>return ret;
> >>}
> >>  
> >> +  ret = hisi_ptt_register_pmu(hisi_ptt);
> >> +  if (ret) {
> >> +  pci_err(pdev, "failed to register pmu device, ret = %d", ret);
> >> +  return ret;
> >> +  }
> >> +
> >>return 0;
> >>  }
> >>  
> >> +void hisi_ptt_remove(struct pci_dev *pdev)
> >> +{
> >> +  struct hisi_ptt *hisi_ptt = pci_get_drvdata(pdev);
> >> +
> >> +  perf_pmu_unregister(_ptt->hisi_ptt_pmu);  
> > 
> > Now you have the filter cleanup occurring using a devm_add_action_or_reset()
> > there is no need to have a manual cleanup of this - you can
> > use the approach of a devm_add_action_or_reset like you had in v4.
> > 
> > As it is the last call in the probe() order it will be the first one
> > called in the device managed cleanup.
> >   
> >> +}
> >> +  
> > 
> > 
> > .
> >   

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 8/8] MAINTAINERS: Add maintainer for HiSilicon PTT driver

2022-03-08 Thread Jonathan Cameron via iommu
On Tue, 8 Mar 2022 16:49:30 +0800
Yicong Yang  wrote:

> Add maintainer for driver and documentation of HiSilicon PTT device.
> 
> Signed-off-by: Yicong Yang 
FWIW
Reviewed-by: Jonathan Cameron 

I've left the perf tool and iommu patches without tags from me
as I don't have the background to do a thorough review.

Thanks,

Jonathan


> ---
>  MAINTAINERS | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ea3e6c914384..237c618a74d5 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -8689,6 +8689,13 @@ F: Documentation/admin-guide/perf/hisi-pcie-pmu.rst
>  F:   Documentation/admin-guide/perf/hisi-pmu.rst
>  F:   drivers/perf/hisilicon
>  
> +HISILICON PTT DRIVER
> +M:   Yicong Yang 
> +L:   linux-ker...@vger.kernel.org
> +S:   Maintained
> +F:   Documentation/trace/hisi-ptt.rst
> +F:   drivers/hwtracing/ptt/
> +
>  HISILICON QM AND ZIP Controller DRIVER
>  M:   Zhou Wang 
>  L:   linux-cry...@vger.kernel.org

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 4/8] hisi_ptt: Add support for dynamically updating the filter list

2022-03-08 Thread Jonathan Cameron via iommu
On Tue, 8 Mar 2022 16:49:26 +0800
Yicong Yang  wrote:

> The PCIe devices supported by the PTT trace can be removed/rescanned by
> hotplug or through sysfs.  Add support for dynamically updating the
> available filter list by registering a PCI bus notifier block. Then user
> can always get latest information about available tracing filters and
> driver can block the invalid filters of which related devices no longer
> exist in the system.
> 
> Signed-off-by: Yicong Yang 
You've made the change I requested in v4 so

Reviewed-by: Jonathan Cameron 

> ---
>  drivers/hwtracing/ptt/hisi_ptt.c | 157 ---
>  drivers/hwtracing/ptt/hisi_ptt.h |  34 +++
>  2 files changed, 176 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/hwtracing/ptt/hisi_ptt.c 
> b/drivers/hwtracing/ptt/hisi_ptt.c
> index f06fbbb8a12a..953d36dfcc89 100644
> --- a/drivers/hwtracing/ptt/hisi_ptt.c
> +++ b/drivers/hwtracing/ptt/hisi_ptt.c
> @@ -270,25 +270,118 @@ static int hisi_ptt_register_irq(struct hisi_ptt 
> *hisi_ptt)
>   return 0;
>  }
>  
> -static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
> +static void hisi_ptt_update_filters(struct work_struct *work)
>  {
> + struct delayed_work *delayed_work = to_delayed_work(work);
> + struct hisi_ptt_filter_update_info info;
>   struct hisi_ptt_filter_desc *filter;
> - struct hisi_ptt *hisi_ptt = data;
>   struct list_head *target_list;
> + struct hisi_ptt *hisi_ptt;
>  
> - target_list = pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT ?
> -   _ptt->port_filters : _ptt->req_filters;
> + hisi_ptt = container_of(delayed_work, struct hisi_ptt, work);
>  
> - filter = kzalloc(sizeof(*filter), GFP_KERNEL);
> - if (!filter)
> - return -ENOMEM;
> + if (!mutex_trylock(_ptt->mutex)) {
> + schedule_delayed_work(_ptt->work, HISI_PTT_WORK_DELAY_MS);
> + return;
> + }
> +
> + while (kfifo_get(_ptt->filter_update_kfifo, )) {
> + bool is_port = pci_pcie_type(info.pdev) == 
> PCI_EXP_TYPE_ROOT_PORT;
> + u16 val = hisi_ptt_get_filter_val(info.pdev);
> +
> + target_list = is_port ? _ptt->port_filters : 
> _ptt->req_filters;
> +
> + if (info.is_add) {
> + filter = kzalloc(sizeof(*filter), GFP_KERNEL);
> + if (!filter)
> + continue;
> +
> + filter->pdev = info.pdev;
> + list_add_tail(>list, target_list);
> + } else {
> + list_for_each_entry(filter, target_list, list)
> + if (hisi_ptt_get_filter_val(filter->pdev) == 
> val) {
> + list_del(>list);
> + kfree(filter);
> + break;
> + }
> + }
>  
> - filter->pdev = pdev;
> - list_add_tail(>list, target_list);
> + /* Update the available port mask */
> + if (!is_port)
> + continue;
>  
> - /* Update the available port mask */
> - if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)
> - hisi_ptt->port_mask |= hisi_ptt_get_filter_val(pdev);
> + if (info.is_add)
> + hisi_ptt->port_mask |= val;
> + else
> + hisi_ptt->port_mask &= ~val;
> + }
> +
> + mutex_unlock(_ptt->mutex);
> +}
> +
> +static void hisi_ptt_update_fifo_in(struct hisi_ptt *hisi_ptt,
> + struct hisi_ptt_filter_update_info *info)
> +{
> + struct pci_dev *root_port = pcie_find_root_port(info->pdev);
> + u32 port_devid;
> +
> + if (!root_port)
> + return;
> +
> + port_devid = PCI_DEVID(root_port->bus->number, root_port->devfn);
> + if (port_devid < hisi_ptt->lower ||
> + port_devid > hisi_ptt->upper)
> + return;
> +
> + if (kfifo_in_spinlocked(_ptt->filter_update_kfifo, info, 1,
> + _ptt->filter_update_lock))
> + schedule_delayed_work(_ptt->work, 0);
> + else
> + pci_warn(hisi_ptt->pdev,
> +  "filter update fifo overflow for target %s\n",
> +  pci_name(info->pdev));
> +}
> +
> +/*
> + * A PCI bus notifier is used here for dynamically updating the filter
> + * list.
> + */
> +static int hisi_ptt_notifier_call(struct notifier_block *

Re: [PATCH v5 3/8] hisi_ptt: Register PMU device for PTT trace

2022-03-08 Thread Jonathan Cameron via iommu
On Tue, 8 Mar 2022 16:49:25 +0800
Yicong Yang  wrote:

> Register PMU device of PTT trace, then users can use trace through perf
> command. The driver makes use of perf AUX trace and support following
> events to configure the trace:
> 
> - filter: select Root port or Endpoint to trace
> - type: select the type of traced TLP headers
> - direction: select the direction of traced TLP headers
> - format: select the data format of the traced TLP headers
> 
> This patch adds the PMU driver part of PTT trace. The perf command support
> of PTT trace is added in the following patch.
> 
> Signed-off-by: Yicong Yang 

It seems to me that you ended up doing both suggestions for
how to clean up the remove order when it was meant to be
a question of picking one or the other.

Otherwise this looks good to me - so with that tidied up

Reviewed-by: Jonathan Cameron 

> ---

> +
> +static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
> +{
> + u16 core_id, sicl_id;
> + char *pmu_name;
> + u32 reg;
> +
> + hisi_ptt->hisi_ptt_pmu = (struct pmu) {
> + .module = THIS_MODULE,
> + .capabilities   = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
> + .task_ctx_nr= perf_sw_context,
> + .attr_groups= hisi_ptt_pmu_groups,
> + .event_init = hisi_ptt_pmu_event_init,
> + .setup_aux  = hisi_ptt_pmu_setup_aux,
> + .free_aux   = hisi_ptt_pmu_free_aux,
> + .start  = hisi_ptt_pmu_start,
> + .stop   = hisi_ptt_pmu_stop,
> + .add= hisi_ptt_pmu_add,
> + .del= hisi_ptt_pmu_del,
> + };
> +
> + reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
> + core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
> + sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);
> +
> + pmu_name = devm_kasprintf(_ptt->pdev->dev, GFP_KERNEL, 
> "hisi_ptt%u_%u",
> +   sicl_id, core_id);
> + if (!pmu_name)
> + return -ENOMEM;
> +
> + return perf_pmu_register(_ptt->hisi_ptt_pmu, pmu_name, -1);

As below, you can put back the devm cleanup that you had in v4 now you
have modified how the filter cleanup is done to also be devm managed.

> +}
> +
>  /*
>   * The DMA of PTT trace can only use direct mapping, due to some
>   * hardware restriction. Check whether there is an IOMMU or the
> @@ -303,15 +825,32 @@ static int hisi_ptt_probe(struct pci_dev *pdev,
>  
>   pci_set_master(pdev);
>  
> + ret = hisi_ptt_register_irq(hisi_ptt);
> + if (ret)
> + return ret;
> +
>   ret = hisi_ptt_init_ctrls(hisi_ptt);
>   if (ret) {
>   pci_err(pdev, "failed to init controls, ret = %d.\n", ret);
>   return ret;
>   }
>  
> + ret = hisi_ptt_register_pmu(hisi_ptt);
> + if (ret) {
> + pci_err(pdev, "failed to register pmu device, ret = %d", ret);
> + return ret;
> + }
> +
>   return 0;
>  }
>  
> +void hisi_ptt_remove(struct pci_dev *pdev)
> +{
> + struct hisi_ptt *hisi_ptt = pci_get_drvdata(pdev);
> +
> + perf_pmu_unregister(_ptt->hisi_ptt_pmu);

Now you have the filter cleanup occurring using a devm_add_action_or_reset()
there is no need to have a manual cleanup of this - you can
use the approach of a devm_add_action_or_reset like you had in v4.

As it is the last call in the probe() order it will be the first one
called in the device managed cleanup.

> +}
> +


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 2/8] hwtracing: Add trace function support for HiSilicon PCIe Tune and Trace device

2022-02-21 Thread Jonathan Cameron via iommu
On Mon, 21 Feb 2022 21:13:45 +0800
Yicong Yang  wrote:

> Hi Jonathan,
> 
> On 2022/2/21 19:18, Jonathan Cameron wrote:
> > On Mon, 21 Feb 2022 16:43:01 +0800
> > Yicong Yang  wrote:
> >   
> >> HiSilicon PCIe tune and trace device(PTT) is a PCIe Root Complex
> >> integrated Endpoint(RCiEP) device, providing the capability
> >> to dynamically monitor and tune the PCIe traffic, and trace
> >> the TLP headers.
> >>
> >> Add the driver for the device to enable the trace function.
> >> This patch adds basic function of trace, including the device's
> >> probe and initialization, functions for trace buffer allocation
> >> and trace enable/disable, register an interrupt handler to
> >> simply response to the DMA events. The user interface of trace
> >> will be added in the following patch.
> >>
> >> Signed-off-by: Yicong Yang   
> > 
> > Hi Yicong,
> > 
> > A few really minor things inline, particularly one place
> > where you can improve the error handling.
> > It's always fiddly to handle errors in a pci_walk_bus() but
> > in this case it's not too difficult as you just need to store
> > the retval somewhere in the private data then retrieve it
> > after the pci_walk_bus() call.
> >   
> 
> Thanks for the quick reply!
> 
> The pci_walk_bus() in this patch will fail only if the memory allocation
> of filter struct fails. We won't allocate memory in the pci_bus_walk()
> after Patch 4 so it will never fail. Maybe I can add some comments
> mentioning this.
Great. Given that answers my only significant question.

Reviewed-by: Jonathan Cameron 

> 
> I also expressed this inline.
> 
> > Thanks,
> > 
> > Jonathan
> > 
> > 
> >   
> >> ---
> >>  drivers/Makefile |   1 +
> >>  drivers/hwtracing/Kconfig|   2 +
> >>  drivers/hwtracing/ptt/Kconfig|  11 +
> >>  drivers/hwtracing/ptt/Makefile   |   2 +
> >>  drivers/hwtracing/ptt/hisi_ptt.c | 370 +++
> >>  drivers/hwtracing/ptt/hisi_ptt.h | 149 +
> >>  6 files changed, 535 insertions(+)
> >>  create mode 100644 drivers/hwtracing/ptt/Kconfig
> >>  create mode 100644 drivers/hwtracing/ptt/Makefile
> >>  create mode 100644 drivers/hwtracing/ptt/hisi_ptt.c
> >>  create mode 100644 drivers/hwtracing/ptt/hisi_ptt.h
> >>
> >> diff --git a/drivers/Makefile b/drivers/Makefile
> >> index a110338c860c..ab3411e4eba5 100644
> >> --- a/drivers/Makefile
> >> +++ b/drivers/Makefile
> >> @@ -175,6 +175,7 @@ obj-$(CONFIG_USB4) += thunderbolt/
> >>  obj-$(CONFIG_CORESIGHT)   += hwtracing/coresight/
> >>  obj-y += hwtracing/intel_th/
> >>  obj-$(CONFIG_STM) += hwtracing/stm/
> >> +obj-$(CONFIG_HISI_PTT)+= hwtracing/ptt/
> >>  obj-$(CONFIG_ANDROID) += android/
> >>  obj-$(CONFIG_NVMEM)   += nvmem/
> >>  obj-$(CONFIG_FPGA)+= fpga/
> >> diff --git a/drivers/hwtracing/Kconfig b/drivers/hwtracing/Kconfig
> >> index 13085835a636..911ee977103c 100644
> >> --- a/drivers/hwtracing/Kconfig
> >> +++ b/drivers/hwtracing/Kconfig
> >> @@ -5,4 +5,6 @@ source "drivers/hwtracing/stm/Kconfig"
> >>  
> >>  source "drivers/hwtracing/intel_th/Kconfig"
> >>  
> >> +source "drivers/hwtracing/ptt/Kconfig"
> >> +
> >>  endmenu
> >> diff --git a/drivers/hwtracing/ptt/Kconfig b/drivers/hwtracing/ptt/Kconfig
> >> new file mode 100644
> >> index ..41fa83921a07
> >> --- /dev/null
> >> +++ b/drivers/hwtracing/ptt/Kconfig
> >> @@ -0,0 +1,11 @@
> >> +# SPDX-License-Identifier: GPL-2.0-only
> >> +config HISI_PTT
> >> +  tristate "HiSilicon PCIe Tune and Trace Device"
> >> +  depends on ARM64 && PCI && HAS_DMA && HAS_IOMEM
> >> +  help
> >> +HiSilicon PCIe Tune and Trace Device exists as a PCIe RCiEP
> >> +device, and it provides support for PCIe traffic tuning and
> >> +tracing TLP headers to the memory.
> >> +
> >> +This driver can also be built as a module. If so, the module
> >> +will be called hisi_ptt.
> >> diff --git a/drivers/hwtracing/ptt/Makefile 
> >> b/drivers/hwtracing/ptt/Makefile
> >> new file mode 100644
> >> index ..908c09a98161
> >> --- 

Re: [PATCH v4 7/8] docs: Add HiSilicon PTT device driver documentation

2022-02-21 Thread Jonathan Cameron via iommu
On Mon, 21 Feb 2022 16:43:06 +0800
Yicong Yang  wrote:

> Document the introduction and usage of HiSilicon PTT device driver.
> 
> Signed-off-by: Yicong Yang 

Reviewed-by: Jonathan Cameron 

> ---
>  Documentation/trace/hisi-ptt.rst | 303 +++
>  1 file changed, 303 insertions(+)
>  create mode 100644 Documentation/trace/hisi-ptt.rst
> 
> diff --git a/Documentation/trace/hisi-ptt.rst 
> b/Documentation/trace/hisi-ptt.rst
> new file mode 100644
> index ..13677705ee1f
> --- /dev/null
> +++ b/Documentation/trace/hisi-ptt.rst
> @@ -0,0 +1,303 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +==
> +HiSilicon PCIe Tune and Trace device
> +==
> +
> +Introduction
> +
> +
> +HiSilicon PCIe tune and trace device (PTT) is a PCIe Root Complex
> +integrated Endpoint (RCiEP) device, providing the capability
> +to dynamically monitor and tune the PCIe link's events (tune),
> +and trace the TLP headers (trace). The two functions are independent,
> +but is recommended to use them together to analyze and enhance the
> +PCIe link's performance.
> +
> +On Kunpeng 930 SoC, the PCIe Root Complex is composed of several
> +PCIe cores. Each PCIe core includes several Root Ports and a PTT
> +RCiEP, like below. The PTT device is capable of tuning and
> +tracing the links of the PCIe core.
> +::
> +  +--Core 0---+
> +  |   |   [   PTT   ] |
> +  |   |   [Root Port]---[Endpoint]
> +  |   |   [Root Port]---[Endpoint]
> +  |   |   [Root Port]---[Endpoint]
> +Root Complex  |--Core 1---+
> +  |   |   [   PTT   ] |
> +  |   |   [Root Port]---[ Switch ]---[Endpoint]
> +  |   |   [Root Port]---[Endpoint] `-[Endpoint]
> +  |   |   [Root Port]---[Endpoint]
> +  +---+
> +
> +The PTT device driver registers one PMU device for each PTT device.
> +The name of each PTT device is composed of 'hisi_ptt' prefix with
> +the id of the SICL and the Core where it locates. The Kunpeng 930
> +SoC encapsulates multiple CPU dies (SCCL, Super CPU Cluster) and
> +IO dies (SICL, Super I/O Cluster), where there's one PCIe Root
> +Complex for each SICL.
> +::
> +/sys/devices/hisi_ptt_
> +
> +Tune
> +
> +
> +PTT tune is designed for monitoring and adjusting PCIe link parameters 
> (events).
> +Currently we support events in 4 classes. The scope of the events
> +covers the PCIe core to which the PTT device belongs.
> +
> +Each event is presented as a file under $(PTT PMU dir)/tune, and
> +a simple open/read/write/close cycle will be used to tune the event.
> +::
> +$ cd /sys/devices/hisi_ptt_/tune
> +$ ls
> +qos_tx_cplqos_tx_npqos_tx_p
> +tx_path_rx_req_alloc_buf_level
> +tx_path_tx_req_alloc_buf_level
> +$ cat qos_tx_dp
> +1
> +$ echo 2 > qos_tx_dp
> +$ cat qos_tx_dp
> +2
> +
> +Current value (numerical value) of the event can be simply read
> +from the file, and the desired value written to the file to tune.
> +
> +1. Tx path QoS control
> +
> +
> +The following files are provided to tune the QoS of the tx path of
> +the PCIe core.
> +
> +- qos_tx_cpl: weight of Tx completion TLPs
> +- qos_tx_np: weight of Tx non-posted TLPs
> +- qos_tx_p: weight of Tx posted TLPs
> +
> +The weight influences the proportion of certain packets on the PCIe link.
> +For example, for the storage scenario, increase the proportion
> +of the completion packets on the link to enhance the performance as
> +more completions are consumed.
> +
> +The available tune data of these events is [0, 1, 2].
> +Writing a negative value will return an error, and out of range
> +values will be converted to 2. Note that the event value just
> +indicates a probable level, but is not precise.
> +
> +2. Tx path buffer control
> +-
> +
> +Following files are provided to tune the buffer of tx path of the PCIe core.
> +
> +- tx_path_rx_req_alloc_buf_level: watermark of Rx requested
> +- tx_path_tx_req_alloc_buf_level: watermark of Tx requested
> +
> +These events influence the watermark of the buffer allocated for each
> +type. Rx means the inbound while Tx means outbound. The packets will
> +be stored in the buffer first and then transmitted either when the
> +watermark reached or when timed out. For a busy direction, you should
> +increase the related buffer watermark to avoid frequently posting and
> +thus enhance the performance. In most cases 

Re: [PATCH v4 4/8] hisi_ptt: Add support for dynamically updating the filter list

2022-02-21 Thread Jonathan Cameron via iommu
On Mon, 21 Feb 2022 16:43:03 +0800
Yicong Yang  wrote:

> The PCIe devices supported by the PTT trace can be removed/rescanned
> by hotplug or through sysfs.  Add support for dynamically updating
> the available filter list by registering a PCI bus notifier block.
> Then user can always get latest information about available tracing
> filters and driver can block the invalid filters of which related
> devices no longer exist in the system.
> 
> Signed-off-by: Yicong Yang 

One comment following on from ordering of mixed devm and manual cleanup
in earlier patches.

Otherwise looks fine to me.

> ---
>  drivers/hwtracing/ptt/hisi_ptt.c | 138 ---
>  drivers/hwtracing/ptt/hisi_ptt.h |  34 
>  2 files changed, 160 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/hwtracing/ptt/hisi_ptt.c 
> b/drivers/hwtracing/ptt/hisi_ptt.c
> index c2b6f8aa9f1e..50193a331faa 100644
> --- a/drivers/hwtracing/ptt/hisi_ptt.c
> +++ b/drivers/hwtracing/ptt/hisi_ptt.c
> @@ -269,25 +269,118 @@ static int hisi_ptt_register_irq(struct hisi_ptt 
> *hisi_ptt)
>   return 0;
>  }
>  


...

> @@ -313,6 +406,9 @@ static void hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
>   struct pci_bus *bus;
>   u32 reg;
>  
> + INIT_DELAYED_WORK(_ptt->work, hisi_ptt_update_filters);
> + spin_lock_init(_ptt->filter_update_lock);
> + INIT_KFIFO(hisi_ptt->filter_update_kfifo);
>   INIT_LIST_HEAD(_ptt->port_filters);
>   INIT_LIST_HEAD(_ptt->req_filters);
>  
> @@ -329,6 +425,13 @@ static void hisi_ptt_init_ctrls(struct hisi_ptt 
> *hisi_ptt)
>   hisi_ptt->upper = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
>   hisi_ptt->lower = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);
>  
> + /*
> +  * No need to fail if the bus is NULL here as the device
> +  * maybe hotplugged after the PTT driver probe, in which
> +  * case we can detect the event and update the list as
> +  * we register a bus notifier for dynamically updating
> +  * the filter list.
> +  */
>   bus = pci_find_bus(pci_domain_nr(pdev->bus), 
> PCI_BUS_NUM(hisi_ptt->upper));
>   if (bus)
>   pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);
> @@ -832,6 +935,12 @@ static int hisi_ptt_probe(struct pci_dev *pdev,
>   return ret;
>   }
>  
> + /* Register the bus notifier for dynamically updating the filter list */
> + hisi_ptt->hisi_ptt_nb.notifier_call = hisi_ptt_notifier_call;
> + ret = bus_register_notifier(_bus_type, _ptt->hisi_ptt_nb);
> + if (ret)
> + pci_warn(pdev, "failed to register filter update notifier, ret 
> = %d", ret);
> +
>   return 0;
>  }
>  
> @@ -839,6 +948,11 @@ void hisi_ptt_remove(struct pci_dev *pdev)
>  {
>   struct hisi_ptt *hisi_ptt = pci_get_drvdata(pdev);
>  
> + bus_unregister_notifier(_bus_type, _ptt->hisi_ptt_nb);
> +

wrt to earlier comment on ordering you'll also need to move these to
a devm_action() call to keep the ordering clean wrt to probe vs remove().

> + /* Cancel any work that has been queued */
> + cancel_delayed_work_sync(_ptt->work);
> +
>   if (hisi_ptt->trace_ctrl.status == HISI_PTT_TRACE_STATUS_ON)
>   hisi_ptt_trace_end(hisi_ptt);
>  

...

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 3/8] hisi_ptt: Register PMU device for PTT trace

2022-02-21 Thread Jonathan Cameron via iommu
On Mon, 21 Feb 2022 16:43:02 +0800
Yicong Yang  wrote:

> Register PMU device of PTT trace, then users can use
> trace through perf command. The driver makes use of perf
> AUX trace and support following events to configure the
> trace:
> 
> - filter: select Root port or Endpoint to trace
> - type: select the type of traced TLP headers
> - direction: select the direction of traced TLP headers
> - format: select the data format of the traced TLP headers
> 
> This patch adds the PMU driver part of PTT trace. The perf
> command support of PTT trace is added in the following
> patch.
> 
> Signed-off-by: Yicong Yang 

A few minor comments inline.

Thanks,

Jonathan

> +static int hisi_ptt_trace_init_filter(struct hisi_ptt *hisi_ptt, u64 config)
> +{
> + unsigned long val, port_mask = hisi_ptt->port_mask;
> + struct hisi_ptt_filter_desc *filter;
> + int ret = -EINVAL;
> +
> + hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, 
> config);
> + val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config);
> +
> + /*
> +  * Port filters are defined as bit mask. For port filters, check
> +  * the bits in the @val are within the range of hisi_ptt->port_mask
> +  * and whether it's empty or not, otherwise user has specified
> +  * some unsupported root ports.
> +  *
> +  * For Requester ID filters, walk the available filter list to see
> +  * whether we have one matched.
> +  */
> + if (!hisi_ptt->trace_ctrl.is_port) {
> + list_for_each_entry(filter, _ptt->req_filters, list)
> + if (val == hisi_ptt_get_filter_val(filter->pdev)) {
> + ret = 0;
> + break;
> + }
> + } else if (bitmap_subset(, _mask, BITS_PER_LONG)) {
> + ret = 0;
> + }
> +
> + if (ret)
> + return ret;
> +
> + hisi_ptt->trace_ctrl.filter = val;
> + return 0;
> +}
> +
> +static int hisi_ptt_pmu_event_init(struct perf_event *event)
> +{
> + struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
> + struct hisi_ptt_trace_ctrl *ctrl = _ptt->trace_ctrl;
> + int ret;
> + u32 val;
> +
> + if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
> + return -ENOENT;
> +
> + mutex_lock(_ptt->mutex);
> +
> + ret = hisi_ptt_trace_init_filter(hisi_ptt, event->attr.config);
> + if (ret < 0)
> + goto out;
> +
> + val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
> + ret = hisi_ptt_trace_valid_config_onehot(val, 
> hisi_ptt_trace_available_direction,
> +  
> ARRAY_SIZE(hisi_ptt_trace_available_direction));
> + if (ret < 0)
> + goto out;
> + ctrl->direction = val;
> +
> + val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
> +

For consistency, no blank line here.

> + ret = hisi_ptt_trace_valid_config(val, hisi_ptt_trace_available_type,
> +   
> ARRAY_SIZE(hisi_ptt_trace_available_type));
> + if (ret < 0)
> + goto out;
> + ctrl->type = val;
> +
> + val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
> + ret = hisi_ptt_trace_valid_config_onehot(val, 
> hisi_ptt_trace_availble_format,
> +  
> ARRAY_SIZE(hisi_ptt_trace_availble_format));
> + if (ret < 0)
> + goto out;
> + ctrl->format = val;
> +
> +out:
> + mutex_unlock(_ptt->mutex);
> + return ret;
> +}

...

> +
> +static void hisi_ptt_pmu_start(struct perf_event *event, int flags)
> +{
> + struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
> + struct perf_output_handle *handle = _ptt->trace_ctrl.handle;
> + struct hw_perf_event *hwc = >hw;
> + struct hisi_ptt_pmu_buf *buf;
> + int cpu = event->cpu;
> + int ret;
> +
> + hwc->state = 0;
> + mutex_lock(_ptt->mutex);
> + if (hisi_ptt->trace_ctrl.status == HISI_PTT_TRACE_STATUS_ON) {
> + pci_dbg(hisi_ptt->pdev, "trace has already started\n");
> + goto stop;

If it is already started setting the state to STOPPED without doing anything
to change the hardware state doesn't feel right.
I'm assuming we only get here as a result of a bug, so perhaps its fine
to do this.

> + }
> +
> + if (cpu == -1)
> + cpu = hisi_ptt->trace_ctrl.default_cpu;
> +
> + /*
> +  * Handle the interrupt on the same cpu which starts the trace to avoid
> +  * context mismatch. Otherwise we'll trigger the WARN from the perf
> +  * core in event_function_local().
> +  */
> + WARN_ON(irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, 
> HISI_PTT_TRACE_DMA_IRQ),
> +  cpumask_of(cpu)));
> +
> + ret = hisi_ptt_alloc_trace_buf(hisi_ptt);
> + if (ret) {
> + pci_dbg(hisi_ptt->pdev, "alloc trace buf failed, ret = %d\n", 
> ret);
> + goto 

Re: [PATCH v4 2/8] hwtracing: Add trace function support for HiSilicon PCIe Tune and Trace device

2022-02-21 Thread Jonathan Cameron via iommu
On Mon, 21 Feb 2022 16:43:01 +0800
Yicong Yang  wrote:

> HiSilicon PCIe tune and trace device(PTT) is a PCIe Root Complex
> integrated Endpoint(RCiEP) device, providing the capability
> to dynamically monitor and tune the PCIe traffic, and trace
> the TLP headers.
> 
> Add the driver for the device to enable the trace function.
> This patch adds basic function of trace, including the device's
> probe and initialization, functions for trace buffer allocation
> and trace enable/disable, register an interrupt handler to
> simply response to the DMA events. The user interface of trace
> will be added in the following patch.
> 
> Signed-off-by: Yicong Yang 

Hi Yicong,

A few really minor things inline, particularly one place
where you can improve the error handling.
It's always fiddly to handle errors in a pci_walk_bus() but
in this case it's not too difficult as you just need to store
the retval somewhere in the private data then retrieve it
after the pci_walk_bus() call.

Thanks,

Jonathan



> ---
>  drivers/Makefile |   1 +
>  drivers/hwtracing/Kconfig|   2 +
>  drivers/hwtracing/ptt/Kconfig|  11 +
>  drivers/hwtracing/ptt/Makefile   |   2 +
>  drivers/hwtracing/ptt/hisi_ptt.c | 370 +++
>  drivers/hwtracing/ptt/hisi_ptt.h | 149 +
>  6 files changed, 535 insertions(+)
>  create mode 100644 drivers/hwtracing/ptt/Kconfig
>  create mode 100644 drivers/hwtracing/ptt/Makefile
>  create mode 100644 drivers/hwtracing/ptt/hisi_ptt.c
>  create mode 100644 drivers/hwtracing/ptt/hisi_ptt.h
> 
> diff --git a/drivers/Makefile b/drivers/Makefile
> index a110338c860c..ab3411e4eba5 100644
> --- a/drivers/Makefile
> +++ b/drivers/Makefile
> @@ -175,6 +175,7 @@ obj-$(CONFIG_USB4)+= thunderbolt/
>  obj-$(CONFIG_CORESIGHT)  += hwtracing/coresight/
>  obj-y+= hwtracing/intel_th/
>  obj-$(CONFIG_STM)+= hwtracing/stm/
> +obj-$(CONFIG_HISI_PTT)   += hwtracing/ptt/
>  obj-$(CONFIG_ANDROID)+= android/
>  obj-$(CONFIG_NVMEM)  += nvmem/
>  obj-$(CONFIG_FPGA)   += fpga/
> diff --git a/drivers/hwtracing/Kconfig b/drivers/hwtracing/Kconfig
> index 13085835a636..911ee977103c 100644
> --- a/drivers/hwtracing/Kconfig
> +++ b/drivers/hwtracing/Kconfig
> @@ -5,4 +5,6 @@ source "drivers/hwtracing/stm/Kconfig"
>  
>  source "drivers/hwtracing/intel_th/Kconfig"
>  
> +source "drivers/hwtracing/ptt/Kconfig"
> +
>  endmenu
> diff --git a/drivers/hwtracing/ptt/Kconfig b/drivers/hwtracing/ptt/Kconfig
> new file mode 100644
> index ..41fa83921a07
> --- /dev/null
> +++ b/drivers/hwtracing/ptt/Kconfig
> @@ -0,0 +1,11 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +config HISI_PTT
> + tristate "HiSilicon PCIe Tune and Trace Device"
> + depends on ARM64 && PCI && HAS_DMA && HAS_IOMEM
> + help
> +   HiSilicon PCIe Tune and Trace Device exists as a PCIe RCiEP
> +   device, and it provides support for PCIe traffic tuning and
> +   tracing TLP headers to the memory.
> +
> +   This driver can also be built as a module. If so, the module
> +   will be called hisi_ptt.
> diff --git a/drivers/hwtracing/ptt/Makefile b/drivers/hwtracing/ptt/Makefile
> new file mode 100644
> index ..908c09a98161
> --- /dev/null
> +++ b/drivers/hwtracing/ptt/Makefile
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0
> +obj-$(CONFIG_HISI_PTT) += hisi_ptt.o
> diff --git a/drivers/hwtracing/ptt/hisi_ptt.c 
> b/drivers/hwtracing/ptt/hisi_ptt.c
> new file mode 100644
> index ..a5b4f09ccd1e
> --- /dev/null
> +++ b/drivers/hwtracing/ptt/hisi_ptt.c
> @@ -0,0 +1,370 @@

...

> +static void hisi_ptt_free_trace_buf(struct hisi_ptt *hisi_ptt)
> +{
> + struct hisi_ptt_trace_ctrl *ctrl = _ptt->trace_ctrl;
> + struct device *dev = _ptt->pdev->dev;
> + int i;
> +
> + if (!ctrl->trace_buf)
> + return;
> +
> + for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
> + if (ctrl->trace_buf[i].addr)
> + dma_free_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
> +   ctrl->trace_buf[i].addr,
> +   ctrl->trace_buf[i].dma);
> +
> + kfree(ctrl->trace_buf);
> + ctrl->trace_buf = NULL;
> +}
> +
> +static int hisi_ptt_alloc_trace_buf(struct hisi_ptt *hisi_ptt)
> +{
> + struct hisi_ptt_trace_ctrl *ctrl = _ptt->trace_ctrl;
> + struct device *dev = _ptt->pdev->dev;
> + int i;
> +
> + hisi_ptt->trace_ctrl.buf_index = 0;
> +
> + /* If the trace buffer has already been allocated, zero it. */
> + if (ctrl->trace_buf) {
> + for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
> + memset(ctrl->trace_buf[i].addr, 0, 
> HISI_PTT_TRACE_BUF_SIZE);
> + return 0;
> + }
> +
> + ctrl->trace_buf = kcalloc(HISI_PTT_TRACE_BUF_CNT, sizeof(struct 
> hisi_ptt_dma_buffer),

Slight 

Re: [PATCH v3 6/8] docs: Add HiSilicon PTT device driver documentation

2022-02-07 Thread Jonathan Cameron via iommu
On Mon, 24 Jan 2022 21:11:16 +0800
Yicong Yang  wrote:

> Document the introduction and usage of HiSilicon PTT device driver.
> 
> Signed-off-by: Yicong Yang 
Nice document.  A few trivial typos inline.
I would give a RB except I've suggested you change a part of the
sysfs interface which will affect the relevant documentation.

Thanks,

Jonathan

> ---
>  Documentation/trace/hisi-ptt.rst | 304 +++
>  1 file changed, 304 insertions(+)
>  create mode 100644 Documentation/trace/hisi-ptt.rst
> 
> diff --git a/Documentation/trace/hisi-ptt.rst 
> b/Documentation/trace/hisi-ptt.rst
> new file mode 100644
> index ..f3269b11a2f6
> --- /dev/null
> +++ b/Documentation/trace/hisi-ptt.rst
> @@ -0,0 +1,304 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +==
> +HiSilicon PCIe Tune and Trace device
> +==
> +
> +Introduction
> +
> +
> +HiSilicon PCIe tune and trace device (PTT) is a PCIe Root Complex
> +integrated Endpoint (RCiEP) device, providing the capability
> +to dynamically monitor and tune the PCIe link's events (tune),
> +and trace the TLP headers (trace). The two functions are independent,
> +but is recommended to use them together to analyze and enhance the
> +PCIe link's performance.
> +
> +On Kunpeng 930 SoC, the PCIe Root Complex is composed of several
> +PCIe cores. Each PCIe core includes several Root Ports and a PTT
> +RCiEP, like below. The PTT device is capable of tuning and
> +tracing the link of the PCIe core.

links

> +::
> +  +--Core 0---+
> +  |   |   [   PTT   ] |
> +  |   |   [Root Port]---[Endpoint]
> +  |   |   [Root Port]---[Endpoint]
> +  |   |   [Root Port]---[Endpoint]
> +Root Complex  |--Core 1---+
> +  |   |   [   PTT   ] |
> +  |   |   [Root Port]---[ Switch ]---[Endpoint]
> +  |   |   [Root Port]---[Endpoint] `-[Endpoint]
> +  |   |   [Root Port]---[Endpoint]
> +  +---+
> +
> +The PTT device driver registers PMU device for each PTT device.

registers one PMU device ..

> +The name of each PTT device is composed of 'hisi_ptt' prefix with
> +the id of the SICL and the Core where it locates. The Kunpeng 930
> +SoC encapsulates multiple CPU dies (SCCL, Super CPU Cluster) and
> +IO dies (SICL, Super I/O Cluster), where there's one PCIe Root
> +Complex for each SICL.
> +::
> +/sys/devices/hisi_ptt_
> +
> +Tune
> +
> +
> +PTT tune is designed for monitoring and adjusting PCIe link parameters 
> (events).
> +Currently we support events in 4 classes. The scope of the events
> +covers the PCIe core to which the PTT device belongs.
> +
> +Each event is presented as a file under $(PTT PMU dir)/tune, and
> +mostly a simple open/read/write/close cycle will be used to tune

drop "mostly" as it doesn't add anything other than potential confusion.

> +the event.
> +::
> +$ cd /sys/devices/hisi_ptt_/tune
> +$ ls
> +qos_tx_cplqos_tx_npqos_tx_p
> +tx_path_rx_req_alloc_buf_level
> +tx_path_tx_req_alloc_buf_level
> +$ cat qos_tx_dp
> +1
> +$ echo 2 > qos_tx_dp
> +$ cat qos_tx_dp
> +2
> +
> +Current value (numerical value) of the event can be simply read
> +from the file, and the desired value written to the file to tune.
> +
> +1. Tx path QoS control
> +
> +
> +The following files are provided to tune the QoS of the tx path of
> +the PCIe core.
> +
> +- qos_tx_cpl: weight of Tx completion TLPs
> +- qos_tx_np: weight of Tx non-posted TLPs
> +- qos_tx_p: weight of Tx posted TLPs
> +
> +The weight influences the proportion of certain packets on the PCIe link.
> +For example, for the storage scenario, increase the proportion
> +of the completion packets on the link to enhance the performance as
> +more completions are consumed.
> +
> +The available tune data of these events is [0, 1, 2].
> +Writing a negative value will return an error, and out of range
> +values will be converted to 2. Note that the event value just
> +indicates a probable level, but is not precise.
> +
> +2. Tx path buffer control
> +-
> +
> +Following files are provided to tune the buffer of tx path of the PCIe core.
> +
> +- tx_path_rx_req_alloc_buf_level: watermark of Rx requested
> +- tx_path_tx_req_alloc_buf_level: watermark of Tx requested
> +
> +These events influence the watermark of the buffer allocated for each
> +type. Rx means the inbound while Tx means outbound. The packets will
> +be stored in the buffer first and then posted either when the watermark

Change "posted" to "transmitted" as posted has a special meaning in PCI
and I don't think that is what you mean here... (I could be wrong!)

> +reached or when timed out. For a busy direction, you should increase
> +the related buffer watermark to avoid frequently 

Re: [PATCH v3 5/8] perf tool: Add support for HiSilicon PCIe Tune and Trace device driver

2022-02-07 Thread Jonathan Cameron via iommu
On Mon, 24 Jan 2022 21:11:15 +0800
Yicong Yang  wrote:

> From: Qi Liu 
> 
> 'perf record' and 'perf report --dump-raw-trace' supported in this
> patch.
> 
> Example usage:
> 
> Output will contain raw PTT data and its textual representation, such
> as:
> 
> 0 0 0x5810 [0x30]: PERF_RECORD_AUXTRACE size: 0x40  offset: 0
> ref: 0xa5d50c725  idx: 0  tid: -1  cpu: 0
> .
> . ... HISI PTT data: size 4194304 bytes
> .  : 00 00 00 00 Prefix
> .  0004: 08 20 00 60 Header DW0
> .  0008: ff 02 00 01 Header DW1
> .  000c: 20 08 00 00 Header DW2
> .  0010: 10 e7 44 ab Header DW3
> .  0014: 2a a8 1e 01 Time
> .  0020: 00 00 00 00 Prefix
> .  0024: 01 00 00 60 Header DW0
> .  0028: 0f 1e 00 01 Header DW1
> .  002c: 04 00 00 00 Header DW2
> .  0030: 40 00 81 02 Header DW3
> .  0034: ee 02 00 00 Time
> 
> 
> Signed-off-by: Qi Liu 
> Signed-off-by: Yicong Yang 

Hi. This is unfortunately out of my areas of expertise, so I just
took a quick glance and noticed one generic c thing that could be
tidied up.

> diff --git a/tools/perf/util/hisi_ptt.c b/tools/perf/util/hisi_ptt.c
> new file mode 100644
> index ..75fa89f3fae3
> --- /dev/null
> +++ b/tools/perf/util/hisi_ptt.c

...

> +
> +static void hisi_ptt_free_queue(void *priv)
> +{
> + struct hisi_ptt_queue *pttq = priv;
> +
> + if (!pttq)
> + return;
> +
> + free(pttq);

free() is safe against a null ptr, so you don't need the 
if (!pttq) return;

See free(3) man page.

> +}
> +
> +static void hisi_ptt_free_events(struct perf_session *session)
> +{
> + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
> + auxtrace);
> + struct auxtrace_queues *queues = >queues;
> + unsigned int i;
> +
> + for (i = 0; i < queues->nr_queues; i++) {
> + hisi_ptt_free_queue(queues->queue_array[i].priv);
> + queues->queue_array[i].priv = NULL;
> + }
> + auxtrace_queues__free(queues);
> +}
> +
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 4/8] hisi_ptt: Add tune function support for HiSilicon PCIe Tune and Trace device

2022-02-07 Thread Jonathan Cameron via iommu
On Mon, 24 Jan 2022 21:11:14 +0800
Yicong Yang  wrote:

> Add tune function for the HiSilicon Tune and Trace device. The interface
> of tune is exposed through sysfs attributes of PTT PMU device.
> 
> Signed-off-by: Yicong Yang 

A few trivial things inline, but looks good in general to me.
With those tidied up
Reviewed-by: Jonathan Cameron 


> ---
>  drivers/hwtracing/ptt/hisi_ptt.c | 154 +++
>  drivers/hwtracing/ptt/hisi_ptt.h |  19 
>  2 files changed, 173 insertions(+)
> 
> diff --git a/drivers/hwtracing/ptt/hisi_ptt.c 
> b/drivers/hwtracing/ptt/hisi_ptt.c
> index 2994354e690b..b11e702eb506 100644
> --- a/drivers/hwtracing/ptt/hisi_ptt.c
> +++ b/drivers/hwtracing/ptt/hisi_ptt.c
> @@ -21,6 +21,159 @@
>  
>  #include "hisi_ptt.h"
>  
> +static int hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt)
> +{
> + u32 val;
> +
> + return readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT,
> +   val, !(val & HISI_PTT_TUNING_INT_STAT_MASK),
> +   HISI_PTT_WAIT_POLL_INTERVAL_US,
> +   HISI_PTT_WAIT_TIMEOUT_US);
> +}
> +
> +static int hisi_ptt_tune_data_get(struct hisi_ptt *hisi_ptt,
> +   u32 event, u16 *data)
> +{
> + u32 reg;
> +
> + reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
> + reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
> + reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
> +   event);
> + writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
> +
> + /* Write all 1 to indicates it's the read process */
> + writel(~0UL, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);

Just to check, this is includes the bits above the DATA_VAL_MASK?
Fine if so, just seems odd to define a field but then write 
parts of the register that aren't part of that field.

> +
> + if (hisi_ptt_wait_tuning_finish(hisi_ptt))
> + return -ETIMEDOUT;
> +
> + reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
> + reg &= HISI_PTT_TUNING_DATA_VAL_MASK;
> + *data = (u16)reg;

As below, prefer a FIELD_GET() for this.

> +
> + return 0;
> +}
> +
> +static int hisi_ptt_tune_data_set(struct hisi_ptt *hisi_ptt,
> +   u32 event, u16 data)
> +{
> + u32 reg;
> +
> + reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
> + reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
> + reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
> +   event);
> + writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
> +
> + reg = data;
Given you defined HISI_PTT_TUNING_DATA_VAL_MASK why not use it here

writel(FIELD_PREP(..), ...)? 

> + writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
> +
> + if (hisi_ptt_wait_tuning_finish(hisi_ptt))
> + return -ETIMEDOUT;
> +
> + return 0;
> +}
> +


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/8] hwtracing: Add trace function support for HiSilicon PCIe Tune and Trace device

2022-02-07 Thread Jonathan Cameron via iommu
On Mon, 24 Jan 2022 21:11:11 +0800
Yicong Yang  wrote:

> HiSilicon PCIe tune and trace device(PTT) is a PCIe Root Complex
> integrated Endpoint(RCiEP) device, providing the capability
> to dynamically monitor and tune the PCIe traffic, and trace
> the TLP headers.
> 
> Add the driver for the device to enable the trace function.
> This patch adds basic function of trace, including the device's
> probe and initialization, functions for trace buffer allocation
> and trace enable/disable, register an interrupt handler to
> simply response to the DMA events. The user interface of trace
> will be added in the following patch.
> 
> Signed-off-by: Yicong Yang 
Hi Yicong,

I've not been following all the earlier discussion on this driver closely
so I may well raise something that has already been addressed. If so
just ignore the comment.

Thanks,

Jonathan

> ---
>  drivers/Makefile |   1 +
>  drivers/hwtracing/Kconfig|   2 +
>  drivers/hwtracing/ptt/Kconfig|  11 +
>  drivers/hwtracing/ptt/Makefile   |   2 +
>  drivers/hwtracing/ptt/hisi_ptt.c | 398 +++
>  drivers/hwtracing/ptt/hisi_ptt.h | 159 
>  6 files changed, 573 insertions(+)
>  create mode 100644 drivers/hwtracing/ptt/Kconfig
>  create mode 100644 drivers/hwtracing/ptt/Makefile
>  create mode 100644 drivers/hwtracing/ptt/hisi_ptt.c
>  create mode 100644 drivers/hwtracing/ptt/hisi_ptt.h
> 
> diff --git a/drivers/Makefile b/drivers/Makefile
> index a110338c860c..ab3411e4eba5 100644
> --- a/drivers/Makefile
> +++ b/drivers/Makefile
> @@ -175,6 +175,7 @@ obj-$(CONFIG_USB4)+= thunderbolt/
>  obj-$(CONFIG_CORESIGHT)  += hwtracing/coresight/
>  obj-y+= hwtracing/intel_th/
>  obj-$(CONFIG_STM)+= hwtracing/stm/
> +obj-$(CONFIG_HISI_PTT)   += hwtracing/ptt/
>  obj-$(CONFIG_ANDROID)+= android/
>  obj-$(CONFIG_NVMEM)  += nvmem/
>  obj-$(CONFIG_FPGA)   += fpga/
> diff --git a/drivers/hwtracing/Kconfig b/drivers/hwtracing/Kconfig
> index 13085835a636..911ee977103c 100644
> --- a/drivers/hwtracing/Kconfig
> +++ b/drivers/hwtracing/Kconfig
> @@ -5,4 +5,6 @@ source "drivers/hwtracing/stm/Kconfig"
>  
>  source "drivers/hwtracing/intel_th/Kconfig"
>  
> +source "drivers/hwtracing/ptt/Kconfig"
> +
>  endmenu
> diff --git a/drivers/hwtracing/ptt/Kconfig b/drivers/hwtracing/ptt/Kconfig
> new file mode 100644
> index ..4f4f2459ac47
> --- /dev/null
> +++ b/drivers/hwtracing/ptt/Kconfig
> @@ -0,0 +1,11 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +config HISI_PTT
> + tristate "HiSilicon PCIe Tune and Trace Device"
> + depends on ARM64 && PCI && HAS_DMA && HAS_IOMEM
> + help
> +   HiSilicon PCIe Tune and Trace Device exist as a PCIe RCiEP
> +   device, provides support for PCIe traffic tuning and
> +   tracing TLP headers to the memory.
> +
> +   This driver can also be built as a module. If so, the module
> +   will be called hisi_ptt.
> diff --git a/drivers/hwtracing/ptt/Makefile b/drivers/hwtracing/ptt/Makefile
> new file mode 100644
> index ..908c09a98161
> --- /dev/null
> +++ b/drivers/hwtracing/ptt/Makefile
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0
> +obj-$(CONFIG_HISI_PTT) += hisi_ptt.o
> diff --git a/drivers/hwtracing/ptt/hisi_ptt.c 
> b/drivers/hwtracing/ptt/hisi_ptt.c
> new file mode 100644
> index ..6d0a0ca5c0a9
> --- /dev/null
> +++ b/drivers/hwtracing/ptt/hisi_ptt.c
> @@ -0,0 +1,398 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Driver for HiSilicon PCIe tune and trace device
> + *
> + * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
> + * Author: Yicong Yang 
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "hisi_ptt.h"
> +
> +static u16 hisi_ptt_get_filter_val(struct pci_dev *pdev)
> +{
> + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)
> + return BIT(HISI_PCIE_CORE_PORT_ID(PCI_SLOT(pdev->devfn)));
> +
> + return PCI_DEVID(pdev->bus->number, pdev->devfn);
> +}
> +
> +static int hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt)
> +{
> + u32 val;
> +
> + return readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TRACE_STS, val,
> +   val & HISI_PTT_TRACE_IDLE,
> +   HISI_PTT_WAIT_POLL_INTERVAL_US,
> +   HISI_PTT_WAIT_TIMEOUT_US);
> +}
> +
> +static void hisi_ptt_free_trace_buf(struct hisi_ptt *hisi_ptt)
> +{
> + struct hisi_ptt_trace_ctrl *ctrl = _ptt->trace_ctrl;
> + struct device *dev = _ptt->pdev->dev;
> + struct hisi_ptt_dma_buffer *buffer, *tbuffer;
> +
> + list_for_each_entry_safe(buffer, tbuffer, >trace_buf, list) {
> + list_del(>list);
> + dma_free_coherent(dev, buffer->size, buffer->addr,
> +

Re: [PATCH v3 2/8] hisi_ptt: Register PMU device for PTT trace

2022-02-07 Thread Jonathan Cameron via iommu
On Mon, 24 Jan 2022 21:11:12 +0800
Yicong Yang  wrote:

> Register PMU device of PTT trace, then users can use
> trace through perf command. The driver makes use of perf
> AUX trace and support following events to configure the
> trace:
> 
> - filter: select Root port or Endpoint to trace
> - type: select the type of traced TLP headers
> - direction: select the direction of traced TLP headers
> - format: select the data format of the traced TLP headers
> 
> This patch adds the PMU driver part of PTT trace. The perf
> command support of PTT trace is added in the following
> patch.
> 
> Signed-off-by: Yicong Yang 
> ---


> @@ -294,6 +346,405 @@ static void hisi_ptt_init_ctrls(struct hisi_ptt 
> *hisi_ptt)
>   hisi_ptt->trace_ctrl.default_cpu = 
> cpumask_first(cpumask_of_node(dev_to_node(>dev)));
>  }
>  
> +#define HISI_PTT_PMU_FILTER_IS_PORT  BIT(19)
> +#define HISI_PTT_PMU_FILTER_VAL_MASK GENMASK(15, 0)
> +#define HISI_PTT_PMU_DIRECTION_MASK  GENMASK(23, 20)
> +#define HISI_PTT_PMU_TYPE_MASK   GENMASK(31, 24)
> +#define HISI_PTT_PMU_FORMAT_MASK GENMASK(35, 32)
> +
> +static ssize_t available_filters_show(struct device *dev,
> +   struct device_attribute *attr,
> +   char *buf)
> +{
> + struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
> + struct hisi_ptt_filter_desc *filter;
> + int pos = 0;
> +
> + if (list_empty(_ptt->port_filters))
> + return sysfs_emit(buf, " No available filter \n");
> +

This is a very unusual sysfs attribute.
They are supposed to be one "thing" per file, so I'd have expected this to
be at least two files

root_ports_available_filters
request_available_filters
and no available filter is indicated by these attribute returning an empty
string.

However you need to match convention for hwtracing drivers so if
this is common approach perhaps you could point me to a similar
example? My grep skills didn't find me one.

> + mutex_lock(_ptt->mutex);
> + pos += sysfs_emit_at(buf, pos, " Root Ports \n");
> + list_for_each_entry(filter, _ptt->port_filters, list)
> + pos += sysfs_emit_at(buf, pos, "%s  0x%05lx\n",
> +  pci_name(filter->pdev),
> +  hisi_ptt_get_filter_val(filter->pdev) |
> +  HISI_PTT_PMU_FILTER_IS_PORT);
> +
> + pos += sysfs_emit_at(buf, pos, " Requesters \n");
> + list_for_each_entry(filter, _ptt->req_filters, list)
> + pos += sysfs_emit_at(buf, pos, "%s  0x%05x\n",
> +  pci_name(filter->pdev),
> +  hisi_ptt_get_filter_val(filter->pdev));
> +
> + mutex_unlock(_ptt->mutex);
> + return pos;
> +}
> +static DEVICE_ATTR_ADMIN_RO(available_filters);
> +

...


> +static int hisi_ptt_trace_valid_config_onehot(u32 val, u32 *available_list, 
> u32 list_size)
> +{
> + int i, ret = -EINVAL;
> +
> + for (i = 0; i < list_size; i++)
> + if (val == available_list[i]) {
> + ret = 0;

return 0;

> + break;
> + }
> +
> + return ret;

return -EINVAL;

> +}
> +

> +
> +static void hisi_ptt_pmu_free_aux(void *aux)
> +{
> + struct hisi_ptt_pmu_buf *buf = aux;
> +
> + vunmap(buf->base);
> + kfree(buf);
> +}
> +


...

> +static int hisi_ptt_pmu_add(struct perf_event *event, int flags)
> +{
> + struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
> + struct hw_perf_event *hwc = >hw;
> + int cpu = event->cpu;
> +
> + if (cpu == -1 && smp_processor_id() != hisi_ptt->trace_ctrl.default_cpu)

This check is not entirely obvious to me. Perhaps a comment would help
readers understand why this condition is successful, but doesn't involve
actually starting the pmu?

> + return 0;
> +
> + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
> +
> + if (flags & PERF_EF_START) {
> + hisi_ptt_pmu_start(event, PERF_EF_RELOAD);
> + if (hwc->state & PERF_HES_STOPPED)
> + return -EINVAL;
> + }
> +
> + return 0;
> +}

...

>  /*
>   * The DMA of PTT trace can only use direct mapping, due to some
>   * hardware restriction. Check whether there is an iommu or the
> @@ -359,6 +810,12 @@ static int hisi_ptt_probe(struct pci_dev *pdev,
>  
>   hisi_ptt_init_ctrls(hisi_ptt);
>  
> + ret = hisi_ptt_register_pmu(hisi_ptt);
> + if (ret) {
> + pci_err(pdev, "failed to register pmu device, ret = %d", ret);

Given I think this exposes userspace interfaces, it should be the very
last thing done in probe(). Otherwise we have a race condition (at least in
theory) where someone starts using it before we then fail the iommu mapping 
check.


> + return ret;
> + }
> +
>   ret = hisi_ptt_check_iommu_mapping(hisi_ptt);
>   if (ret) {
>   

Re: [patch 08/37] genirq/msi: Provide msi_device_populate/destroy_sysfs()

2021-11-30 Thread Jonathan Cameron via iommu
On Sat, 27 Nov 2021 02:20:19 +0100 (CET)
Thomas Gleixner  wrote:

> Add new allocation functions which can be activated by domain info
> flags. They store the groups pointer in struct msi_device_data.
> 
> Signed-off-by: Thomas Gleixner 

A few trivial comments...

> ---
>  include/linux/msi.h |   12 +++-
>  kernel/irq/msi.c|   42 --
>  2 files changed, 51 insertions(+), 3 deletions(-)
> 
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -174,9 +174,11 @@ struct msi_desc {
>  /**
>   * msi_device_data - MSI per device data
>   * @lock:Spinlock to protect register access
> + * @attrs:   Pointer to the sysfs attribute group
>   */
>  struct msi_device_data {
> - raw_spinlock_t  lock;
> + raw_spinlock_t  lock;

Trivial: Move the alignment change back to patch 2.

> + const struct attribute_group**attrs;
>  };
>  
>  int msi_setup_device_data(struct device *dev);
> @@ -242,10 +244,16 @@ void pci_msi_mask_irq(struct irq_data *d
>  void pci_msi_unmask_irq(struct irq_data *data);
>  
>  #ifdef CONFIG_SYSFS
> +int msi_device_populate_sysfs(struct device *dev);
> +void msi_device_destroy_sysfs(struct device *dev);
> +
>  const struct attribute_group **msi_populate_sysfs(struct device *dev);
>  void msi_destroy_sysfs(struct device *dev,
>  const struct attribute_group **msi_irq_groups);
>  #else
> +static inline int msi_device_populate_sysfs(struct device *dev) { return 0; }
> +static inline void msi_device_destroy_sysfs(struct device *dev) { }
> +
>  static inline const struct attribute_group **msi_populate_sysfs(struct 
> device *dev)
>  {
>   return NULL;
> @@ -393,6 +401,8 @@ enum {
>   MSI_FLAG_MUST_REACTIVATE= (1 << 5),
>   /* Is level-triggered capable, using two messages */
>   MSI_FLAG_LEVEL_CAPABLE  = (1 << 6),
> + /* Populate sysfs on alloc() and destroy it on free() */
> + MSI_FLAG_DEV_SYSFS  = (1 << 7),
>  };
>  
>  int msi_domain_set_affinity(struct irq_data *data, const struct cpumask 
> *mask,
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -214,6 +214,20 @@ const struct attribute_group **msi_popul
>  }
>  
>  /**
> + * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
> + * @dev: The device(PCI, platform etc) which will get sysfs entries

Space after device

> + */
> +int msi_device_populate_sysfs(struct device *dev)
> +{
> + const struct attribute_group **group = msi_populate_sysfs(dev);
> +
> + if (IS_ERR(group))
> + return PTR_ERR(group);
> + dev->msi.data->attrs = group;
> + return 0;
> +}
> +
> +/**
>   * msi_destroy_sysfs - Destroy msi_irqs sysfs entries for devices
>   * @dev: The device(PCI, platform etc) who will remove sysfs 
> entries
>   * @msi_irq_groups:  attribute_group for device msi_irqs entries
> @@ -239,6 +253,17 @@ void msi_destroy_sysfs(struct device *de
>   kfree(msi_irq_groups);
>   }
>  }
> +
> +/**
> + * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
> + * @dev: The device(PCI, platform etc) for which to remove
> + *   sysfs entries
> + */
> +void msi_device_destroy_sysfs(struct device *dev)
> +{
> + msi_destroy_sysfs(dev, dev->msi.data->attrs);
> + dev->msi.data->attrs = NULL;
> +}
>  #endif
>  
>  #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
> @@ -686,8 +711,19 @@ int msi_domain_alloc_irqs(struct irq_dom
>  {
>   struct msi_domain_info *info = domain->host_data;
>   struct msi_domain_ops *ops = info->ops;
> + int ret;
>  
> - return ops->domain_alloc_irqs(domain, dev, nvec);
> + ret = ops->domain_alloc_irqs(domain, dev, nvec);
> + if (ret)
> + return ret;
> +
> + if (!(info->flags & MSI_FLAG_DEV_SYSFS))
> + return 0;
> +
> + ret = msi_device_populate_sysfs(dev);
> + if (ret)
> + msi_domain_free_irqs(domain, dev);
> + return ret;
>  }
>  
>  void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
> @@ -726,7 +762,9 @@ void msi_domain_free_irqs(struct irq_dom
>   struct msi_domain_info *info = domain->host_data;
>   struct msi_domain_ops *ops = info->ops;
>  
> - return ops->domain_free_irqs(domain, dev);
> + if (info->flags & MSI_FLAG_DEV_SYSFS)
> + msi_device_destroy_sysfs(dev);
> + ops->domain_free_irqs(domain, dev);
>  }
>  
>  /**
> 
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 6/9] PCI: Add pci_find_dvsec_capability to find designated VSEC

2021-10-01 Thread Jonathan Cameron
On Thu, 23 Sep 2021 10:26:44 -0700
Ben Widawsky  wrote:

> Add pci_find_dvsec_capability to locate a Designated Vendor-Specific
> Extended Capability with the specified DVSEC ID.
> 
> The Designated Vendor-Specific Extended Capability (DVSEC) allows one or
> more vendor specific capabilities that aren't tied to the vendor ID of
> the PCI component.
> 
> DVSEC is critical for both the Compute Express Link (CXL) driver as well
> as the driver for OpenCAPI coherent accelerator (OCXL).
> 
> Cc: David E. Box 
> Cc: Jonathan Cameron 
> Cc: Bjorn Helgaas 
> Cc: Dan Williams 
> Cc: linux-...@vger.kernel.org
> Cc: linuxppc-...@lists.ozlabs.org
> Cc: Andrew Donnellan 
> Cc: Lu Baolu 
> Reviewed-by: Frederic Barrat 
> Signed-off-by: Ben Widawsky 

Great to see this cleaned up.

Reviewed-by: Jonathan Cameron 

> ---
>  drivers/pci/pci.c   | 32 
>  include/linux/pci.h |  1 +
>  2 files changed, 33 insertions(+)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index ce2ab62b64cf..94ac86ff28b0 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -732,6 +732,38 @@ u16 pci_find_vsec_capability(struct pci_dev *dev, u16 
> vendor, int cap)
>  }
>  EXPORT_SYMBOL_GPL(pci_find_vsec_capability);
>  
> +/**
> + * pci_find_dvsec_capability - Find DVSEC for vendor
> + * @dev: PCI device to query
> + * @vendor: Vendor ID to match for the DVSEC
> + * @dvsec: Designated Vendor-specific capability ID
> + *
> + * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability
> + * offset in config space; otherwise return 0.
> + */
> +u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec)
> +{
> + int pos;
> +
> + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC);
> + if (!pos)
> + return 0;
> +
> + while (pos) {
> + u16 v, id;
> +
> + pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, );
> + pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, );
> + if (vendor == v && dvsec == id)
> + return pos;
> +
> + pos = pci_find_next_ext_capability(dev, pos, 
> PCI_EXT_CAP_ID_DVSEC);
> + }
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(pci_find_dvsec_capability);
> +
>  /**
>   * pci_find_parent_resource - return resource region of parent bus of given
>   * region
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index cd8aa6fce204..c93ccfa4571b 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1130,6 +1130,7 @@ u16 pci_find_ext_capability(struct pci_dev *dev, int 
> cap);
>  u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap);
>  struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
>  u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap);
> +u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec);
>  
>  u64 pci_get_dsn(struct pci_dev *dev);
>  

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dt-bindings: Drop redundant minItems/maxItems

2021-06-16 Thread Jonathan Cameron
On Tue, 15 Jun 2021 13:15:43 -0600
Rob Herring  wrote:

> If a property has an 'items' list, then a 'minItems' or 'maxItems' with the
> same size as the list is redundant and can be dropped. Note that is DT
> schema specific behavior and not standard json-schema behavior. The tooling
> will fixup the final schema adding any unspecified minItems/maxItems.
> 
> This condition is partially checked with the meta-schema already, but
> only if both 'minItems' and 'maxItems' are equal to the 'items' length.
> An improved meta-schema is pending.
> 

...

>  .../devicetree/bindings/iio/adc/amlogic,meson-saradc.yaml   | 1 -

For this one, the fact it overrides maxItems elsewhere makes this a little
bit odd.  I guess we can get used to it being implicit.

>  .../devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml | 2 --

Acked-by: Jonathan Cameron 


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v11 10/10] iommu/arm-smmu-v3: Add stall support for platform devices

2021-01-25 Thread Jonathan Cameron
On Mon, 25 Jan 2021 12:06:51 +0100
Jean-Philippe Brucker  wrote:

> The SMMU provides a Stall model for handling page faults in platform
> devices. It is similar to PCIe PRI, but doesn't require devices to have
> their own translation cache. Instead, faulting transactions are parked
> and the OS is given a chance to fix the page tables and retry the
> transaction.
> 
> Enable stall for devices that support it (opt-in by firmware). When an
> event corresponds to a translation error, call the IOMMU fault handler.
> If the fault is recoverable, it will call us back to terminate or
> continue the stall.
> 
> To use stall device drivers need to enable IOMMU_DEV_FEAT_IOPF, which
> initializes the fault queue for the device.
> 
> Tested-by: Zhangfei Gao 
> Signed-off-by: Jean-Philippe Brucker 

Hi Jean-Phillipe, 

Just one query below.  Either fix that or tell me why you don't need it and
then I'm happy.  With that resolved

Reviewed-by: Jonathan Cameron 

> ---

> git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index bb251cab61f3..ee66d1f4cb81 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -435,9 +435,13 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
>   return true;
>  }
>  
> -static bool arm_smmu_iopf_supported(struct arm_smmu_master *master)
> +bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
>  {
> - return false;
> + /* We're not keeping track of SIDs in fault events */
> + if (master->num_streams != 1)
> + return false;
> +
> + return master->stall_enabled;
>  }
>  
>  bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
> @@ -445,8 +449,8 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master 
> *master)
>   if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
>   return false;
>  
> - /* SSID and IOPF support are mandatory for the moment */
> - return master->ssid_bits && arm_smmu_iopf_supported(master);
> + /* SSID support is mandatory for the moment */
> + return master->ssid_bits;
>  }
>  
>  bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
> @@ -459,13 +463,55 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master 
> *master)
>   return enabled;
>  }
>  
> +static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
> +{
> + int ret;
> + struct device *dev = master->dev;
> +
> + /*
> +  * Drivers for devices supporting PRI or stall should enable IOPF first.
> +  * Others have device-specific fault handlers and don't need IOPF.
> +  */
> + if (!arm_smmu_master_iopf_supported(master))

So if we have master->iopf_enabled and this happens. Then I'm not totally sure
what prevents the disable below running its cleanup on stuff that was never
configured.

> + return 0;
> +
> + if (!master->iopf_enabled)
> + return -EINVAL;
> +
> + ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev);
> + if (ret)
> + return ret;
> +
> + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
> + if (ret) {
> + iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
> + return ret;
> + }
> + return 0;
> +}
> +
> +static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
> +{
> + struct device *dev = master->dev;
> +
> + if (!master->iopf_enabled)
> + return;

As above, I think you need a sanity check on

!arm_smmu_master_iopf_supported(master) before clearing the following.

I may well be missing something that stops us getting here though.

Alternative is probably to sanity check iopf_enabled = true is supported
before letting a driver set it.


> +
> + iommu_unregister_device_fault_handler(dev);
> + iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
> +}
> +
>  int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
>  {
> + int ret;
> +
>   mutex_lock(_lock);
> - master->sva_enabled = true;
> + ret = arm_smmu_master_sva_enable_iopf(master);
> + if (!ret)
> + master->sva_enabled = true;
>   mutex_unlock(_lock);
>  
> - return 0;
> + return ret;
>  }
>  
>  int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
> @@ -476,6 +522,7 @@ int arm_smmu_master_disable_sva(struct arm_smmu_master 
> *master)
>   mutex_unlock(_lock);
>   return -EBUSY;
>   }
> + 

Re: [PATCH 0/3] iommu/arm-smmu-v3: TLB invalidation for SVA

2021-01-22 Thread Jonathan Cameron
On Fri, 22 Jan 2021 12:52:55 +0100
Jean-Philippe Brucker  wrote:

> To support sharing page tables with the CPU, the SMMU can participate in
> Broadcast TLB Maintenance (BTM), where TLB invalidate instructions from
> the CPU are received by the SMMU. For platforms that do no implement BTM
> [1], it is still possible to use SVA, by sending all TLB invalidations
> through the command queue. Patch 2 implements this.
> 
> This series also enables SVA for platforms that do support BTM, as an
> intermediate step because properly supporting BTM requires cooperating
> with KVM to allocate VMIDs [2]. With BTM enabled, the SMMU applies
> broadcast invalidations by VMID to any matching TLB entry, because there
> is no distinction between private and shared VMIDs like there is for
> ASIDs. Therefore a stage-2 domain will need a VMID that doesn't conflict
> with one allocated by KVM (or use the one from the corresponding VM,
> pinned).
> 
> These patches, along with the IOPF series [3] and the quirks [4], enable
> SVA for the hisi accelerator that's already supported upstream. My quick
> performance comparison between BTM and !BTM on that platform were
> inconclusive. Doing invalidations via cmdq seemed to slightly reduce
> performance of some heavy compression jobs, but there was too much noise
> and not enough invalidations in my tests.
> 
> This series does not depend on the IOPF one [3].
> 
> [1] 
> https://lore.kernel.org/linux-iommu/by5pr12mb37641e84d516054387fee330b3...@by5pr12mb3764.namprd12.prod.outlook.com/
> [2] https://lore.kernel.org/linux-iommu/20200522101755.GA3453945@myrica/
> [3] 
> https://lore.kernel.org/linux-iommu/20210121123623.2060416-1-jean-phili...@linaro.org/
> [4] 
> https://lore.kernel.org/linux-pci/1610960316-28935-1-git-send-email-zhangfei@linaro.org/

Whole series looks good to me so FWIW
Reviewed-by: Jonathan Cameron 

Will be very nice to have mainline support for SVA with those accelerators :)

> 
> Jean-Philippe Brucker (3):
>   iommu/arm-smmu-v3: Split arm_smmu_tlb_inv_range()
>   iommu/arm-smmu-v3: Make BTM optional for SVA
>   iommu/arm-smmu-v3: Add support for VHE
> 
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |   6 +
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  14 ++-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 104 --
>  3 files changed, 89 insertions(+), 35 deletions(-)
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 10/10] iommu/arm-smmu-v3: Add stall support for platform devices

2021-01-22 Thread Jonathan Cameron
On Fri, 22 Jan 2021 09:51:20 +0100
Jean-Philippe Brucker  wrote:

> On Thu, Jan 21, 2021 at 07:12:36PM +0000, Jonathan Cameron wrote:
> > > @@ -2502,6 +2647,7 @@ static void arm_smmu_release_device(struct device 
> > > *dev)
> > >  
> > >   master = dev_iommu_priv_get(dev);
> > >   WARN_ON(arm_smmu_master_sva_enabled(master));
> > > + iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
> > >   arm_smmu_detach_dev(master);
> > >   arm_smmu_disable_pasid(master);
> > >   arm_smmu_remove_master(master);  
> > 
> > The lack of symmetry here bothers me a bit, but it's already true, so I 
> > guess
> > this case is fine as well.  
> 
> Normally the device driver calls iommu_dev_feat_disable(SVA) which does
> iopf_queue_remove_device(). This is just a safety net in case the device
> gets removed without the driver properly cleaning up (which will WARN as
> well) 

Ah makes sense.  Maybe it's worth a comment in the code for future generations
of tired code readers?

> 
> > 
> > ...  
> > >  
> > > @@ -2785,6 +2946,7 @@ static int arm_smmu_cmdq_init(struct 
> > > arm_smmu_device *smmu)
> > >  static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
> > >  {
> > >   int ret;
> > > + bool sva = smmu->features & ARM_SMMU_FEAT_STALLS;  
> > 
> > FEAT_SVA?  
> 
> Ugh yes, thanks. I left this as a bool instead of moving into the test
> below because the PRI patch reuses it, but I think I'll just move it down
> when resending.

Makes sense.

> 
> Thanks,
> Jean
> 
> >   
> > >  
> > >   /* cmdq */
> > >   ret = arm_smmu_init_one_queue(smmu, >cmdq.q, ARM_SMMU_CMDQ_PROD,
> > > @@ -2804,6 +2966,12 @@ static int arm_smmu_init_queues(struct 
> > > arm_smmu_device *smmu)
> > >   if (ret)
> > >   return ret;
> > >  
> > > + if (sva && smmu->features & ARM_SMMU_FEAT_STALLS) {  
> > 
> > Isn't this checking same thing twice?
> >   
> > > + smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
> > > + if (!smmu->evtq.iopf)
> > > + return -ENOMEM;
> > > + }
> > > +
> > >   /* priq */
> > >   if (!(smmu->features & ARM_SMMU_FEAT_PRI))
> > >   return 0;
> > > @@ -3718,6 +3886,7 @@ static int arm_smmu_device_remove(struct 
> > > platform_device *pdev)
> > >   iommu_device_unregister(>iommu);
> > >   iommu_device_sysfs_remove(>iommu);
> > >   arm_smmu_device_disable(smmu);
> > > + iopf_queue_free(smmu->evtq.iopf);
> > >  
> > >   return 0;
> > >  }  
> >   

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 10/10] iommu/arm-smmu-v3: Add stall support for platform devices

2021-01-21 Thread Jonathan Cameron
On Thu, 21 Jan 2021 13:36:24 +0100
Jean-Philippe Brucker  wrote:

> The SMMU provides a Stall model for handling page faults in platform
> devices. It is similar to PCIe PRI, but doesn't require devices to have
> their own translation cache. Instead, faulting transactions are parked
> and the OS is given a chance to fix the page tables and retry the
> transaction.
> 
> Enable stall for devices that support it (opt-in by firmware). When an
> event corresponds to a translation error, call the IOMMU fault handler.
> If the fault is recoverable, it will call us back to terminate or
> continue the stall.
> 
> To use stall device drivers need to enable IOMMU_DEV_FEAT_IOPF, which
> initializes the fault queue for the device.
> 
> Tested-by: Zhangfei Gao 
> Signed-off-by: Jean-Philippe Brucker 

One thing inline + one comment which was mostly a case of I ran
out of time to walk through why probe and release aren't symmetric...

> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  43 
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  59 +-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 185 +-
>  3 files changed, 273 insertions(+), 14 deletions(-)
> 

...



> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index db5d6aa76c3a..af6982aca42e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -32,6 +32,7 @@


...
>  
>   master->domain = smmu_domain;
> @@ -2484,6 +2624,11 @@ static struct iommu_device 
> *arm_smmu_probe_device(struct device *dev)
>   master->ssid_bits = min_t(u8, master->ssid_bits,
> CTXDESC_LINEAR_CDMAX);
>  
> + if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
> +  device_property_read_bool(dev, "dma-can-stall")) ||
> + smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> + master->stall_enabled = true;
> +
>   return >iommu;
>  
>  err_free_master:
> @@ -2502,6 +2647,7 @@ static void arm_smmu_release_device(struct device *dev)
>  
>   master = dev_iommu_priv_get(dev);
>   WARN_ON(arm_smmu_master_sva_enabled(master));
> + iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
>   arm_smmu_detach_dev(master);
>   arm_smmu_disable_pasid(master);
>   arm_smmu_remove_master(master);

The lack of symmetry here bothers me a bit, but it's already true, so I guess
this case is fine as well.

...
>  
> @@ -2785,6 +2946,7 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device 
> *smmu)
>  static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
>  {
>   int ret;
> + bool sva = smmu->features & ARM_SMMU_FEAT_STALLS;

FEAT_SVA?

>  
>   /* cmdq */
>   ret = arm_smmu_init_one_queue(smmu, >cmdq.q, ARM_SMMU_CMDQ_PROD,
> @@ -2804,6 +2966,12 @@ static int arm_smmu_init_queues(struct arm_smmu_device 
> *smmu)
>   if (ret)
>   return ret;
>  
> + if (sva && smmu->features & ARM_SMMU_FEAT_STALLS) {

Isn't this checking same thing twice?

> + smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
> + if (!smmu->evtq.iopf)
> + return -ENOMEM;
> + }
> +
>   /* priq */
>   if (!(smmu->features & ARM_SMMU_FEAT_PRI))
>   return 0;
> @@ -3718,6 +3886,7 @@ static int arm_smmu_device_remove(struct 
> platform_device *pdev)
>   iommu_device_unregister(>iommu);
>   iommu_device_sysfs_remove(>iommu);
>   arm_smmu_device_disable(smmu);
> + iopf_queue_free(smmu->evtq.iopf);
>  
>   return 0;
>  }

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 09/10] ACPI/IORT: Enable stall support for platform devices

2021-01-21 Thread Jonathan Cameron
On Thu, 21 Jan 2021 13:36:23 +0100
Jean-Philippe Brucker  wrote:

> Copy the "Stall supported" bit, that tells whether a named component
> supports stall, into the dma-can-stall device property.
> 
> Signed-off-by: Jean-Philippe Brucker 

FWIW given how simple this is :
Acked-by: Jonathan Cameron 

> ---
>  drivers/acpi/arm64/iort.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index c9a8bbb74b09..42820d7eb869 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -968,13 +968,15 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, 
> u16 alias, void *data)
>  static void iort_named_component_init(struct device *dev,
> struct acpi_iort_node *node)
>  {
> - struct property_entry props[2] = {};
> + struct property_entry props[3] = {};
>   struct acpi_iort_named_component *nc;
>  
>   nc = (struct acpi_iort_named_component *)node->node_data;
>   props[0] = PROPERTY_ENTRY_U32("pasid-num-bits",
> FIELD_GET(ACPI_IORT_NC_PASID_BITS,
>   nc->node_flags));
> + if (nc->node_flags & ACPI_IORT_NC_STALL_SUPPORTED)
> + props[1] = PROPERTY_ENTRY_BOOL("dma-can-stall");
>  
>   if (device_add_properties(dev, props))
>   dev_warn(dev, "Could not add device properties\n");

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 07/10] iommu/arm-smmu-v3: Maintain a SID->device structure

2021-01-19 Thread Jonathan Cameron
On Fri, 8 Jan 2021 15:52:15 +0100
Jean-Philippe Brucker  wrote:

> When handling faults from the event or PRI queue, we need to find the
> struct device associated with a SID. Add a rb_tree to keep track of
> SIDs.
> 
> Signed-off-by: Jean-Philippe Brucker 
One totally trivial point if you happen to be spinning again.

Acked-by: Jonathan Cameron 
with or without that.

> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  13 +-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 161 
>  2 files changed, 144 insertions(+), 30 deletions(-)
> 

...

>  
> +static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
> +   struct arm_smmu_master *master)
> +{
> + int i;
> + int ret = 0;
> + struct arm_smmu_stream *new_stream, *cur_stream;
> + struct rb_node **new_node, *parent_node = NULL;
> + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
> +
> + master->streams = kcalloc(fwspec->num_ids,
> +   sizeof(struct arm_smmu_stream), GFP_KERNEL);
  sizeof(*master->streams)

nitpick :) Saves reviewer going to check that master->streams is of the type 
they expect.


> + if (!master->streams)
> + return -ENOMEM;
> + master->num_streams = fwspec->num_ids;
> +
> + mutex_lock(>streams_mutex);
> + for (i = 0; i < fwspec->num_ids && !ret; i++) {
> + u32 sid = fwspec->ids[i];
> +
> + new_stream = >streams[i];
> + new_stream->id = sid;
> + new_stream->master = master;
> +
> + /*
> +  * Check the SIDs are in range of the SMMU and our stream table
> +  */
> + if (!arm_smmu_sid_in_range(smmu, sid)) {
> + ret = -ERANGE;
> + break;
> + }
> +
> + /* Ensure l2 strtab is initialised */
> + if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
> + ret = arm_smmu_init_l2_strtab(smmu, sid);
> + if (ret)
> + break;
> + }
> +
> + /* Insert into SID tree */
> + new_node = &(smmu->streams.rb_node);
> + while (*new_node) {
> + cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
> +   node);
> + parent_node = *new_node;
> + if (cur_stream->id > new_stream->id) {
> + new_node = &((*new_node)->rb_left);
> + } else if (cur_stream->id < new_stream->id) {
> + new_node = &((*new_node)->rb_right);
> + } else {
> + dev_warn(master->dev,
> +  "stream %u already in tree\n",
> +  cur_stream->id);
> + ret = -EINVAL;
> + break;
> + }
> + }
> +
> + if (!ret) {
> + rb_link_node(_stream->node, parent_node, new_node);
> + rb_insert_color(_stream->node, >streams);
> + }
> + }
> +
> + if (ret) {
> + for (; i > 0; i--)
> + rb_erase(>streams[i].node, >streams);
> + kfree(master->streams);
> + }
> + mutex_unlock(>streams_mutex);
> +
> + return ret;
> +}
...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 06/10] iommu: Add a page fault handler

2021-01-19 Thread Jonathan Cameron
On Fri, 8 Jan 2021 15:52:14 +0100
Jean-Philippe Brucker  wrote:

> Some systems allow devices to handle I/O Page Faults in the core mm. For
> example systems implementing the PCIe PRI extension or Arm SMMU stall
> model. Infrastructure for reporting these recoverable page faults was
> added to the IOMMU core by commit 0c830e6b3282 ("iommu: Introduce device
> fault report API"). Add a page fault handler for host SVA.
> 
> IOMMU driver can now instantiate several fault workqueues and link them
> to IOPF-capable devices. Drivers can choose between a single global
> workqueue, one per IOMMU device, one per low-level fault queue, one per
> domain, etc.
> 
> When it receives a fault event, supposedly in an IRQ handler, the IOMMU

Why "supposedly"? Do you mean "most commonly" 

> driver reports the fault using iommu_report_device_fault(), which calls
> the registered handler. The page fault handler then calls the mm fault
> handler, and reports either success or failure with iommu_page_response().
> When the handler succeeds, the IOMMU retries the access.

For PRI that description is perhaps a bit missleading.  IIRC the IOMMU
will only retry when it gets a new ATS query.

> 
> The iopf_param pointer could be embedded into iommu_fault_param. But
> putting iopf_param into the iommu_param structure allows us not to care
> about ordering between calls to iopf_queue_add_device() and
> iommu_register_device_fault_handler().
> 
> Signed-off-by: Jean-Philippe Brucker 

One really minor inconsistency inline that made me look twice..
With or without that tided up FWIW.

Reviewed-by: Jonathan Cameron 

...

> +/**
> + * iopf_queue_add_device - Add producer to the fault queue
> + * @queue: IOPF queue
> + * @dev: device to add
> + *
> + * Return: 0 on success and <0 on error.
> + */
> +int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
> +{
> + int ret = -EBUSY;
> + struct iopf_device_param *iopf_param;
> + struct dev_iommu *param = dev->iommu;
> +
> + if (!param)
> + return -ENODEV;
> +
> + iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL);
> + if (!iopf_param)
> + return -ENOMEM;
> +
> + INIT_LIST_HEAD(_param->partial);
> + iopf_param->queue = queue;
> + iopf_param->dev = dev;
> +
> + mutex_lock(>lock);
> + mutex_lock(>lock);
> + if (!param->iopf_param) {
> + list_add(_param->queue_list, >devices);
> + param->iopf_param = iopf_param;
> + ret = 0;
> + }
> + mutex_unlock(>lock);
> + mutex_unlock(>lock);
> +
> + if (ret)
> + kfree(iopf_param);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(iopf_queue_add_device);
> +
> +/**
> + * iopf_queue_remove_device - Remove producer from fault queue
> + * @queue: IOPF queue
> + * @dev: device to remove
> + *
> + * Caller makes sure that no more faults are reported for this device.
> + *
> + * Return: 0 on success and <0 on error.
> + */
> +int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
> +{
> + int ret = 0;
I'm not that keen that the logic of ret is basically the opposite
of that in the previous function.
There we had it init to error then set to good, here we do the opposite.

Not that important which but right now it just made me do a double take
whilst reading.

> + struct iopf_fault *iopf, *next;
> + struct iopf_device_param *iopf_param;
> + struct dev_iommu *param = dev->iommu;
> +
> + if (!param || !queue)
> + return -EINVAL;
> +
> + mutex_lock(>lock);
> + mutex_lock(>lock);
> + iopf_param = param->iopf_param;
> + if (iopf_param && iopf_param->queue == queue) {
> + list_del(_param->queue_list);
> + param->iopf_param = NULL;
> + } else {
> + ret = -EINVAL;
> + }
> + mutex_unlock(>lock);
> + mutex_unlock(>lock);
> + if (ret)
> + return ret;
> +
> + /* Just in case some faults are still stuck */
> + list_for_each_entry_safe(iopf, next, _param->partial, list)
> + kfree(iopf);
> +
> + kfree(iopf_param);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
> +

...

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 05/10] uacce: Enable IOMMU_DEV_FEAT_IOPF

2021-01-19 Thread Jonathan Cameron
On Fri, 8 Jan 2021 15:52:13 +0100
Jean-Philippe Brucker  wrote:

> The IOPF (I/O Page Fault) feature is now enabled independently from the
> SVA feature, because some IOPF implementations are device-specific and
> do not require IOMMU support for PCIe PRI or Arm SMMU stall.
> 
> Enable IOPF unconditionally when enabling SVA for now. In the future, if
> a device driver implementing a uacce interface doesn't need IOPF
> support, it will need to tell the uacce module, for example with a new
> flag.
> 
> Signed-off-by: Jean-Philippe Brucker 
Hi Jean-Philippe,

A minor suggestion inline but I'm not that bothered so either way
looks good to me.

> ---
> Cc: Arnd Bergmann 
> Cc: Greg Kroah-Hartman 
> Cc: Zhangfei Gao 
> Cc: Zhou Wang 
> ---
>  drivers/misc/uacce/uacce.c | 32 +---
>  1 file changed, 25 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> index d07af4edfcac..41ef1eb62a14 100644
> --- a/drivers/misc/uacce/uacce.c
> +++ b/drivers/misc/uacce/uacce.c
> @@ -385,6 +385,24 @@ static void uacce_release(struct device *dev)
>   kfree(uacce);
>  }
>  
> +static unsigned int uacce_enable_sva(struct device *parent, unsigned int 
> flags)
> +{
> + if (!(flags & UACCE_DEV_SVA))
> + return flags;
> +
> + flags &= ~UACCE_DEV_SVA;
> +
> + if (iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_IOPF))
> + return flags;
> +
> + if (iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA)) {
> + iommu_dev_disable_feature(parent, IOMMU_DEV_FEAT_IOPF);
> + return flags;
> + }
> +
> + return flags | UACCE_DEV_SVA;
> +}

I'm a great fan of paired enable / disable functions.
Whilst it would be trivial, maybe it is worth introducing

uacce_disable_sva()?
Also make that do the flags check internally to make it match
up with the enable path.


> +
>  /**
>   * uacce_alloc() - alloc an accelerator
>   * @parent: pointer of uacce parent device
> @@ -404,11 +422,7 @@ struct uacce_device *uacce_alloc(struct device *parent,
>   if (!uacce)
>   return ERR_PTR(-ENOMEM);
>  
> - if (flags & UACCE_DEV_SVA) {
> - ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA);
> - if (ret)
> - flags &= ~UACCE_DEV_SVA;
> - }
> + flags = uacce_enable_sva(parent, flags);
>  
>   uacce->parent = parent;
>   uacce->flags = flags;
> @@ -432,8 +446,10 @@ struct uacce_device *uacce_alloc(struct device *parent,
>   return uacce;
>  
>  err_with_uacce:
> - if (flags & UACCE_DEV_SVA)
> + if (flags & UACCE_DEV_SVA) {
>   iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA);
> + iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_IOPF);
> + }
>   kfree(uacce);
>   return ERR_PTR(ret);
>  }
> @@ -487,8 +503,10 @@ void uacce_remove(struct uacce_device *uacce)
>   mutex_unlock(>queues_lock);
>  
>   /* disable sva now since no opened queues */
> - if (uacce->flags & UACCE_DEV_SVA)
> + if (uacce->flags & UACCE_DEV_SVA) {
>   iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA);
> + iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_IOPF);
> + }
>  
>   if (uacce->cdev)
>   cdev_device_del(uacce->cdev, >dev);

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 02/10] iommu/arm-smmu-v3: Use device properties for pasid-num-bits

2021-01-19 Thread Jonathan Cameron
On Fri, 8 Jan 2021 15:52:10 +0100
Jean-Philippe Brucker  wrote:

> The pasid-num-bits property shouldn't need a dedicated fwspec field,
> it's a job for device properties. Add properties for IORT, and access
> the number of PASID bits using device_property_read_u32().
> 
> Suggested-by: Robin Murphy 
> Signed-off-by: Jean-Philippe Brucker 

Nice

Acked-by: Jonathan Cameron 

Looks like we are fine not checking for missing properties because
ssid_bits == 0 corresponds to pasid off anyway.


> ---
>  include/linux/iommu.h   |  2 --
>  drivers/acpi/arm64/iort.c   | 13 +++--
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  3 ++-
>  drivers/iommu/of_iommu.c|  5 -
>  4 files changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 26bcde5e7746..583c734b2e87 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -570,7 +570,6 @@ struct iommu_group *fsl_mc_device_group(struct device 
> *dev);
>   * struct iommu_fwspec - per-device IOMMU instance data
>   * @ops: ops for this device's IOMMU
>   * @iommu_fwnode: firmware handle for this device's IOMMU
> - * @num_pasid_bits: number of PASID bits supported by this device
>   * @num_ids: number of associated device IDs
>   * @ids: IDs which this device may present to the IOMMU
>   */
> @@ -578,7 +577,6 @@ struct iommu_fwspec {
>   const struct iommu_ops  *ops;
>   struct fwnode_handle*iommu_fwnode;
>   u32 flags;
> - u32 num_pasid_bits;
>   unsigned intnum_ids;
>   u32 ids[];
>  };
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index d4eac6d7e9fb..c9a8bbb74b09 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -968,15 +968,16 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, 
> u16 alias, void *data)
>  static void iort_named_component_init(struct device *dev,
> struct acpi_iort_node *node)
>  {
> + struct property_entry props[2] = {};
>   struct acpi_iort_named_component *nc;
> - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> -
> - if (!fwspec)
> - return;
>  
>   nc = (struct acpi_iort_named_component *)node->node_data;
> - fwspec->num_pasid_bits = FIELD_GET(ACPI_IORT_NC_PASID_BITS,
> -nc->node_flags);
> + props[0] = PROPERTY_ENTRY_U32("pasid-num-bits",
> +   FIELD_GET(ACPI_IORT_NC_PASID_BITS,
> + nc->node_flags));
> +
> + if (device_add_properties(dev, props))
> + dev_warn(dev, "Could not add device properties\n");
>  }
>  
>  static int iort_nc_iommu_map(struct device *dev, struct acpi_iort_node *node)
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 8ca7415d785d..6a53b4edf054 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -2366,7 +2366,8 @@ static struct iommu_device 
> *arm_smmu_probe_device(struct device *dev)
>   }
>   }
>  
> - master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
> + device_property_read_u32(dev, "pasid-num-bits", >ssid_bits);
> + master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
>  
>   /*
>* Note that PASID must be enabled before, and disabled after ATS:
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index e505b9130a1c..a9d2df001149 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -210,11 +210,6 @@ const struct iommu_ops *of_iommu_configure(struct device 
> *dev,
>of_pci_iommu_init, );
>   } else {
>   err = of_iommu_configure_device(master_np, dev, id);
> -
> - fwspec = dev_iommu_fwspec_get(dev);
> - if (!err && fwspec)
> - of_property_read_u32(master_np, "pasid-num-bits",
> -  >num_pasid_bits);
>   }
>  
>   /*

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 01/10] iommu: Remove obsolete comment

2021-01-19 Thread Jonathan Cameron
On Fri, 8 Jan 2021 15:52:09 +0100
Jean-Philippe Brucker  wrote:

> Commit 986d5ecc5699 ("iommu: Move fwspec->iommu_priv to struct
> dev_iommu") removed iommu_priv from fwspec. Update the struct doc.
> 
> Signed-off-by: Jean-Philippe Brucker 

Jonathan


> ---
>  include/linux/iommu.h | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index b3f0e2018c62..26bcde5e7746 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -570,7 +570,6 @@ struct iommu_group *fsl_mc_device_group(struct device 
> *dev);
>   * struct iommu_fwspec - per-device IOMMU instance data
>   * @ops: ops for this device's IOMMU
>   * @iommu_fwnode: firmware handle for this device's IOMMU
> - * @iommu_priv: IOMMU driver private data for this device
>   * @num_pasid_bits: number of PASID bits supported by this device
>   * @num_ids: number of associated device IDs
>   * @ids: IDs which this device may present to the IOMMU

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 14/14] iommu/amd: Adopt IO page table framework

2020-10-05 Thread Jonathan Cameron
On Sun, 4 Oct 2020 01:45:49 +
Suravee Suthikulpanit  wrote:

> Switch to using IO page table framework for AMD IOMMU v1 page table.
> 
> Signed-off-by: Suravee Suthikulpanit 

One minor thing inline.


> ---
>  drivers/iommu/amd/iommu.c | 26 ++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index 77f44b927ae7..6f8316206fb8 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -32,6 +32,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -1573,6 +1574,22 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
>   return ret;
>  }
>  
> +struct io_pgtable_ops *
> +amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data *dev_data,
> +struct protection_domain *domain)
> +{
> + struct amd_iommu *iommu = amd_iommu_rlookup_table[dev_data->devid];
> +
> + domain->iop.pgtbl_cfg = (struct io_pgtable_cfg) {
> + .pgsize_bitmap  = AMD_IOMMU_PGSIZES,
> + .ias= IOMMU_IN_ADDR_BIT_SIZE,
> + .oas= IOMMU_OUT_ADDR_BIT_SIZE,
> + .iommu_dev  = >dev->dev,
> + };
> +
> + return alloc_io_pgtable_ops(AMD_IOMMU_V1, >iop.pgtbl_cfg, 
> domain);
> +}
> +
>  /*
>   * If a device is not yet associated with a domain, this function makes the
>   * device visible in the domain
> @@ -1580,6 +1597,7 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
>  static int attach_device(struct device *dev,
>struct protection_domain *domain)
>  {
> + struct io_pgtable_ops *pgtbl_ops;
>   struct iommu_dev_data *dev_data;
>   struct pci_dev *pdev;
>   unsigned long flags;
> @@ -1623,6 +1641,12 @@ static int attach_device(struct device *dev,
>  skip_ats_check:
>   ret = 0;
>  
> + pgtbl_ops = amd_iommu_setup_io_pgtable_ops(dev_data, domain);
> + if (!pgtbl_ops) {

Perhaps cleaner to not store in a local variable if you aren't going to use it?

if (!amd_iommu_setup_io_pgtable_ops(dev_data, domain)) {

> + ret = -ENOMEM;
> + goto out;
> + }
> +
>   do_attach(dev_data, domain);
>  
>   /*
> @@ -1958,6 +1982,8 @@ static void amd_iommu_domain_free(struct iommu_domain 
> *dom)
>   if (domain->dev_cnt > 0)
>   cleanup_domain(domain);
>  
> + free_io_pgtable_ops(>iop.iop.ops);
> +
>   BUG_ON(domain->dev_cnt != 0);
>  
>   if (!dom)


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 00/13] iommu: Shared Virtual Addressing for SMMUv3 (PT sharing part)

2020-09-18 Thread Jonathan Cameron
On Fri, 18 Sep 2020 12:18:40 +0200
Jean-Philippe Brucker  wrote:

> This is version 10 of the page table sharing support for Arm SMMUv3.
> Patch 1 still needs an Ack from mm maintainers. However patches 4-11 do
> not depend on it, and could get merged for v5.10 regardless.

Hi Jean-Philippe,

It's been a rather long time since I last looked at this stuff (about v4
I think!), but I just had a read through this set and they all look good to me.

FWIW:
Reviewed-by: Jonathan Cameron 

Thanks,

Jonathan

> 
> v10:
> * Fix patches 3, 8, 9 and 10 following Eric's review.
> 
> v9 resend: 
> https://lore.kernel.org/linux-iommu/20200817171558.325917-1-jean-phili...@linaro.org/
> * Rebased onto v5.9-rc1. Moved to drivers/iommu/arm/arm-smmu-v3/ as a
>   result.
> 
> v9: 
> https://lore.kernel.org/linux-iommu/20200723145724.3014766-1-jean-phili...@linaro.org/
> * Moved most of the SVA code to arm-smmu-v3-sva.c. This required moving
>   struct definitions and macros to arm-smmu-v3.h (patch 7), hence the
>   new 700 insertions/deletions in the diffstat.
> * Updated patches 4 and 8 following review.
> * Fixed a bug when replacing a private ASID.
> 
> v8: 
> https://lore.kernel.org/linux-iommu/20200618155125.1548969-1-jean-phili...@linaro.org/
> * Split SVA series into three parts: page table sharing, I/O page
>   faults, and additional features (DVM, VHE and HTTU).
> 
> Fenghua Yu (1):
>   mm: Define pasid in mm
> 
> Jean-Philippe Brucker (12):
>   iommu/ioasid: Add ioasid references
>   iommu/sva: Add PASID helpers
>   arm64: mm: Pin down ASIDs for sharing mm with devices
>   iommu/io-pgtable-arm: Move some definitions to a header
>   arm64: cpufeature: Export symbol read_sanitised_ftr_reg()
>   iommu/arm-smmu-v3: Move definitions to a header
>   iommu/arm-smmu-v3: Share process page tables
>   iommu/arm-smmu-v3: Seize private ASID
>   iommu/arm-smmu-v3: Check for SVA features
>   iommu/arm-smmu-v3: Add SVA device feature
>   iommu/arm-smmu-v3: Implement iommu_sva_bind/unbind()
>   iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops
> 
>  drivers/iommu/Kconfig |  17 +
>  drivers/iommu/Makefile|   1 +
>  drivers/iommu/arm/arm-smmu-v3/Makefile|   5 +-
>  arch/arm64/include/asm/mmu.h  |   3 +
>  arch/arm64/include/asm/mmu_context.h  |  11 +-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   | 752 +++
>  drivers/iommu/io-pgtable-arm.h|  30 +
>  drivers/iommu/iommu-sva-lib.h |  15 +
>  include/linux/ioasid.h|  10 +-
>  include/linux/mm_types.h  |   4 +
>  arch/arm64/kernel/cpufeature.c|   1 +
>  arch/arm64/mm/context.c   | 105 ++-
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 488 ++
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 860 --
>  drivers/iommu/intel/iommu.c   |   4 +-
>  drivers/iommu/intel/svm.c |   6 +-
>  drivers/iommu/io-pgtable-arm.c|  27 +-
>  drivers/iommu/ioasid.c|  38 +-
>  drivers/iommu/iommu-sva-lib.c |  86 ++
>  MAINTAINERS   |   3 +-
>  20 files changed, 1731 insertions(+), 735 deletions(-)
>  create mode 100644 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>  create mode 100644 drivers/iommu/io-pgtable-arm.h
>  create mode 100644 drivers/iommu/iommu-sva-lib.h
>  create mode 100644 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
>  create mode 100644 drivers/iommu/iommu-sva-lib.c
> 


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 09/13] iommu/arm-smmu-v3: Seize private ASID

2020-09-18 Thread Jonathan Cameron
On Fri, 18 Sep 2020 12:18:49 +0200
Jean-Philippe Brucker  wrote:

> The SMMU has a single ASID space, the union of shared and private ASID
> sets. This means that the SMMU driver competes with the arch allocator
> for ASIDs. Shared ASIDs are those of Linux processes, allocated by the
> arch, and contribute in broadcast TLB maintenance. Private ASIDs are
> allocated by the SMMU driver and used for "classic" map/unmap DMA. They
> require command-queue TLB invalidations.
> 
> When we pin down an mm_context and get an ASID that is already in use by
> the SMMU, it belongs to a private context. We used to simply abort the
> bind, but this is unfair to users that would be unable to bind a few
> seemingly random processes. Try to allocate a new private ASID for the
> context, and make the old ASID shared.
> 
> Signed-off-by: Jean-Philippe Brucker 
Hi,

One totally trivial comment inline that might have ever so slightly
improved reviewability of the patch.

However it is only minor so don't bother respinning for that.

Thanks,

Jonathan

> ---
> v10: fix ASID limit, small comment update
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  3 ++
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 35 +--
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 34 +++---
>  3 files changed, 57 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 6b06a6f19604..90c08f156b43 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -678,6 +678,9 @@ struct arm_smmu_domain {
>  extern struct xarray arm_smmu_asid_xa;
>  extern struct mutex arm_smmu_asid_lock;
>  
> +int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
> + struct arm_smmu_ctx_desc *cd);
> +void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
>  bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd);
>  
>  #endif /* _ARM_SMMU_V3_H */
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index 6c1113059632..ef3fcfa72187 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -10,10 +10,18 @@
>  #include "arm-smmu-v3.h"
>  #include "../../io-pgtable-arm.h"
>  
> +/*
> + * Check if the CPU ASID is available on the SMMU side. If a private context
> + * descriptor is using it, try to replace it.
> + */
>  static struct arm_smmu_ctx_desc *
>  arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
>  {
> + int ret;
> + u32 new_asid;
>   struct arm_smmu_ctx_desc *cd;
> + struct arm_smmu_device *smmu;
> + struct arm_smmu_domain *smmu_domain;
>  
>   cd = xa_load(_smmu_asid_xa, asid);
>   if (!cd)
> @@ -27,8 +35,31 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
>   return cd;
>   }
>  
> - /* Ouch, ASID is already in use for a private cd. */
> - return ERR_PTR(-EBUSY);
> + smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd);
> + smmu = smmu_domain->smmu;
> +
> + ret = xa_alloc(_smmu_asid_xa, _asid, cd,
> +XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
> + if (ret)
> + return ERR_PTR(-ENOSPC);
> + /*
> +  * Race with unmap: TLB invalidations will start targeting the new ASID,
> +  * which isn't assigned yet. We'll do an invalidate-all on the old ASID
> +  * later, so it doesn't matter.
> +  */
> + cd->asid = new_asid;
> + /*
> +  * Update ASID and invalidate CD in all associated masters. There will
> +  * be some overlap between use of both ASIDs, until we invalidate the
> +  * TLB.
> +  */
> + arm_smmu_write_ctx_desc(smmu_domain, 0, cd);
> +
> + /* Invalidate TLB entries previously associated with that context */
> + arm_smmu_tlb_inv_asid(smmu, asid);
> +
> + xa_erase(_smmu_asid_xa, asid);
> + return NULL;
>  }
>  
>  __maybe_unused
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 19af27fd183b..e99ebdd4c841 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -872,6 +872,17 @@ static int arm_smmu_cmdq_batch_submit(struct 
> arm_smmu_device *smmu,
>  }
>  
>  /* Context descriptor manipulation functions */
> +void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
> +{
> + struct arm_smmu_cmdq_ent cmd = {
> + .opcode = CMDQ_OP_TLBI_NH_ASID,
> + .tlbi.asid = asid,
> + };
> +
> + arm_smmu_cmdq_issue_cmd(smmu, );
> + arm_smmu_cmdq_issue_sync(smmu);
> +}
> +
>  static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
>int ssid, bool leaf)
>  {
> @@ -952,8 +963,8 @@ static __le64 *arm_smmu_get_cd_ptr(struct 

Re: [PATCH 04/15] arm64: numa: simplify dummy_numa_init()

2020-07-29 Thread Jonathan Cameron
On Tue, 28 Jul 2020 08:11:42 +0300
Mike Rapoport  wrote:

> From: Mike Rapoport 
> 
> dummy_numa_init() loops over memblock.memory and passes nid=0 to
> numa_add_memblk() which essentially wraps memblock_set_node(). However,
> memblock_set_node() can cope with entire memory span itself, so the loop
> over memblock.memory regions is redundant.
> 
> Replace the loop with a single call to memblock_set_node() to the entire
> memory.

Hi Mike,

I had a similar patch I was going to post shortly so can add a bit more
on the advantages of this one.

Beyond cleaning up, it also fixes an issue with a buggy ACPI firmware in which 
the SRAT
table covers some but not all of the memory in the EFI memory map.  Stealing 
bits
from the draft cover letter I had for that...

> This issue can be easily triggered by having an SRAT table which fails
> to cover all elements of the EFI memory map.
> 
> This firmware error is detected and a warning printed. e.g.
> "NUMA: Warning: invalid memblk node 64 [mem 0x24000-0x27fff]"
> At that point we fall back to dummy_numa_init().
> 
> However, the failed ACPI init has left us with our memblocks all broken
> up as we split them when trying to assign them to NUMA nodes.
> 
> We then iterate over the memblocks and add them to node 0.
> 
> for_each_memblock(memory, mblk) {
>   ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
>   if (!ret)
>   continue;
>   pr_err("NUMA init failed\n");
>   return ret;
> }
> 
> numa_add_memblk() calls memblock_set_node() which merges regions that
> were previously split up during the earlier attempt to add them to different
> nodes during parsing of SRAT.
> 
> This means elements are moved in the memblock array and we can end up
> in a different memblock after the call to numa_add_memblk().
> Result is:
> 
> Unable to handle kernel paging request at virtual address 3a40
> Mem abort info:
>   ESR = 0x9604
>   EC = 0x25: DABT (current EL), IL = 32 bits
>   SET = 0, FnV = 0
>   EA = 0, S1PTW = 0
> Data abort info:
>   ISV = 0, ISS = 0x0004
>   CM = 0, WnR = 0
> [3a40] user address but active_mm is swapper
> Internal error: Oops: 9604 [#1] PREEMPT SMP
> 
> ...
> 
> Call trace:
>   sparse_init_nid+0x5c/0x2b0
>   sparse_init+0x138/0x170
>   bootmem_init+0x80/0xe0
>   setup_arch+0x2a0/0x5fc
>   start_kernel+0x8c/0x648
> 
> As an illustrative example:
> EFI table has one block of memory.
> memblks[0] = [0...0x2f]  so we start with a single memblock.
> 
> SRAT has
> [0x00...0x0f] in node 0
> [0x10...0x1f] in node 1
> but no entry covering 
> [0x20...0x2f].
> 
> Whilst parsing SRAT the single memblock is broken into 3.
> memblks[0] = [0x00...0x0f] in node 0
> memblks[1] = [0x10...0x1f] in node 1
> memblks[2] = [0x20...0x2f] in node MAX_NUM_NODES (invalid value)
> 
> A sanity check parse then detects the invalid section and acpi_numa_init
> fails.  We then fall back to the dummy path.
> 
> That iterates over the memblocks.  We'll use i an index in the array of 
> memblocks
> 
> i = 0;
> memblks[0] = [0x00...0x0f] set to node0.
>merge doesn't do anything because the neighbouring memblock is still in 
> node1.
> 
> i = 1
> memblks[1] = [0x10...0x1f] set to node 0.
>merge combines memblock 0 and 1 to give a new set of memblocks.
> 
> memblks[0] = [0x00..0x1f] in node 0
> memblks[1] = [0x20..0x2f] in node MAX_NUM_NODES.
> 
> i = 2 off the end of the now reduced array of memblocks, so exit the loop.
> (if we restart the loop here everything will be fine).
> 
> Later sparse_init_nid tries to use the node of the second memblock to index
> somethings and boom.


> 
> Signed-off-by: Mike Rapoport 

Acked-by: Jonathan Cameron 

> ---
>  arch/arm64/mm/numa.c | 13 +
>  1 file changed, 5 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
> index aafcee3e3f7e..0cbdbcc885fb 100644
> --- a/arch/arm64/mm/numa.c
> +++ b/arch/arm64/mm/numa.c
> @@ -423,19 +423,16 @@ static int __init numa_init(int (*init_func)(void))
>   */
>  static int __init dummy_numa_init(void)
>  {
> + phys_addr_t start = memblock_start_of_DRAM();
> + phys_addr_t end = memblock_end_of_DRAM();
>   int ret;
> - struct memblock_region *mblk;
>  
>   if (numa_off)
>   pr_info("NUMA disabled\n"); /* Forced off on command line. */
> - pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
> - memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
> -
> - for_each_memblock(memory, mblk) {
> - ret = numa_a

Re: [PATCH] iommu/arm-smmu-v3: expose numa_node attribute to users in sysfs

2020-07-06 Thread Jonathan Cameron


+CC Brice.  

On Sun, 5 Jul 2020 09:53:58 +
"Song Bao Hua (Barry Song)"  wrote:

> > -Original Message-
> > From: Will Deacon [mailto:w...@kernel.org]
> > Sent: Saturday, July 4, 2020 4:22 AM
> > To: Song Bao Hua (Barry Song) 
> > Cc: robin.mur...@arm.com; h...@lst.de; m.szyprow...@samsung.com;
> > iommu@lists.linux-foundation.org; linux-arm-ker...@lists.infradead.org;
> > Linuxarm 
> > Subject: Re: [PATCH] iommu/arm-smmu-v3: expose numa_node attribute to
> > users in sysfs
> > 
> > On Sat, May 30, 2020 at 09:15:05PM +1200, Barry Song wrote:  
> > > As tests show the latency of dma_unmap can increase dramatically while
> > > calling them cross NUMA nodes, especially cross CPU packages, eg.
> > > 300ns vs 800ns while waiting for the completion of CMD_SYNC in an
> > > empty command queue. The large latency causing by remote node will
> > > in turn make contention of the command queue more serious, and enlarge
> > > the latency of DMA users within local NUMA nodes.
> > >
> > > Users might intend to enforce NUMA locality with the consideration of
> > > the position of SMMU. The patch provides minor benefit by presenting
> > > this information to users directly, as they might want to know it without
> > > checking hardware spec at all.  
> > 
> > I don't think that's a very good reason to expose things to userspace.
> > I know sysfs shouldn't be treated as ABI, but the grim reality is that
> > once somebody relies on this stuff then we can't change it, so I'd
> > rather avoid exposing it unless it's absolutely necessary.  
> 
> Will, thanks for taking a look!
> 
> I am not sure if it is absolutely necessary, but it is useful to users. The 
> whole story started
> from some users who wanted to know the hardware topology very clear by 
> reading some
> sysfs node just like they are able to do that for pci devices. The intention 
> is that users can
> know hardware topology of various devices easily from linux since they maybe 
> don't know
> all the hardware details.
> 
> For pci devices, kernel has done that. And there are some other drivers out 
> of pci
> exposing numa_node as well. It seems it is hard to say it is absolutely 
> necessary
> for them too since sysfs shouldn't be treated as ABI. 
Brice,

Given hwloc is probably the most demanding user of topology information
currently...

How useful would this info be for hwloc and hwloc users?
Sort of feels like it might be useful in some cases.

The very brief description of what we have here is exposing the numa node
of an IOMMU.  The discussion also diverted into whether it just makes sense
to expose this for all platform devices or even do it at the device level.

Jonathan


> 
> I got some input from Linux users who also wanted to know the numa node for
> other devices which are not PCI, for example, platform devices. And I thought 
> the
> requirement is kind of reasonable. So I also had another patch to generally 
> support
> this kind of requirements, with the below patch, this smmu patch is not 
> necessary
> any more:
> https://lkml.org/lkml/2020/6/18/1257
> 
> for platform device created by ARM ACPI/IORT and general 
> acpi_create_platform_device()
> drivers/acpi/scan.c:
> static void acpi_default_enumeration(struct acpi_device *device)
> {
>   ...
>   if (!device->flags.enumeration_by_parent) {
>   acpi_create_platform_device(device, NULL);
>   acpi_device_set_enumerated(device);
>   }
> }
> 
> struct platform_device *acpi_create_platform_device(struct acpi_device *adev,
>   struct property_entry *properties)
> {
>   ...
> 
>   pdev = platform_device_register_full();
>   if (IS_ERR(pdev))
>   ...
>   else {
>   set_dev_node(>dev, acpi_get_node(adev->handle));
>   ...
>   }
>   ...
> }
> numa_node is set for this kind of devices.
> 
> Anyway, just want to explain to you the background some people want to know 
> the 
> hardware topology from Linux in same simple way. And it seems it is a 
> reasonable
> requirement to me :-)
> 
> > 
> > Thanks,
> > 
> > Will  
> 
> Thanks
> barry
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/3] docs: IOMMU user API

2020-06-11 Thread Jonathan Cameron
On Wed, 10 Jun 2020 21:12:13 -0700
Jacob Pan  wrote:

> IOMMU UAPI is newly introduced to support communications between guest
> virtual IOMMU and host IOMMU. There has been lots of discussions on how
> it should work with VFIO UAPI and userspace in general.
> 
> This document is indended to clarify the UAPI design and usage. The
> mechenics of how future extensions should be achieved are also covered

mechanics 

> in this documentation.
> 
> Signed-off-by: Liu Yi L 
> Signed-off-by: Jacob Pan 
Mostly seems sensible.  A few comments / queries inline.

Jonathan

> ---
>  Documentation/userspace-api/iommu.rst | 210 
> ++
>  1 file changed, 210 insertions(+)
>  create mode 100644 Documentation/userspace-api/iommu.rst
> 
> diff --git a/Documentation/userspace-api/iommu.rst 
> b/Documentation/userspace-api/iommu.rst
> new file mode 100644
> index ..e95dc5a04a41
> --- /dev/null
> +++ b/Documentation/userspace-api/iommu.rst
> @@ -0,0 +1,210 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +.. iommu:
> +
> +=
> +IOMMU Userspace API
> +=
> +
> +IOMMU UAPI is used for virtualization cases where communications are
> +needed between physical and virtual IOMMU drivers. For native
> +usage, IOMMU is a system device which does not need to communicate
> +with user space directly.
> +
> +The primary use cases are guest Shared Virtual Address (SVA) and
> +guest IO virtual address (IOVA), wherein virtual IOMMU (vIOMMU) is

wherein _a_ virtual IOMMU 

> +required to communicate with the physical IOMMU in the host.
> +
> +.. contents:: :local:
> +
> +Functionalities
> +
> +Communications of user and kernel involve both directions. The
> +supported user-kernel APIs are as follows:
> +
> +1. Alloc/Free PASID
> +2. Bind/unbind guest PASID (e.g. Intel VT-d)
> +3. Bind/unbind guest PASID table (e.g. ARM sMMU)
> +4. Invalidate IOMMU caches
> +5. Service page request
> +
> +Requirements
> +
> +The IOMMU UAPIs are generic and extensible to meet the following
> +requirements:
> +
> +1. Emulated and para-virtualised vIOMMUs
> +2. Multiple vendors (Intel VT-d, ARM sMMU, etc.)
> +3. Extensions to the UAPI shall not break existing user space
> +
> +Interfaces
> +
> +Although the data structures defined in IOMMU UAPI are self-contained,
> +there is no user API functions introduced. Instead, IOMMU UAPI is
> +designed to work with existing user driver frameworks such as VFIO.
> +
> +Extension Rules & Precautions
> +-
> +When IOMMU UAPI gets extended, the data structures can *only* be
> +modified in two ways:
> +
> +1. Adding new fields by re-purposing the padding[] field. No size change.
> +2. Adding new union members at the end. May increase in size.
> +
> +No new fields can be added *after* the variable size union in that it
> +will break backward compatibility when offset moves. In both cases, a
> +new flag must be accompanied with a new field such that the IOMMU
> +driver can process the data based on the new flag. Version field is
> +only reserved for the unlikely event of UAPI upgrade at its entirety.
> +
> +It's *always* the caller's responsibility to indicate the size of the
> +structure passed by setting argsz appropriately.
> +
> +When IOMMU UAPI extension results in size increase, user such as VFIO
> +has to handle the following scenarios:
> +
> +1. User and kernel has exact size match
> +2. An older user with older kernel header (smaller UAPI size) running on a
> +   newer kernel (larger UAPI size)
> +3. A newer user with newer kernel header (larger UAPI size) running
> +   on a older kernel.
> +4. A malicious/misbehaving user pass illegal/invalid size but within
> +   range. The data may contain garbage.
> +
> +
> +Feature Checking
> +
> +While launching a guest with vIOMMU, it is important to ensure that host
> +can support the UAPI data structures to be used for vIOMMU-pIOMMU
> +communications. Without the upfront compatibility checking, future
> +faults are difficult to report even in normal conditions. For example,
> +TLB invalidations should always succeed from vIOMMU's
> +perspective. 

This statement has me concerned.  If a TLB invalidation fails, but
is reported to the guest as successful do we have possible breaking of iommu
isolation guarantees?

If you get a TLB invalidation not happening, for some reason, that's a critical
fault, isolate the device using the IOMMU or kill the VM.

I'd reword it as "TLB invalidations should always succeed."

As you mention, we should never get to this state anyway!

> There is no architectural way to report back to the vIOMMU
> +if the UAPI data is incompatible. For this reason the following IOMMU
> +UAPIs cannot fail:
> +
> +1. Free PASID
> +2. Unbind guest PASID
> +3. Unbind 

Re: [PATCH v2] of_device: removed #include that caused a recursion in included headers

2020-04-18 Thread Jonathan Cameron
On Thu, 16 Apr 2020 12:49:03 +0300
Hadar Gat  wrote:

> Both of_platform.h and of_device.h were included each other.
> In of_device.h, removed unneeded #include to of_platform.h
> and added include to of_platform.h in the files that needs it.
> 
> Signed-off-by: Hadar Gat 

Hmm.  I guess the chances of this causing merge problems are fairly low so
perhaps not worth doing additions of headers via individual subsystems and
actually dropping the header include after another cycle.

So on that basis

Acked-by: Jonathan Cameron  #for-iio

> ---
> v2: add include to of_platform.h in more files. (reported due other builds)
> 
>  arch/sparc/mm/io-unit.c   | 1 +
>  arch/sparc/mm/iommu.c | 1 +
>  drivers/base/platform.c   | 1 +
>  drivers/bus/imx-weim.c| 1 +
>  drivers/bus/vexpress-config.c | 1 +
>  drivers/clk/mediatek/clk-mt7622-aud.c | 1 +
>  drivers/dma/at_hdmac.c| 1 +
>  drivers/dma/stm32-dmamux.c| 1 +
>  drivers/dma/ti/dma-crossbar.c | 1 +
>  drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 1 +
>  drivers/gpu/drm/msm/hdmi/hdmi.c   | 1 +
>  drivers/gpu/drm/msm/msm_drv.c | 1 +
>  drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c   | 1 +
>  drivers/gpu/drm/sun4i/sun4i_tcon.c| 1 +
>  drivers/iio/adc/stm32-adc-core.c  | 1 +
>  drivers/iio/adc/stm32-dfsdm-adc.c | 1 +
>  drivers/iio/adc/stm32-dfsdm-core.c| 1 +
>  drivers/iommu/tegra-smmu.c| 1 +
>  drivers/memory/atmel-ebi.c| 1 +
>  drivers/mfd/palmas.c  | 1 +
>  drivers/mfd/ssbi.c| 1 +
>  drivers/mtd/nand/raw/omap2.c  | 1 +
>  drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 1 +
>  drivers/net/ethernet/ti/cpsw.c| 1 +
>  drivers/phy/tegra/xusb.c  | 1 +
>  drivers/pinctrl/freescale/pinctrl-imx1-core.c | 1 +
>  drivers/pinctrl/nomadik/pinctrl-nomadik.c | 1 +
>  drivers/soc/samsung/exynos-pmu.c  | 1 +
>  drivers/soc/sunxi/sunxi_sram.c| 1 +
>  include/linux/of_device.h | 2 --
>  lib/genalloc.c| 1 +
>  31 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
> index 289276b..5638399 100644
> --- a/arch/sparc/mm/io-unit.c
> +++ b/arch/sparc/mm/io-unit.c
> @@ -15,6 +15,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
> index b00dde1..9cbb2e7 100644
> --- a/arch/sparc/mm/iommu.c
> +++ b/arch/sparc/mm/iommu.c
> @@ -16,6 +16,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> diff --git a/drivers/base/platform.c b/drivers/base/platform.c
> index 520..f549274b 100644
> --- a/drivers/base/platform.c
> +++ b/drivers/base/platform.c
> @@ -12,6 +12,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
> index 28bb65a..8c786da 100644
> --- a/drivers/bus/imx-weim.c
> +++ b/drivers/bus/imx-weim.c
> @@ -11,6 +11,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/bus/vexpress-config.c b/drivers/bus/vexpress-config.c
> index ff70575..12b8b0b 100644
> --- a/drivers/bus/vexpress-config.c
> +++ b/drivers/bus/vexpress-config.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  
> diff --git a/drivers/clk/mediatek/clk-mt7622-aud.c 
> b/drivers/clk/mediatek/clk-mt7622-aud.c
> index 2bd4295..8cbb68f 100644
> --- a/drivers/clk/mediatek/clk-mt7622-aud.c
> +++ b/drivers/clk/mediatek/clk-mt7622-aud.c
> @@ -9,6 +9,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "clk-mtk.h"
> diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
> index 73a2078..388f8e10 100644
> --- a/drivers/dma/at_hdmac.c
> +++ b/drivers/dma/at_hdmac.c
> @@ -20,6 +20,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "at_hdmac_regs.h"
> diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c
> index 12f7637..b704896 100644
> --- a/drivers/dma/stm32-dma

Re: [PATCH v4 23/26] iommu/arm-smmu-v3: Add stall support for platform devices

2020-03-09 Thread Jonathan Cameron
On Wed, 4 Mar 2020 15:08:33 +0100
Jean-Philippe Brucker  wrote:

> On Thu, Feb 27, 2020 at 06:17:26PM +0000, Jonathan Cameron wrote:
> > On Mon, 24 Feb 2020 19:23:58 +0100
> > Jean-Philippe Brucker  wrote:
> >   
> > > From: Jean-Philippe Brucker 
> > > 
> > > The SMMU provides a Stall model for handling page faults in platform
> > > devices. It is similar to PCI PRI, but doesn't require devices to have
> > > their own translation cache. Instead, faulting transactions are parked and
> > > the OS is given a chance to fix the page tables and retry the transaction.
> > > 
> > > Enable stall for devices that support it (opt-in by firmware). When an
> > > event corresponds to a translation error, call the IOMMU fault handler. If
> > > the fault is recoverable, it will call us back to terminate or continue
> > > the stall.
> > > 
> > > Signed-off-by: Jean-Philippe Brucker   
> > One question inline.
> > 
> > Thanks,
> >   
> > > ---
> > >  drivers/iommu/arm-smmu-v3.c | 271 ++--
> > >  drivers/iommu/of_iommu.c|   5 +-
> > >  include/linux/iommu.h   |   2 +
> > >  3 files changed, 269 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > > index 6a5987cce03f..da5dda5ba26a 100644
> > > --- a/drivers/iommu/arm-smmu-v3.c
> > > +++ b/drivers/iommu/arm-smmu-v3.c
> > > @@ -374,6 +374,13 @@  
> > 
> >   
> > > +/*
> > > + * arm_smmu_flush_evtq - wait until all events currently in the queue 
> > > have been
> > > + *   consumed.
> > > + *
> > > + * Wait until the evtq thread finished a batch, or until the queue is 
> > > empty.
> > > + * Note that we don't handle overflows on q->batch. If it occurs, just 
> > > wait for
> > > + * the queue to be empty.
> > > + */
> > > +static int arm_smmu_flush_evtq(void *cookie, struct device *dev, int 
> > > pasid)
> > > +{
> > > + int ret;
> > > + u64 batch;
> > > + struct arm_smmu_device *smmu = cookie;
> > > + struct arm_smmu_queue *q = >evtq.q;
> > > +
> > > + spin_lock(>wq.lock);
> > > + if (queue_sync_prod_in(q) == -EOVERFLOW)
> > > + dev_err(smmu->dev, "evtq overflow detected -- requests lost\n");
> > > +
> > > + batch = q->batch;  
> > 
> > So this is trying to be sure we have advanced the queue 2 spots?  
> 
> So we call arm_smmu_flush_evtq() before decommissioning a PASID, to make
> sure that there aren't any pending event for this PASID languishing in the
> fault queues.
> 
> The main test is queue_empty(). If that succeeds then we know that there
> aren't any pending event (and the PASID is safe to reuse). But if new
> events are constantly added to the queue then we wait for the evtq thread
> to handle a full batch, where one batch corresponds to the queue size. For
> that we take the batch number when entering flush(), and wait for the evtq
> thread to increment it twice.
> 
> > Is there a potential race here?  q->batch could have updated before we take
> > a local copy.  
> 
> Yes we're just checking on the progress of the evtq thread. All accesses
> to batch are made while holding the wq lock.
> 
> Flush is a rare event so the lock isn't contended, but the wake_up() that
> this patch introduces in arm_smmu_evtq_thread() does add some overhead
> (0.85% of arm_smmu_evtq_thread(), according to perf). It would be nice to
> get rid of it but I haven't found anything clever yet.
> 

Thanks.  Maybe worth a few comments in the code as this is a bit esoteric.

Thanks,

Jonathan

> Thanks,
> Jean
> 
> >   
> > > + ret = wait_event_interruptible_locked(q->wq, queue_empty(>llq) ||
> > > +   q->batch >= batch + 2);
> > > + spin_unlock(>wq.lock);
> > > +
> > > + return ret;
> > > +}
> > > +  
> > ...
> >   


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 02/26] iommu/sva: Manage process address spaces

2020-02-28 Thread Jonathan Cameron
On Fri, 28 Feb 2020 15:43:04 +0100
Jean-Philippe Brucker  wrote:

> On Wed, Feb 26, 2020 at 12:35:06PM +0000, Jonathan Cameron wrote:
> > > + * A single Process Address Space ID (PASID) is allocated for each mm. 
> > > In the
> > > + * example, devices use PASID 1 to read/write into address space X and 
> > > PASID 2
> > > + * to read/write into address space Y. Calling iommu_sva_get_pasid() on 
> > > bond 1
> > > + * returns 1, and calling it on bonds 2-4 returns 2.
> > > + *
> > > + * Hardware tables describing this configuration in the IOMMU would 
> > > typically
> > > + * look like this:
> > > + *
> > > + *PASID tables
> > > + * of domain A
> > > + *  .->++
> > > + * / 0 ||---> io_pgtable
> > > + */++
> > > + *Device tables  /   1 ||---> pgd X
> > > + *  ++  /  ++
> > > + *  00:00.0 |  A |-' 2 ||--.
> > > + *  ++ ++   \
> > > + *  ::   3 ||\
> > > + *  ++ ++ --> pgd Y
> > > + *  00:01.0 |  B |--./
> > > + *  ++   \  |
> > > + *  00:01.1 |  B |+   PASID tables  |
> > > + *  ++ \   of domain B  |
> > > + *  '->++   |
> > > + *   0 ||-- | --> io_pgtable
> > > + * ++   |
> > > + *   1 ||   |
> > > + * ++   |
> > > + *   2 ||---'
> > > + * ++
> > > + *   3 ||
> > > + * ++
> > > + *
> > > + * With this model, a single call binds all devices in a given domain to 
> > > an
> > > + * address space. Other devices in the domain will get the same bond 
> > > implicitly.
> > > + * However, users must issue one bind() for each device, because IOMMUs 
> > > may
> > > + * implement SVA differently. Furthermore, mandating one bind() per 
> > > device
> > > + * allows the driver to perform sanity-checks on device capabilities.  
> >   
> > > + *
> > > + * In some IOMMUs, one entry of the PASID table (typically the first 
> > > one) can
> > > + * hold non-PASID translations. In this case PASID 0 is reserved and the 
> > > first
> > > + * entry points to the io_pgtable pointer. In other IOMMUs the io_pgtable
> > > + * pointer is held in the device table and PASID 0 is available to the
> > > + * allocator.  
> > 
> > Is it worth hammering home in here that we can only do this because the 
> > PASID space
> > is global (with exception of PASID 0)?  It's a convenient simplification 
> > but not
> > necessarily a hardware restriction so perhaps we should remind people 
> > somewhere in here?  
> 
> I could add this four paragraphs up:
> 
> "A single Process Address Space ID (PASID) is allocated for each mm. It is
> a choice made for the Linux SVA implementation, not a hardware
> restriction."

Perfect.

> 
> > > + */
> > > +
> > > +struct io_mm {
> > > + struct list_headdevices;
> > > + struct mm_struct*mm;
> > > + struct mmu_notifier notifier;
> > > +
> > > + /* Late initialization */
> > > + const struct io_mm_ops  *ops;
> > > + void*ctx;
> > > + int pasid;
> > > +};
> > > +
> > > +#define to_io_mm(mmu_notifier)   container_of(mmu_notifier, struct 
> > > io_mm, notifier)
> > > +#define to_iommu_bond(handle)container_of(handle, struct iommu_bond, 
> > > sva)  
> > 
> > Code ordering wise, do we want this after the definition of iommu_bond?
> > 
> > For both of these it's a bit non obvious what they come 'from'.
> > I wouldn't naturally assume to_io_mm gets me from notifier to the io_mm
> > for example.  Not sure it matters though if these are only 

Re: [PATCH v4 00/26] iommu: Shared Virtual Addressing and SMMUv3 support

2020-02-27 Thread Jonathan Cameron
On Mon, 24 Feb 2020 19:23:35 +0100
Jean-Philippe Brucker  wrote:

> Shared Virtual Addressing (SVA) allows to share process page tables with
> devices using the IOMMU. Add a generic implementation of the IOMMU SVA
> API, and add support in the Arm SMMUv3 driver.
> 
> Previous versions of this patchset were sent over a year ago [1][2] but
> we've made a lot of progress since then:
> 
> * ATS support for SMMUv3 was merged in v5.2.
> * The bind() and fault reporting APIs have been merged in v5.3.
> * IOASID were added in v5.5.
> * SMMUv3 PASID was added in v5.6, with some pending for v5.7.
> 
> * The first user of the bind() API will be merged in v5.7 [3]. The zip
>   accelerator is also the first piece of hardware that I've been able to
>   use for testing (previous versions were developed with software models)
>   and I now have tools for evaluating SVA performance. Unfortunately I
>   still don't have hardware that supports ATS and PRI; the zip accelerator
>   uses stall.
> 
> These are the remaining changes for SVA support in SMMUv3. Since v3 [1]
> I fixed countless bugs and - I think - addressed everyone's comments.
> Thanks to recent MMU notifier rework, iommu-sva.c is a lot more
> straightforward. I'm still unhappy with the complicated locking in the
> SMMUv3 driver resulting from patch 12 (Seize private ASID), but I
> haven't found anything better.
> 
> Please find all SVA patches on branches sva/current and sva/zip-devel at
> https://jpbrucker.net/git/linux
> 
> [1] 
> https://lore.kernel.org/linux-iommu/20180920170046.20154-1-jean-philippe.bruc...@arm.com/
> [2] 
> https://lore.kernel.org/linux-iommu/20180511190641.23008-1-jean-philippe.bruc...@arm.com/
> [3] 
> https://lore.kernel.org/linux-iommu/1581407665-13504-1-git-send-email-zhangfei@linaro.org/

Hi Jean-Phillippe.

Great to see this progressing.  Other than the few places I've commented
it all looks good to me.

Thanks,

Jonathan

> 
> Jean-Philippe Brucker (26):
>   mm/mmu_notifiers: pass private data down to alloc_notifier()
>   iommu/sva: Manage process address spaces
>   iommu: Add a page fault handler
>   iommu/sva: Search mm by PASID
>   iommu/iopf: Handle mm faults
>   iommu/sva: Register page fault handler
>   arm64: mm: Pin down ASIDs for sharing mm with devices
>   iommu/io-pgtable-arm: Move some definitions to a header
>   iommu/arm-smmu-v3: Manage ASIDs with xarray
>   arm64: cpufeature: Export symbol read_sanitised_ftr_reg()
>   iommu/arm-smmu-v3: Share process page tables
>   iommu/arm-smmu-v3: Seize private ASID
>   iommu/arm-smmu-v3: Add support for VHE
>   iommu/arm-smmu-v3: Enable broadcast TLB maintenance
>   iommu/arm-smmu-v3: Add SVA feature checking
>   iommu/arm-smmu-v3: Add dev_to_master() helper
>   iommu/arm-smmu-v3: Implement mm operations
>   iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops
>   iommu/arm-smmu-v3: Add support for Hardware Translation Table Update
>   iommu/arm-smmu-v3: Maintain a SID->device structure
>   iommu/arm-smmu-v3: Ratelimit event dump
>   dt-bindings: document stall property for IOMMU masters
>   iommu/arm-smmu-v3: Add stall support for platform devices
>   PCI/ATS: Add PRI stubs
>   PCI/ATS: Export symbols of PRI functions
>   iommu/arm-smmu-v3: Add support for PRI
> 
>  .../devicetree/bindings/iommu/iommu.txt   |   18 +
>  arch/arm64/include/asm/mmu.h  |1 +
>  arch/arm64/include/asm/mmu_context.h  |   11 +-
>  arch/arm64/kernel/cpufeature.c|1 +
>  arch/arm64/mm/context.c   |  103 +-
>  drivers/iommu/Kconfig |   13 +
>  drivers/iommu/Makefile|2 +
>  drivers/iommu/arm-smmu-v3.c   | 1354 +++--
>  drivers/iommu/io-pgfault.c|  533 +++
>  drivers/iommu/io-pgtable-arm.c|   27 +-
>  drivers/iommu/io-pgtable-arm.h|   30 +
>  drivers/iommu/iommu-sva.c |  596 
>  drivers/iommu/iommu-sva.h |   64 +
>  drivers/iommu/iommu.c |1 +
>  drivers/iommu/of_iommu.c  |5 +-
>  drivers/misc/sgi-gru/grutlbpurge.c|4 +-
>  drivers/pci/ats.c |4 +
>  include/linux/iommu.h |   73 +
>  include/linux/mmu_notifier.h  |   10 +-
>  include/linux/pci-ats.h   |8 +
>  mm/mmu_notifier.c |6 +-
>  21 files changed, 2699 insertions(+), 165 deletions(-)
>  create mode 100644 drivers/iommu/io-pgfault.c
>  create mode 100644 drivers/iommu/io-pgtable-arm.h
>  create mode 100644 drivers/iommu/iommu-sva.c
>  create mode 100644 drivers/iommu/iommu-sva.h
> 


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 23/26] iommu/arm-smmu-v3: Add stall support for platform devices

2020-02-27 Thread Jonathan Cameron
On Mon, 24 Feb 2020 19:23:58 +0100
Jean-Philippe Brucker  wrote:

> From: Jean-Philippe Brucker 
> 
> The SMMU provides a Stall model for handling page faults in platform
> devices. It is similar to PCI PRI, but doesn't require devices to have
> their own translation cache. Instead, faulting transactions are parked and
> the OS is given a chance to fix the page tables and retry the transaction.
> 
> Enable stall for devices that support it (opt-in by firmware). When an
> event corresponds to a translation error, call the IOMMU fault handler. If
> the fault is recoverable, it will call us back to terminate or continue
> the stall.
> 
> Signed-off-by: Jean-Philippe Brucker 
One question inline.

Thanks,

> ---
>  drivers/iommu/arm-smmu-v3.c | 271 ++--
>  drivers/iommu/of_iommu.c|   5 +-
>  include/linux/iommu.h   |   2 +
>  3 files changed, 269 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 6a5987cce03f..da5dda5ba26a 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -374,6 +374,13 @@


> +/*
> + * arm_smmu_flush_evtq - wait until all events currently in the queue have 
> been
> + *   consumed.
> + *
> + * Wait until the evtq thread finished a batch, or until the queue is empty.
> + * Note that we don't handle overflows on q->batch. If it occurs, just wait 
> for
> + * the queue to be empty.
> + */
> +static int arm_smmu_flush_evtq(void *cookie, struct device *dev, int pasid)
> +{
> + int ret;
> + u64 batch;
> + struct arm_smmu_device *smmu = cookie;
> + struct arm_smmu_queue *q = >evtq.q;
> +
> + spin_lock(>wq.lock);
> + if (queue_sync_prod_in(q) == -EOVERFLOW)
> + dev_err(smmu->dev, "evtq overflow detected -- requests lost\n");
> +
> + batch = q->batch;

So this is trying to be sure we have advanced the queue 2 spots?

Is there a potential race here?  q->batch could have updated before we take
a local copy.

> + ret = wait_event_interruptible_locked(q->wq, queue_empty(>llq) ||
> +   q->batch >= batch + 2);
> + spin_unlock(>wq.lock);
> +
> + return ret;
> +}
> +
...

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 07/26] arm64: mm: Pin down ASIDs for sharing mm with devices

2020-02-27 Thread Jonathan Cameron
On Mon, 24 Feb 2020 19:23:42 +0100
Jean-Philippe Brucker  wrote:

> From: Jean-Philippe Brucker 
> 
> To enable address space sharing with the IOMMU, introduce mm_context_get()
> and mm_context_put(), that pin down a context and ensure that it will keep
> its ASID after a rollover. Export the symbols to let the modular SMMUv3
> driver use them.
> 
> Pinning is necessary because a device constantly needs a valid ASID,
> unlike tasks that only require one when running. Without pinning, we would
> need to notify the IOMMU when we're about to use a new ASID for a task,
> and it would get complicated when a new task is assigned a shared ASID.
> Consider the following scenario with no ASID pinned:
> 
> 1. Task t1 is running on CPUx with shared ASID (gen=1, asid=1)
> 2. Task t2 is scheduled on CPUx, gets ASID (1, 2)
> 3. Task tn is scheduled on CPUy, a rollover occurs, tn gets ASID (2, 1)
>We would now have to immediately generate a new ASID for t1, notify
>the IOMMU, and finally enable task tn. We are holding the lock during
>all that time, since we can't afford having another CPU trigger a
>rollover. The IOMMU issues invalidation commands that can take tens of
>milliseconds.
> 
> It gets needlessly complicated. All we wanted to do was schedule task tn,
> that has no business with the IOMMU. By letting the IOMMU pin tasks when
> needed, we avoid stalling the slow path, and let the pinning fail when
> we're out of shareable ASIDs.
> 
> After a rollover, the allocator expects at least one ASID to be available
> in addition to the reserved ones (one per CPU). So (NR_ASIDS - NR_CPUS -
> 1) is the maximum number of ASIDs that can be shared with the IOMMU.
> 
> Signed-off-by: Jean-Philippe Brucker 
A few more trivial points.

Thanks,

Jonathan

> ---
> v2->v4: handle KPTI
> ---
>  arch/arm64/include/asm/mmu.h |   1 +
>  arch/arm64/include/asm/mmu_context.h |  11 ++-
>  arch/arm64/mm/context.c  | 103 +--
>  3 files changed, 109 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
> index e4d862420bb4..70ac3d4cbd3e 100644
> --- a/arch/arm64/include/asm/mmu.h
> +++ b/arch/arm64/include/asm/mmu.h
> @@ -18,6 +18,7 @@
>  
>  typedef struct {
>   atomic64_t  id;
> + unsigned long   pinned;
>   void*vdso;
>   unsigned long   flags;
>  } mm_context_t;
> diff --git a/arch/arm64/include/asm/mmu_context.h 
> b/arch/arm64/include/asm/mmu_context.h
> index 3827ff4040a3..70715c10c02a 100644
> --- a/arch/arm64/include/asm/mmu_context.h
> +++ b/arch/arm64/include/asm/mmu_context.h
> @@ -175,7 +175,13 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp)
>  #define destroy_context(mm)  do { } while(0)
>  void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
>  
> -#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 
> 0; })
> +static inline int
> +init_new_context(struct task_struct *tsk, struct mm_struct *mm)
> +{
> + atomic64_set(>context.id, 0);
> + mm->context.pinned = 0;
> + return 0;
> +}
>  
>  #ifdef CONFIG_ARM64_SW_TTBR0_PAN
>  static inline void update_saved_ttbr0(struct task_struct *tsk,
> @@ -248,6 +254,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
>  void verify_cpu_asid_bits(void);
>  void post_ttbr_update_workaround(void);
>  
> +unsigned long mm_context_get(struct mm_struct *mm);
> +void mm_context_put(struct mm_struct *mm);
> +
>  #endif /* !__ASSEMBLY__ */
>  
>  #endif /* !__ASM_MMU_CONTEXT_H */
> diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
> index 121aba5b1941..5558de88b67d 100644
> --- a/arch/arm64/mm/context.c
> +++ b/arch/arm64/mm/context.c
> @@ -26,6 +26,10 @@ static DEFINE_PER_CPU(atomic64_t, active_asids);
>  static DEFINE_PER_CPU(u64, reserved_asids);
>  static cpumask_t tlb_flush_pending;
>  
> +static unsigned long max_pinned_asids;
> +static unsigned long nr_pinned_asids;
> +static unsigned long *pinned_asid_map;
> +
>  #define ASID_MASK(~GENMASK(asid_bits - 1, 0))
>  #define ASID_FIRST_VERSION   (1UL << asid_bits)
>  
> @@ -73,6 +77,9 @@ void verify_cpu_asid_bits(void)
>  
>  static void set_kpti_asid_bits(void)
>  {
> + unsigned int k;
> + u8 *dst = (u8 *)asid_map;
> + u8 *src = (u8 *)pinned_asid_map;
>   unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned 
> long);
>   /*
>* In case of KPTI kernel/user ASIDs are allocated in
> @@ -80,7 +87,8 @@ static void set_kpti_asid_bits(void)
>* is set, then the ASID will map only userspace. Thus
>* mark even as reserved for kernel.
>*/
> - memset(asid_map, 0xaa, len);
> + for (k = 0; k < len; k++)
> + dst[k] = src[k] | 0xaa;
>  }
>  
>  static void set_reserved_asid_bits(void)
> @@ -88,9 +96,12 @@ static void set_reserved_asid_bits(void)
>   if (arm64_kernel_unmapped_at_el0())
>   set_kpti_asid_bits();
> 

Re: [PATCH v4 03/26] iommu: Add a page fault handler

2020-02-26 Thread Jonathan Cameron
On Mon, 24 Feb 2020 19:23:38 +0100
Jean-Philippe Brucker  wrote:

> From: Jean-Philippe Brucker 
> 
> Some systems allow devices to handle I/O Page Faults in the core mm. For
> example systems implementing the PCI PRI extension or Arm SMMU stall
> model. Infrastructure for reporting these recoverable page faults was
> recently added to the IOMMU core. Add a page fault handler for host SVA.
> 
> IOMMU driver can now instantiate several fault workqueues and link them to
> IOPF-capable devices. Drivers can choose between a single global
> workqueue, one per IOMMU device, one per low-level fault queue, one per
> domain, etc.
> 
> When it receives a fault event, supposedly in an IRQ handler, the IOMMU
> driver reports the fault using iommu_report_device_fault(), which calls
> the registered handler. The page fault handler then calls the mm fault
> handler, and reports either success or failure with iommu_page_response().
> When the handler succeeded, the IOMMU retries the access.
> 
> The iopf_param pointer could be embedded into iommu_fault_param. But
> putting iopf_param into the iommu_param structure allows us not to care
> about ordering between calls to iopf_queue_add_device() and
> iommu_register_device_fault_handler().
> 
> Signed-off-by: Jean-Philippe Brucker 
A few more minor comments...

> ---
>  drivers/iommu/Kconfig  |   4 +
>  drivers/iommu/Makefile |   1 +
>  drivers/iommu/io-pgfault.c | 451 +
>  include/linux/iommu.h  |  59 +
>  4 files changed, 515 insertions(+)
>  create mode 100644 drivers/iommu/io-pgfault.c
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index acca20e2da2f..e4a42e1708b4 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -109,6 +109,10 @@ config IOMMU_SVA
>   select IOMMU_API
>   select MMU_NOTIFIER
>  
> +config IOMMU_PAGE_FAULT
> + bool
> + select IOMMU_API
> +
>  config FSL_PAMU
>   bool "Freescale IOMMU support"
>   depends on PCI
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 40c800dd4e3e..bf5cb4ee8409 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -4,6 +4,7 @@ obj-$(CONFIG_IOMMU_API) += iommu-traces.o
>  obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
>  obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o
>  obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
> +obj-$(CONFIG_IOMMU_PAGE_FAULT) += io-pgfault.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
> new file mode 100644
> index ..76e153c59fe3
> --- /dev/null
> +++ b/drivers/iommu/io-pgfault.c
> @@ -0,0 +1,451 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Handle device page faults
> + *
> + * Copyright (C) 2018 ARM Ltd.

As before. Date update perhaps?

> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/**
> + * struct iopf_queue - IO Page Fault queue
> + * @wq: the fault workqueue
> + * @flush: low-level flush callback
> + * @flush_arg: flush() argument
> + * @devices: devices attached to this queue
> + * @lock: protects the device list
> + */
> +struct iopf_queue {
> + struct workqueue_struct *wq;
> + iopf_queue_flush_t  flush;
> + void*flush_arg;
> + struct list_headdevices;
> + struct mutexlock;
> +};
> +
> +/**
> + * struct iopf_device_param - IO Page Fault data attached to a device
> + * @dev: the device that owns this param
> + * @queue: IOPF queue
> + * @queue_list: index into queue->devices
> + * @partial: faults that are part of a Page Request Group for which the last
> + *   request hasn't been submitted yet.
> + * @busy: the param is being used
> + * @wq_head: signal a change to @busy
> + */
> +struct iopf_device_param {
> + struct device   *dev;
> + struct iopf_queue   *queue;
> + struct list_headqueue_list;
> + struct list_headpartial;
> + boolbusy;
> + wait_queue_head_t   wq_head;
> +};
> +
> +struct iopf_fault {
> + struct iommu_fault  fault;
> + struct list_headhead;
> +};
> +
> +struct iopf_group {
> + struct iopf_fault   last_fault;
> + struct list_headfaults;
> + struct work_struct  work;
> + struct device   *dev;
> +};
> +
> +static int iopf_complete(struct device *dev, struct iopf_fault *iopf,
> +  enum iommu_page_response_code status)

This is called once per group.  Should name reflect that?

> +{
> + struct iommu_page_response resp = {
> + .version= IOMMU_PAGE_RESP_VERSION_1,
> + .pasid  = 

Re: [PATCH v4 02/26] iommu/sva: Manage process address spaces

2020-02-26 Thread Jonathan Cameron
On Mon, 24 Feb 2020 19:23:37 +0100
Jean-Philippe Brucker  wrote:

> From: Jean-Philippe Brucker 
> 
> Add a small library to help IOMMU drivers manage process address spaces
> bound to their devices. Register an MMU notifier to track modification
> on each address space bound to one or more devices.
> 
> IOMMU drivers must implement the io_mm_ops and can then use the helpers
> provided by this library to easily implement the SVA API introduced by
> commit 26b25a2b98e4. The io_mm_ops are:
> 
> void *alloc(struct mm_struct *)
>   Allocate a PASID context private to the IOMMU driver. There is a
>   single context per mm. IOMMU drivers may perform arch-specific
>   operations in there, for example pinning down a CPU ASID (on Arm).
> 
> int attach(struct device *, int pasid, void *ctx, bool attach_domain)
>   Attach a context to the device, by setting up the PASID table entry.
> 
> int invalidate(struct device *, int pasid, void *ctx,
>unsigned long vaddr, size_t size)
>   Invalidate TLB entries for this address range.
> 
> int detach(struct device *, int pasid, void *ctx, bool detach_domain)
>   Detach a context from the device, by clearing the PASID table entry
>   and invalidating cached entries.
> 
> void free(void *ctx)
>   Free a context.
> 
> Signed-off-by: Jean-Philippe Brucker 

Hi Jean-Phillippe,

A few trivial comments from me in line.  Otherwise this all seems sensible.

Jonathan

> ---
>  drivers/iommu/Kconfig |   7 +
>  drivers/iommu/Makefile|   1 +
>  drivers/iommu/iommu-sva.c | 561 ++
>  drivers/iommu/iommu-sva.h |  64 +
>  drivers/iommu/iommu.c |   1 +
>  include/linux/iommu.h |   3 +
>  6 files changed, 637 insertions(+)
>  create mode 100644 drivers/iommu/iommu-sva.c
>  create mode 100644 drivers/iommu/iommu-sva.h
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index d2fade984999..acca20e2da2f 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -102,6 +102,13 @@ config IOMMU_DMA
>   select IRQ_MSI_IOMMU
>   select NEED_SG_DMA_LENGTH
>  
> +# Shared Virtual Addressing library
> +config IOMMU_SVA
> + bool
> + select IOASID
> + select IOMMU_API
> + select MMU_NOTIFIER
> +
>  config FSL_PAMU
>   bool "Freescale IOMMU support"
>   depends on PCI
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 9f33fdb3bb05..40c800dd4e3e 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -37,3 +37,4 @@ obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
>  obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
>  obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
>  obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
> +obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
> new file mode 100644
> index ..64f1d1c82383
> --- /dev/null
> +++ b/drivers/iommu/iommu-sva.c
> @@ -0,0 +1,561 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Manage PASIDs and bind process address spaces to devices.
> + *
> + * Copyright (C) 2018 ARM Ltd.

Worth updating the date?

> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "iommu-sva.h"
> +
> +/**
> + * DOC: io_mm model
> + *
> + * The io_mm keeps track of process address spaces shared between CPU and 
> IOMMU.
> + * The following example illustrates the relation between structures
> + * iommu_domain, io_mm and iommu_sva. The iommu_sva struct is a bond between
> + * io_mm and device. A device can have multiple io_mm and an io_mm may be 
> bound
> + * to multiple devices.
> + *  ___
> + * |  IOMMU domain A   |
> + * |   |
> + * | |  IOMMU group   |+--- io_pgtables
> + * | |||
> + * | |   dev 00:00.0 +--- bond 1 --- io_mm X
> + * | ||   \|
> + * |   '- bond 2 ---.
> + * |___| \
> + *  ___   \
> + * |  IOMMU domain B   | io_mm Y
> + * |   | / /
> + * | |  IOMMU group   ||/ /
> + * | |||   / /
> + * | |   dev 00:01.0  bond 3 -' /
> + * | |   dev 00:01.1  bond 4 --'
> + * | |||
> + * |   +--- io_pgtables
> + * |___|
> + *
> + * In this example, device 00:00.0 is in domain A, devices 00:01.* are in 
> domain
> + * B. All devices within the same domain access the same address spaces. 
> Device
> + * 00:00.0 accesses address spaces X and Y, each corresponding to an 
> mm_struct.
> + 

Re: [PATCH v10 2/4] uacce: add uacce driver

2020-01-10 Thread Jonathan Cameron
On Fri, 10 Jan 2020 14:55:39 +0800
"zhangfei@foxmail.com"  wrote:

> On 2020/1/10 上午1:38, Jonathan Cameron wrote:
> > On Mon, 16 Dec 2019 11:08:15 +0800
> > Zhangfei Gao  wrote:
> >  
> >> From: Kenneth Lee 
> >>
> >> Uacce (Unified/User-space-access-intended Accelerator Framework) targets to
> >> provide Shared Virtual Addressing (SVA) between accelerators and processes.
> >> So accelerator can access any data structure of the main cpu.
> >> This differs from the data sharing between cpu and io device, which share
> >> only data content rather than address.
> >> Since unified address, hardware and user space of process can share the
> >> same virtual address in the communication.
> >>
> >> Uacce create a chrdev for every registration, the queue is allocated to
> >> the process when the chrdev is opened. Then the process can access the
> >> hardware resource by interact with the queue file. By mmap the queue
> >> file space to user space, the process can directly put requests to the
> >> hardware without syscall to the kernel space.
> >>
> >> The IOMMU core only tracks mm<->device bonds at the moment, because it
> >> only needs to handle IOTLB invalidation and PASID table entries. However
> >> uacce needs a finer granularity since multiple queues from the same
> >> device can be bound to an mm. When the mm exits, all bound queues must
> >> be stopped so that the IOMMU can safely clear the PASID table entry and
> >> reallocate the PASID.
> >>
> >> An intermediate struct uacce_mm links uacce devices and queues.
> >> Note that an mm may be bound to multiple devices but an uacce_mm
> >> structure only ever belongs to a single device, because we don't need
> >> anything more complex (if multiple devices are bound to one mm, then
> >> we'll create one uacce_mm for each bond).
> >>
> >>  uacce_device --+-- uacce_mm --+-- uacce_queue
> >> |  '-- uacce_queue
> >> |
> >> '-- uacce_mm --+-- uacce_queue
> >>+-- uacce_queue
> >>        '-- uacce_queue
> >>
> >> Signed-off-by: Kenneth Lee 
> >> Signed-off-by: Zaibo Xu 
> >> Signed-off-by: Zhou Wang 
> >> Signed-off-by: Jean-Philippe Brucker 
> >> Signed-off-by: Zhangfei Gao   
> > Hi,
> >
> > Two small things I'd missed previously.  Fix those and for
> > what it's worth
> >
> > Reviewed-by: Jonathan Cameron   
> Thanks Jonathan
> >  
> >> ---
> >>   Documentation/ABI/testing/sysfs-driver-uacce |  37 ++
> >>   drivers/misc/Kconfig |   1 +
> >>   drivers/misc/Makefile|   1 +
> >>   drivers/misc/uacce/Kconfig   |  13 +
> >>   drivers/misc/uacce/Makefile  |   2 +
> >>   drivers/misc/uacce/uacce.c   | 628 
> >> +++
> >>   include/linux/uacce.h| 161 +++
> >>   include/uapi/misc/uacce/uacce.h  |  38 ++
> >>   8 files changed, 881 insertions(+)
> >>   create mode 100644 Documentation/ABI/testing/sysfs-driver-uacce
> >>   create mode 100644 drivers/misc/uacce/Kconfig
> >>   create mode 100644 drivers/misc/uacce/Makefile
> >>   create mode 100644 drivers/misc/uacce/uacce.c
> >>   create mode 100644 include/linux/uacce.h
> >>   create mode 100644 include/uapi/misc/uacce/uacce.h
> >>  
> > ...  
> >> +
> >> +What:   /sys/class/uacce//available_instances
> >> +Date:   Dec 2019
> >> +KernelVersion:  5.6
> >> +Contact:linux-accelerat...@lists.ozlabs.org
> >> +Description:Available instances left of the device
> >> +Return -ENODEV if uacce_ops get_available_instances is 
> >> not provided
> >> +  
> > See below.  It doesn't "return" it prints it currently.  
> Will update to
> 'unknown' if uacce_ops get_available_instances is not provided
> >
> > ...
> >  
> >> +static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma)
> >> +{
> >> +  struct uacce_queue *q = filep->private_data;
> >> +  struct uacce_device *uacce = q->uacce;
> >> +  struct uacce_qfile_region *qfr;
> >> +  enum uacce_qfrt type = UACCE_MAX_R

Re: [PATCH v10 0/4] Add uacce module for Accelerator

2020-01-10 Thread Jonathan Cameron
On Fri, 10 Jan 2020 15:03:25 +0800
zhangfei  wrote:

> On 2020/1/10 上午1:49, Jonathan Cameron wrote:
> > On Mon, 16 Dec 2019 11:08:13 +0800
> > Zhangfei Gao  wrote:
> >  
> >> Uacce (Unified/User-space-access-intended Accelerator Framework) targets to
> >> provide Shared Virtual Addressing (SVA) between accelerators and processes.
> >> So accelerator can access any data structure of the main cpu.
> >> This differs from the data sharing between cpu and io device, which share
> >> data content rather than address.
> >> Because of unified address, hardware and user space of process can share
> >> the same virtual address in the communication.
> >>
> >> Uacce is intended to be used with Jean Philippe Brucker's SVA
> >> patchset[1], which enables IO side page fault and PASID support.
> >> We have keep verifying with Jean's sva patchset [2]
> >> We also keep verifying with Eric's SMMUv3 Nested Stage patches [3]  
> > Hi Zhangfei Gao,
> >
> > Just to check my understanding...
> >
> > This patch set is not dependent on either 2 or 3?
> >
> > To use it on our hardware, we need 2, but the interfaces used are already
> > upstream, so this could move forwards in parallel.
> >
> >  
> Yes,
> patch 1, 2 is for uacce.
> patch 3, 4 is an example using uacce, which happen to be crypto.
Sorry, I wasn't clear enough.

Question is whether we need Jean's sva patch set [2] to merge this?

> 
> Thanks


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v10 0/4] Add uacce module for Accelerator

2020-01-09 Thread Jonathan Cameron
On Mon, 16 Dec 2019 11:08:13 +0800
Zhangfei Gao  wrote:

> Uacce (Unified/User-space-access-intended Accelerator Framework) targets to
> provide Shared Virtual Addressing (SVA) between accelerators and processes.
> So accelerator can access any data structure of the main cpu.
> This differs from the data sharing between cpu and io device, which share
> data content rather than address.
> Because of unified address, hardware and user space of process can share
> the same virtual address in the communication.
> 
> Uacce is intended to be used with Jean Philippe Brucker's SVA
> patchset[1], which enables IO side page fault and PASID support. 
> We have keep verifying with Jean's sva patchset [2]
> We also keep verifying with Eric's SMMUv3 Nested Stage patches [3]

Hi Zhangfei Gao,

Just to check my understanding...

This patch set is not dependent on either 2 or 3?

To use it on our hardware, we need 2, but the interfaces used are already
upstream, so this could move forwards in parallel.

Given interest from Dave it would be great if it can!

Thanks,

Jonathan

> 
> This series and related zip & qm driver
> https://github.com/Linaro/linux-kernel-warpdrive/tree/v5.5-rc1-uacce-v10
> 
> The library and user application:
> https://github.com/Linaro/warpdrive/tree/wdprd-upstream-v10
> 
> References:
> [1] http://jpbrucker.net/sva/
> [2] http://jpbrucker.net/git/linux/log/?h=sva/zip-devel
> [3] https://github.com/eauger/linux/tree/v5.3.0-rc0-2stage-v9
> 
> Change History:
> v10:
> Modify the include header to fix kbuild test erorr in other arch.
> 
> v9:
> Suggested by Jonathan
> 1. Remove sysfs: numa_distance, node_id, id, also add is_visible callback
> 2. Split the api to solve the potential race
> struct uacce_device *uacce_alloc(struct device *parent,
>struct uacce_interface *interface)
> int uacce_register(struct uacce_device *uacce)
> void uacce_remove(struct uacce_device *uacce)
> 3. Split clean up patch 03
> 
> v8:
> Address some comments from Jonathan
> Merge Jean's patch, using uacce_mm instead of pid for sva_exit
> 
> v7:
> As suggested by Jean and Jerome
> Only consider sva case and remove unused dma apis for the first patch.
> Also add mm_exit for sva and vm_ops.close etc
> 
> 
> v6: https://lkml.org/lkml/2019/10/16/231
> Change sys qfrs_size to different file, suggested by Jonathan
> Fix crypto daily build issue and based on crypto code base, also 5.4-rc1.
> 
> v5: https://lkml.org/lkml/2019/10/14/74
> Add an example patch using the uacce interface, suggested by Greg
> 0003-crypto-hisilicon-register-zip-engine-to-uacce.patch
> 
> v4: https://lkml.org/lkml/2019/9/17/116
> Based on 5.4-rc1
> Considering other driver integrating uacce, 
> if uacce not compiled, uacce_register return error and uacce_unregister is 
> empty.
> Simplify uacce flag: UACCE_DEV_SVA.
> Address Greg's comments: 
> Fix state machine, remove potential syslog triggered from user space etc.
> 
> v3: https://lkml.org/lkml/2019/9/2/990
> Recommended by Greg, use sturct uacce_device instead of struct uacce,
> and use struct *cdev in struct uacce_device, as a result, 
> cdev can be released by itself when refcount decreased to 0.
> So the two structures are decoupled and self-maintained by themsleves.
> Also add dev.release for put_device.
> 
> v2: https://lkml.org/lkml/2019/8/28/565
> Address comments from Greg and Jonathan
> Modify interface uacce_register
> Drop noiommu mode first
> 
> v1: https://lkml.org/lkml/2019/8/14/277
> 1. Rebase to 5.3-rc1
> 2. Build on iommu interface
> 3. Verifying with Jean's sva and Eric's nested mode iommu.
> 4. User library has developed a lot: support zlib, openssl etc.
> 5. Move to misc first
> 
> RFC3:
> https://lkml.org/lkml/2018/11/12/1951
> 
> RFC2:
> https://lwn.net/Articles/763990/
> 
> 
> Background of why Uacce:
> Von Neumann processor is not good at general data manipulation.
> It is designed for control-bound rather than data-bound application.
> The latter need less control path facility and more/specific ALUs.
> So there are more and more heterogeneous processors, such as
> encryption/decryption accelerators, TPUs, or
> EDGE (Explicated Data Graph Execution) processors, introduced to gain
> better performance or power efficiency for particular applications
> these days.
> 
> There are generally two ways to make use of these heterogeneous processors:
> 
> The first is to make them co-processors, just like FPU.
> This is good for some application but it has its own cons:
> It changes the ISA set permanently.
> You must save all state elements when the process is switched out.
> But most data-bound processors have a huge set of state elements.
> It makes the kernel scheduler more complex.
> 
> The second is Accelerator.
> It is taken as a IO device from the CPU's point of view
> (but it need not to be physically). The process, running on CPU,
> hold a context of the accelerator and send instructions to it as if
> it calls a function or thread running with FPU.

Re: [PATCH v10 4/4] crypto: hisilicon - register zip engine to uacce

2020-01-09 Thread Jonathan Cameron
On Mon, 16 Dec 2019 11:08:17 +0800
Zhangfei Gao  wrote:

> Register qm to uacce framework for user crypto driver
> 
> Signed-off-by: Zhangfei Gao 
> Signed-off-by: Zhou Wang 

Very nice to see how minimal the changes are.

Whilst uacce in general isn't crypto specific, as we are looking
at changes in a crypto driver, this will need a crypto Ack.

Herbert, this is about as non invasive as things can get and
provide a user space shared virtual addressing based interface.
What do you think?

>From my side, for what it's worth...

Reviewed-by: Jonathan Cameron 

> ---
>  drivers/crypto/hisilicon/qm.c   | 236 
> +++-
>  drivers/crypto/hisilicon/qm.h   |  11 ++
>  drivers/crypto/hisilicon/zip/zip_main.c |  16 ++-
>  include/uapi/misc/uacce/hisi_qm.h   |  23 
>  4 files changed, 278 insertions(+), 8 deletions(-)
>  create mode 100644 include/uapi/misc/uacce/hisi_qm.h
> 
> diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
> index b57da5e..1e923bc 100644
> --- a/drivers/crypto/hisilicon/qm.c
> +++ b/drivers/crypto/hisilicon/qm.c
> @@ -9,6 +9,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
> +#include 
>  #include "qm.h"
>  
>  /* eq/aeq irq enable */
> @@ -465,9 +468,14 @@ static void qm_cq_head_update(struct hisi_qp *qp)
>  
>  static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
>  {
> - struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
> + if (qp->event_cb) {
> + qp->event_cb(qp);
> + return;
> + }
>  
>   if (qp->req_cb) {
> + struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
> +
>   while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
>   dma_rmb();
>   qp->req_cb(qp, qp->sqe + qm->sqe_size *
> @@ -1269,7 +1277,7 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, 
> int pasid)
>   * @qp: The qp we want to start to run.
>   * @arg: Accelerator specific argument.
>   *
> - * After this function, qp can receive request from user. Return qp_id if
> + * After this function, qp can receive request from user. Return 0 if
>   * successful, Return -EBUSY if failed.
>   */
>  int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
> @@ -1314,7 +1322,7 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long 
> arg)
>  
>   dev_dbg(dev, "queue %d started\n", qp_id);
>  
> - return qp_id;
> + return 0;
>  }
>  EXPORT_SYMBOL_GPL(hisi_qm_start_qp);
>  
> @@ -1395,6 +1403,213 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm)
>   }
>  }
>  
> +static void qm_qp_event_notifier(struct hisi_qp *qp)
> +{
> + wake_up_interruptible(>uacce_q->wait);
> +}
> +
> +static int hisi_qm_get_available_instances(struct uacce_device *uacce)
> +{
> + int i, ret;
> + struct hisi_qm *qm = uacce->priv;
> +
> + read_lock(>qps_lock);
> + for (i = 0, ret = 0; i < qm->qp_num; i++)
> + if (!qm->qp_array[i])
> + ret++;
> + read_unlock(>qps_lock);
> +
> + return ret;
> +}
> +
> +static int hisi_qm_uacce_get_queue(struct uacce_device *uacce,
> +unsigned long arg,
> +struct uacce_queue *q)
> +{
> + struct hisi_qm *qm = uacce->priv;
> + struct hisi_qp *qp;
> + u8 alg_type = 0;
> +
> + qp = hisi_qm_create_qp(qm, alg_type);
> + if (IS_ERR(qp))
> + return PTR_ERR(qp);
> +
> + q->priv = qp;
> + q->uacce = uacce;
> + qp->uacce_q = q;
> + qp->event_cb = qm_qp_event_notifier;
> + qp->pasid = arg;
> +
> + return 0;
> +}
> +
> +static void hisi_qm_uacce_put_queue(struct uacce_queue *q)
> +{
> + struct hisi_qp *qp = q->priv;
> +
> + hisi_qm_cache_wb(qp->qm);
> + hisi_qm_release_qp(qp);
> +}
> +
> +/* map sq/cq/doorbell to user space */
> +static int hisi_qm_uacce_mmap(struct uacce_queue *q,
> +   struct vm_area_struct *vma,
> +   struct uacce_qfile_region *qfr)
> +{
> + struct hisi_qp *qp = q->priv;
> + struct hisi_qm *qm = qp->qm;
> + size_t sz = vma->vm_end - vma->vm_start;
> + struct pci_dev *pdev = qm->pdev;
> + struct device *dev = >dev;
> + unsigned long vm_pgoff;
> + int ret;
> +
> + switch (qfr->type) {
> + case UACCE_QFRT_MMIO:
> + if (qm->ver == QM_HW_V2) {
> + if (sz > PAGE_SIZE * (QM

Re: [PATCH v10 3/4] crypto: hisilicon - Remove module_param uacce_mode

2020-01-09 Thread Jonathan Cameron
On Mon, 16 Dec 2019 11:08:16 +0800
Zhangfei Gao  wrote:

> Remove the module_param uacce_mode, which is not used currently.
> 
> Signed-off-by: Zhangfei Gao 
> Signed-off-by: Zhou Wang 


Reviewed-by: Jonathan Cameron 

> ---
>  drivers/crypto/hisilicon/zip/zip_main.c | 31 ++-
>  1 file changed, 6 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/crypto/hisilicon/zip/zip_main.c 
> b/drivers/crypto/hisilicon/zip/zip_main.c
> index e1bab1a..93345f0 100644
> --- a/drivers/crypto/hisilicon/zip/zip_main.c
> +++ b/drivers/crypto/hisilicon/zip/zip_main.c
> @@ -297,9 +297,6 @@ static u32 pf_q_num = HZIP_PF_DEF_Q_NUM;
>  module_param_cb(pf_q_num, _q_num_ops, _q_num, 0444);
>  MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 1-4096, v2 1-1024)");
>  
> -static int uacce_mode;
> -module_param(uacce_mode, int, 0);
> -
>  static u32 vfs_num;
>  module_param(vfs_num, uint, 0444);
>  MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63)");
> @@ -791,6 +788,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const 
> struct pci_device_id *id)
>   pci_set_drvdata(pdev, hisi_zip);
>  
>   qm = _zip->qm;
> + qm->use_dma_api = true;
>   qm->pdev = pdev;
>   qm->ver = rev_id;
>  
> @@ -798,20 +796,6 @@ static int hisi_zip_probe(struct pci_dev *pdev, const 
> struct pci_device_id *id)
>   qm->dev_name = hisi_zip_name;
>   qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF :
>   QM_HW_VF;
> - switch (uacce_mode) {
> - case 0:
> - qm->use_dma_api = true;
> - break;
> - case 1:
> - qm->use_dma_api = false;
> - break;
> - case 2:
> - qm->use_dma_api = true;
> - break;
> - default:
> - return -EINVAL;
> - }
> -
>   ret = hisi_qm_init(qm);
>   if (ret) {
>   dev_err(>dev, "Failed to init qm!\n");
> @@ -1010,12 +994,10 @@ static int __init hisi_zip_init(void)
>   goto err_pci;
>   }
>  
> - if (uacce_mode == 0 || uacce_mode == 2) {
> - ret = hisi_zip_register_to_crypto();
> - if (ret < 0) {
> - pr_err("Failed to register driver to crypto.\n");
> - goto err_crypto;
> - }
> + ret = hisi_zip_register_to_crypto();
> + if (ret < 0) {
> + pr_err("Failed to register driver to crypto.\n");
> + goto err_crypto;
>   }
>  
>   return 0;
> @@ -1030,8 +1012,7 @@ static int __init hisi_zip_init(void)
>  
>  static void __exit hisi_zip_exit(void)
>  {
> - if (uacce_mode == 0 || uacce_mode == 2)
> - hisi_zip_unregister_from_crypto();
> + hisi_zip_unregister_from_crypto();
>   pci_unregister_driver(_zip_pci_driver);
>   hisi_zip_unregister_debugfs();
>  }


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 2/4] uacce: add uacce driver

2020-01-09 Thread Jonathan Cameron
On Mon, 16 Dec 2019 11:08:15 +0800
Zhangfei Gao  wrote:

> From: Kenneth Lee 
> 
> Uacce (Unified/User-space-access-intended Accelerator Framework) targets to
> provide Shared Virtual Addressing (SVA) between accelerators and processes.
> So accelerator can access any data structure of the main cpu.
> This differs from the data sharing between cpu and io device, which share
> only data content rather than address.
> Since unified address, hardware and user space of process can share the
> same virtual address in the communication.
> 
> Uacce create a chrdev for every registration, the queue is allocated to
> the process when the chrdev is opened. Then the process can access the
> hardware resource by interact with the queue file. By mmap the queue
> file space to user space, the process can directly put requests to the
> hardware without syscall to the kernel space.
> 
> The IOMMU core only tracks mm<->device bonds at the moment, because it
> only needs to handle IOTLB invalidation and PASID table entries. However
> uacce needs a finer granularity since multiple queues from the same
> device can be bound to an mm. When the mm exits, all bound queues must
> be stopped so that the IOMMU can safely clear the PASID table entry and
> reallocate the PASID.
> 
> An intermediate struct uacce_mm links uacce devices and queues.
> Note that an mm may be bound to multiple devices but an uacce_mm
> structure only ever belongs to a single device, because we don't need
> anything more complex (if multiple devices are bound to one mm, then
> we'll create one uacce_mm for each bond).
> 
> uacce_device --+-- uacce_mm --+-- uacce_queue
>|  '-- uacce_queue
>|
>'-- uacce_mm --+-- uacce_queue
>   +-- uacce_queue
>   '-- uacce_queue
> 
> Signed-off-by: Kenneth Lee 
> Signed-off-by: Zaibo Xu 
> Signed-off-by: Zhou Wang 
> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Zhangfei Gao 

Hi,

Two small things I'd missed previously.  Fix those and for
what it's worth

Reviewed-by: Jonathan Cameron 

> ---
>  Documentation/ABI/testing/sysfs-driver-uacce |  37 ++
>  drivers/misc/Kconfig |   1 +
>  drivers/misc/Makefile|   1 +
>  drivers/misc/uacce/Kconfig   |  13 +
>  drivers/misc/uacce/Makefile  |   2 +
>  drivers/misc/uacce/uacce.c   | 628 
> +++
>  include/linux/uacce.h| 161 +++
>  include/uapi/misc/uacce/uacce.h  |  38 ++
>  8 files changed, 881 insertions(+)
>  create mode 100644 Documentation/ABI/testing/sysfs-driver-uacce
>  create mode 100644 drivers/misc/uacce/Kconfig
>  create mode 100644 drivers/misc/uacce/Makefile
>  create mode 100644 drivers/misc/uacce/uacce.c
>  create mode 100644 include/linux/uacce.h
>  create mode 100644 include/uapi/misc/uacce/uacce.h
> 
...
> +
> +What:   /sys/class/uacce//available_instances
> +Date:   Dec 2019
> +KernelVersion:  5.6
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Available instances left of the device
> +Return -ENODEV if uacce_ops get_available_instances is not 
> provided
> +

See below.  It doesn't "return" it prints it currently.

...

> +static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma)
> +{
> + struct uacce_queue *q = filep->private_data;
> + struct uacce_device *uacce = q->uacce;
> + struct uacce_qfile_region *qfr;
> + enum uacce_qfrt type = UACCE_MAX_REGION;
> + int ret = 0;
> +
> + if (vma->vm_pgoff < UACCE_MAX_REGION)
> + type = vma->vm_pgoff;
> + else
> + return -EINVAL;
> +
> + qfr = kzalloc(sizeof(*qfr), GFP_KERNEL);
> + if (!qfr)
> + return -ENOMEM;
> +
> + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_WIPEONFORK;
> + vma->vm_ops = _vm_ops;
> + vma->vm_private_data = q;
> + qfr->type = type;
> +
> + mutex_lock(_mutex);
> +
> + if (q->state != UACCE_Q_INIT && q->state != UACCE_Q_STARTED) {
> + ret = -EINVAL;
> + goto out_with_lock;
> + }
> +
> + if (q->qfrs[type]) {
> + ret = -EEXIST;
> + goto out_with_lock;
> + }
> +
> + switch (type) {
> + case UACCE_QFRT_MMIO:
> + if (!uacce->ops->mmap) {
> + ret = -EINVAL;
> + goto out_with_lock;
> + 

Re: [PATCH v3 00/13] iommu: Add PASID support to Arm SMMUv3

2019-12-13 Thread Jonathan Cameron
On Mon, 9 Dec 2019 19:05:01 +0100
Jean-Philippe Brucker  wrote:

> Add support for Substream ID and PASIDs to the SMMUv3 driver.
> Changes since v2 [1]:
> 
> * Split preparatory work into patches 5, 6, 8 and 9.
> 
> * Added patch 1. Not strictly relevant, but since we're moving the DMA
>   allocations and adding a new one, we might as well clean the flags
>   first.
> 
> * Fixed a double free reported by Jonathan, and other small
>   issues.
> 
> * Added patch 12. Upstream commit c6e9aefbf9db ("PCI/ATS: Remove unused
>   PRI and PASID stubs") removed the unused PASID stubs. Since the SMMU
>   driver can be built without PCI, the stubs are now needed.
> 
> [1] 
> https://lore.kernel.org/linux-iommu/20191108152508.4039168-1-jean-phili...@linaro.org/

Hi Jean-Philippe,

Series looks great to me.  FWIW

Reviewed-by: Jonathan Cameron 
for the patches I didn't comment on in this version as I couldn't find anything
to comment about ;)

Thanks

Jonathan

> 
> Jean-Philippe Brucker (13):
>   iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation
>   dt-bindings: document PASID property for IOMMU masters
>   iommu/arm-smmu-v3: Support platform SSID
>   ACPI/IORT: Support PASID for platform devices
>   iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support
>   iommu/arm-smmu-v3: Add context descriptor tables allocators
>   iommu/arm-smmu-v3: Add support for Substream IDs
>   iommu/arm-smmu-v3: Propate ssid_bits
>   iommu/arm-smmu-v3: Handle failure of arm_smmu_write_ctx_desc()
>   iommu/arm-smmu-v3: Add second level of context descriptor table
>   iommu/arm-smmu-v3: Improve add_device() error handling
>   PCI/ATS: Add PASID stubs
>   iommu/arm-smmu-v3: Add support for PCI PASID
> 
>  .../devicetree/bindings/iommu/iommu.txt   |   6 +
>  drivers/acpi/arm64/iort.c |  18 +
>  drivers/iommu/arm-smmu-v3.c   | 462 +++---
>  drivers/iommu/of_iommu.c  |   6 +-
>  include/linux/iommu.h |   2 +
>  include/linux/pci-ats.h   |   3 +
>  6 files changed, 437 insertions(+), 60 deletions(-)
> 


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table

2019-12-13 Thread Jonathan Cameron
On Mon, 9 Dec 2019 19:05:11 +0100
Jean-Philippe Brucker  wrote:

> The SMMU can support up to 20 bits of SSID. Add a second level of page
> tables to accommodate this. Devices that support more than 1024 SSIDs now
> have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
> descriptors (64kB), allocated on demand.
> 
> Signed-off-by: Jean-Philippe Brucker 

One tiny little comment inline.  I really don't mind if you ignore it ;)

Reviewed-by: Jonathan Cameron 

> ---
>  drivers/iommu/arm-smmu-v3.c | 153 +---
>  1 file changed, 143 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index fc5119f34187..52adcdfda58b 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -224,6 +224,7 @@
>  
>  #define STRTAB_STE_0_S1FMT   GENMASK_ULL(5, 4)
>  #define STRTAB_STE_0_S1FMT_LINEAR0
> +#define STRTAB_STE_0_S1FMT_64K_L22
>  #define STRTAB_STE_0_S1CTXPTR_MASK   GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
>  
> @@ -263,7 +264,20 @@
>  
>  #define STRTAB_STE_3_S2TTB_MASK  GENMASK_ULL(51, 4)
>  
> -/* Context descriptor (stage-1 only) */
> +/*
> + * Context descriptors.
> + *
> + * Linear: when less than 1024 SSIDs are supported
> + * 2lvl: at most 1024 L1 entries,
> + *   1024 lazy entries per table.
> + */
> +#define CTXDESC_SPLIT10
> +#define CTXDESC_L2_ENTRIES   (1 << CTXDESC_SPLIT)
> +
> +#define CTXDESC_L1_DESC_DWORDS   1
> +#define CTXDESC_L1_DESC_VALID1
> +#define CTXDESC_L1_DESC_L2PTR_MASK   GENMASK_ULL(51, 12)
> +
>  #define CTXDESC_CD_DWORDS8
>  #define CTXDESC_CD_0_TCR_T0SZGENMASK_ULL(5, 0)
>  #define ARM64_TCR_T0SZ   GENMASK_ULL(5, 0)
> @@ -575,7 +589,10 @@ struct arm_smmu_cd_table {
>  };
>  
>  struct arm_smmu_s1_cfg {
> - struct arm_smmu_cd_tabletable;
> + struct arm_smmu_cd_table*tables;
> + size_t  num_tables;
> + __le64  *l1ptr;
> + dma_addr_t  l1ptr_dma;
>   struct arm_smmu_ctx_desccd;
>   u8  s1fmt;
>   u8  s1cdmax;
> @@ -1521,9 +1538,53 @@ static void arm_smmu_free_cd_leaf_table(struct 
> arm_smmu_device *smmu,
>  {
>   size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
>  
> + if (!table->ptr)
> + return;
>   dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
>  }
>  
> +static void arm_smmu_write_cd_l1_desc(__le64 *dst,
> +   struct arm_smmu_cd_table *table)
> +{
> + u64 val = (table->ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
> +   CTXDESC_L1_DESC_VALID;
> +
> + WRITE_ONCE(*dst, cpu_to_le64(val));
> +}
> +
> +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
> +u32 ssid)
> +{
> + __le64 *l1ptr;
> + unsigned int idx;
> + struct arm_smmu_cd_table *table;
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_s1_cfg *cfg = _domain->s1_cfg;
> +
> + if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
> + table = >tables[0];
> + idx = ssid;
> + } else {
> + idx = ssid >> CTXDESC_SPLIT;
> + if (idx >= cfg->num_tables)
> + return NULL;
> +
> + table = >tables[idx];
> + if (!table->ptr) {
> + if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> +  CTXDESC_L2_ENTRIES))
> + return NULL;
> +
> + l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
> + arm_smmu_write_cd_l1_desc(l1ptr, table);
> + /* An invalid L1CD can be cached */
> + arm_smmu_sync_cd(smmu_domain, ssid, false);
> + }
> + idx = ssid & (CTXDESC_L2_ENTRIES - 1);
> + }
> + return table->ptr + idx * CTXDESC_CD_DWORDS;
> +}
> +
>  static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
>  {
>   u64 val = 0;
> @@ -1556,8 +1617,10 @@ static int arm_smmu_write_ctx_desc(struct 
> arm_smmu_domain *smmu_domain,
>   u64 val;
>   bool cd_live;
>   struct arm_smmu_device *smmu = smmu_domain->smmu;
> - __le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *
> -   

Re: [PATCH v3 01/13] iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation

2019-12-13 Thread Jonathan Cameron
On Mon, 9 Dec 2019 19:05:02 +0100
Jean-Philippe Brucker  wrote:

> Since commit 518a2f1925c3 ("dma-mapping: zero memory returned from
> dma_alloc_*"), dma_alloc_* always initializes memory to zero, so there
> is no need to use dma_zalloc_* or pass the __GFP_ZERO flag anymore.
> 
> The flag was introduced by commit 04fa26c71be5 ("iommu/arm-smmu: Convert
> DMA buffer allocations to the managed API"), since the managed API
> didn't provide a dmam_zalloc_coherent() function.
> 
> Signed-off-by: Jean-Philippe Brucker 
Reviewed-by: Jonathan Cameron 

Good to tidy these up whilst we are here.

Jonathan

> ---
>  drivers/iommu/arm-smmu-v3.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index effe72eb89e7..d4e8b7f8d9f4 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -1675,7 +1675,7 @@ static int arm_smmu_init_l2_strtab(struct 
> arm_smmu_device *smmu, u32 sid)
>  
>   desc->span = STRTAB_SPLIT + 1;
>   desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, >l2ptr_dma,
> -   GFP_KERNEL | __GFP_ZERO);
> +   GFP_KERNEL);
>   if (!desc->l2ptr) {
>   dev_err(smmu->dev,
>   "failed to allocate l2 stream table for SID %u\n",
> @@ -2161,8 +2161,7 @@ static int arm_smmu_domain_finalise_s1(struct 
> arm_smmu_domain *smmu_domain,
>   return asid;
>  
>   cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
> -  >cdptr_dma,
> -  GFP_KERNEL | __GFP_ZERO);
> +  >cdptr_dma, GFP_KERNEL);
>   if (!cfg->cdptr) {
>   dev_warn(smmu->dev, "failed to allocate context descriptor\n");
>   ret = -ENOMEM;
> @@ -2883,7 +2882,7 @@ static int arm_smmu_init_strtab_2lvl(struct 
> arm_smmu_device *smmu)
>  
>   l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
>   strtab = dmam_alloc_coherent(smmu->dev, l1size, >strtab_dma,
> -  GFP_KERNEL | __GFP_ZERO);
> +  GFP_KERNEL);
>   if (!strtab) {
>   dev_err(smmu->dev,
>   "failed to allocate l1 stream table (%u bytes)\n",
> @@ -2910,7 +2909,7 @@ static int arm_smmu_init_strtab_linear(struct 
> arm_smmu_device *smmu)
>  
>   size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
>   strtab = dmam_alloc_coherent(smmu->dev, size, >strtab_dma,
> -  GFP_KERNEL | __GFP_ZERO);
> +  GFP_KERNEL);
>   if (!strtab) {
>   dev_err(smmu->dev,
>   "failed to allocate linear stream table (%u bytes)\n",


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 8/8] iommu/arm-smmu-v3: Add support for PCI PASID

2019-11-11 Thread Jonathan Cameron
On Fri, 8 Nov 2019 16:25:08 +0100
Jean-Philippe Brucker  wrote:

> Enable PASID for PCI devices that support it. Since the SSID tables are
> allocated by arm_smmu_attach_dev(), PASID has to be enabled early enough.
> arm_smmu_dev_feature_enable() would be too late, since by that time the
> main DMA domain has already been attached. Do it in add_device() instead.
> 
> Signed-off-by: Jean-Philippe Brucker 
Seems straightforward.

Reviewed-by: Jonathan Cameron 

Thanks for working on this stuff.  I hope we an move to get the rest of the
SVA elements lined up behind it so everything moves quickly in the next
cycle (or two).

Jonathan

> ---
>  drivers/iommu/arm-smmu-v3.c | 51 -
>  1 file changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 88ec0bf33492..3ee313c08325 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2633,6 +2633,49 @@ static void arm_smmu_disable_ats(struct 
> arm_smmu_master *master)
>   atomic_dec(_domain->nr_ats_masters);
>  }
>  
> +static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
> +{
> + int ret;
> + int features;
> + int num_pasids;
> + struct pci_dev *pdev;
> +
> + if (!dev_is_pci(master->dev))
> + return -ENOSYS;
> +
> + pdev = to_pci_dev(master->dev);
> +
> + features = pci_pasid_features(pdev);
> + if (features < 0)
> + return -ENOSYS;
> +
> + num_pasids = pci_max_pasids(pdev);
> + if (num_pasids <= 0)
> + return -ENOSYS;
> +
> + ret = pci_enable_pasid(pdev, features);
> + if (!ret)
> + master->ssid_bits = min_t(u8, ilog2(num_pasids),
> +   master->smmu->ssid_bits);
> + return ret;
> +}
> +
> +static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
> +{
> + struct pci_dev *pdev;
> +
> + if (!dev_is_pci(master->dev))
> + return;
> +
> + pdev = to_pci_dev(master->dev);
> +
> + if (!pdev->pasid_enabled)
> + return;
> +
> + master->ssid_bits = 0;
> + pci_disable_pasid(pdev);
> +}
> +
>  static void arm_smmu_detach_dev(struct arm_smmu_master *master)
>  {
>   unsigned long flags;
> @@ -2841,13 +2884,16 @@ static int arm_smmu_add_device(struct device *dev)
>  
>   master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
>  
> + /* Note that PASID must be enabled before, and disabled after ATS */
> + arm_smmu_enable_pasid(master);
> +
>   if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
>   master->ssid_bits = min_t(u8, master->ssid_bits,
> CTXDESC_LINEAR_CDMAX);
>  
>   ret = iommu_device_link(>iommu, dev);
>   if (ret)
> - goto err_free_master;
> + goto err_disable_pasid;
>  
>   group = iommu_group_get_for_dev(dev);
>   if (IS_ERR(group)) {
> @@ -2860,6 +2906,8 @@ static int arm_smmu_add_device(struct device *dev)
>  
>  err_unlink:
>   iommu_device_unlink(>iommu, dev);
> +err_disable_pasid:
> + arm_smmu_disable_pasid(master);
>  err_free_master:
>   kfree(master);
>   fwspec->iommu_priv = NULL;
> @@ -2880,6 +2928,7 @@ static void arm_smmu_remove_device(struct device *dev)
>   arm_smmu_detach_dev(master);
>   iommu_group_remove_device(dev);
>   iommu_device_unlink(>iommu, dev);
> + arm_smmu_disable_pasid(master);
>   kfree(master);
>   iommu_fwspec_free(dev);
>  }


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 7/8] iommu/arm-smmu-v3: Improve add_device() error handling

2019-11-11 Thread Jonathan Cameron
On Fri, 8 Nov 2019 16:25:07 +0100
Jean-Philippe Brucker  wrote:

> Let add_device() clean up after itself. The iommu_bus_init() function
> does call remove_device() on error, but other sites (e.g. of_iommu) do
> not.
> 
> Don't free level-2 stream tables because we'd have to track if we
> allocated each of them or if they are used by other endpoints. It's not
> worth the hassle since they are managed resources.
> 
> Reviewed-by: Eric Auger 
> Signed-off-by: Jean-Philippe Brucker 

Potentially some fun around reordering of last few actions, but
doesn't seem there is any real connection between them so should be
fine.

Reviewed-by: Jonathan Cameron 

> ---
>  drivers/iommu/arm-smmu-v3.c | 28 +---
>  1 file changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 82eac89ee187..88ec0bf33492 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2826,14 +2826,16 @@ static int arm_smmu_add_device(struct device *dev)
>   for (i = 0; i < master->num_sids; i++) {
>   u32 sid = master->sids[i];
>  
> - if (!arm_smmu_sid_in_range(smmu, sid))
> - return -ERANGE;
> + if (!arm_smmu_sid_in_range(smmu, sid)) {
> + ret = -ERANGE;
> + goto err_free_master;
> + }
>  
>   /* Ensure l2 strtab is initialised */
>   if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
>   ret = arm_smmu_init_l2_strtab(smmu, sid);
>   if (ret)
> - return ret;
> + goto err_free_master;
>   }
>   }
>  
> @@ -2843,13 +2845,25 @@ static int arm_smmu_add_device(struct device *dev)
>   master->ssid_bits = min_t(u8, master->ssid_bits,
> CTXDESC_LINEAR_CDMAX);
>  
> + ret = iommu_device_link(>iommu, dev);
> + if (ret)
> + goto err_free_master;
> +
>   group = iommu_group_get_for_dev(dev);
> - if (!IS_ERR(group)) {
> - iommu_group_put(group);
> - iommu_device_link(>iommu, dev);
> + if (IS_ERR(group)) {
> + ret = PTR_ERR(group);
> + goto err_unlink;
>   }
>  
> - return PTR_ERR_OR_ZERO(group);
> + iommu_group_put(group);
> + return 0;
> +
> +err_unlink:
> + iommu_device_unlink(>iommu, dev);
> +err_free_master:
> + kfree(master);
> + fwspec->iommu_priv = NULL;
> + return ret;
>  }
>  
>  static void arm_smmu_remove_device(struct device *dev)


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 6/8] iommu/arm-smmu-v3: Add second level of context descriptor table

2019-11-11 Thread Jonathan Cameron
On Fri, 8 Nov 2019 16:25:06 +0100
Jean-Philippe Brucker  wrote:

> The SMMU can support up to 20 bits of SSID. Add a second level of page
> tables to accommodate this. Devices that support more than 1024 SSIDs now
> have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
> descriptors (64kB), allocated on demand.
> 
> Signed-off-by: Jean-Philippe Brucker 
Hi Jean-Philippe,

There seems to be a disconnect in here between clearing by hand
device managed entities, which normally implies we'll reallocate
them later, and clearing pointers that are used in the control
flow of allocation.  I'm looking at this a bit in isolation so
I'm not quite sure on how they are used.

> ---
>  drivers/iommu/arm-smmu-v3.c | 137 +---
>  1 file changed, 126 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index df7d45503c65..82eac89ee187 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -224,6 +224,7 @@
>  
>  #define STRTAB_STE_0_S1FMT   GENMASK_ULL(5, 4)
>  #define STRTAB_STE_0_S1FMT_LINEAR0
> +#define STRTAB_STE_0_S1FMT_64K_L22
>  #define STRTAB_STE_0_S1CTXPTR_MASK   GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
>  
> @@ -263,7 +264,20 @@
>  
>  #define STRTAB_STE_3_S2TTB_MASK  GENMASK_ULL(51, 4)
>  
> -/* Context descriptor (stage-1 only) */
> +/*
> + * Context descriptors.
> + *
> + * Linear: when less than 1024 SSIDs are supported
> + * 2lvl: at most 1024 L1 entries,
> + *   1024 lazy entries per table.
> + */
> +#define CTXDESC_SPLIT10
> +#define CTXDESC_L2_ENTRIES   (1 << CTXDESC_SPLIT)
> +
> +#define CTXDESC_L1_DESC_DWORDS   1
> +#define CTXDESC_L1_DESC_VALID1
> +#define CTXDESC_L1_DESC_L2PTR_MASK   GENMASK_ULL(51, 12)
> +
>  #define CTXDESC_CD_DWORDS8
>  #define CTXDESC_CD_0_TCR_T0SZGENMASK_ULL(5, 0)
>  #define ARM64_TCR_T0SZ   GENMASK_ULL(5, 0)
> @@ -577,7 +591,10 @@ struct arm_smmu_cd_table {
>  struct arm_smmu_s1_cfg {
>   u8  s1fmt;
>   u8  s1cdmax;
> - struct arm_smmu_cd_tabletable;
> + struct arm_smmu_cd_table*tables;
> + size_t  num_tables;
> + __le64  *l1ptr;
> + dma_addr_t  l1ptr_dma;
>   struct arm_smmu_ctx_desccd;
>  };
>  
> @@ -1521,12 +1538,51 @@ static void arm_smmu_free_cd_leaf_table(struct 
> arm_smmu_device *smmu,
>  {
>   size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
>  
> + if (!table->ptr)
> + return;
>   dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
>  }
>  
> -static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_s1_cfg *cfg, u32 ssid)
> +static void arm_smmu_write_cd_l1_desc(__le64 *dst,
> +   struct arm_smmu_cd_table *table)
>  {
> - return cfg->table.ptr + ssid * CTXDESC_CD_DWORDS;
> + u64 val = (table->ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
> +   CTXDESC_L1_DESC_VALID;
> +
> + WRITE_ONCE(*dst, cpu_to_le64(val));
> +}
> +
> +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
> +u32 ssid)
> +{
> + __le64 *l1ptr;
> + unsigned int idx;
> + struct arm_smmu_cd_table *table;
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_s1_cfg *cfg = _domain->s1_cfg;
> +
> + if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
> + table = >tables[0];
> + idx = ssid;
> + } else {
> + idx = ssid >> CTXDESC_SPLIT;
> + if (idx >= cfg->num_tables)
> + return NULL;
> +
> + table = >tables[idx];
> + if (!table->ptr) {
> + if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> +  CTXDESC_L2_ENTRIES))
> + return NULL;
> +
> + l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
> + arm_smmu_write_cd_l1_desc(l1ptr, table);
> + /* An invalid L1CD can be cached */
> + arm_smmu_sync_cd(smmu_domain, ssid, false);
> + }
> + idx = ssid & (CTXDESC_L2_ENTRIES - 1);
> + }
> + return table->ptr + idx * CTXDESC_CD_DWORDS;
>  }
>  
>  static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
> @@ -1552,7 +1608,7 @@ static int arm_smmu_write_ctx_desc(struct 
> arm_smmu_domain *smmu_domain,
>   u64 val;
>   bool cd_live;
>   struct arm_smmu_device *smmu = smmu_domain->smmu;
> - __le64 *cdptr = arm_smmu_get_cd_ptr(_domain->s1_cfg, ssid);
> + __le64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
>  
>   /*
>* This function handles the 

Re: [PATCH v2 5/8] iommu/arm-smmu-v3: Add support for Substream IDs

2019-11-11 Thread Jonathan Cameron
On Fri, 8 Nov 2019 16:25:05 +0100
Jean-Philippe Brucker  wrote:

> At the moment, the SMMUv3 driver implements only one stage-1 or stage-2
> page directory per device. However SMMUv3 allows more than one address
> space for some devices, by providing multiple stage-1 page directories. In
> addition to the Stream ID (SID), that identifies a device, we can now have
> Substream IDs (SSID) identifying an address space. In PCIe, SID is called
> Requester ID (RID) and SSID is called Process Address-Space ID (PASID).
> 
> Prepare the driver for SSID support, by adding context descriptor tables
> in STEs (previously a single static context descriptor). A complete
> stage-1 walk is now performed like this by the SMMU:
> 
>   Stream tables  Ctx. tables  Page tables
> ++   ,--->+---+   ,--->+---+
> ::   |:   :   |:   :
> ++   |+---+   |+---+
>SID->|  STE   |---'  SSID->|  CD   |---'  IOVA->|  PTE  |--> IPA
> +++---++---+
> :::   ::   :
> +++---++---+
> 
> Implement a single level of context descriptor table for now, but as with
> stream and page tables, an SSID can be split to index multiple levels of
> tables.
> 
> Signed-off-by: Jean-Philippe Brucker 
Looks good to me.

Reviewed-by: Jonathan Cameron 

> ---
>  drivers/iommu/arm-smmu-v3.c | 132 ++--
>  1 file changed, 111 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 122bed0168a3..df7d45503c65 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -227,6 +227,11 @@
>  #define STRTAB_STE_0_S1CTXPTR_MASK   GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
>  
> +#define STRTAB_STE_1_S1DSS   GENMASK_ULL(1, 0)
> +#define STRTAB_STE_1_S1DSS_TERMINATE 0x0
> +#define STRTAB_STE_1_S1DSS_BYPASS0x1
> +#define STRTAB_STE_1_S1DSS_SSID0 0x2
> +
>  #define STRTAB_STE_1_S1C_CACHE_NC0UL
>  #define STRTAB_STE_1_S1C_CACHE_WBRA  1UL
>  #define STRTAB_STE_1_S1C_CACHE_WT2UL
> @@ -329,6 +334,7 @@
>  #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
>  #define CMDQ_PREFETCH_1_ADDR_MASKGENMASK_ULL(63, 12)
>  
> +#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12)
>  #define CMDQ_CFGI_0_SID  GENMASK_ULL(63, 32)
>  #define CMDQ_CFGI_1_LEAF (1UL << 0)
>  #define CMDQ_CFGI_1_RANGEGENMASK_ULL(4, 0)
> @@ -446,8 +452,11 @@ struct arm_smmu_cmdq_ent {
>  
>   #define CMDQ_OP_CFGI_STE0x3
>   #define CMDQ_OP_CFGI_ALL0x4
> + #define CMDQ_OP_CFGI_CD 0x5
> + #define CMDQ_OP_CFGI_CD_ALL 0x6
>   struct {
>   u32 sid;
> + u32 ssid;
>   union {
>   boolleaf;
>   u8  span;
> @@ -566,6 +575,7 @@ struct arm_smmu_cd_table {
>  };
>  
>  struct arm_smmu_s1_cfg {
> + u8  s1fmt;
>   u8  s1cdmax;
>   struct arm_smmu_cd_tabletable;
>   struct arm_smmu_ctx_desccd;
> @@ -860,10 +870,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
> arm_smmu_cmdq_ent *ent)
>   cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
>   cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
>   break;
> + case CMDQ_OP_CFGI_CD:
> + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
> + /* Fallthrough */
>   case CMDQ_OP_CFGI_STE:
>   cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
>   cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
>   break;
> + case CMDQ_OP_CFGI_CD_ALL:
> + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
> + break;
>   case CMDQ_OP_CFGI_ALL:
>   /* Cover the entire SID range */
>   cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
> @@ -1456,6 +1472,33 @@ static int arm_smmu_cmdq_issue_sync(struct 
> arm_smmu_device *smmu)
>  }
>  
>  /* Context descriptor manipulation functions */
> +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
> +  int ssid, bool leaf)

Re: [PATCH v2 4/8] iommu/arm-smmu-v3: Prepare for SSID support

2019-11-11 Thread Jonathan Cameron
On Fri, 8 Nov 2019 16:25:04 +0100
Jean-Philippe Brucker  wrote:

> When a master supports substream ID, allocate a table with multiple
> context descriptors for its stage-1 domain. For the moment S1CDMax is
> still 0 in the STE, so the additional context descriptors are ignored.
> 
> Context descriptor tables are allocated once for the first master attached
> to a domain. Therefore attaching multiple devices with different SSID
> sizes is tricky, and we currently don't support it.
> 
> As a future improvement it would be nice to at least support attaching a
> SSID-capable device to a domain that isn't using SSID, by reallocating the
> SSID table. This would allow supporting a SSID-capable device that is in
> the same IOMMU group as a bridge, for example. Varying SSID size is less
> of a concern, since the PCIe specification "highly recommends" that
> devices supporting PASID implement all 20 bits of it.
> 
> Signed-off-by: Jean-Philippe Brucker 

Hmm. There are several different refactors in here alongside a few new
bits.  Would be nice to break it up more to make life even easier for
reviewers.   It's not 'so' complex that it's really a problem though
so could leave it as is if you really want to.

One carry over inline on zeroing a coherent allocation...



> ---
>  drivers/iommu/arm-smmu-v3.c | 117 ++--
>  1 file changed, 85 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 33488da8f742..122bed0168a3 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -553,16 +553,22 @@ struct arm_smmu_strtab_l1_desc {
>   dma_addr_t  l2ptr_dma;
>  };
>  
> +struct arm_smmu_ctx_desc {
> + u16 asid;
> + u64 ttbr;
> + u64 tcr;
> + u64 mair;
> +};
> +
> +struct arm_smmu_cd_table {
> + __le64  *ptr;
> + dma_addr_t  ptr_dma;
> +};
> +
>  struct arm_smmu_s1_cfg {
> - __le64  *cdptr;
> - dma_addr_t  cdptr_dma;
> -
> - struct arm_smmu_ctx_desc {
> - u16 asid;
> - u64 ttbr;
> - u64 tcr;
> - u64 mair;
> - }   cd;
> + u8  s1cdmax;
> + struct arm_smmu_cd_tabletable;
> + struct arm_smmu_ctx_desccd;

It might have been a tiny bit nicer to have a precursor patch
that did the change to a pair of structs. Then only functional
changes would be in here.

>  };
>  
>  struct arm_smmu_s2_cfg {
> @@ -1450,6 +1456,31 @@ static int arm_smmu_cmdq_issue_sync(struct 
> arm_smmu_device *smmu)
>  }
>  
>  /* Context descriptor manipulation functions */
> +static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
> + struct arm_smmu_cd_table *table,
> + size_t num_entries)
> +{
> + size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
> +
> + table->ptr = dmam_alloc_coherent(smmu->dev, size, >ptr_dma,
> +  GFP_KERNEL | __GFP_ZERO);

We dropped dma_zalloc_coherent because we now zero in dma_alloc_coherent
anyway.  Hence I'm fairly sure that __GFP_ZERO should have no effect.

https://lore.kernel.org/patchwork/patch/1031536/

Am I missing some special corner case here?

> + if (!table->ptr) {
> + dev_warn(smmu->dev,
> +  "failed to allocate context descriptor table\n");
> + return -ENOMEM;
> + }
> + return 0;
> +}
> +
> +static void arm_smmu_free_cd_leaf_table(struct arm_smmu_device *smmu,
> + struct arm_smmu_cd_table *table,
> + size_t num_entries)
> +{
> + size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
> +
> + dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
> +}
> +
>  static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
>  {
>   u64 val = 0;
> @@ -1471,6 +1502,7 @@ static void arm_smmu_write_ctx_desc(struct 
> arm_smmu_device *smmu,
>   struct arm_smmu_s1_cfg *cfg)
>  {
>   u64 val;
> + __le64 *cdptr = cfg->table.ptr;
The changes in here would all be in purely mechanical refactor of the structure
patch.
>  
>   /*
>* We don't need to issue any invalidation here, as we'll invalidate
> @@ -1488,12 +1520,29 @@ static void arm_smmu_write_ctx_desc(struct 
> arm_smmu_device *smmu,
>   if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
>   val |= CTXDESC_CD_0_S;
>  
> - cfg->cdptr[0] = cpu_to_le64(val);
> + cdptr[0] = cpu_to_le64(val);
>  
>   val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
> - cfg->cdptr[1] = cpu_to_le64(val);
> + cdptr[1] = cpu_to_le64(val);
>  
> - 

Re: [PATCH v7 3/3] crypto: hisilicon - register zip engine to uacce

2019-11-11 Thread Jonathan Cameron
On Tue, 5 Nov 2019 16:34:48 +0800
zhangfei  wrote:

> Hi, Jonathan
> 
> On 2019/11/1 上午1:53, Jonathan Cameron wrote:
> > On Tue, 29 Oct 2019 14:40:16 +0800
> > Zhangfei Gao  wrote:
> >  
> >> Register qm to uacce framework for user crypto driver
> >>
> >> Signed-off-by: Zhangfei Gao 
> >> Signed-off-by: Zhou Wang   
> > Hi.
> >
> > This shows there is probably a race during setup that you should close.
> > Userspace interface is exposed before the driver is ready to handle it.
> >
> > Few other bits inline.
> >
> > Thanks,
> >
> > Jonathan
> >  
> >> ---
> >>   drivers/crypto/hisilicon/qm.c   | 253 
> >> ++--
> >>   drivers/crypto/hisilicon/qm.h   |  13 +-
> >>   drivers/crypto/hisilicon/zip/zip_main.c |  39 ++---
> >>   include/uapi/misc/uacce/qm.h|  23 +++
> >>   4 files changed, 292 insertions(+), 36 deletions(-)
> >>   create mode 100644 include/uapi/misc/uacce/qm.h
> >>
> >> diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
> >> index a8ed6990..4b9cced 100644
> >> --- a/drivers/crypto/hisilicon/qm.c
> >> +++ b/drivers/crypto/hisilicon/qm.c
> >> @@ -9,6 +9,9 @@
> >>   #include 
> >>   #include 
> >>   #include 
> >> +#include 
> >> +#include 
> >> +#include 
> >>   #include "qm.h"
> >>   
> >>   /* eq/aeq irq enable */
> >> @@ -465,17 +468,22 @@ static void qm_cq_head_update(struct hisi_qp *qp)
> >>   
> >>   static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
> >>   {
> >> -  struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
> >> -
> >> -  if (qp->req_cb) {
> >> -  while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
> >> -  dma_rmb();
> >> -  qp->req_cb(qp, qp->sqe + qm->sqe_size * cqe->sq_head);
> >> -  qm_cq_head_update(qp);
> >> -  cqe = qp->cqe + qp->qp_status.cq_head;
> >> -  qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
> >> -qp->qp_status.cq_head, 0);
> >> -  atomic_dec(>qp_status.used);
> >> +  struct qm_cqe *cqe;
> >> +
> >> +  if (qp->event_cb) {
> >> +  qp->event_cb(qp);
> >> +  } else {
> >> +  cqe = qp->cqe + qp->qp_status.cq_head;
> >> +
> >> +  if (qp->req_cb) {
> >> +  while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
> >> +  dma_rmb();
> >> +  qp->req_cb(qp, qp->sqe + qm->sqe_size *
> >> + cqe->sq_head);
> >> +  qm_cq_head_update(qp);
> >> +  cqe = qp->cqe + qp->qp_status.cq_head;
> >> +  atomic_dec(>qp_status.used);
> >> +  }
> >>}
> >>   
> >>/* set c_flag */
> >> @@ -1397,6 +1405,220 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm)
> >>}
> >>   }
> >>   
> >> +static void qm_qp_event_notifier(struct hisi_qp *qp)
> >> +{
> >> +  wake_up_interruptible(>uacce_q->wait);
> >> +}
> >> +
> >> +static int hisi_qm_get_available_instances(struct uacce_device *uacce)
> >> +{
> >> +  int i, ret;
> >> +  struct hisi_qm *qm = uacce->priv;
> >> +
> >> +  read_lock(>qps_lock);
> >> +  for (i = 0, ret = 0; i < qm->qp_num; i++)
> >> +  if (!qm->qp_array[i])
> >> +  ret++;
> >> +  read_unlock(>qps_lock);
> >> +
> >> +  return ret;
> >> +}
> >> +
> >> +static int hisi_qm_uacce_get_queue(struct uacce_device *uacce,
> >> + unsigned long arg,
> >> + struct uacce_queue *q)
> >> +{
> >> +  struct hisi_qm *qm = uacce->priv;
> >> +  struct hisi_qp *qp;
> >> +  u8 alg_type = 0;
> >> +
> >> +  qp = hisi_qm_create_qp(qm, alg_type);
> >> +  if (IS_ERR(qp))
> >> +  return PTR_ERR(qp);
> >> +
> >> +  q->priv = qp;
> >> +  q->uacce = uacce;
> >> +  qp->uacce_q = q;
>

Re: [PATCH v7 2/3] uacce: add uacce driver

2019-11-11 Thread Jonathan Cameron
On Tue, 5 Nov 2019 15:43:31 +0800
zhangfei  wrote:

> Hi, Jonathan
> 
> Thanks for the suggestions
> 
> On 2019/11/1 上午1:13, Jonathan Cameron wrote:
> > On Tue, 29 Oct 2019 14:40:15 +0800
> > Zhangfei Gao  wrote:
> >  
> >> From: Kenneth Lee 
> >>
> >> Uacce (Unified/User-space-access-intended Accelerator Framework) targets to
> >> provide Shared Virtual Addressing (SVA) between accelerators and processes.
> >> So accelerator can access any data structure of the main cpu.
> >> This differs from the data sharing between cpu and io device, which share
> >> data content rather than address.
> >> Since unified address, hardware and user space of process can share the
> >> same virtual address in the communication.
> >>
> >> Uacce create a chrdev for every registration, the queue is allocated to
> >> the process when the chrdev is opened. Then the process can access the
> >> hardware resource by interact with the queue file. By mmap the queue
> >> file space to user space, the process can directly put requests to the
> >> hardware without syscall to the kernel space.
> >>
> >> Signed-off-by: Kenneth Lee 
> >> Signed-off-by: Zaibo Xu 
> >> Signed-off-by: Zhou Wang 
> >> Signed-off-by: Zhangfei Gao   
> > Great, much more compact.
> >
> > I've not gone through this in detail yet but a few initial comments inline.
> >
> > Thanks,
> >
> > Jonathan
> >  
> >> ---
> >>   Documentation/ABI/testing/sysfs-driver-uacce |  53 +++
> >>   drivers/misc/Kconfig |   1 +
> >>   drivers/misc/Makefile|   1 +
> >>   drivers/misc/uacce/Kconfig   |  13 +
> >>   drivers/misc/uacce/Makefile  |   2 +
> >>   drivers/misc/uacce/uacce.c   | 574 
> >> +++
> >>   include/linux/uacce.h| 163 
> >>   include/uapi/misc/uacce/uacce.h  |  38 ++
> >>   8 files changed, 845 insertions(+)
> >>   create mode 100644 Documentation/ABI/testing/sysfs-driver-uacce
> >>   create mode 100644 drivers/misc/uacce/Kconfig
> >>   create mode 100644 drivers/misc/uacce/Makefile
> >>   create mode 100644 drivers/misc/uacce/uacce.c
> >>   create mode 100644 include/linux/uacce.h
> >>   create mode 100644 include/uapi/misc/uacce/uacce.h
> >>
> >> diff --git a/Documentation/ABI/testing/sysfs-driver-uacce 
> >> b/Documentation/ABI/testing/sysfs-driver-uacce
> >> new file mode 100644
> >> index 000..35699dc
> >> --- /dev/null
> >> +++ b/Documentation/ABI/testing/sysfs-driver-uacce
> >> @@ -0,0 +1,53 @@
> >> +What:   /sys/class/uacce//id
> >> +Date:   Oct 2019
> >> +KernelVersion:  5.5
> >> +Contact:linux-accelerat...@lists.ozlabs.org
> >> +Description:Id of the device.
> >> +
> >> +What:   /sys/class/uacce//api
> >> +Date:   Oct 2019
> >> +KernelVersion:  5.5
> >> +Contact:linux-accelerat...@lists.ozlabs.org
> >> +Description:Api of the device, used by application to match the 
> >> correct driver
> >> +
> >> +What:   /sys/class/uacce//flags
> >> +Date:   Oct 2019
> >> +KernelVersion:  5.5
> >> +Contact:linux-accelerat...@lists.ozlabs.org
> >> +Description:Attributes of the device, see UACCE_DEV_xxx flag defined 
> >> in uacce.h
> >> +
> >> +What:   /sys/class/uacce//available_instances
> >> +Date:   Oct 2019
> >> +KernelVersion:  5.5
> >> +Contact:linux-accelerat...@lists.ozlabs.org
> >> +Description:Available instances left of the device
> >> +
> >> +What:   /sys/class/uacce//algorithms
> >> +Date:   Oct 2019
> >> +KernelVersion:  5.5
> >> +Contact:linux-accelerat...@lists.ozlabs.org
> >> +Description:Algorithms supported by this accelerator  
> > How are they separated?  Userspace code needs to know that.
> > (comma, tab, newline?)  
> Yes, will add "separated by new line"
> >  
> >> +
> >> +What:   /sys/class/uacce//qfrt_mmio_size  
> > qfrt is not the most obvious naming ever.  Do we care beyond its
> > a region for this interface?  region_mmio_size maybe?  
> OK,
> >  
> >> +Date:   Oct 2019
> >> +Ker

Re: [PATCH v7 1/3] uacce: Add documents for uacce

2019-10-31 Thread Jonathan Cameron
On Tue, 29 Oct 2019 14:40:14 +0800
Zhangfei Gao  wrote:

> From: Kenneth Lee 
> 
> Uacce (Unified/User-space-access-intended Accelerator Framework) is
> a kernel module targets to provide Shared Virtual Addressing (SVA)
> between the accelerator and process.
> 
> This patch add document to explain how it works.
> 
> Signed-off-by: Kenneth Lee 
> Signed-off-by: Zaibo Xu 
> Signed-off-by: Zhou Wang 
> Signed-off-by: Zhangfei Gao 
> ---
>  Documentation/misc-devices/uacce.rst | 160 
> +++
>  1 file changed, 160 insertions(+)
>  create mode 100644 Documentation/misc-devices/uacce.rst
> 
> diff --git a/Documentation/misc-devices/uacce.rst 
> b/Documentation/misc-devices/uacce.rst
> new file mode 100644
> index 000..ecd5d8b
> --- /dev/null
> +++ b/Documentation/misc-devices/uacce.rst
> @@ -0,0 +1,160 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +Introduction of Uacce
> +=

Fix the underline length to match the title.

> +
> +Uacce (Unified/User-space-access-intended Accelerator Framework) targets to
> +provide Shared Virtual Addressing (SVA) between accelerators and processes.
> +So accelerator can access any data structure of the main cpu.
> +This differs from the data sharing between cpu and io device, which share
> +data content rather than address.

which share only data content rather than address.

> +Because of the unified address, hardware and user space of process can
> +share the same virtual address in the communication.
> +Uacce takes the hardware accelerator as a heterogeneous processor, while
> +IOMMU share the same CPU page tables and as a result the same translation
> +from va to pa.
> +
> +  __   __
> + |  | |  |
> + |  User application (CPU)  | |   Hardware Accelerator   |
> + |__| |__|
> +
> +  | |
> +  | va  | va
> +  V V
> + ____
> +|  |  |  |
> +|   MMU|  |  IOMMU   |
> +|__|  |__|
> +  | |
> +  | |
> +  V pa  V pa
> +  ___
> + |   |
> + |  Memory   |
> + |___|
> +
> +
> +
> +Architecture
> +
> +
> +Uacce is the kernel module, taking charge of iommu and address sharing.
> +The user drivers and libraries are called WarpDrive.
> +
> +The uacce device, built around the IOMMU SVA API, can access multiple
> +address spaces, including the one without PASID.
> +
> +A virtual concept, queue, is used for the communication. It provides a
> +FIFO-like interface. And it maintains a unified address space between the
> +application and all involved hardware.
> +
> + ___  
> 
> +|   |   user API |   
>  |
> +| WarpDrive library | >  |  user 
> driver   |
> +|___|
> ||
> + ||
> + ||
> + | queue fd   |
> + ||
> + ||
> + v|
> + ___ _|
> +|   |   | |   | 
> mmap memory
> +| Other framework   |   |  uacce  |   | 
> r/w interface
> +| crypto/nic/others |   |_|   |
> +|___| |
> + |   ||
> + | register  | register   |
> + |   ||
> + |   ||
> + |_   __  |
> + | 

Re: [PATCH v7 2/3] uacce: add uacce driver

2019-10-31 Thread Jonathan Cameron
On Tue, 29 Oct 2019 14:40:15 +0800
Zhangfei Gao  wrote:

> From: Kenneth Lee 
> 
> Uacce (Unified/User-space-access-intended Accelerator Framework) targets to
> provide Shared Virtual Addressing (SVA) between accelerators and processes.
> So accelerator can access any data structure of the main cpu.
> This differs from the data sharing between cpu and io device, which share
> data content rather than address.
> Since unified address, hardware and user space of process can share the
> same virtual address in the communication.
> 
> Uacce create a chrdev for every registration, the queue is allocated to
> the process when the chrdev is opened. Then the process can access the
> hardware resource by interact with the queue file. By mmap the queue
> file space to user space, the process can directly put requests to the
> hardware without syscall to the kernel space.
> 
> Signed-off-by: Kenneth Lee 
> Signed-off-by: Zaibo Xu 
> Signed-off-by: Zhou Wang 
> Signed-off-by: Zhangfei Gao 

Great, much more compact.

I've not gone through this in detail yet but a few initial comments inline.

Thanks,

Jonathan

> ---
>  Documentation/ABI/testing/sysfs-driver-uacce |  53 +++
>  drivers/misc/Kconfig |   1 +
>  drivers/misc/Makefile|   1 +
>  drivers/misc/uacce/Kconfig   |  13 +
>  drivers/misc/uacce/Makefile  |   2 +
>  drivers/misc/uacce/uacce.c   | 574 
> +++
>  include/linux/uacce.h| 163 
>  include/uapi/misc/uacce/uacce.h  |  38 ++
>  8 files changed, 845 insertions(+)
>  create mode 100644 Documentation/ABI/testing/sysfs-driver-uacce
>  create mode 100644 drivers/misc/uacce/Kconfig
>  create mode 100644 drivers/misc/uacce/Makefile
>  create mode 100644 drivers/misc/uacce/uacce.c
>  create mode 100644 include/linux/uacce.h
>  create mode 100644 include/uapi/misc/uacce/uacce.h
> 
> diff --git a/Documentation/ABI/testing/sysfs-driver-uacce 
> b/Documentation/ABI/testing/sysfs-driver-uacce
> new file mode 100644
> index 000..35699dc
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-driver-uacce
> @@ -0,0 +1,53 @@
> +What:   /sys/class/uacce//id
> +Date:   Oct 2019
> +KernelVersion:  5.5
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Id of the device.
> +
> +What:   /sys/class/uacce//api
> +Date:   Oct 2019
> +KernelVersion:  5.5
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Api of the device, used by application to match the correct 
> driver
> +
> +What:   /sys/class/uacce//flags
> +Date:   Oct 2019
> +KernelVersion:  5.5
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Attributes of the device, see UACCE_DEV_xxx flag defined in 
> uacce.h
> +
> +What:   /sys/class/uacce//available_instances
> +Date:   Oct 2019
> +KernelVersion:  5.5
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Available instances left of the device
> +
> +What:   /sys/class/uacce//algorithms
> +Date:   Oct 2019
> +KernelVersion:  5.5
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Algorithms supported by this accelerator
How are they separated?  Userspace code needs to know that.
(comma, tab, newline?)

> +
> +What:   /sys/class/uacce//qfrt_mmio_size

qfrt is not the most obvious naming ever.  Do we care beyond its
a region for this interface?  region_mmio_size maybe?

> +Date:   Oct 2019
> +KernelVersion:  5.5
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Page size of mmio region queue file

Size of page in this region, or number of pages in the region?

> +
> +What:   /sys/class/uacce//qfrt_dus_size
> +Date:   Oct 2019
> +KernelVersion:  5.5
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Page size of dus region queue file
> +
> +What:   /sys/class/uacce//numa_distance
> +Date:   Oct 2019
> +KernelVersion:  5.5
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Distance of device node to cpu node

I wonder if we should be doing this in here. There are other standard
ways of obtaining this for the device.  Follow parent and check node_id
there then use the /sys/bus/node path to find out the distances.

> +
> +What:   /sys/class/uacce//node_id
> +Date:   Oct 2019
> +KernelVersion:  5.5
> +Contact:linux-accelerat...@lists.ozlabs.org
> +Description:Id of the numa node
> diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> index c55b637..929feb0 100644
> --- a/drivers/misc/Kconfig
> +++ b/drivers/misc/Kconfig
> @@ -481,4 +481,5 @@ source "drivers/misc/cxl/Kconfig"
>  source "drivers/misc/ocxl/Kconfig"
>  source "drivers/misc/cardreader/Kconfig"
>  source "drivers/misc/habanalabs/Kconfig"
> +source 

Re: [PATCH v7 3/3] crypto: hisilicon - register zip engine to uacce

2019-10-31 Thread Jonathan Cameron
On Tue, 29 Oct 2019 14:40:16 +0800
Zhangfei Gao  wrote:

> Register qm to uacce framework for user crypto driver
> 
> Signed-off-by: Zhangfei Gao 
> Signed-off-by: Zhou Wang 
Hi. 

This shows there is probably a race during setup that you should close.
Userspace interface is exposed before the driver is ready to handle it.

Few other bits inline.

Thanks,

Jonathan

> ---
>  drivers/crypto/hisilicon/qm.c   | 253 
> ++--
>  drivers/crypto/hisilicon/qm.h   |  13 +-
>  drivers/crypto/hisilicon/zip/zip_main.c |  39 ++---
>  include/uapi/misc/uacce/qm.h|  23 +++
>  4 files changed, 292 insertions(+), 36 deletions(-)
>  create mode 100644 include/uapi/misc/uacce/qm.h
> 
> diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
> index a8ed6990..4b9cced 100644
> --- a/drivers/crypto/hisilicon/qm.c
> +++ b/drivers/crypto/hisilicon/qm.c
> @@ -9,6 +9,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
> +#include 
>  #include "qm.h"
>  
>  /* eq/aeq irq enable */
> @@ -465,17 +468,22 @@ static void qm_cq_head_update(struct hisi_qp *qp)
>  
>  static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
>  {
> - struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
> -
> - if (qp->req_cb) {
> - while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
> - dma_rmb();
> - qp->req_cb(qp, qp->sqe + qm->sqe_size * cqe->sq_head);
> - qm_cq_head_update(qp);
> - cqe = qp->cqe + qp->qp_status.cq_head;
> - qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
> -   qp->qp_status.cq_head, 0);
> - atomic_dec(>qp_status.used);
> + struct qm_cqe *cqe;
> +
> + if (qp->event_cb) {
> + qp->event_cb(qp);
> + } else {
> + cqe = qp->cqe + qp->qp_status.cq_head;
> +
> + if (qp->req_cb) {
> + while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
> + dma_rmb();
> + qp->req_cb(qp, qp->sqe + qm->sqe_size *
> +cqe->sq_head);
> + qm_cq_head_update(qp);
> + cqe = qp->cqe + qp->qp_status.cq_head;
> + atomic_dec(>qp_status.used);
> + }
>   }
>  
>   /* set c_flag */
> @@ -1397,6 +1405,220 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm)
>   }
>  }
>  
> +static void qm_qp_event_notifier(struct hisi_qp *qp)
> +{
> + wake_up_interruptible(>uacce_q->wait);
> +}
> +
> +static int hisi_qm_get_available_instances(struct uacce_device *uacce)
> +{
> + int i, ret;
> + struct hisi_qm *qm = uacce->priv;
> +
> + read_lock(>qps_lock);
> + for (i = 0, ret = 0; i < qm->qp_num; i++)
> + if (!qm->qp_array[i])
> + ret++;
> + read_unlock(>qps_lock);
> +
> + return ret;
> +}
> +
> +static int hisi_qm_uacce_get_queue(struct uacce_device *uacce,
> +unsigned long arg,
> +struct uacce_queue *q)
> +{
> + struct hisi_qm *qm = uacce->priv;
> + struct hisi_qp *qp;
> + u8 alg_type = 0;
> +
> + qp = hisi_qm_create_qp(qm, alg_type);
> + if (IS_ERR(qp))
> + return PTR_ERR(qp);
> +
> + q->priv = qp;
> + q->uacce = uacce;
> + qp->uacce_q = q;
> + qp->event_cb = qm_qp_event_notifier;
> + qp->pasid = arg;
> +
> + return 0;
> +}
> +
> +static void hisi_qm_uacce_put_queue(struct uacce_queue *q)
> +{
> + struct hisi_qp *qp = q->priv;
> +
> + /*
> +  * As put_queue is only called in uacce_mode=1, and only one queue can
We got rid of the modes I think so comment needs an update.

> +  * be used in this mode. we flush all sqc cache back in put queue.
> +  */
> + hisi_qm_cache_wb(qp->qm);
> +
> + /* need to stop hardware, but can not support in v1 */
> + hisi_qm_release_qp(qp);

Should we just drop support for the v1 hardware if we can't do this?

> +}
> +
> +/* map sq/cq/doorbell to user space */
> +static int hisi_qm_uacce_mmap(struct uacce_queue *q,
> +   struct vm_area_struct *vma,
> +   struct uacce_qfile_region *qfr)
> +{
> + struct hisi_qp *qp = q->priv;
> + struct hisi_qm *qm = qp->qm;
> + size_t sz = vma->vm_end - vma->vm_start;
> + struct pci_dev *pdev = qm->pdev;
> + struct device *dev = >dev;
> + unsigned long vm_pgoff;
> + int ret;
> +
> + switch (qfr->type) {
> + case UACCE_QFRT_MMIO:
> + if (qm->ver == QM_HW_V2) {
> + if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR +
> + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE))
> + return -EINVAL;
> + } else {
> + 

Re: [PATCH v4 03/22] iommu: Introduce device fault report API

2019-06-19 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:03 -0700
Jacob Pan  wrote:

> Traditionally, device specific faults are detected and handled within
> their own device drivers. When IOMMU is enabled, faults such as DMA
> related transactions are detected by IOMMU. There is no generic
> reporting mechanism to report faults back to the in-kernel device
> driver or the guest OS in case of assigned devices.
> 
> This patch introduces a registration API for device specific fault
> handlers. This differs from the existing iommu_set_fault_handler/
> report_iommu_fault infrastructures in several ways:
> - it allows to report more sophisticated fault events (both
>   unrecoverable faults and page request faults) due to the nature
>   of the iommu_fault struct
> - it is device specific and not domain specific.
> 
> The current iommu_report_device_fault() implementation only handles
> the "shoot and forget" unrecoverable fault case. Handling of page
> request faults or stalled faults will come later.
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Ashok Raj 
> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Eric Auger 

A few nitpicks and minor suggestions inline.

> ---
>  drivers/iommu/iommu.c | 127 
> +-
>  include/linux/iommu.h |  33 -
>  2 files changed, 157 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 67ee662..7955184 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -644,6 +644,13 @@ int iommu_group_add_device(struct iommu_group *group, 
> struct device *dev)
>   goto err_free_name;
>   }
>  
> + dev->iommu_param = kzalloc(sizeof(*dev->iommu_param), GFP_KERNEL);
> + if (!dev->iommu_param) {
> + ret = -ENOMEM;
> + goto err_free_name;
> + }
> + mutex_init(>iommu_param->lock);
> +
>   kobject_get(group->devices_kobj);
>  
>   dev->iommu_group = group;
> @@ -674,6 +681,7 @@ int iommu_group_add_device(struct iommu_group *group, 
> struct device *dev)
>   mutex_unlock(>mutex);
>   dev->iommu_group = NULL;
>   kobject_put(group->devices_kobj);
> + kfree(dev->iommu_param);
>  err_free_name:
>   kfree(device->name);
>  err_remove_link:
> @@ -720,7 +728,7 @@ void iommu_group_remove_device(struct device *dev)
>   sysfs_remove_link(>kobj, "iommu_group");
>  
>   trace_remove_device_from_group(group->id, dev);
> -
> + kfree(dev->iommu_param);
>   kfree(device->name);
>   kfree(device);
>   dev->iommu_group = NULL;
> @@ -855,6 +863,123 @@ int iommu_group_unregister_notifier(struct iommu_group 
> *group,
>  EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
>  
>  /**
> + * iommu_register_device_fault_handler() - Register a device fault handler
> + * @dev: the device
> + * @handler: the fault handler
> + * @data: private data passed as argument to the handler
> + *
> + * When an IOMMU fault event is received, this handler gets called with the
> + * fault event and data as argument.
> + *
> + * Return 0 if the fault handler was installed successfully, or an error.
> + */
> +int iommu_register_device_fault_handler(struct device *dev,
> + iommu_dev_fault_handler_t handler,
> + void *data)
> +{
> + struct iommu_param *param = dev->iommu_param;
> + int ret = 0;
> +
> + /*
> +  * Device iommu_param should have been allocated when device is
> +  * added to its iommu_group.
> +  */
> + if (!param)
> + return -EINVAL;
> +
> + mutex_lock(>lock);
> + /* Only allow one fault handler registered for each device */
> + if (param->fault_param) {
> + ret = -EBUSY;
> + goto done_unlock;
> + }
> +
> + get_device(dev);
> + param->fault_param =
> + kzalloc(sizeof(struct iommu_fault_param), GFP_KERNEL);
> + if (!param->fault_param) {
> + put_device(dev);
> + ret = -ENOMEM;
> + goto done_unlock;
> + }
> + param->fault_param->handler = handler;
> + param->fault_param->data = data;
> +
> +done_unlock:
> + mutex_unlock(>lock);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
> +
> +/**
> + * iommu_unregister_device_fault_handler() - Unregister the device fault 
> handler
> + * @dev: the device
> + *
> + * Remove the device fault handler installed with
> + * iommu_register_device_fault_handler().
> + *
> + * Return 0 on success, or an error.
> + */
> +int iommu_unregister_device_fault_handler(struct device *dev)
> +{
> + struct iommu_param *param = dev->iommu_param;
> + int ret = 0;
> +
> + if (!param)
> + return -EINVAL;
> +
> + mutex_lock(>lock);
> +
> + if (!param->fault_param)
> + goto unlock;
> +
> + kfree(param->fault_param);
> + param->fault_param = NULL;
> + put_device(dev);
> +unlock:
> + 

Re: [PATCH v4 12/22] iommu: Add I/O ASID allocator

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:12 -0700
Jacob Pan  wrote:

> From: Jean-Philippe Brucker 
> 
> Some devices might support multiple DMA address spaces, in particular
> those that have the PCI PASID feature. PASID (Process Address Space ID)
> allows to share process address spaces with devices (SVA), partition a
> device into VM-assignable entities (VFIO mdev) or simply provide
> multiple DMA address space to kernel drivers. Add a global PASID
> allocator usable by different drivers at the same time. Name it I/O ASID
> to avoid confusion with ASIDs allocated by arch code, which are usually
> a separate ID space.
> 
> The IOASID space is global. Each device can have its own PASID space,
> but by convention the IOMMU ended up having a global PASID space, so
> that with SVA, each mm_struct is associated to a single PASID.
> 
> The allocator is primarily used by IOMMU subsystem but in rare occasions
> drivers would like to allocate PASIDs for devices that aren't managed by
> an IOMMU, using the same ID space as IOMMU.
> 
> There are two types of allocators:
> 1. default allocator - Always available, uses an XArray to track
> 2. custom allocators - Can be registered at runtime, take precedence
> over the default allocator.
> 
> Custom allocators have these attributes:
> - provides platform specific alloc()/free() functions with private data.
> - allocation results lookup are not provided by the allocator, lookup
>   request must be done by the IOASID framework by its own XArray.
> - allocators can be unregistered at runtime, either fallback to the next
>   custom allocator or to the default allocator.

What is the usecase for having a 'stack' of custom allocators?

> - custom allocators can share the same set of alloc()/free() helpers, in
>   this case they also share the same IOASID space, thus the same XArray.
> - switching between allocators requires all outstanding IOASIDs to be
>   freed unless the two allocators share the same alloc()/free() helpers.
In general this approach has a lot of features where the justification is
missing from this particular patch.  It may be useful to add some
more background to this intro?

> 
> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Jacob Pan 
> Link: https://lkml.org/lkml/2019/4/26/462

Various comments inline.  Given the several cups of coffee this took to
review I may well have misunderstood everything ;)

Jonathan
> ---
>  drivers/iommu/Kconfig  |   8 +
>  drivers/iommu/Makefile |   1 +
>  drivers/iommu/ioasid.c | 427 
> +
>  include/linux/ioasid.h |  74 +
>  4 files changed, 510 insertions(+)
>  create mode 100644 drivers/iommu/ioasid.c
>  create mode 100644 include/linux/ioasid.h
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 83664db..c40c4b5 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -3,6 +3,13 @@
>  config IOMMU_IOVA
>   tristate
>  
> +# The IOASID allocator may also be used by non-IOMMU_API users
> +config IOASID
> + tristate
> + help
> +   Enable the I/O Address Space ID allocator. A single ID space shared
> +   between different users.
> +
>  # IOMMU_API always gets selected by whoever wants it.
>  config IOMMU_API
>   bool
> @@ -207,6 +214,7 @@ config INTEL_IOMMU_SVM
>   depends on INTEL_IOMMU && X86
>   select PCI_PASID
>   select MMU_NOTIFIER
> + select IOASID
>   help
> Shared Virtual Memory (SVM) provides a facility for devices
> to access DMA resources through process address space by
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 8c71a15..0efac6f 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -7,6 +7,7 @@ obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
> +obj-$(CONFIG_IOASID) += ioasid.o
>  obj-$(CONFIG_IOMMU_IOVA) += iova.o
>  obj-$(CONFIG_OF_IOMMU)   += of_iommu.o
>  obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
> diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c
> new file mode 100644
> index 000..0919b70
> --- /dev/null
> +++ b/drivers/iommu/ioasid.c
> @@ -0,0 +1,427 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * I/O Address Space ID allocator. There is one global IOASID space, split 
> into
> + * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and
> + * free IOASIDs with ioasid_alloc and ioasid_free.
> + */
> +#define pr_fmt(fmt)  KBUILD_MODNAME ": " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +struct ioasid_data {
> + ioasid_t id;
> + struct ioasid_set *set;
> + void *private;
> + struct rcu_head rcu;
> +};
> +
> +/*
> + * struct ioasid_allocator_data - Internal data structure to hold information
> + * about an allocator. There are two types of allocators:
> + *
> + * - 

Re: [PATCH v4 20/22] iommu/vt-d: Add bind guest PASID support

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:20 -0700
Jacob Pan  wrote:

> When supporting guest SVA with emulated IOMMU, the guest PASID
> table is shadowed in VMM. Updates to guest vIOMMU PASID table
> will result in PASID cache flush which will be passed down to
> the host as bind guest PASID calls.
> 
> For the SL page tables, it will be harvested from device's
> default domain (request w/o PASID), or aux domain in case of
> mediated device.
> 
> .-.  .---.
> |   vIOMMU|  | Guest process CR3, FL only|
> | |  '---'
> ./
> | PASID Entry |--- PASID cache flush -
> '-'   |
> | |   V
> | |CR3 in GPA
> '-'
> Guest
> --| Shadow |--|
>   vv  v
> Host
> .-.  .--.
> |   pIOMMU|  | Bind FL for GVA-GPA  |
> | |  '--'
> ./  |
> | PASID Entry | V (Nested xlate)
> '\.--.
> | |   |SL for GPA-HPA, default domain|
> | |   '--'
> '-'
> Where:
>  - FL = First level/stage one page tables
>  - SL = Second level/stage two page tables
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Liu, Yi L 


A few trivial bits inline.  As far as I can tell looks good but I'm not that
familiar with the hardware.

Jonathan

> ---
>  drivers/iommu/intel-iommu.c |   4 +
>  drivers/iommu/intel-svm.c   | 187 
> 
>  include/linux/intel-iommu.h |  13 ++-
>  include/linux/intel-svm.h   |  17 
>  4 files changed, 219 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 7cfa0eb..3b4d712 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -5782,6 +5782,10 @@ const struct iommu_ops intel_iommu_ops = {
>   .dev_enable_feat= intel_iommu_dev_enable_feat,
>   .dev_disable_feat   = intel_iommu_dev_disable_feat,
>   .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
> +#ifdef CONFIG_INTEL_IOMMU_SVM
> + .sva_bind_gpasid= intel_svm_bind_gpasid,
> + .sva_unbind_gpasid  = intel_svm_unbind_gpasid,
> +#endif
>  };
>  
>  static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 66d98e1..f06a82f 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -229,6 +229,193 @@ static LIST_HEAD(global_svm_list);
>   list_for_each_entry(sdev, >devs, list) \
>   if (dev == sdev->dev)   \
>  
> +int intel_svm_bind_gpasid(struct iommu_domain *domain,
> + struct device *dev,
> + struct gpasid_bind_data *data)
> +{
> + struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
> + struct intel_svm_dev *sdev;
> + struct intel_svm *svm = NULL;
I think this is set in all the paths that use it..

> + struct dmar_domain *ddomain;
> + int ret = 0;
> +
> + if (WARN_ON(!iommu) || !data)
> + return -EINVAL;
> +
> + if (data->version != IOMMU_GPASID_BIND_VERSION_1 ||
> + data->format != IOMMU_PASID_FORMAT_INTEL_VTD)
> + return -EINVAL;
> +
> + if (dev_is_pci(dev)) {
> + /* VT-d supports devices with full 20 bit PASIDs only */
> + if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX)
> + return -EINVAL;
> + }
> +
> + /*
> +  * We only check host PASID range, we have no knowledge to check
> +  * guest PASID range nor do we use the guest PASID.
> +  */
> + if (data->hpasid <= 0 || data->hpasid >= PASID_MAX)
> + return -EINVAL;
> +
> + ddomain = to_dmar_domain(domain);
> + /* REVISIT:
> +  * Sanity check adddress width and paging mode support
> +  * width matching in two dimensions:
> +  * 1. paging mode CPU <= IOMMU
> +  * 2. address width Guest <= Host.
> +  */
> + mutex_lock(_mutex);
> + svm = ioasid_find(NULL, data->hpasid, NULL);
> + if (IS_ERR(svm)) {
> + ret = PTR_ERR(svm);
> + goto out;
> + }
> + if (svm) {
> + /*
> +  * If we found svm for the PASID, there must be at
> +  * least one device bond, otherwise svm should be freed.
> +  */
> + BUG_ON(list_empty(>devs));
> +
> + for_each_svm_dev() {
> + /* In case of multiple sub-devices of the same pdev 
> assigned, we should
> +  * allow multiple bind calls with the same PASID and 
> pdev.
> +  */
> + sdev->users++;
> + 

Re: [PATCH v4 19/22] iommu/vt-d: Clean up for SVM device list

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:19 -0700
Jacob Pan  wrote:

> Use combined macro for_each_svm_dev() to simplify SVM device iteration.
> 
> Suggested-by: Andy Shevchenko 
> Signed-off-by: Jacob Pan 
> Reviewed-by: Eric Auger 
> ---
>  drivers/iommu/intel-svm.c | 79 
> +++
>  1 file changed, 39 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 9cbcc1f..66d98e1 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -225,6 +225,9 @@ static const struct mmu_notifier_ops intel_mmuops = {
>  
>  static DEFINE_MUTEX(pasid_mutex);
>  static LIST_HEAD(global_svm_list);
> +#define for_each_svm_dev() \
> + list_for_each_entry(sdev, >devs, list) \
> + if (dev == sdev->dev)   \

Could we make this macro less opaque and have it take the svm and dev as
arguments?

>  
>  int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct 
> svm_dev_ops *ops)
>  {
> @@ -271,15 +274,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, 
> int flags, struct svm_dev_
>   goto out;
>   }
>  
> - list_for_each_entry(sdev, >devs, list) {
> - if (dev == sdev->dev) {
> - if (sdev->ops != ops) {
> - ret = -EBUSY;
> - goto out;
> - }
> - sdev->users++;
> - goto success;
> + for_each_svm_dev() {
> + if (sdev->ops != ops) {
> + ret = -EBUSY;
> + goto out;
>   }
> + sdev->users++;
> + goto success;
>   }
>  
>   break;
> @@ -409,40 +410,38 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
>   if (!svm)
>   goto out;
>  
> - list_for_each_entry(sdev, >devs, list) {
> - if (dev == sdev->dev) {
> - ret = 0;
> - sdev->users--;
> - if (!sdev->users) {
> - list_del_rcu(>list);
> - /* Flush the PASID cache and IOTLB for this 
> device.
> -  * Note that we do depend on the hardware *not* 
> using
> -  * the PASID any more. Just as we depend on 
> other
> -  * devices never using PASIDs that they have no 
> right
> -  * to use. We have a *shared* PASID table, 
> because it's
> -  * large and has to be physically contiguous. 
> So it's
> -  * hard to be as defensive as we might like. */
> - intel_pasid_tear_down_entry(iommu, dev, 
> svm->pasid);
> - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, 
> !svm->mm);
> - kfree_rcu(sdev, rcu);
> -
> - if (list_empty(>devs)) {
> - ioasid_free(svm->pasid);
> - if (svm->mm)
> - 
> mmu_notifier_unregister(>notifier, svm->mm);
> -
> - list_del(>list);
> -
> - /* We mandate that no page faults may 
> be outstanding
> -  * for the PASID when 
> intel_svm_unbind_mm() is called.
> -  * If that is not obeyed, subtle errors 
> will happen.
> -  * Let's make them less subtle... */
> - memset(svm, 0x6b, sizeof(*svm));
> - kfree(svm);
> - }
> + for_each_svm_dev() {
> + ret = 0;
> + sdev->users--;
> + if (!sdev->users) {
> + list_del_rcu(>list);
> + /* Flush the PASID cache and IOTLB for this device.
> +  * Note that we do depend on the hardware *not* using
> +  * the PASID any more. Just as we depend on other
> +  * devices never using PASIDs that they have no right
> +  * to use. We have a *shared* PASID table, because it's
> +  * large and has to be physically contiguous. So it's
> +  * hard to be as defensive as we might like. */
> + intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
> + intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, 

Re: [PATCH v4 17/22] iommu/vt-d: Avoid duplicated code for PASID setup

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:17 -0700
Jacob Pan  wrote:

> After each setup for PASID entry, related translation caches must be flushed.
> We can combine duplicated code into one function which is less error prone.
> 
> Signed-off-by: Jacob Pan 
Formatting nitpick below ;)

Otherwise it's cut and paste
> ---
>  drivers/iommu/intel-pasid.c | 48 
> +
>  1 file changed, 18 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
> index 1e25539..1ff2ecc 100644
> --- a/drivers/iommu/intel-pasid.c
> +++ b/drivers/iommu/intel-pasid.c
> @@ -522,6 +522,21 @@ void intel_pasid_tear_down_entry(struct intel_iommu 
> *iommu,
>   devtlb_invalidation_with_pasid(iommu, dev, pasid);
>  }
>  
> +static inline void pasid_flush_caches(struct intel_iommu *iommu,
> + struct pasid_entry *pte,
> + int pasid, u16 did)
> +{
> + if (!ecap_coherent(iommu->ecap))
> + clflush_cache_range(pte, sizeof(*pte));
> +
> + if (cap_caching_mode(iommu->cap)) {
> + pasid_cache_invalidation_with_pasid(iommu, did, pasid);
> + iotlb_invalidation_with_pasid(iommu, did, pasid);
> + } else
> + iommu_flush_write_buffer(iommu);

I have some vague recollection kernel style says use brackets around
single lines if other blocks in an if / else stack have multiple lines..

I checked, this case is specifically called out

https://www.kernel.org/doc/html/v5.1/process/coding-style.html
> +
This blank line doesn't add anything either ;)
> +}
> +
>  /*
>   * Set up the scalable mode pasid table entry for first only
>   * translation type.
> @@ -567,16 +582,7 @@ int intel_pasid_setup_first_level(struct intel_iommu 
> *iommu,
>   /* Setup Present and PASID Granular Transfer Type: */
>   pasid_set_translation_type(pte, 1);
>   pasid_set_present(pte);
> -
> - if (!ecap_coherent(iommu->ecap))
> - clflush_cache_range(pte, sizeof(*pte));
> -
> - if (cap_caching_mode(iommu->cap)) {
> - pasid_cache_invalidation_with_pasid(iommu, did, pasid);
> - iotlb_invalidation_with_pasid(iommu, did, pasid);
> - } else {
> - iommu_flush_write_buffer(iommu);
> - }
> + pasid_flush_caches(iommu, pte, pasid, did);
>  
>   return 0;
>  }
> @@ -640,16 +646,7 @@ int intel_pasid_setup_second_level(struct intel_iommu 
> *iommu,
>*/
>   pasid_set_sre(pte);
>   pasid_set_present(pte);
> -
> - if (!ecap_coherent(iommu->ecap))
> - clflush_cache_range(pte, sizeof(*pte));
> -
> - if (cap_caching_mode(iommu->cap)) {
> - pasid_cache_invalidation_with_pasid(iommu, did, pasid);
> - iotlb_invalidation_with_pasid(iommu, did, pasid);
> - } else {
> - iommu_flush_write_buffer(iommu);
> - }
> + pasid_flush_caches(iommu, pte, pasid, did);
>  
>   return 0;
>  }
> @@ -683,16 +680,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu 
> *iommu,
>*/
>   pasid_set_sre(pte);
>   pasid_set_present(pte);
> -
> - if (!ecap_coherent(iommu->ecap))
> - clflush_cache_range(pte, sizeof(*pte));
> -
> - if (cap_caching_mode(iommu->cap)) {
> - pasid_cache_invalidation_with_pasid(iommu, did, pasid);
> - iotlb_invalidation_with_pasid(iommu, did, pasid);
> - } else {
> - iommu_flush_write_buffer(iommu);
> - }
> + pasid_flush_caches(iommu, pte, pasid, did);
>  
>   return 0;
>  }




Re: [PATCH v4 15/22] iommu/vt-d: Replace Intel specific PASID allocator with IOASID

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:15 -0700
Jacob Pan  wrote:

> Make use of generic IOASID code to manage PASID allocation,
> free, and lookup. Replace Intel specific code.
> 
> Signed-off-by: Jacob Pan 
Hi Jacob,

One question inline.

Jonathan

> ---
>  drivers/iommu/intel-iommu.c | 11 +--
>  drivers/iommu/intel-pasid.c | 36 
>  drivers/iommu/intel-svm.c   | 37 +
>  3 files changed, 26 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 5b84994..39b63fe 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -5167,7 +5167,7 @@ static void auxiliary_unlink_device(struct dmar_domain 
> *domain,
>   domain->auxd_refcnt--;
>  
>   if (!domain->auxd_refcnt && domain->default_pasid > 0)
> - intel_pasid_free_id(domain->default_pasid);
> + ioasid_free(domain->default_pasid);
>  }
>  
>  static int aux_domain_add_dev(struct dmar_domain *domain,
> @@ -5185,10 +5185,9 @@ static int aux_domain_add_dev(struct dmar_domain 
> *domain,
>   if (domain->default_pasid <= 0) {
>   int pasid;
>  
> - pasid = intel_pasid_alloc_id(domain, PASID_MIN,
> -  pci_max_pasids(to_pci_dev(dev)),
> -  GFP_KERNEL);
> - if (pasid <= 0) {
> + pasid = ioasid_alloc(NULL, PASID_MIN, 
> pci_max_pasids(to_pci_dev(dev)) - 1,
> + domain);

Is there any point in passing the domain in as the private pointer here?
I can't immediately see anywhere it is read back?

It is also rather confusing as the same driver stashes two different types of 
data
in the same xarray.

> + if (pasid == INVALID_IOASID) {
>   pr_err("Can't allocate default pasid\n");
>   return -ENODEV;
>   }
> @@ -5224,7 +5223,7 @@ static int aux_domain_add_dev(struct dmar_domain 
> *domain,
>   spin_unlock(>lock);
>   spin_unlock_irqrestore(_domain_lock, flags);
>   if (!domain->auxd_refcnt && domain->default_pasid > 0)
> - intel_pasid_free_id(domain->default_pasid);
> + ioasid_free(domain->default_pasid);
>  
>   return ret;
>  }
> diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
> index 69fddd3..1e25539 100644
> --- a/drivers/iommu/intel-pasid.c
> +++ b/drivers/iommu/intel-pasid.c
> @@ -26,42 +26,6 @@
>   */
>  static DEFINE_SPINLOCK(pasid_lock);
>  u32 intel_pasid_max_id = PASID_MAX;
> -static DEFINE_IDR(pasid_idr);
> -
> -int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp)
> -{
> - int ret, min, max;
> -
> - min = max_t(int, start, PASID_MIN);
> - max = min_t(int, end, intel_pasid_max_id);
> -
> - WARN_ON(in_interrupt());
> - idr_preload(gfp);
> - spin_lock(_lock);
> - ret = idr_alloc(_idr, ptr, min, max, GFP_ATOMIC);
> - spin_unlock(_lock);
> - idr_preload_end();
> -
> - return ret;
> -}
> -
> -void intel_pasid_free_id(int pasid)
> -{
> - spin_lock(_lock);
> - idr_remove(_idr, pasid);
> - spin_unlock(_lock);
> -}
> -
> -void *intel_pasid_lookup_id(int pasid)
> -{
> - void *p;
> -
> - spin_lock(_lock);
> - p = idr_find(_idr, pasid);
> - spin_unlock(_lock);
> -
> - return p;
> -}
>  
>  int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid)
>  {
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 8f87304..9cbcc1f 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -25,6 +25,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "intel-pasid.h"
> @@ -332,16 +333,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, 
> int flags, struct svm_dev_
>   if (pasid_max > intel_pasid_max_id)
>   pasid_max = intel_pasid_max_id;
>  
> - /* Do not use PASID 0 in caching mode (virtualised IOMMU) */
> - ret = intel_pasid_alloc_id(svm,
> -!!cap_caching_mode(iommu->cap),
> -pasid_max - 1, GFP_KERNEL);
> - if (ret < 0) {
> + /* Do not use PASID 0, reserved for RID to PASID */
> + svm->pasid = ioasid_alloc(NULL, PASID_MIN,
> + pasid_max - 1, svm);
> + if (svm->pasid == INVALID_IOASID) {
>   kfree(svm);
>   kfree(sdev);
> + ret = ENOSPC;
>   goto out;
>   }
> - svm->pasid = ret;
>   svm->notifier.ops = _mmuops;
>   svm->mm = mm;
>   svm->flags = flags;
> @@ -351,7 +351,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
> flags, struct svm_dev_
>   if (mm) {
>  

Re: [PATCH v4 04/22] iommu: Add recoverable fault reporting

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:04 -0700
Jacob Pan  wrote:

> From: Jean-Philippe Brucker 
> 
> Some IOMMU hardware features, for example PCI's PRI and Arm SMMU's Stall,
> enable recoverable I/O page faults. Allow IOMMU drivers to report PRI Page
> Requests and Stall events through the new fault reporting API. The
> consumer of the fault can be either an I/O page fault handler in the host,
> or a guest OS.
> 
> Once handled, the fault must be completed by sending a page response back
> to the IOMMU. Add an iommu_page_response() function to complete a page
> fault.
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Jean-Philippe Brucker 
One totally trivial ordering of docs comment in here.  Otherwise good.

Jonathan
> ---
>  drivers/iommu/iommu.c | 77 
> ++-
>  include/linux/iommu.h | 51 ++
>  2 files changed, 127 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 7955184..13b301c 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -869,7 +869,14 @@ EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
>   * @data: private data passed as argument to the handler
>   *
>   * When an IOMMU fault event is received, this handler gets called with the
> - * fault event and data as argument.
> + * fault event and data as argument. The handler should return 0 on success. 
> If
> + * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the handler should also
> + * complete the fault by calling iommu_page_response() with one of the 
> following
> + * response code:
> + * - IOMMU_PAGE_RESP_SUCCESS: retry the translation
> + * - IOMMU_PAGE_RESP_INVALID: terminate the fault
> + * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting
> + *   page faults if possible.
>   *
>   * Return 0 if the fault handler was installed successfully, or an error.
>   */
> @@ -904,6 +911,8 @@ int iommu_register_device_fault_handler(struct device 
> *dev,
>   }
>   param->fault_param->handler = handler;
>   param->fault_param->data = data;
> + mutex_init(>fault_param->lock);
> + INIT_LIST_HEAD(>fault_param->faults);
>  
>  done_unlock:
>   mutex_unlock(>lock);
> @@ -934,6 +943,12 @@ int iommu_unregister_device_fault_handler(struct device 
> *dev)
>   if (!param->fault_param)
>   goto unlock;
>  
> + /* we cannot unregister handler if there are pending faults */
> + if (!list_empty(>fault_param->faults)) {
> + ret = -EBUSY;
> + goto unlock;
> + }
> +
>   kfree(param->fault_param);
>   param->fault_param = NULL;
>   put_device(dev);
> @@ -958,6 +973,7 @@ EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler);
>  int iommu_report_device_fault(struct device *dev, struct iommu_fault_event 
> *evt)
>  {
>   struct iommu_param *param = dev->iommu_param;
> + struct iommu_fault_event *evt_pending;
>   struct iommu_fault_param *fparam;
>   int ret = 0;
>  
> @@ -972,6 +988,20 @@ int iommu_report_device_fault(struct device *dev, struct 
> iommu_fault_event *evt)
>   ret = -EINVAL;
>   goto done_unlock;
>   }
> +
> + if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
> + (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
> + evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event),
> +   GFP_KERNEL);
> + if (!evt_pending) {
> + ret = -ENOMEM;
> + goto done_unlock;
> + }
> + mutex_lock(>lock);
> + list_add_tail(_pending->list, >faults);
> + mutex_unlock(>lock);
> + }
> +
>   ret = fparam->handler(evt, fparam->data);
>  done_unlock:
>   mutex_unlock(>lock);
> @@ -1513,6 +1543,51 @@ int iommu_attach_device(struct iommu_domain *domain, 
> struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>  
> +int iommu_page_response(struct device *dev,
> + struct page_response_msg *msg)
> +{
> + struct iommu_param *param = dev->iommu_param;
> + int ret = -EINVAL;
> + struct iommu_fault_event *evt;
> + struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> +
> + if (!domain || !domain->ops->page_response)
> + return -ENODEV;
> +
> + /*
> +  * Device iommu_param should have been allocated when device is
> +  * added to its iommu_group.
> +  */
> + if (!param || !param->fault_param)
> + return -EINVAL;
> +
> + /* Only send response if there is a fault report pending */
> + mutex_lock(>fault_param->lock);
> + if (list_empty(>fault_param->faults)) {
> + pr_warn("no pending PRQ, drop response\n");
> + goto done_unlock;
> + }
> + /*
> +  * Check if we have a matching page request pending to respond,
> +  * otherwise return -EINVAL
> +  */
> + 

Re: [PATCH v4 02/22] iommu: Introduce device fault data

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:02 -0700
Jacob Pan  wrote:

> Device faults detected by IOMMU can be reported outside the IOMMU
> subsystem for further processing. This patch introduces
> a generic device fault data structure.
> 
> The fault can be either an unrecoverable fault or a page request,
> also referred to as a recoverable fault.
> 
> We only care about non internal faults that are likely to be reported
> to an external subsystem.
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Liu, Yi L 
> Signed-off-by: Ashok Raj 
> Signed-off-by: Eric Auger 

A few trivial nitpicks in here.

Otherwise looks straight forward and sensible to me.

Jonathan
> ---
>  include/linux/iommu.h  |  44 +
>  include/uapi/linux/iommu.h | 118 
> +
>  2 files changed, 162 insertions(+)
>  create mode 100644 include/uapi/linux/iommu.h
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index a815cf6..7890a92 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define IOMMU_READ   (1 << 0)
>  #define IOMMU_WRITE  (1 << 1)
> @@ -49,6 +50,7 @@ struct device;
>  struct iommu_domain;
>  struct notifier_block;
>  struct iommu_sva;
> +struct iommu_fault_event;
>  
>  /* iommu fault flags */
>  #define IOMMU_FAULT_READ 0x0
> @@ -58,6 +60,7 @@ typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
>   struct device *, unsigned long, int, void *);
>  typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva 
> *,
>  void *);
> +typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *, void *);
>  
>  struct iommu_domain_geometry {
>   dma_addr_t aperture_start; /* First address that can be mapped*/
> @@ -301,6 +304,46 @@ struct iommu_device {
>   struct device *dev;
>  };
>  
> +/**
> + * struct iommu_fault_event - Generic fault event
> + *
> + * Can represent recoverable faults such as a page requests or
> + * unrecoverable faults such as DMA or IRQ remapping faults.
> + *
> + * @fault: fault descriptor
> + * @iommu_private: used by the IOMMU driver for storing fault-specific
> + * data. Users should not modify this field before
> + * sending the fault response.
> + */
> +struct iommu_fault_event {
> + struct iommu_fault fault;
> + u64 iommu_private;
> +};
> +
> +/**
> + * struct iommu_fault_param - per-device IOMMU fault data
> + * @dev_fault_handler: Callback function to handle IOMMU faults at device 
> level
> + * @data: handler private data
> + *

No need for this blank line.  Seems inconsistent with other docs in here.

Also, there is a docs fixup in patch 3 which should be pulled back to here.

> + */
> +struct iommu_fault_param {
> + iommu_dev_fault_handler_t handler;
> + void *data;
> +};
> +
> +/**
> + * struct iommu_param - collection of per-device IOMMU data
> + *
> + * @fault_param: IOMMU detected device fault reporting data
> + *
> + * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
> + *   struct iommu_group  *iommu_group;
> + *   struct iommu_fwspec *iommu_fwspec;
> + */
> +struct iommu_param {
> + struct iommu_fault_param *fault_param;
Is it actually worth having the indirection of a pointer here as opposed
to just embedding the actual structure?  The null value is used in places
but can just use the handler being null for the same job I think...

It reduces the code needed in patch 3 a bit.

It gets a bit bigger in patch 4, but is still only about 16 bytes.

> +};
> +
>  int  iommu_device_register(struct iommu_device *iommu);
>  void iommu_device_unregister(struct iommu_device *iommu);
>  int  iommu_device_sysfs_add(struct iommu_device *iommu,
> @@ -504,6 +547,7 @@ struct iommu_ops {};
>  struct iommu_group {};
>  struct iommu_fwspec {};
>  struct iommu_device {};
> +struct iommu_fault_param {};
>  
>  static inline bool iommu_present(struct bus_type *bus)
>  {
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> new file mode 100644
> index 000..aaa3b6a
> --- /dev/null
> +++ b/include/uapi/linux/iommu.h
> @@ -0,0 +1,118 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * IOMMU user API definitions
> + */
> +
> +#ifndef _UAPI_IOMMU_H
> +#define _UAPI_IOMMU_H
> +
> +#include 
> +
> +#define IOMMU_FAULT_PERM_READ(1 << 0) /* read */
> +#define IOMMU_FAULT_PERM_WRITE   (1 << 1) /* write */
> +#define IOMMU_FAULT_PERM_EXEC(1 << 2) /* exec */
> +#define IOMMU_FAULT_PERM_PRIV(1 << 3) /* privileged */
> +
> +/* Generic fault types, can be expanded IRQ remapping fault */
> +enum iommu_fault_type {
> + IOMMU_FAULT_DMA_UNRECOV = 1,/* unrecoverable fault */
> + IOMMU_FAULT_PAGE_REQ,   /* page request fault */
> +};
> +
> +enum 

Re: [PATCH v4 08/22] iommu: Introduce attach/detach_pasid_table API

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:08 -0700
Jacob Pan  wrote:

> In virtualization use case, when a guest is assigned
> a PCI host device, protected by a virtual IOMMU on the guest,
> the physical IOMMU must be programmed to be consistent with
> the guest mappings. If the physical IOMMU supports two
> translation stages it makes sense to program guest mappings
> onto the first stage/level (ARM/Intel terminology) while the host
> owns the stage/level 2.
> 
> In that case, it is mandated to trap on guest configuration
> settings and pass those to the physical iommu driver.
> 
> This patch adds a new API to the iommu subsystem that allows
> to set/unset the pasid table information.
> 
> A generic iommu_pasid_table_config struct is introduced in
> a new iommu.h uapi header. This is going to be used by the VFIO
> user API.

Another case where strictly speaking stuff is introduced that this series
doesn't use.  I don't know what the plans are to merge the various
related series though so this might make sense in general. Right now
it just bloats this series a bit..
> 
> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Liu, Yi L 
> Signed-off-by: Ashok Raj 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Eric Auger 
> Reviewed-by: Jean-Philippe Brucker 
> ---
>  drivers/iommu/iommu.c  | 19 +
>  include/linux/iommu.h  | 18 
>  include/uapi/linux/iommu.h | 52 
> ++
>  3 files changed, 89 insertions(+)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 166adb8..4496ccd 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1619,6 +1619,25 @@ int iommu_page_response(struct device *dev,
>  }
>  EXPORT_SYMBOL_GPL(iommu_page_response);
>  
> +int iommu_attach_pasid_table(struct iommu_domain *domain,
> +  struct iommu_pasid_table_config *cfg)
> +{
> + if (unlikely(!domain->ops->attach_pasid_table))
> + return -ENODEV;
> +
> + return domain->ops->attach_pasid_table(domain, cfg);
> +}
> +EXPORT_SYMBOL_GPL(iommu_attach_pasid_table);
> +
> +void iommu_detach_pasid_table(struct iommu_domain *domain)
> +{
> + if (unlikely(!domain->ops->detach_pasid_table))
> + return;
> +
> + domain->ops->detach_pasid_table(domain);
> +}
> +EXPORT_SYMBOL_GPL(iommu_detach_pasid_table);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
> struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 950347b..d3edb10 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -264,6 +264,8 @@ struct page_response_msg {
>   * @sva_unbind: Unbind process address space from device
>   * @sva_get_pasid: Get PASID associated to a SVA handle
>   * @page_response: handle page request response
> + * @attach_pasid_table: attach a pasid table
> + * @detach_pasid_table: detach the pasid table
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
>   */
>  struct iommu_ops {
> @@ -323,6 +325,9 @@ struct iommu_ops {
> void *drvdata);
>   void (*sva_unbind)(struct iommu_sva *handle);
>   int (*sva_get_pasid)(struct iommu_sva *handle);
> + int (*attach_pasid_table)(struct iommu_domain *domain,
> +   struct iommu_pasid_table_config *cfg);
> + void (*detach_pasid_table)(struct iommu_domain *domain);
>  
>   int (*page_response)(struct device *dev, struct page_response_msg *msg);
>  
> @@ -434,6 +439,9 @@ extern int iommu_attach_device(struct iommu_domain 
> *domain,
>  struct device *dev);
>  extern void iommu_detach_device(struct iommu_domain *domain,
>   struct device *dev);
> +extern int iommu_attach_pasid_table(struct iommu_domain *domain,
> + struct iommu_pasid_table_config *cfg);
> +extern void iommu_detach_pasid_table(struct iommu_domain *domain);
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -947,6 +955,13 @@ iommu_aux_get_pasid(struct iommu_domain *domain, struct 
> device *dev)
>   return -ENODEV;
>  }
>  
> +static inline
> +int iommu_attach_pasid_table(struct iommu_domain *domain,
> +  struct iommu_pasid_table_config *cfg)
> +{
> + return -ENODEV;
> +}
> +
>  static inline struct iommu_sva *
>  iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void 
> *drvdata)
>  {
> @@ -968,6 +983,9 @@ static inline int iommu_sva_get_pasid(struct iommu_sva 
> *handle)
>   return IOMMU_PASID_INVALID;
>  }
>  
> +static inline
> +void iommu_detach_pasid_table(struct iommu_domain *domain) {}
> +
>  #endif /* CONFIG_IOMMU_API */
>  
>  #ifdef CONFIG_IOMMU_DEBUGFS
> diff --git 

Re: [PATCH v4 09/22] iommu: Introduce cache_invalidate API

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:09 -0700
Jacob Pan  wrote:

> From: Liu Yi L 
> 
> In any virtualization use case, when the first translation stage
> is "owned" by the guest OS, the host IOMMU driver has no knowledge
> of caching structure updates unless the guest invalidation activities
> are trapped by the virtualizer and passed down to the host.
> 
> Since the invalidation data are obtained from user space and will be
> written into physical IOMMU, we must allow security check at various
> layers. Therefore, generic invalidation data format are proposed here,
> model specific IOMMU drivers need to convert them into their own format.
> 
> Signed-off-by: Liu Yi L 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Ashok Raj 
> Signed-off-by: Eric Auger 
> Signed-off-by: Jean-Philippe Brucker 
Some comment ordering nitpicks.  Nothing important.

Jonathan

> ---
>  drivers/iommu/iommu.c  |  10 +
>  include/linux/iommu.h  |  14 ++
>  include/uapi/linux/iommu.h | 110 
> +
>  3 files changed, 134 insertions(+)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 4496ccd..1758b57 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1638,6 +1638,16 @@ void iommu_detach_pasid_table(struct iommu_domain 
> *domain)
>  }
>  EXPORT_SYMBOL_GPL(iommu_detach_pasid_table);
>  
> +int iommu_cache_invalidate(struct iommu_domain *domain, struct device *dev,
> +struct iommu_cache_invalidate_info *inv_info)
> +{
> + if (unlikely(!domain->ops->cache_invalidate))
> + return -ENODEV;
> +
> + return domain->ops->cache_invalidate(domain, dev, inv_info);
> +}
> +EXPORT_SYMBOL_GPL(iommu_cache_invalidate);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
> struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index d3edb10..7a37336 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -266,6 +266,7 @@ struct page_response_msg {
>   * @page_response: handle page request response
>   * @attach_pasid_table: attach a pasid table
>   * @detach_pasid_table: detach the pasid table
> + * @cache_invalidate: invalidate translation caches
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
>   */
>  struct iommu_ops {
> @@ -330,6 +331,8 @@ struct iommu_ops {
>   void (*detach_pasid_table)(struct iommu_domain *domain);
>  
>   int (*page_response)(struct device *dev, struct page_response_msg *msg);
> + int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev,
> + struct iommu_cache_invalidate_info *inv_info);
>  
>   unsigned long pgsize_bitmap;
>  };
> @@ -442,6 +445,9 @@ extern void iommu_detach_device(struct iommu_domain 
> *domain,
>  extern int iommu_attach_pasid_table(struct iommu_domain *domain,
>   struct iommu_pasid_table_config *cfg);
>  extern void iommu_detach_pasid_table(struct iommu_domain *domain);
> +extern int iommu_cache_invalidate(struct iommu_domain *domain,
> +   struct device *dev,
> +   struct iommu_cache_invalidate_info *inv_info);
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -986,6 +992,14 @@ static inline int iommu_sva_get_pasid(struct iommu_sva 
> *handle)
>  static inline
>  void iommu_detach_pasid_table(struct iommu_domain *domain) {}
>  
> +static inline int
> +iommu_cache_invalidate(struct iommu_domain *domain,
> +struct device *dev,
> +struct iommu_cache_invalidate_info *inv_info)
> +{
> + return -ENODEV;
> +}
> +
>  #endif /* CONFIG_IOMMU_API */
>  
>  #ifdef CONFIG_IOMMU_DEBUGFS
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> index 3976767..ca4b753 100644
> --- a/include/uapi/linux/iommu.h
> +++ b/include/uapi/linux/iommu.h
> @@ -167,4 +167,114 @@ struct iommu_pasid_table_config {
>   };
>  };
>  
> +/* defines the granularity of the invalidation */
> +enum iommu_inv_granularity {
> + IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */
> + IOMMU_INV_GRANU_PASID,  /* PASID-selective invalidation */
> + IOMMU_INV_GRANU_ADDR,   /* page-selective invalidation */
> + IOMMU_INV_GRANU_NR, /* number of invalidation granularities */
> +};
> +
> +/**
> + * struct iommu_inv_addr_info - Address Selective Invalidation Structure
> + *
> + * @flags: indicates the granularity of the address-selective invalidation
> + * - If the PASID bit is set, the @pasid field is populated and the 
> invalidation
> + *   relates to cache entries tagged with this PASID and matching the address
> + *   range.
> + * - If ARCHID bit is set, @archid is populated and the 

Re: [PATCH v4 10/22] iommu: Fix compile error without IOMMU_API

2019-06-18 Thread Jonathan Cameron
On Sun, 9 Jun 2019 06:44:10 -0700
Jacob Pan  wrote:

> struct page_response_msg needs to be defined outside CONFIG_IOMMU_API.

What was the error? 

If this is a fix for an earlier patch in this series role it in there
(or put it before it). If more general we should add a fixes tag.

Jonathan
> 
> Signed-off-by: Jacob Pan 
> ---
>  include/linux/iommu.h | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 7a37336..8d766a8 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -189,8 +189,6 @@ struct iommu_sva_ops {
>   iommu_mm_exit_handler_t mm_exit;
>  };
>  
> -#ifdef CONFIG_IOMMU_API
> -
>  /**
>   * enum page_response_code - Return status of fault handlers, telling the 
> IOMMU
>   * driver how to proceed with the fault.
> @@ -227,6 +225,7 @@ struct page_response_msg {
>   u64 iommu_data;
>  };
>  
> +#ifdef CONFIG_IOMMU_API
>  /**
>   * struct iommu_ops - iommu ops and capabilities
>   * @capable: check capability




Re: [PATCH 8/8] iommu/arm-smmu-v3: Add support for PCI PASID

2019-06-11 Thread Jonathan Cameron
On Mon, 10 Jun 2019 19:47:14 +0100
Jean-Philippe Brucker  wrote:

> Enable PASID for PCI devices that support it. Since the SSID tables are
> allocated by arm_smmu_attach_dev(), PASID has to be enabled early enough.
> arm_smmu_dev_feature_enable() would be too late, since by that time the
> main DMA domain has already been attached. Do it in add_device() instead.
> 
> Signed-off-by: Jean-Philippe Brucker 
Nitpick in line.

Thanks,

Jonathan
> ---
>  drivers/iommu/arm-smmu-v3.c | 51 -
>  1 file changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 972bfb80f964..a8a516d9ff10 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2197,6 +2197,49 @@ static void arm_smmu_disable_ats(struct 
> arm_smmu_master *master)
>   master->ats_enabled = false;
>  }
>  
> +static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
> +{
> + int ret;
> + int features;
> + int num_pasids;
> + struct pci_dev *pdev;
> +
> + if (!dev_is_pci(master->dev))
> + return -ENOSYS;
> +
> + pdev = to_pci_dev(master->dev);
> +
> + features = pci_pasid_features(pdev);
> + if (features < 0)
> + return -ENOSYS;
> +
> + num_pasids = pci_max_pasids(pdev);
> + if (num_pasids <= 0)
> + return -ENOSYS;
> +
> + ret = pci_enable_pasid(pdev, features);
> + if (!ret)
> + master->ssid_bits = min_t(u8, ilog2(num_pasids),
> +   master->smmu->ssid_bits);
> + return ret;
> +}
> +
> +static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
> +{
> + struct pci_dev *pdev;
> +
> + if (!dev_is_pci(master->dev))
> + return;
> +
> + pdev = to_pci_dev(master->dev);
> +
> + if (!pdev->pasid_enabled)
> + return;
> +
> + pci_disable_pasid(pdev);
> + master->ssid_bits = 0;

If we are being really fussy about ordering, why have this set of
ssid_bits after pci_disable_pasid rather than before (to reverse order
of .._enable_pasid)?

> +}
> +
>  static void arm_smmu_detach_dev(struct arm_smmu_master *master)
>  {
>   unsigned long flags;
> @@ -2413,6 +2456,9 @@ static int arm_smmu_add_device(struct device *dev)
>  
>   master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
>  
> + /* Note that PASID must be enabled before, and disabled after ATS */
> + arm_smmu_enable_pasid(master);
> +
>   /*
>* If the SMMU doesn't support 2-stage CD, limit the linear
>* tables to a reasonable number of contexts, let's say
> @@ -2423,7 +2469,7 @@ static int arm_smmu_add_device(struct device *dev)
>  
>   ret = iommu_device_link(>iommu, dev);
>   if (ret)
> - goto err_free_master;
> + goto err_disable_pasid;
>  
>   group = iommu_group_get_for_dev(dev);
>   if (IS_ERR(group)) {
> @@ -2436,6 +2482,8 @@ static int arm_smmu_add_device(struct device *dev)
>  
>  err_unlink:
>   iommu_device_unlink(>iommu, dev);
> +err_disable_pasid:
> + arm_smmu_disable_pasid(master);
>  err_free_master:
>   kfree(master);
>   fwspec->iommu_priv = NULL;
> @@ -2456,6 +2504,7 @@ static void arm_smmu_remove_device(struct device *dev)
>   arm_smmu_detach_dev(master);
>   iommu_group_remove_device(dev);
>   iommu_device_unlink(>iommu, dev);
> + arm_smmu_disable_pasid(master);
>   kfree(master);
>   iommu_fwspec_free(dev);
>  }




Re: [PATCH 5/8] iommu/arm-smmu-v3: Add second level of context descriptor table

2019-06-11 Thread Jonathan Cameron
On Mon, 10 Jun 2019 19:47:11 +0100
Jean-Philippe Brucker  wrote:

> The SMMU can support up to 20 bits of SSID. Add a second level of page
> tables to accommodate this. Devices that support more than 1024 SSIDs now
> have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
> descriptors (64kB), allocated on demand.
> 
> Signed-off-by: Jean-Philippe Brucker 
One trivial typo.

Thanks,

Jonathan
> ---
>  drivers/iommu/arm-smmu-v3.c | 136 +---
>  1 file changed, 128 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index d90eb604b65d..326b71793336 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -216,6 +216,8 @@
>  
>  #define STRTAB_STE_0_S1FMT   GENMASK_ULL(5, 4)
>  #define STRTAB_STE_0_S1FMT_LINEAR0
> +#define STRTAB_STE_0_S1FMT_4K_L2 1
> +#define STRTAB_STE_0_S1FMT_64K_L22
>  #define STRTAB_STE_0_S1CTXPTR_MASK   GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
>  
> @@ -255,6 +257,18 @@
>  
>  #define STRTAB_STE_3_S2TTB_MASK  GENMASK_ULL(51, 4)
>  
> +/*
> + * Linear: when less than 1024 SSIDs are supported
> + * 2lvl: at most 1024 L1 entrie,

entries?

> + *  1024 lazy entries per table.
> + */
> +#define CTXDESC_SPLIT10
> +#define CTXDESC_NUM_L2_ENTRIES   (1 << CTXDESC_SPLIT)
> +
> +#define CTXDESC_L1_DESC_DWORD1
> +#define CTXDESC_L1_DESC_VALID1
> +#define CTXDESC_L1_DESC_L2PTR_MASK   GENMASK_ULL(51, 12)
> +
>  /* Context descriptor (stage-1 only) */
>  #define CTXDESC_CD_DWORDS8
>  #define CTXDESC_CD_0_TCR_T0SZGENMASK_ULL(5, 0)
> @@ -530,7 +544,10 @@ struct arm_smmu_ctx_desc {
>  struct arm_smmu_s1_cfg {
>   u8  s1fmt;
>   u8  s1cdmax;
> - struct arm_smmu_cd_tabletable;
> + struct arm_smmu_cd_table*tables;
> + size_t  num_tables;
> + __le64  *l1ptr;
> + dma_addr_t  l1ptr_dma;
>  
>   /* Context descriptor 0, when substreams are disabled or s1dss = 0b10 */
>   struct arm_smmu_ctx_desccd;
> @@ -1118,12 +1135,51 @@ static void arm_smmu_free_cd_leaf_table(struct 
> arm_smmu_device *smmu,
>  {
>   size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
>  
> + if (!table->ptr)
> + return;
>   dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
>  }
>  
> -static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_s1_cfg *cfg, u32 ssid)
> +static void arm_smmu_write_cd_l1_desc(__le64 *dst,
> +   struct arm_smmu_cd_table *table)
>  {
> - return cfg->table.ptr + ssid * CTXDESC_CD_DWORDS;
> + u64 val = (table->ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
> +   CTXDESC_L1_DESC_VALID;
> +
> + *dst = cpu_to_le64(val);
> +}
> +
> +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
> +u32 ssid)
> +{
> + unsigned int idx;
> + struct arm_smmu_cd_table *table;
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_s1_cfg *cfg = _domain->s1_cfg;
> +
> + if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
> + table = >tables[0];
> + idx = ssid;
> + } else {
> + idx = ssid >> CTXDESC_SPLIT;
> + if (idx >= cfg->num_tables)
> + return NULL;
> +
> + table = >tables[idx];
> + if (!table->ptr) {
> + __le64 *l1ptr = cfg->l1ptr + idx * 
> CTXDESC_L1_DESC_DWORD;
> +
> + if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> +  
> CTXDESC_NUM_L2_ENTRIES))
> + return NULL;
> +
> + arm_smmu_write_cd_l1_desc(l1ptr, table);
> + /* An invalid L1 entry is allowed to be cached */
> + arm_smmu_sync_cd(smmu_domain, ssid, false);
> + }
> + idx = ssid & (CTXDESC_NUM_L2_ENTRIES - 1);
> + }
> + return table->ptr + idx * CTXDESC_CD_DWORDS;
>  }
>  
>  static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
> @@ -1149,7 +1205,7 @@ static int arm_smmu_write_ctx_desc(struct 
> arm_smmu_domain *smmu_domain,
>   u64 val;
>   bool cd_live;
>   struct arm_smmu_device *smmu = smmu_domain->smmu;
> - __le64 *cdptr = arm_smmu_get_cd_ptr(_domain->s1_cfg, ssid);
> + __le64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
>  
>   /*
>* This function handles the following cases:
> @@ -1213,20 +1269,81 @@ static int arm_smmu_write_ctx_desc(struct 
> arm_smmu_domain *smmu_domain,
>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain,
>   struct 

Re: [PATCH 4/8] iommu/arm-smmu-v3: Add support for Substream IDs

2019-06-11 Thread Jonathan Cameron
On Mon, 10 Jun 2019 19:47:10 +0100
Jean-Philippe Brucker  wrote:

> At the moment, the SMMUv3 driver implements only one stage-1 or stage-2
> page directory per device. However SMMUv3 allows more than one address
> space for some devices, by providing multiple stage-1 page directories. In
> addition to the Stream ID (SID), that identifies a device, we can now have
> Substream IDs (SSID) identifying an address space. In PCIe, SID is called
> Requester ID (RID) and SSID is called Process Address-Space ID (PASID).
> 
> Prepare the driver for SSID support, by adding context descriptor tables
> in STEs (previously a single static context descriptor). A complete
> stage-1 walk is now performed like this by the SMMU:
> 
>   Stream tables  Ctx. tables  Page tables
> ++   ,--->+---+   ,--->+---+
> ::   |:   :   |:   :
> ++   |+---+   |+---+
>SID->|  STE   |---'  SSID->|  CD   |---'  IOVA->|  PTE  |--> IPA
> +++---++---+
> :::   ::   :
> +++---++---+
> 
> Implement a single level of context descriptor table for now, but as with
> stream and page tables, an SSID can be split to index multiple levels of
> tables.
> 
> In all stream table entries, we set S1DSS=SSID0 mode, making translations
> without an SSID use context descriptor 0. Although it would be possible by
> setting S1DSS=BYPASS, we don't currently support SSID when user selects
> iommu.passthrough.
> 
> Signed-off-by: Jean-Philippe Brucker 

Hi Jean-Phillipe,

A few trivial comments inline, mostly around wondering if a few bits
of refactoring can get pulled out before this and hopefully stop diff
making such a mess of this patch from a readability point of view!

Thanks,

Jonathan

> ---
>  drivers/iommu/arm-smmu-v3.c | 238 +---
>  1 file changed, 192 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 3254f473e681..d90eb604b65d 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -219,6 +219,11 @@
>  #define STRTAB_STE_0_S1CTXPTR_MASK   GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
>  
> +#define STRTAB_STE_1_S1DSS   GENMASK_ULL(1, 0)
> +#define STRTAB_STE_1_S1DSS_TERMINATE 0x0
> +#define STRTAB_STE_1_S1DSS_BYPASS0x1
> +#define STRTAB_STE_1_S1DSS_SSID0 0x2
> +
>  #define STRTAB_STE_1_S1C_CACHE_NC0UL
>  #define STRTAB_STE_1_S1C_CACHE_WBRA  1UL
>  #define STRTAB_STE_1_S1C_CACHE_WT2UL
> @@ -305,6 +310,7 @@
>  #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
>  #define CMDQ_PREFETCH_1_ADDR_MASKGENMASK_ULL(63, 12)
>  
> +#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12)
>  #define CMDQ_CFGI_0_SID  GENMASK_ULL(63, 32)
>  #define CMDQ_CFGI_1_LEAF (1UL << 0)
>  #define CMDQ_CFGI_1_RANGEGENMASK_ULL(4, 0)
> @@ -421,8 +427,11 @@ struct arm_smmu_cmdq_ent {
>  
>   #define CMDQ_OP_CFGI_STE0x3
>   #define CMDQ_OP_CFGI_ALL0x4
> + #define CMDQ_OP_CFGI_CD 0x5
> + #define CMDQ_OP_CFGI_CD_ALL 0x6
>   struct {
>   u32 sid;
> + u32 ssid;
>   union {
>   boolleaf;
>   u8  span;
> @@ -506,16 +515,25 @@ struct arm_smmu_strtab_l1_desc {
>   dma_addr_t  l2ptr_dma;
>  };
>  
> +struct arm_smmu_cd_table {
> + __le64  *ptr;
> + dma_addr_t  ptr_dma;
> +};
> +
> +struct arm_smmu_ctx_desc {
> + u16 asid;
> + u64 ttbr;
> + u64 tcr;
> + u64 mair;
> +};
> +
>  struct arm_smmu_s1_cfg {
> - __le64  *cdptr;
> - dma_addr_t  cdptr_dma;
> -
> - struct arm_smmu_ctx_desc {
> - u16 asid;
> - u64 ttbr;
> - u64 tcr;
> - u64 mair;
> - }   cd;
> + u8  s1fmt;
> + u8  s1cdmax;
> + struct arm_smmu_cd_tabletable;

This new structure is a sensible addition and makes the code more readable,
but it's not directly tied to the main flow of this patch, perhaps pull it out?

> +
> + /* Context descriptor 0, when substreams are disabled or s1dss = 0b10 */
> + struct arm_smmu_ctx_desccd;
I've not checked in detail but feels like you could pull this refactor out as 
well
as a trivial precursor and 

Re: [PATCH 3/8] iommu/arm-smmu-v3: Support platform SSID

2019-06-11 Thread Jonathan Cameron
On Mon, 10 Jun 2019 19:47:09 +0100
Jean-Philippe Brucker  wrote:

> For platform devices that support SubstreamID (SSID), firmware provides
> the number of supported SSID bits. Restrict it to what the SMMU supports
> and cache it into master->ssid_bits.
> 
> Signed-off-by: Jean-Philippe Brucker 

Missing kernel-doc.

Thanks,

Jonathan

> ---
>  drivers/iommu/arm-smmu-v3.c | 11 +++
>  drivers/iommu/of_iommu.c|  6 +-
>  include/linux/iommu.h   |  1 +
>  3 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 4d5a694f02c2..3254f473e681 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -604,6 +604,7 @@ struct arm_smmu_master {
>   struct list_headdomain_head;
>   u32 *sids;
>   unsigned intnum_sids;
> + unsigned intssid_bits;
>   boolats_enabled :1;
>  };
>  
> @@ -2097,6 +2098,16 @@ static int arm_smmu_add_device(struct device *dev)
>   }
>   }
>  
> + master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
> +
> + /*
> +  * If the SMMU doesn't support 2-stage CD, limit the linear
> +  * tables to a reasonable number of contexts, let's say
> +  * 64kB / sizeof(ctx_desc) = 1024 = 2^10
> +  */
> + if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
> + master->ssid_bits = min(master->ssid_bits, 10U);
> +
>   group = iommu_group_get_for_dev(dev);
>   if (!IS_ERR(group)) {
>   iommu_group_put(group);
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index f04a6df65eb8..04f4f6b95d82 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -206,8 +206,12 @@ const struct iommu_ops *of_iommu_configure(struct device 
> *dev,
>   if (err)
>   break;
>   }
> - }
>  
> + fwspec = dev_iommu_fwspec_get(dev);
> + if (!err && fwspec)
> + of_property_read_u32(master_np, "pasid-num-bits",
> +  >num_pasid_bits);
> + }
>  
>   /*
>* Two success conditions can be represented by non-negative err here:
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 519e40fb23ce..b91df613385f 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -536,6 +536,7 @@ struct iommu_fwspec {
>   struct fwnode_handle*iommu_fwnode;
>   void*iommu_priv;
>   u32 flags;
> + u32 num_pasid_bits;

This structure has kernel doc so you need to add something for this.

>   unsigned intnum_ids;
>   u32 ids[1];
>  };


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 4/8] iommu/vt-d: Aux-domain specific domain attach/detach

2019-01-15 Thread Jonathan Cameron
On Tue, 15 Jan 2019 10:10:21 +0800
Lu Baolu  wrote:

> Hi,
> 
> On 1/14/19 8:26 PM, Jonathan Cameron wrote:
> > On Thu, 10 Jan 2019 11:00:23 +0800
> > Lu Baolu  wrote:
> >   
> >> When multiple domains per device has been enabled by the
> >> device driver, the device will tag the default PASID for
> >> the domain to all DMA traffics out of the subset of this
> >> device; and the IOMMU should translate the DMA requests
> >> in PASID granularity.
> >>
> >> This adds the intel_iommu_aux_attach/detach_device() ops
> >> to support managing PASID granular translation structures
> >> when the device driver has enabled multiple domains per
> >> device.
> >>
> >> Cc: Ashok Raj 
> >> Cc: Jacob Pan 
> >> Cc: Kevin Tian 
> >> Signed-off-by: Sanjay Kumar 
> >> Signed-off-by: Liu Yi L 
> >> Signed-off-by: Lu Baolu   
> > 
> > The following is probably a rather naive review given I don't know
> > the driver or hardware well at all.  Still, it seems like things
> > are a lot less balanced than I'd expect and isn't totally obvious
> > to me why that is.  
> 
> Thank you!

You are welcome.

...

> >> +/*
> >> + * Check whether a @domain could be attached to the @dev through the
> >> + * aux-domain attach/detach APIs.
> >> + */
> >> +static inline bool
> >> +is_aux_domain(struct device *dev, struct iommu_domain *domain)  
> > 
> > I'm finding the distinction between an aux domain capability on
> > a given device and whether one is actually in use to be obscured
> > slightly in the function naming.
> > 
> > This one for example is actually checking if we have a domain
> > that is capable of being enabled for aux domain use, but not
> > yet actually in that mode?
> > 
> > Mind you I'm not sure I have a better answer for the naming.
> > can_aux_domain_be_enabled?  is_unattached_aux_domain?
> > 
> >   
> 
> device aux mode vs. normal mode
> ===
> 
> When we talk about the auxiliary mode (simply aux-mode), it means "the
> device works in aux-mode or normal mode". "normal mode" means that the
> device (and it's corresponding IOMMU) supports only RID (PCI Request ID)
> based DMA translation; while, aux-mode means the the device (and it's
> IOMMU) supports fine-grained DMA translation, like PASID based DMA
> translation with Intel VT-d scalable mode.
> 
> We are adding below APIs to switch a device between these two modes:
> 
> int iommu_dev_enable/disable_feature(dev, IOMMU_DEV_FEAT_AUX)
> 
> And this API (still under discussion) to check which mode the device is
> working in:
> 
> bool iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX)
> 
> aux-domain
> ==
> 
> If a device is working in aux-mode and we are going to attach a domain
> to this device, we say "this domain will be attached to the device in
> aux mode", and simply "aux domain". So a domain is "normal" when it is
> going to attach to a device in normal mode; and is "aux-domain" when it
> is going to attach to a device in aux mode.

Hmm.. OK I guess.  It still feels like there is more need to refer to
the docs than there should be.  Still, your code and I may well never
read it again so I don't mind :)

> 
> >   
> >> +{

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 3/8] iommu/vt-d: Move common code out of iommu_attch_device()

2019-01-14 Thread Jonathan Cameron
On Thu, 10 Jan 2019 11:00:22 +0800
Lu Baolu  wrote:

> This part of code could be used by both normal and aux
> domain specific attach entries. Hence move them into a
> common function to avoid duplication.
> 
> Cc: Ashok Raj 
> Cc: Jacob Pan 
> Cc: Kevin Tian 
> Signed-off-by: Lu Baolu 
Another trivial one (it's going to be one of those days).
Typo in the patch title.

> ---
>  drivers/iommu/intel-iommu.c | 60 ++---
>  1 file changed, 36 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index ee8832d26f7e..e9119d45a29d 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -5058,35 +5058,14 @@ static void intel_iommu_domain_free(struct 
> iommu_domain *domain)
>   domain_exit(to_dmar_domain(domain));
>  }
>  
> -static int intel_iommu_attach_device(struct iommu_domain *domain,
> -  struct device *dev)
> +static int prepare_domain_attach_device(struct iommu_domain *domain,
> + struct device *dev)
>  {
>   struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>   struct intel_iommu *iommu;
>   int addr_width;
>   u8 bus, devfn;
>  
> - if (device_is_rmrr_locked(dev)) {
> - dev_warn(dev, "Device is ineligible for IOMMU domain attach due 
> to platform RMRR requirement.  Contact your platform vendor.\n");
> - return -EPERM;
> - }
> -
> - /* normally dev is not mapped */
> - if (unlikely(domain_context_mapped(dev))) {
> - struct dmar_domain *old_domain;
> -
> - old_domain = find_domain(dev);
> - if (old_domain) {
> - rcu_read_lock();
> - dmar_remove_one_dev_info(old_domain, dev);
> - rcu_read_unlock();
> -
> - if (!domain_type_is_vm_or_si(old_domain) &&
> -  list_empty(_domain->devices))
> - domain_exit(old_domain);
> - }
> - }
> -
>   iommu = device_to_iommu(dev, , );
>   if (!iommu)
>   return -ENODEV;
> @@ -5119,7 +5098,40 @@ static int intel_iommu_attach_device(struct 
> iommu_domain *domain,
>   dmar_domain->agaw--;
>   }
>  
> - return domain_add_dev_info(dmar_domain, dev);
> + return 0;
> +}
> +
> +static int intel_iommu_attach_device(struct iommu_domain *domain,
> +  struct device *dev)
> +{
> + int ret;
> +
> + if (device_is_rmrr_locked(dev)) {
> + dev_warn(dev, "Device is ineligible for IOMMU domain attach due 
> to platform RMRR requirement.  Contact your platform vendor.\n");
> + return -EPERM;
> + }
> +
> + /* normally dev is not mapped */
> + if (unlikely(domain_context_mapped(dev))) {
> + struct dmar_domain *old_domain;
> +
> + old_domain = find_domain(dev);
> + if (old_domain) {
> + rcu_read_lock();
> + dmar_remove_one_dev_info(old_domain, dev);
> + rcu_read_unlock();
> +
> + if (!domain_type_is_vm_or_si(old_domain) &&
> + list_empty(_domain->devices))
> + domain_exit(old_domain);
> + }
> + }
> +
> + ret = prepare_domain_attach_device(domain, dev);
> + if (ret)
> + return ret;
> +
> + return domain_add_dev_info(to_dmar_domain(domain), dev);
>  }
>  
>  static void intel_iommu_detach_device(struct iommu_domain *domain,


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 1/8] iommu: Add APIs for multiple domains per device

2019-01-14 Thread Jonathan Cameron
On Thu, 10 Jan 2019 11:00:20 +0800
Lu Baolu  wrote:

> Sharing a physical PCI device in a finer-granularity way
> is becoming a consensus in the industry. IOMMU vendors
> are also engaging efforts to support such sharing as well
> as possible. Among the efforts, the capability of support
> finer-granularity DMA isolation is a common requirement
> due to the security consideration. With finer-granularity
> DMA isolation, all DMA requests out of or to a subset of
> a physical PCI device can be protected by the IOMMU. As a
> result, there is a request in software to attach multiple
> domains to a physical PCI device. One example of such use
> model is the Intel Scalable IOV [1] [2]. The Intel vt-d
> 3.0 spec [3] introduces the scalable mode which enables
> PASID granularity DMA isolation.
> 
> This adds the APIs to support multiple domains per device.
> In order to ease the discussions, we call it 'a domain in
> auxiliary mode' or simply 'auxiliary domain' when multiple
> domains are attached to a physical device.
> 
> The APIs include:
> 
> * iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX)
>   - Check whether both IOMMU and device support IOMMU aux
> domain feature. Below aux-domain specific interfaces
> are available only after this returns true.
> 
> * iommu_dev_enable/disable_feature(dev, IOMMU_DEV_FEAT_AUX)
>   - Enable/disable device specific aux-domain feature.
> 
> * iommu_aux_attach_device(domain, dev)
>   - Attaches @domain to @dev in the auxiliary mode. Multiple
> domains could be attached to a single device in the
> auxiliary mode with each domain representing an isolated
> address space for an assignable subset of the device.
> 
> * iommu_aux_detach_device(domain, dev)
>   - Detach @domain which has been attached to @dev in the
> auxiliary mode.
> 
> * iommu_aux_get_pasid(domain, dev)
>   - Return ID used for finer-granularity DMA translation.
> For the Intel Scalable IOV usage model, this will be
> a PASID. The device which supports Scalable IOV needs
> to write this ID to the device register so that DMA
> requests could be tagged with a right PASID prefix.
> 
> This has been updated with the latest proposal from Joerg
> posted here [5].
> 
> Many people involved in discussions of this design.
> 
> Kevin Tian 
> Liu Yi L 
> Ashok Raj 
> Sanjay Kumar 
> Jacob Pan 
> Alex Williamson 
> Jean-Philippe Brucker 
> Joerg Roedel 
> 
> and some discussions can be found here [4] [5].
> 
> [1] 
> https://software.intel.com/en-us/download/intel-scalable-io-virtualization-technical-specification
> [2] https://schd.ws/hosted_files/lc32018/00/LC3-SIOV-final.pdf
> [3] 
> https://software.intel.com/en-us/download/intel-virtualization-technology-for-directed-io-architecture-specification
> [4] https://lkml.org/lkml/2018/7/26/4
> [5] https://www.spinics.net/lists/iommu/msg31874.html
> 
> Cc: Ashok Raj 
> Cc: Jacob Pan 
> Cc: Kevin Tian 
> Cc: Liu Yi L 
> Suggested-by: Kevin Tian 
> Suggested-by: Jean-Philippe Brucker 
> Suggested-by: Joerg Roedel 
> Signed-off-by: Lu Baolu 

One trivial comment inline.

> ---
>  drivers/iommu/iommu.c | 80 +++
>  include/linux/iommu.h | 61 +
>  2 files changed, 141 insertions(+)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 3ed4db334341..9166b6145409 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2033,3 +2033,83 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, 
> int num_ids)
>   return 0;
>  }
>  EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
> +
> +/*
> + * Per device IOMMU features.
> + */
> +bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat)
> +{
> + const struct iommu_ops *ops = dev->bus->iommu_ops;
> +
> + if (ops && ops->dev_has_feat)
> + return ops->dev_has_feat(dev, feat);
> +
> + return false;
> +}
> +EXPORT_SYMBOL_GPL(iommu_dev_has_feature);
> +
> +int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features 
> feat)
> +{
> + const struct iommu_ops *ops = dev->bus->iommu_ops;
> +
> + if (ops && ops->dev_enable_feat)
> + return ops->dev_enable_feat(dev, feat);
> +
> + return -ENODEV;
> +}
> +EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
> +
> +int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features 
> feat)
> +{
> + const struct iommu_ops *ops = dev->bus->iommu_ops;
> +
> + if (ops && ops->dev_disable_feat)
> + return ops->dev_disable_feat(dev, feat);
> +
> + return -ENODEV;
> +}
> +EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
> +
> +/*
> + * Aux-domain specific attach/detach.
> + *
> + * Only works if iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX) returns true.
> + * Also, as long as domains are attached to a device through this interface,
> + * any tries to call iommu_attach_device() should fail (iommu_detach_device()
> + * can't fail, so we fail on the tryint to re-attach). 

Re: [PATCH v2 03/40] iommu/sva: Manage process address spaces

2018-05-25 Thread Jonathan Cameron
+CC Kenneth Lee

On Fri, 25 May 2018 09:33:11 +0300
Ilias Apalodimas  wrote:

> On Thu, May 24, 2018 at 04:04:39PM +0100, Jean-Philippe Brucker wrote:
> > On 24/05/18 12:50, Ilias Apalodimas wrote:  
> > >> Interesting, I hadn't thought about this use-case before. At first I
> > >> thought you were talking about mdev devices assigned to VMs, but I think
> > >> you're referring to mdevs assigned to userspace drivers instead? Out of
> > >> curiosity, is it only theoretical or does someone actually need this?  
> > > 
> > > There has been some non upstreamed efforts to have mdev and produce 
> > > userspace
> > > drivers. Huawei is using it on what they call "wrapdrive" for crypto 
> > > devices and
> > > we did a proof of concept for ethernet interfaces. At the time we choose 
> > > not to
> > > involve the IOMMU for the reason you mentioned, but having it there would 
> > > be
> > > good.  
> > 
> > I'm guessing there were good reasons to do it that way but I wonder, is
> > it not simpler to just have the kernel driver create a /dev/foo, with a
> > standard ioctl/mmap/poll interface? Here VFIO adds a layer of
> > indirection, and since the mediating driver has to implement these
> > operations already, what is gained?  
> The best reason i can come up with is "common code". You already have one API
> doing that for you so we replicate it in a /dev file?
> The mdev approach still needs extentions to support what we tried to do (i.e
> mdev bus might need yo have access on iommu_ops), but as far as i undestand 
> it's
> a possible case.
> > 
> > Thanks,
> > Jean  


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 05/40] iommu/sva: Track mm changes with an MMU notifier

2018-05-17 Thread Jonathan Cameron
On Fri, 11 May 2018 20:06:06 +0100
Jean-Philippe Brucker  wrote:

> When creating an io_mm structure, register an MMU notifier that informs
> us when the virtual address space changes and disappears.
> 
> Add a new operation to the IOMMU driver, mm_invalidate, called when a
> range of addresses is unmapped to let the IOMMU driver send ATC
> invalidations. mm_invalidate cannot sleep.
> 
> Adding the notifier complicates io_mm release. In one case device
> drivers free the io_mm explicitly by calling unbind (or detaching the
> device from its domain). In the other case the process could crash
> before unbind, in which case the release notifier has to do all the
> work.
> 
> Allowing the device driver's mm_exit() handler to sleep adds another
> complication, but it will greatly simplify things for users. For example
> VFIO can take the IOMMU mutex and remove any trace of io_mm, instead of
> introducing complex synchronization to delicatly handle this race. But
> relaxing the user side does force unbind() to sleep and wait for all
> pending mm_exit() calls to finish.
> 
> Signed-off-by: Jean-Philippe Brucker 
> 
> ---
> v1->v2:
> * Unbind() waits for mm_exit to finish
> * mm_exit can sleep
> ---
>  drivers/iommu/Kconfig |   1 +
>  drivers/iommu/iommu-sva.c | 248 +++---
>  include/linux/iommu.h |  10 ++
>  3 files changed, 244 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index cca8e06903c7..38434899e283 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -77,6 +77,7 @@ config IOMMU_DMA
>  config IOMMU_SVA
>   bool
>   select IOMMU_API
> + select MMU_NOTIFIER
>  
>  config FSL_PAMU
>   bool "Freescale IOMMU support"
> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
> index 0700893c679d..e9afae2537a2 100644
> --- a/drivers/iommu/iommu-sva.c
> +++ b/drivers/iommu/iommu-sva.c
> @@ -7,6 +7,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -106,6 +107,9 @@ struct iommu_bond {
>   struct list_headmm_head;
>   struct list_headdev_head;
>   struct list_headdomain_head;
> + refcount_t  refs;
> + struct wait_queue_head  mm_exit_wq;
> + boolmm_exit_active;
>  
>   void*drvdata;
>  };
> @@ -124,6 +128,8 @@ static DEFINE_IDR(iommu_pasid_idr);
>   */
>  static DEFINE_SPINLOCK(iommu_sva_lock);
>  
> +static struct mmu_notifier_ops iommu_mmu_notifier;
> +
>  static struct io_mm *
>  io_mm_alloc(struct iommu_domain *domain, struct device *dev,
>   struct mm_struct *mm, unsigned long flags)
> @@ -151,6 +157,7 @@ io_mm_alloc(struct iommu_domain *domain, struct device 
> *dev,
>  
>   io_mm->flags= flags;
>   io_mm->mm   = mm;
> + io_mm->notifier.ops = _mmu_notifier;
>   io_mm->release  = domain->ops->mm_free;
>   INIT_LIST_HEAD(_mm->devices);
>  
> @@ -167,8 +174,29 @@ io_mm_alloc(struct iommu_domain *domain, struct device 
> *dev,
>   goto err_free_mm;
>   }
>  
> - /* TODO: keep track of mm. For the moment, abort. */
> - ret = -ENOSYS;
> + ret = mmu_notifier_register(_mm->notifier, mm);
> + if (ret)
> + goto err_free_pasid;
> +
> + /*
> +  * Now that the MMU notifier is valid, we can allow users to grab this
> +  * io_mm by setting a valid refcount. Before that it was accessible in
> +  * the IDR but invalid.
> +  *
> +  * The following barrier ensures that users, who obtain the io_mm with
> +  * kref_get_unless_zero, don't read uninitialized fields in the
> +  * structure.
> +  */
> + smp_wmb();
> + kref_init(_mm->kref);
> +
> + return io_mm;
> +
> +err_free_pasid:
> + /*
> +  * Even if the io_mm is accessible from the IDR at this point, kref is
> +  * 0 so no user could get a reference to it. Free it manually.
> +  */
>   spin_lock(_sva_lock);
>   idr_remove(_pasid_idr, io_mm->pasid);
>   spin_unlock(_sva_lock);
> @@ -180,9 +208,13 @@ io_mm_alloc(struct iommu_domain *domain, struct device 
> *dev,
>   return ERR_PTR(ret);
>  }
>  
> -static void io_mm_free(struct io_mm *io_mm)
> +static void io_mm_free(struct rcu_head *rcu)
>  {
> - struct mm_struct *mm = io_mm->mm;
> + struct io_mm *io_mm;
> + struct mm_struct *mm;
> +
> + io_mm = container_of(rcu, struct io_mm, rcu);
> + mm = io_mm->mm;
>  
>   io_mm->release(io_mm);
>   mmdrop(mm);
> @@ -197,7 +229,22 @@ static void io_mm_release(struct kref *kref)
>  
>   idr_remove(_pasid_idr, io_mm->pasid);
>  
> - io_mm_free(io_mm);
> + /*
> +  * If we're being released from mm exit, the notifier callback ->release
> +  * has already been called. Otherwise we don't need ->release, the io_mm
> +  * 

Re: [PATCH v2 03/40] iommu/sva: Manage process address spaces

2018-05-17 Thread Jonathan Cameron
On Fri, 11 May 2018 20:06:04 +0100
Jean-Philippe Brucker  wrote:

> Allocate IOMMU mm structures and binding them to devices. Four operations
> are added to IOMMU drivers:
> 
> * mm_alloc(): to create an io_mm structure and perform architecture-
>   specific operations required to grab the process (for instance on ARM,
>   pin down the CPU ASID so that the process doesn't get assigned a new
>   ASID on rollover).
> 
>   There is a single valid io_mm structure per Linux mm. Future extensions
>   may also use io_mm for kernel-managed address spaces, populated with
>   map()/unmap() calls instead of bound to process address spaces. This
>   patch focuses on "shared" io_mm.
> 
> * mm_attach(): attach an mm to a device. The IOMMU driver checks that the
>   device is capable of sharing an address space, and writes the PASID
>   table entry to install the pgd.
> 
>   Some IOMMU drivers will have a single PASID table per domain, for
>   convenience. Other can implement it differently but to help these
>   drivers, mm_attach and mm_detach take 'attach_domain' and
>   'detach_domain' parameters, that tell whether they need to set and clear
>   the PASID entry or only send the required TLB invalidations.
> 
> * mm_detach(): detach an mm from a device. The IOMMU driver removes the
>   PASID table entry and invalidates the IOTLBs.
> 
> * mm_free(): free a structure allocated by mm_alloc(), and let arch
>   release the process.
> 
> mm_attach and mm_detach operations are serialized with a spinlock. When
> trying to optimize this code, we should at least prevent concurrent
> attach()/detach() on the same domain (so multi-level PASID table code can
> allocate tables lazily). mm_alloc() can sleep, but mm_free must not
> (because we'll have to call it from call_srcu later on).
> 
> At the moment we use an IDR for allocating PASIDs and retrieving contexts.
> We also use a single spinlock. These can be refined and optimized later (a
> custom allocator will be needed for top-down PASID allocation).
> 
> Keeping track of address spaces requires the use of MMU notifiers.
> Handling process exit with regard to unbind() is tricky, so it is left for
> another patch and we explicitly fail mm_alloc() for the moment.
> 
> Signed-off-by: Jean-Philippe Brucker 

A few minor bits and bobs inline.  Looks good in general + nice diags!

Thanks,

Jonathan

> 
> ---
> v1->v2: sanity-check of flags
> ---
>  drivers/iommu/iommu-sva.c | 380 +-
>  drivers/iommu/iommu.c |   1 +
>  include/linux/iommu.h |  28 +++
>  3 files changed, 406 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
> index 8d98f9c09864..6ac679c48f3c 100644
> --- a/drivers/iommu/iommu-sva.c
> +++ b/drivers/iommu/iommu-sva.c
> @@ -5,8 +5,298 @@
>   * Copyright (C) 2018 ARM Ltd.
>   */
>  
> +#include 
>  #include 
> +#include 
>  #include 
> +#include 
> +
> +/**
> + * DOC: io_mm model
> + *
> + * The io_mm keeps track of process address spaces shared between CPU and 
> IOMMU.
> + * The following example illustrates the relation between structures
> + * iommu_domain, io_mm and iommu_bond. An iommu_bond is a link between io_mm 
> and
> + * device. A device can have multiple io_mm and an io_mm may be bound to
> + * multiple devices.
> + *  ___
> + * |  IOMMU domain A   |
> + * |   |
> + * | |  IOMMU group   |+--- io_pgtables
> + * | |||
> + * | |   dev 00:00.0 +--- bond --- io_mm X
> + * | ||   \|
> + * |   '- bond ---.
> + * |___|   \
> + *  ___ \
> + * |  IOMMU domain B   |   io_mm Y
> + * |   |   / /
> + * | |  IOMMU group   ||  / /
> + * | ||| / /
> + * | |   dev 00:01.0  bond -' /
> + * | |   dev 00:01.1  bond --'
> + * | |||
> + * |   +--- io_pgtables
> + * |___|
> + *
> + * In this example, device 00:00.0 is in domain A, devices 00:01.* are in 
> domain
> + * B. All devices within the same domain access the same address spaces. 
> Device
> + * 00:00.0 accesses address spaces X and Y, each corresponding to an 
> mm_struct.
> + * Devices 00:01.* only access address space Y. In addition each
> + * IOMMU_DOMAIN_DMA domain has a private address space, io_pgtable, that is
> + * managed with iommu_map()/iommu_unmap(), and isn't shared with the CPU MMU.
> + *
> + * To obtain the above configuration, 

Re: [PATCH v2 02/40] iommu/sva: Bind process address spaces to devices

2018-05-17 Thread Jonathan Cameron
On Fri, 11 May 2018 20:06:03 +0100
Jean-Philippe Brucker  wrote:

> Add bind() and unbind() operations to the IOMMU API. Bind() returns a
> PASID that drivers can program in hardware, to let their devices access an
> mm. This patch only adds skeletons for the device driver API, most of the
> implementation is still missing.
> 
> IOMMU groups with more than one device aren't supported for SVA at the
> moment. There may be P2P traffic between devices within a group, which
> cannot be seen by an IOMMU (note that supporting PASID doesn't add any
> form of isolation with regard to P2P). Supporting groups would require
> calling bind() for all bound processes every time a device is added to a
> group, to perform sanity checks (e.g. ensure that new devices support
> PASIDs at least as big as those already allocated in the group).

Is it worth adding an explicit comment on this reasoning (or a minimal subset
of it) at the check for the number of devices in the group?
It's well laid out here, but might not be so obvious if someone is reading
the code in the future.

>It also
> means making sure that reserved ranges (IOMMU_RESV_*) of all devices are
> carved out of processes. This is already tricky with single devices, but
> becomes very difficult with groups. Since SVA-capable devices are expected
> to be cleanly isolated, and since we don't have any way to test groups or
> hot-plug, we only allow singular groups for now.
> 
> Signed-off-by: Jean-Philippe Brucker 

Otherwise, looks good to me.

> 
> ---
> v1->v2: remove iommu_sva_bind/unbind_group
> ---
>  drivers/iommu/iommu-sva.c | 27 +
>  drivers/iommu/iommu.c | 83 +++
>  include/linux/iommu.h | 37 +
>  3 files changed, 147 insertions(+)
> 
> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
> index 8b4afb7c63ae..8d98f9c09864 100644
> --- a/drivers/iommu/iommu-sva.c
> +++ b/drivers/iommu/iommu-sva.c
> @@ -93,6 +93,8 @@ int iommu_sva_device_shutdown(struct device *dev)
>   if (!domain)
>   return -ENODEV;
>  
> + __iommu_sva_unbind_dev_all(dev);
> +
>   mutex_lock(>iommu_param->lock);
>   param = dev->iommu_param->sva_param;
>   dev->iommu_param->sva_param = NULL;
> @@ -108,3 +110,28 @@ int iommu_sva_device_shutdown(struct device *dev)
>   return 0;
>  }
>  EXPORT_SYMBOL_GPL(iommu_sva_device_shutdown);
> +
> +int __iommu_sva_bind_device(struct device *dev, struct mm_struct *mm,
> + int *pasid, unsigned long flags, void *drvdata)
> +{
> + return -ENOSYS; /* TODO */
> +}
> +EXPORT_SYMBOL_GPL(__iommu_sva_bind_device);
> +
> +int __iommu_sva_unbind_device(struct device *dev, int pasid)
> +{
> + return -ENOSYS; /* TODO */
> +}
> +EXPORT_SYMBOL_GPL(__iommu_sva_unbind_device);
> +
> +/**
> + * __iommu_sva_unbind_dev_all() - Detach all address spaces from this device
> + * @dev: the device
> + *
> + * When detaching @device from a domain, IOMMU drivers should use this 
> helper.
> + */
> +void __iommu_sva_unbind_dev_all(struct device *dev)
> +{
> + /* TODO */
> +}
> +EXPORT_SYMBOL_GPL(__iommu_sva_unbind_dev_all);
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 9e28d88c8074..bd2819deae5b 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2261,3 +2261,86 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, 
> int num_ids)
>   return 0;
>  }
>  EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
> +
> +/**
> + * iommu_sva_bind_device() - Bind a process address space to a device
> + * @dev: the device
> + * @mm: the mm to bind, caller must hold a reference to it
> + * @pasid: valid address where the PASID will be stored
> + * @flags: bond properties
> + * @drvdata: private data passed to the mm exit handler
> + *
> + * Create a bond between device and task, allowing the device to access the 
> mm
> + * using the returned PASID. If unbind() isn't called first, a subsequent 
> bind()
> + * for the same device and mm fails with -EEXIST.
> + *
> + * iommu_sva_device_init() must be called first, to initialize the required 
> SVA
> + * features. @flags is a subset of these features.
> + *
> + * The caller must pin down using get_user_pages*() all mappings shared with 
> the
> + * device. mlock() isn't sufficient, as it doesn't prevent minor page faults
> + * (e.g. copy-on-write).
> + *
> + * On success, 0 is returned and @pasid contains a valid ID. Otherwise, an 
> error
> + * is returned.
> + */
> +int iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, int 
> *pasid,
> +   unsigned long flags, void *drvdata)
> +{
> + int ret = -EINVAL;
> + struct iommu_group *group;
> +
> + if (!pasid)
> + return -EINVAL;
> +
> + group = iommu_group_get(dev);
> + if (!group)
> + return -ENODEV;
> +
> + /* Ensure device count and domain don't change while 

Re: [PATCH v2 17/40] iommu/arm-smmu-v3: Link domains and devices

2018-05-17 Thread Jonathan Cameron
On Fri, 11 May 2018 20:06:18 +0100
Jean-Philippe Brucker  wrote:

> When removing a mapping from a domain, we need to send an invalidation to
> all devices that might have stored it in their Address Translation Cache
> (ATC). In addition when updating the context descriptor of a live domain,
> we'll need to send invalidations for all devices attached to it.
> 
> Maintain a list of devices in each domain, protected by a spinlock. It is
> updated every time we attach or detach devices to and from domains.
> 
> It needs to be a spinlock because we'll invalidate ATC entries from
> within hardirq-safe contexts, but it may be possible to relax the read
> side with RCU later.
> 
> Signed-off-by: Jean-Philippe Brucker 

Trivial naming suggestion...

> ---
>  drivers/iommu/arm-smmu-v3.c | 28 
>  1 file changed, 28 insertions(+)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 1d647104bccc..c892f012fb43 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -595,6 +595,11 @@ struct arm_smmu_device {
>  struct arm_smmu_master_data {
>   struct arm_smmu_device  *smmu;
>   struct arm_smmu_strtab_ent  ste;
> +
> + struct arm_smmu_domain  *domain;
> + struct list_headlist; /* domain->devices */

More meaningful name perhaps to avoid the need for the comment?

> +
> + struct device   *dev;
>  };
>  
>  /* SMMU private data for an IOMMU domain */
> @@ -618,6 +623,9 @@ struct arm_smmu_domain {
>   };
>  
>   struct iommu_domain domain;
> +
> + struct list_headdevices;
> + spinlock_t  devices_lock;
>  };
>  
>  struct arm_smmu_option_prop {
> @@ -1470,6 +1478,9 @@ static struct iommu_domain 
> *arm_smmu_domain_alloc(unsigned type)
>   }
>  
>   mutex_init(_domain->init_mutex);
> + INIT_LIST_HEAD(_domain->devices);
> + spin_lock_init(_domain->devices_lock);
> +
>   return _domain->domain;
>  }
>  
> @@ -1685,7 +1696,17 @@ static void arm_smmu_install_ste_for_dev(struct 
> iommu_fwspec *fwspec)
>  
>  static void arm_smmu_detach_dev(struct device *dev)
>  {
> + unsigned long flags;
>   struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
> + struct arm_smmu_domain *smmu_domain = master->domain;
> +
> + if (smmu_domain) {
> + spin_lock_irqsave(_domain->devices_lock, flags);
> + list_del(>list);
> + spin_unlock_irqrestore(_domain->devices_lock, flags);
> +
> + master->domain = NULL;
> + }
>  
>   master->ste.assigned = false;
>   arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
> @@ -1694,6 +1715,7 @@ static void arm_smmu_detach_dev(struct device *dev)
>  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device 
> *dev)
>  {
>   int ret = 0;
> + unsigned long flags;
>   struct arm_smmu_device *smmu;
>   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>   struct arm_smmu_master_data *master;
> @@ -1729,6 +1751,11 @@ static int arm_smmu_attach_dev(struct iommu_domain 
> *domain, struct device *dev)
>   }
>  
>   ste->assigned = true;
> + master->domain = smmu_domain;
> +
> + spin_lock_irqsave(_domain->devices_lock, flags);
> + list_add(>list, _domain->devices);
> + spin_unlock_irqrestore(_domain->devices_lock, flags);
>  
>   if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
>   ste->s1_cfg = NULL;
> @@ -1847,6 +1874,7 @@ static int arm_smmu_add_device(struct device *dev)
>   return -ENOMEM;
>  
>   master->smmu = smmu;
> + master->dev = dev;
>   fwspec->iommu_priv = master;
>   }
>  

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 13/40] vfio: Add support for Shared Virtual Addressing

2018-05-17 Thread Jonathan Cameron
On Fri, 11 May 2018 20:06:14 +0100
Jean-Philippe Brucker  wrote:

> Add two new ioctls for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a
> bond between a container and a process address space, identified by a
> Process Address Space ID (PASID). Devices in the container append this
> PASID to DMA transactions in order to access the process' address space.
> The process page tables are shared with the IOMMU, and mechanisms such as
> PCI ATS/PRI are used to handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a
> bond created with VFIO_IOMMU_BIND_PROCESS.
> 
> Signed-off-by: Jean-Philippe Brucker 

A couple of small comments inline..

Jonathan

> 
> ---
> v1->v2:
> * Simplify mm_exit
> * Can be built as module again
> ---
>  drivers/vfio/vfio_iommu_type1.c | 449 ++--
>  include/uapi/linux/vfio.h   |  76 ++
>  2 files changed, 504 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 5c212bf29640..2902774062b8 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -60,6 +61,7 @@ MODULE_PARM_DESC(disable_hugepages,
>  
>  struct vfio_iommu {
>   struct list_headdomain_list;
> + struct list_headmm_list;
>   struct vfio_domain  *external_domain; /* domain for external user */
>   struct mutexlock;
>   struct rb_root  dma_list;
> @@ -90,6 +92,14 @@ struct vfio_dma {
>  struct vfio_group {
>   struct iommu_group  *iommu_group;
>   struct list_headnext;
> + boolsva_enabled;
> +};
> +
> +struct vfio_mm {
> +#define VFIO_PASID_INVALID   (-1)
> + int pasid;
> + struct mm_struct*mm;
> + struct list_headnext;
>  };
>  
>  /*
> @@ -1238,6 +1248,164 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
>   return 0;
>  }
>  
> +static int vfio_iommu_mm_exit(struct device *dev, int pasid, void *data)
> +{
> + struct vfio_mm *vfio_mm;
> + struct vfio_iommu *iommu = data;
> +
> + mutex_lock(>lock);
> + list_for_each_entry(vfio_mm, >mm_list, next) {
> + if (vfio_mm->pasid == pasid) {
> + list_del(_mm->next);
> + kfree(vfio_mm);
> + break;
> + }
> + }
> + mutex_unlock(>lock);
> +
> + return 0;
> +}
> +
> +static int vfio_iommu_sva_init(struct device *dev, void *data)
> +{
> + return iommu_sva_device_init(dev, IOMMU_SVA_FEAT_IOPF, 0,
> +  vfio_iommu_mm_exit);
> +}
> +
> +static int vfio_iommu_sva_shutdown(struct device *dev, void *data)
> +{
> + iommu_sva_device_shutdown(dev);
> +
> + return 0;
> +}
> +
> +struct vfio_iommu_sva_bind_data {
> + struct vfio_mm *vfio_mm;
> + struct vfio_iommu *iommu;
> + int count;
> +};
> +
> +static int vfio_iommu_sva_bind_dev(struct device *dev, void *data)
> +{
> + struct vfio_iommu_sva_bind_data *bind_data = data;
> +
> + /* Multi-device groups aren't support for SVA */
> + if (bind_data->count++)
> + return -EINVAL;
> +
> + return __iommu_sva_bind_device(dev, bind_data->vfio_mm->mm,
> +_data->vfio_mm->pasid,
> +IOMMU_SVA_FEAT_IOPF, bind_data->iommu);
> +}
> +
> +static int vfio_iommu_sva_unbind_dev(struct device *dev, void *data)
> +{
> + struct vfio_mm *vfio_mm = data;
> +
> + return __iommu_sva_unbind_device(dev, vfio_mm->pasid);
> +}
> +
> +static int vfio_iommu_bind_group(struct vfio_iommu *iommu,
> +  struct vfio_group *group,
> +  struct vfio_mm *vfio_mm)
> +{
> + int ret;
> + bool enabled_sva = false;
> + struct vfio_iommu_sva_bind_data data = {
> + .vfio_mm= vfio_mm,
> + .iommu  = iommu,
> + .count  = 0,
> + };
> +
> + if (!group->sva_enabled) {
> + ret = iommu_group_for_each_dev(group->iommu_group, NULL,
> +vfio_iommu_sva_init);
> + if (ret)
> + return ret;
> +
> + group->sva_enabled = enabled_sva = true;
> + }
> +
> + ret = iommu_group_for_each_dev(group->iommu_group, ,
> +vfio_iommu_sva_bind_dev);
> + if (ret && data.count > 1)

Are we safe to run this even if data.count == 1?  I assume that at
that point we would always not have an iommu domain associated with the
device so the initial check would error out.

Just be nice to get rid of the special casing in this error path as then
could just do it all under if (ret) and make it visually clearer these
are different 

Re: [PATCH v2 07/40] iommu: Add a page fault handler

2018-05-17 Thread Jonathan Cameron
On Fri, 11 May 2018 20:06:08 +0100
Jean-Philippe Brucker  wrote:

> Some systems allow devices to handle I/O Page Faults in the core mm. For
> example systems implementing the PCI PRI extension or Arm SMMU stall
> model. Infrastructure for reporting these recoverable page faults was
> recently added to the IOMMU core for SVA virtualisation. Add a page fault
> handler for host SVA.
> 
> IOMMU driver can now instantiate several fault workqueues and link them to
> IOPF-capable devices. Drivers can choose between a single global
> workqueue, one per IOMMU device, one per low-level fault queue, one per
> domain, etc.
> 
> When it receives a fault event, supposedly in an IRQ handler, the IOMMU
> driver reports the fault using iommu_report_device_fault(), which calls
> the registered handler. The page fault handler then calls the mm fault
> handler, and reports either success or failure with iommu_page_response().
> When the handler succeeded, the IOMMU retries the access.
> 
> The iopf_param pointer could be embedded into iommu_fault_param. But
> putting iopf_param into the iommu_param structure allows us not to care
> about ordering between calls to iopf_queue_add_device() and
> iommu_register_device_fault_handler().
> 
> Signed-off-by: Jean-Philippe Brucker 

Hi Jean-Phillipe,

One question below on how we would end up with partial faults left when
doing the queue remove. Code looks fine, but I'm not seeing how that
would happen without buggy hardware... + we presumably have to rely on
the hardware timing out on that request or it's dead anyway...

Jonathan

> 
> ---
> v1->v2: multiple queues registered by IOMMU driver
> ---
>  drivers/iommu/Kconfig  |   4 +
>  drivers/iommu/Makefile |   1 +
>  drivers/iommu/io-pgfault.c | 363 +
>  include/linux/iommu.h  |  58 ++
>  4 files changed, 426 insertions(+)
>  create mode 100644 drivers/iommu/io-pgfault.c
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 38434899e283..09f13a7c4b60 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -79,6 +79,10 @@ config IOMMU_SVA
>   select IOMMU_API
>   select MMU_NOTIFIER
>  
> +config IOMMU_PAGE_FAULT
> + bool
> + select IOMMU_API
> +
>  config FSL_PAMU
>   bool "Freescale IOMMU support"
>   depends on PCI
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 1dbcc89ebe4c..4b744e399a1b 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -4,6 +4,7 @@ obj-$(CONFIG_IOMMU_API) += iommu-traces.o
>  obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
>  obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
>  obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
> +obj-$(CONFIG_IOMMU_PAGE_FAULT) += io-pgfault.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
> new file mode 100644
> index ..321c00dd3a3d
> --- /dev/null
> +++ b/drivers/iommu/io-pgfault.c
> @@ -0,0 +1,363 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Handle device page faults
> + *
> + * Copyright (C) 2018 ARM Ltd.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/**
> + * struct iopf_queue - IO Page Fault queue
> + * @wq: the fault workqueue
> + * @flush: low-level flush callback
> + * @flush_arg: flush() argument
> + * @refs: references to this structure taken by producers
> + */
> +struct iopf_queue {
> + struct workqueue_struct *wq;
> + iopf_queue_flush_t  flush;
> + void*flush_arg;
> + refcount_t  refs;
> +};
> +
> +/**
> + * struct iopf_device_param - IO Page Fault data attached to a device
> + * @queue: IOPF queue
> + * @partial: faults that are part of a Page Request Group for which the last
> + *   request hasn't been submitted yet.
> + */
> +struct iopf_device_param {
> + struct iopf_queue   *queue;
> + struct list_headpartial;
> +};
> +
> +struct iopf_context {
> + struct device   *dev;
> + struct iommu_fault_eventevt;
> + struct list_headhead;
> +};
> +
> +struct iopf_group {
> + struct iopf_context last_fault;
> + struct list_headfaults;
> + struct work_struct  work;
> +};
> +
> +static int iopf_complete(struct device *dev, struct iommu_fault_event *evt,
> +  enum page_response_code status)
> +{
> + struct page_response_msg resp = {
> + .addr   = evt->addr,
> + .pasid  = evt->pasid,
> + .pasid_present  = evt->pasid_valid,
> + .page_req_group_id  = evt->page_req_group_id,
> + .private_data 

Re: [PATCH v2 08/21] iio: adc: Remove depends on HAS_DMA in case of platform dependency

2018-03-17 Thread Jonathan Cameron
On Fri, 16 Mar 2018 14:51:41 +0100
Geert Uytterhoeven <ge...@linux-m68k.org> wrote:

> Remove dependencies on HAS_DMA where a Kconfig symbol depends on another
> symbol that implies HAS_DMA, and, optionally, on "|| COMPILE_TEST".
> In most cases this other symbol is an architecture or platform specific
> symbol, or PCI.
> 
> Generic symbols and drivers without platform dependencies keep their
> dependencies on HAS_DMA, to prevent compiling subsystems or drivers that
> cannot work anyway.
> 
> This simplifies the dependencies, and allows to improve compile-testing.
> 
> Signed-off-by: Geert Uytterhoeven <ge...@linux-m68k.org>
> Reviewed-by: Mark Brown <broo...@kernel.org>
> Acked-by: Robin Murphy <robin.mur...@arm.com>
Great.
Acked-by: Jonathan Cameron <jonathan.came...@huawei.com>

Thanks for doing this - this has been annoying for a long time :)

> ---
> v2:
>   - Add Reviewed-by, Acked-by,
>   - Drop RFC state,
>   - Split per subsystem.
> ---
>  drivers/iio/adc/Kconfig | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
> index 72bc2b71765ae2ff..57f46e88f5c2536e 100644
> --- a/drivers/iio/adc/Kconfig
> +++ b/drivers/iio/adc/Kconfig
> @@ -158,7 +158,6 @@ config AT91_SAMA5D2_ADC
>   tristate "Atmel AT91 SAMA5D2 ADC"
>   depends on ARCH_AT91 || COMPILE_TEST
>   depends on HAS_IOMEM
> - depends on HAS_DMA
>   select IIO_TRIGGERED_BUFFER
>   help
> Say yes here to build support for Atmel SAMA5D2 ADC which is
> @@ -647,7 +646,6 @@ config SD_ADC_MODULATOR
>  config STM32_ADC_CORE
>   tristate "STMicroelectronics STM32 adc core"
>   depends on ARCH_STM32 || COMPILE_TEST
> - depends on HAS_DMA
>   depends on OF
>   depends on REGULATOR
>   select IIO_BUFFER

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 17/37] iommu/arm-smmu-v3: Move context descriptor code

2018-03-09 Thread Jonathan Cameron
On Mon, 12 Feb 2018 18:33:32 +
Jean-Philippe Brucker  wrote:

> In order to add support for substream ID, move the context descriptor code
> into a separate library. At the moment it only manages context descriptor
> 0, which is used for non-PASID translations.
> 
> One important behavior change is the ASID allocator, which is now global
> instead of per-SMMU. If we end up needing per-SMMU ASIDs after all, it
> would be relatively simple to move back to per-device allocator instead
> of a global one. Sharing ASIDs will require an IDR, so implement the
> ASID allocator with an IDA instead of porting the bitmap, to ease the
> transition.
> 
> Signed-off-by: Jean-Philippe Brucker 
Hi Jean-Philippe,

This would have been easier to review if split into a 'move' and additional
patches actually making the changes described.

Superficially it looks like there may be more going on in here than the
above description suggests.  I'm unsure why we are gaining 
the CFGI_CD_ALL and similar in this patch as there is just to much going on.

Thanks,

Jonathan
> ---
>  MAINTAINERS |   2 +-
>  drivers/iommu/Kconfig   |  11 ++
>  drivers/iommu/Makefile  |   1 +
>  drivers/iommu/arm-smmu-v3-context.c | 289 
> 
>  drivers/iommu/arm-smmu-v3.c | 265 +++--
>  drivers/iommu/iommu-pasid.c |   1 +
>  drivers/iommu/iommu-pasid.h |  27 
>  7 files changed, 451 insertions(+), 145 deletions(-)
>  create mode 100644 drivers/iommu/arm-smmu-v3-context.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 9cb8ced8322a..93507bfe03a6 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1104,7 +1104,7 @@ R:  Robin Murphy 
>  L:   linux-arm-ker...@lists.infradead.org (moderated for non-subscribers)
>  S:   Maintained
>  F:   drivers/iommu/arm-smmu.c
> -F:   drivers/iommu/arm-smmu-v3.c
> +F:   drivers/iommu/arm-smmu-v3*
>  F:   drivers/iommu/io-pgtable-arm.c
>  F:   drivers/iommu/io-pgtable-arm.h
>  F:   drivers/iommu/io-pgtable-arm-v7s.c
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 8add90ba9b75..4b272925ee78 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -66,6 +66,16 @@ menu "Generic PASID table support"
>  config IOMMU_PASID_TABLE
>   bool
>  
> +config ARM_SMMU_V3_CONTEXT
> + bool "ARM SMMU v3 Context Descriptor tables"
> + select IOMMU_PASID_TABLE
> + depends on ARM64
> + help
> + Enable support for ARM SMMU v3 Context Descriptor tables, used for DMA
> + and PASID support.
> +
> + If unsure, say N here.
> +
>  endmenu
>  
>  config IOMMU_IOVA
> @@ -344,6 +354,7 @@ config ARM_SMMU_V3
>   depends on ARM64
>   select IOMMU_API
>   select IOMMU_IO_PGTABLE_LPAE
> + select ARM_SMMU_V3_CONTEXT
>   select GENERIC_MSI_IRQ_DOMAIN
>   help
> Support for implementations of the ARM System MMU architecture
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 338e59c93131..22758960ed02 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -9,6 +9,7 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
>  obj-$(CONFIG_IOMMU_PASID_TABLE) += iommu-pasid.o
> +obj-$(CONFIG_ARM_SMMU_V3_CONTEXT) += arm-smmu-v3-context.o
>  obj-$(CONFIG_IOMMU_IOVA) += iova.o
>  obj-$(CONFIG_OF_IOMMU)   += of_iommu.o
>  obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
> diff --git a/drivers/iommu/arm-smmu-v3-context.c 
> b/drivers/iommu/arm-smmu-v3-context.c
> new file mode 100644
> index ..e910cb356f45
> --- /dev/null
> +++ b/drivers/iommu/arm-smmu-v3-context.c
> @@ -0,0 +1,289 @@
> +/*
> + * Context descriptor table driver for SMMUv3
> + *
> + * Copyright (C) 2018 ARM Ltd.
> + *
> + * SPDX-License-Identifier: GPL-2.0
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "iommu-pasid.h"
> +
> +#define CTXDESC_CD_DWORDS8
> +#define CTXDESC_CD_0_TCR_T0SZ_SHIFT  0
> +#define ARM64_TCR_T0SZ_SHIFT 0
> +#define ARM64_TCR_T0SZ_MASK  0x1fUL
> +#define CTXDESC_CD_0_TCR_TG0_SHIFT   6
> +#define ARM64_TCR_TG0_SHIFT  14
> +#define ARM64_TCR_TG0_MASK   0x3UL
> +#define CTXDESC_CD_0_TCR_IRGN0_SHIFT 8
> +#define ARM64_TCR_IRGN0_SHIFT8
> +#define ARM64_TCR_IRGN0_MASK 0x3UL
> +#define CTXDESC_CD_0_TCR_ORGN0_SHIFT 10
> +#define ARM64_TCR_ORGN0_SHIFT10
> +#define ARM64_TCR_ORGN0_MASK 0x3UL
> +#define CTXDESC_CD_0_TCR_SH0_SHIFT   12
> +#define ARM64_TCR_SH0_SHIFT  12
> +#define ARM64_TCR_SH0_MASK   0x3UL
> +#define CTXDESC_CD_0_TCR_EPD0_SHIFT  14
> +#define ARM64_TCR_EPD0_SHIFT 7
> +#define ARM64_TCR_EPD0_MASK  0x1UL
> +#define 

Re: [PATCH 27/37] iommu/arm-smmu-v3: Register fault workqueue

2018-03-08 Thread Jonathan Cameron
On Mon, 12 Feb 2018 18:33:42 +
Jean-Philippe Brucker  wrote:

> When using PRI or Stall, the PRI or event handler enqueues faults into the
> core fault queue. Register it based on the SMMU features.
> 
> When the core stops using a PASID, it notifies the SMMU to flush all
> instances of this PASID from the PRI queue. Add a way to flush the PRI and
> event queue. PRI and event thread now take a spinlock while processing the
> queue. The flush handler takes this lock to inspect the queue state.
> We avoid livelock, where the SMMU adds fault to the queue faster than we
> can consume them, by incrementing a 'batch' number on every cycle so the
> flush handler only has to wait a complete cycle (two batch increments.)
> 
> Signed-off-by: Jean-Philippe Brucker 
I think you have a potential incorrect free issue... See inline.

Jonathan
> ---
>  drivers/iommu/Kconfig   |   1 +
>  drivers/iommu/arm-smmu-v3.c | 103 
> +++-
>  2 files changed, 103 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index d434f7085dc2..d79c68754bb9 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -354,6 +354,7 @@ config ARM_SMMU_V3
>   depends on ARM64
>   select IOMMU_API
>   select IOMMU_SVA
> + select IOMMU_FAULT
>   select IOMMU_IO_PGTABLE_LPAE
>   select ARM_SMMU_V3_CONTEXT
>   select GENERIC_MSI_IRQ_DOMAIN
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 8528704627b5..c5b3a43becaf 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -494,6 +494,10 @@ struct arm_smmu_queue {
>  
>   u32 __iomem *prod_reg;
>   u32 __iomem *cons_reg;
> +
> + /* Event and PRI */
> + u64 batch;
> + wait_queue_head_t   wq;
>  };
>  
>  struct arm_smmu_cmdq {
> @@ -610,6 +614,9 @@ struct arm_smmu_device {
>  
>   /* IOMMU core code handle */
>   struct iommu_device iommu;
> +
> + /* Notifier for the fault queue */
> + struct notifier_block   faultq_nb;
>  };
>  
>  /* SMMU private data for each master */
> @@ -1247,14 +1254,23 @@ static int arm_smmu_init_l2_strtab(struct 
> arm_smmu_device *smmu, u32 sid)
>  static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>  {
>   int i;
> + int num_handled = 0;
>   struct arm_smmu_device *smmu = dev;
>   struct arm_smmu_queue *q = >evtq.q;
> + size_t queue_size = 1 << q->max_n_shift;
>   u64 evt[EVTQ_ENT_DWORDS];
>  
> + spin_lock(>wq.lock);
>   do {
>   while (!queue_remove_raw(q, evt)) {
>   u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
>  
> + if (++num_handled == queue_size) {
> + q->batch++;
> + wake_up_locked(>wq);
> + num_handled = 0;
> + }
> +
>   dev_info(smmu->dev, "event 0x%02x received:\n", id);
>   for (i = 0; i < ARRAY_SIZE(evt); ++i)
>   dev_info(smmu->dev, "\t0x%016llx\n",
> @@ -1272,6 +1288,11 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void 
> *dev)
>  
>   /* Sync our overflow flag, as we believe we're up to speed */
>   q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
> +
> + q->batch++;
> + wake_up_locked(>wq);
> + spin_unlock(>wq.lock);
> +
>   return IRQ_HANDLED;
>  }
>  
> @@ -1315,13 +1336,24 @@ static void arm_smmu_handle_ppr(struct 
> arm_smmu_device *smmu, u64 *evt)
>  
>  static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
>  {
> + int num_handled = 0;
>   struct arm_smmu_device *smmu = dev;
>   struct arm_smmu_queue *q = >priq.q;
> + size_t queue_size = 1 << q->max_n_shift;
>   u64 evt[PRIQ_ENT_DWORDS];
>  
> + spin_lock(>wq.lock);
>   do {
> - while (!queue_remove_raw(q, evt))
> + while (!queue_remove_raw(q, evt)) {
> + spin_unlock(>wq.lock);
>   arm_smmu_handle_ppr(smmu, evt);
> + spin_lock(>wq.lock);
> + if (++num_handled == queue_size) {
> + q->batch++;
> + wake_up_locked(>wq);
> + num_handled = 0;
> + }
> + }
>  
>   if (queue_sync_prod(q) == -EOVERFLOW)
>   dev_err(smmu->dev, "PRIQ overflow detected -- requests 
> lost\n");
> @@ -1329,9 +1361,65 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void 
> *dev)
>  
>   /* Sync our overflow flag, as we believe we're up to speed */
>   q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
> +
> + q->batch++;
> + 

Re: [PATCH 28/37] iommu/arm-smmu-v3: Maintain a SID->device structure

2018-03-08 Thread Jonathan Cameron
On Mon, 12 Feb 2018 18:33:43 +
Jean-Philippe Brucker  wrote:

> When handling faults from the event or PRI queue, we need to find the
> struct device associated to a SID. Add a rb_tree to keep track of SIDs.
> 
> Signed-off-by: Jean-Philippe Brucker 
nipick inline.


> ---
>  drivers/iommu/arm-smmu-v3.c | 105 
> 
>  1 file changed, 105 insertions(+)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index c5b3a43becaf..2430b2140f8d 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -615,10 +615,19 @@ struct arm_smmu_device {
>   /* IOMMU core code handle */
>   struct iommu_device iommu;
>  
> + struct rb_root  streams;
> + struct mutexstreams_mutex;
> +
>   /* Notifier for the fault queue */
>   struct notifier_block   faultq_nb;
>  };
>  
> +struct arm_smmu_stream {
> + u32 id;
> + struct arm_smmu_master_data *master;
> + struct rb_node  node;
> +};
> +
>  /* SMMU private data for each master */
>  struct arm_smmu_master_data {
>   struct arm_smmu_device  *smmu;
> @@ -626,6 +635,7 @@ struct arm_smmu_master_data {
>  
>   struct arm_smmu_domain  *domain;
>   struct list_headlist; /* domain->devices */
> + struct arm_smmu_stream  *streams;
>  
>   struct device   *dev;
>  
> @@ -1250,6 +1260,31 @@ static int arm_smmu_init_l2_strtab(struct 
> arm_smmu_device *smmu, u32 sid)
>   return 0;
>  }
>  
> +static struct arm_smmu_master_data *
> +arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
> +{
> + struct rb_node *node;
> + struct arm_smmu_stream *stream;
> + struct arm_smmu_master_data *master = NULL;
> +
> + mutex_lock(>streams_mutex);
> + node = smmu->streams.rb_node;
> + while (node) {
> + stream = rb_entry(node, struct arm_smmu_stream, node);
> + if (stream->id < sid) {
> + node = node->rb_right;
> + } else if (stream->id > sid) {
> + node = node->rb_left;
> + } else {
> + master = stream->master;
> + break;
> + }
> + }
> + mutex_unlock(>streams_mutex);
> +
> + return master;
> +}
> +
>  /* IRQ and event handlers */
>  static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>  {
> @@ -2146,6 +2181,71 @@ static bool arm_smmu_sid_in_range(struct 
> arm_smmu_device *smmu, u32 sid)
>   return sid < limit;
>  }
>  
> +static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
> +   struct arm_smmu_master_data *master)
> +{
> + int i;
> + int ret = 0;
> + struct arm_smmu_stream *new_stream, *cur_stream;
> + struct rb_node **new_node, *parent_node = NULL;
> + struct iommu_fwspec *fwspec = master->dev->iommu_fwspec;
> +
> + master->streams = kcalloc(fwspec->num_ids,
> +   sizeof(struct arm_smmu_stream), GFP_KERNEL);
> + if (!master->streams)
> + return -ENOMEM;
> +
> + mutex_lock(>streams_mutex);
> + for (i = 0; i < fwspec->num_ids && !ret; i++) {
> + new_stream = >streams[i];
> + new_stream->id = fwspec->ids[i];
> + new_stream->master = master;
> +
> + new_node = &(smmu->streams.rb_node);
> + while (*new_node) {
> + cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
> +   node);
> + parent_node = *new_node;
> + if (cur_stream->id > new_stream->id) {
> + new_node = &((*new_node)->rb_left);
> + } else if (cur_stream->id < new_stream->id) {
> + new_node = &((*new_node)->rb_right);
> + } else {
> + dev_warn(master->dev,
> +  "stream %u already in tree\n",
> +  cur_stream->id);
> + ret = -EINVAL;
> + break;
> + }
> + }
> +
> + if (!ret) {
> + rb_link_node(_stream->node, parent_node, new_node);
> + rb_insert_color(_stream->node, >streams);
> + }
> + }
> + mutex_unlock(>streams_mutex);
> +
> + return ret;
> +}
> +
> +static void arm_smmu_remove_master(struct arm_smmu_device *smmu,
> +struct arm_smmu_master_data *master)
> +{
> + int i;
> + struct iommu_fwspec *fwspec = master->dev->iommu_fwspec;
> +
> + if (!master->streams)
> + return;
> +
> + 

Re: [PATCH 31/37] iommu/arm-smmu-v3: Add support for PCI ATS

2018-03-08 Thread Jonathan Cameron
On Mon, 12 Feb 2018 18:33:46 +
Jean-Philippe Brucker  wrote:

> PCIe devices can implement their own TLB, named Address Translation Cache
> (ATC). Enable Address Translation Service (ATS) for devices that support
> it and send them invalidation requests whenever we invalidate the IOTLBs.
> 
>   Range calculation
>   -
> 
> The invalidation packet itself is a bit awkward: range must be naturally
> aligned, which means that the start address is a multiple of the range
> size. In addition, the size must be a power of two number of 4k pages. We
> have a few options to enforce this constraint:
> 
> (1) Find the smallest naturally aligned region that covers the requested
> range. This is simple to compute and only takes one ATC_INV, but it
> will spill on lots of neighbouring ATC entries.
> 
> (2) Align the start address to the region size (rounded up to a power of
> two), and send a second invalidation for the next range of the same
> size. Still not great, but reduces spilling.
> 
> (3) Cover the range exactly with the smallest number of naturally aligned
> regions. This would be interesting to implement but as for (2),
> requires multiple ATC_INV.
> 
> As I suspect ATC invalidation packets will be a very scarce resource, I'll
> go with option (1) for now, and only send one big invalidation. We can
> move to (2), which is both easier to read and more gentle with the ATC,
> once we've observed on real systems that we can send multiple smaller
> Invalidation Requests for roughly the same price as a single big one.
> 
> Note that with io-pgtable, the unmap function is called for each page, so
> this doesn't matter. The problem shows up when sharing page tables with
> the MMU.
> 
>   Timeout
>   ---
> 
> ATC invalidation is allowed to take up to 90 seconds, according to the
> PCIe spec, so it is possible to hit the SMMU command queue timeout during
> normal operations.
> 
> Some SMMU implementations will raise a CERROR_ATC_INV_SYNC when a CMD_SYNC
> fails because of an ATC invalidation. Some will just abort the CMD_SYNC.
> Others might let CMD_SYNC complete and have an asynchronous IMPDEF
> mechanism to record the error. When we receive a CERROR_ATC_INV_SYNC, we
> could retry sending all ATC_INV since last successful CMD_SYNC. When a
> CMD_SYNC fails without CERROR_ATC_INV_SYNC, we could retry sending *all*
> commands since last successful CMD_SYNC.
> 
> We cannot afford to wait 90 seconds in iommu_unmap, let alone MMU
> notifiers. So we'd have to introduce a more clever system if this timeout
> becomes a problem, like keeping hold of mappings and invalidating in the
> background. Implementing safe delayed invalidations is a very complex
> problem and deserves a series of its own. We'll assess whether more work
> is needed to properly handle ATC invalidation timeouts once this code runs
> on real hardware.
> 
>   Misc
>   
> 
> I didn't put ATC and TLB invalidations in the same functions for three
> reasons:
> 
> * TLB invalidation by range is batched and committed with a single sync.
>   Batching ATC invalidation is inconvenient, endpoints limit the number of
>   inflight invalidations. We'd have to count the number of invalidations
>   queued and send a sync periodically. In addition, I suspect we always
>   need a sync between TLB and ATC invalidation for the same page.
> 
> * Doing ATC invalidation outside tlb_inv_range also allows to send less
>   requests, since TLB invalidations are done per page or block, while ATC
>   invalidations target IOVA ranges.
> 
> * TLB invalidation by context is performed when freeing the domain, at
>   which point there isn't any device attached anymore.
> 
> Signed-off-by: Jean-Philippe Brucker 
Few minor error path related comments inline..

> ---
>  drivers/iommu/arm-smmu-v3.c | 236 
> ++--
>  1 file changed, 226 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 8b9f5dd06be0..76513135310f 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -109,6 +110,7 @@
>  #define IDR5_OAS_48_BIT  (5 << IDR5_OAS_SHIFT)
>  
>  #define ARM_SMMU_CR0 0x20
> +#define CR0_ATSCHK   (1 << 4)
>  #define CR0_CMDQEN   (1 << 3)
>  #define CR0_EVTQEN   (1 << 2)
>  #define CR0_PRIQEN   (1 << 1)
> @@ -304,6 +306,7 @@
>  #define CMDQ_ERR_CERROR_NONE_IDX 0
>  #define CMDQ_ERR_CERROR_ILL_IDX  1
>  #define CMDQ_ERR_CERROR_ABT_IDX  2
> +#define CMDQ_ERR_CERROR_ATC_INV_IDX  3
>  
>  #define CMDQ_0_OP_SHIFT  0
>  #define CMDQ_0_OP_MASK   0xffUL
> @@ -327,6 +330,15 @@
>  #define CMDQ_TLBI_1_VA_MASK  

Re: [PATCH 35/37] iommu/arm-smmu-v3: Add support for PRI

2018-03-08 Thread Jonathan Cameron
On Mon, 12 Feb 2018 18:33:50 +
Jean-Philippe Brucker  wrote:

> For PCI devices that support it, enable the PRI capability and handle
> PRI Page Requests with the generic fault handler.
> 
> Signed-off-by: Jean-Philippe Brucker 
A couple of nitpicks.

> ---
>  drivers/iommu/arm-smmu-v3.c | 174 
> ++--
>  1 file changed, 119 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 8d09615fab35..ace2f995b0c0 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -271,6 +271,7 @@
>  #define STRTAB_STE_1_S1COR_SHIFT 4
>  #define STRTAB_STE_1_S1CSH_SHIFT 6
>  
> +#define STRTAB_STE_1_PPAR(1UL << 18)
>  #define STRTAB_STE_1_S1STALLD(1UL << 27)
>  
>  #define STRTAB_STE_1_EATS_ABT0UL
> @@ -346,9 +347,9 @@
>  #define CMDQ_PRI_1_GRPID_SHIFT   0
>  #define CMDQ_PRI_1_GRPID_MASK0x1ffUL
>  #define CMDQ_PRI_1_RESP_SHIFT12
> -#define CMDQ_PRI_1_RESP_DENY (0UL << CMDQ_PRI_1_RESP_SHIFT)
> -#define CMDQ_PRI_1_RESP_FAIL (1UL << CMDQ_PRI_1_RESP_SHIFT)
> -#define CMDQ_PRI_1_RESP_SUCC (2UL << CMDQ_PRI_1_RESP_SHIFT)
> +#define CMDQ_PRI_1_RESP_FAILURE  (0UL << CMDQ_PRI_1_RESP_SHIFT)
> +#define CMDQ_PRI_1_RESP_INVALID  (1UL << CMDQ_PRI_1_RESP_SHIFT)
> +#define CMDQ_PRI_1_RESP_SUCCESS  (2UL << CMDQ_PRI_1_RESP_SHIFT)
Mixing fixing up this naming with the rest of the patch does make things a
little harder to read than they would have been if done as separate patches.
Worth splitting?

>  
>  #define CMDQ_RESUME_0_SID_SHIFT  32
>  #define CMDQ_RESUME_0_SID_MASK   0xUL
> @@ -442,12 +443,6 @@ module_param_named(disable_ats_check, disable_ats_check, 
> bool, S_IRUGO);
>  MODULE_PARM_DESC(disable_ats_check,
>   "By default, the SMMU checks whether each incoming transaction marked 
> as translated is allowed by the stream configuration. This option disables 
> the check.");
>  
> -enum pri_resp {
> - PRI_RESP_DENY,
> - PRI_RESP_FAIL,
> - PRI_RESP_SUCC,
> -};
> -
>  enum arm_smmu_msi_index {
>   EVTQ_MSI_INDEX,
>   GERROR_MSI_INDEX,
> @@ -530,7 +525,7 @@ struct arm_smmu_cmdq_ent {
>   u32 sid;
>   u32 ssid;
>   u16 grpid;
> - enum pri_resp   resp;
> + enum page_response_code resp;
>   } pri;
>  
>   #define CMDQ_OP_RESUME  0x44
> @@ -615,6 +610,7 @@ struct arm_smmu_strtab_ent {
>   struct arm_smmu_s2_cfg  *s2_cfg;
>  
>   boolcan_stall;
> + boolprg_resp_needs_ssid;
>  };
>  
>  struct arm_smmu_strtab_cfg {
> @@ -969,14 +965,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
> arm_smmu_cmdq_ent *ent)
>   cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
>   cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
>   switch (ent->pri.resp) {
> - case PRI_RESP_DENY:
> - cmd[1] |= CMDQ_PRI_1_RESP_DENY;
> + case IOMMU_PAGE_RESP_FAILURE:
> + cmd[1] |= CMDQ_PRI_1_RESP_FAILURE;
>   break;
> - case PRI_RESP_FAIL:
> - cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
> + case IOMMU_PAGE_RESP_INVALID:
> + cmd[1] |= CMDQ_PRI_1_RESP_INVALID;
>   break;
> - case PRI_RESP_SUCC:
> - cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
> + case IOMMU_PAGE_RESP_SUCCESS:
> + cmd[1] |= CMDQ_PRI_1_RESP_SUCCESS;
>   break;
>   default:
>   return -EINVAL;
> @@ -1180,9 +1176,16 @@ static int arm_smmu_page_response(struct iommu_domain 
> *domain,
>   cmd.resume.sid  = sid;
>   cmd.resume.stag = resp->page_req_group_id;
>   cmd.resume.resp = resp->resp_code;
> + } else if (master->can_fault) {
> + cmd.opcode  = CMDQ_OP_PRI_RESP;
> + cmd.substream_valid = resp->pasid_present &&
> +   master->ste.prg_resp_needs_ssid;
> + cmd.pri.sid = sid;
> + cmd.pri.ssid= resp->pasid;
> + cmd.pri.grpid   = resp->page_req_group_id;
> + cmd.pri.resp= resp->resp_code;
>   } else {
> - /* TODO: put PRI response here */
> - return -EINVAL;
> + return -ENODEV;
>   }
>  
>   arm_smmu_cmdq_issue_cmd(master->smmu, );
> @@ -1309,6 +1312,9 @@ static void 

Re: [PATCH 07/37] iommu: Add a page fault handler

2018-03-08 Thread Jonathan Cameron
On Mon, 12 Feb 2018 18:33:22 +
Jean-Philippe Brucker  wrote:

> Some systems allow devices to handle IOMMU translation faults in the core
> mm. For example systems supporting the PCI PRI extension or Arm SMMU stall
> model. Infrastructure for reporting such recoverable page faults was
> recently added to the IOMMU core, for SVA virtualization. Extend
> iommu_report_device_fault() to handle host page faults as well.
> 
> * IOMMU drivers instantiate a fault workqueue, using
>   iommu_fault_queue_init() and iommu_fault_queue_destroy().
> 
> * When it receives a fault event, supposedly in an IRQ handler, the IOMMU
>   driver reports the fault using iommu_report_device_fault()
> 
> * If the device driver registered a handler (e.g. VFIO), pass down the
>   fault event. Otherwise submit it to the fault queue, to be handled in a
>   thread.
> 
> * When the fault corresponds to an io_mm, call the mm fault handler on it
>   (in next patch).
> 
> * Once the fault is handled, the mm wrapper or the device driver reports
>   success of failure with iommu_page_response(). The translation is either
>   retried or aborted, depending on the response code.
> 
> Signed-off-by: Jean-Philippe Brucker 
A few really minor points inline...  Basically looks good to me.

> ---
>  drivers/iommu/Kconfig  |  10 ++
>  drivers/iommu/Makefile |   1 +
>  drivers/iommu/io-pgfault.c | 282 
> +
>  drivers/iommu/iommu-sva.c  |   3 -
>  drivers/iommu/iommu.c  |  31 ++---
>  include/linux/iommu.h  |  34 +-
>  6 files changed, 339 insertions(+), 22 deletions(-)
>  create mode 100644 drivers/iommu/io-pgfault.c
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 146eebe9a4bb..e751bb9958ba 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -85,6 +85,15 @@ config IOMMU_SVA
>  
> If unsure, say N here.
>  
> +config IOMMU_FAULT
> + bool "Fault handler for the IOMMU API"
> + select IOMMU_API
> + help
> +   Enable the generic fault handler for the IOMMU API, that handles
> +   recoverable page faults or inject them into guests.
> +
> +   If unsure, say N here.
> +
>  config FSL_PAMU
>   bool "Freescale IOMMU support"
>   depends on PCI
> @@ -156,6 +165,7 @@ config INTEL_IOMMU
>   select IOMMU_API
>   select IOMMU_IOVA
>   select DMAR_TABLE
> + select IOMMU_FAULT
>   help
> DMA remapping (DMAR) devices support enables independent address
> translations for Direct Memory Access (DMA) from devices.
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 1dbcc89ebe4c..f4324e29035e 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -4,6 +4,7 @@ obj-$(CONFIG_IOMMU_API) += iommu-traces.o
>  obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
>  obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
>  obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
> +obj-$(CONFIG_IOMMU_FAULT) += io-pgfault.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
> new file mode 100644
> index ..33309ed316d2
> --- /dev/null
> +++ b/drivers/iommu/io-pgfault.c
> @@ -0,0 +1,282 @@
> +/*
> + * Handle device page faults
> + *
> + * Copyright (C) 2018 ARM Ltd.
> + * Author: Jean-Philippe Brucker 
> + *
> + * SPDX-License-Identifier: GPL-2.0
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static struct workqueue_struct *iommu_fault_queue;
> +static DECLARE_RWSEM(iommu_fault_queue_sem);
> +static refcount_t iommu_fault_queue_refs = REFCOUNT_INIT(0);
> +static BLOCKING_NOTIFIER_HEAD(iommu_fault_queue_flush_notifiers);
> +
> +/* Used to store incomplete fault groups */
> +static LIST_HEAD(iommu_partial_faults);
> +static DEFINE_SPINLOCK(iommu_partial_faults_lock);
> +
> +struct iommu_fault_context {
> + struct device   *dev;
> + struct iommu_fault_eventevt;
> + struct list_headhead;
> +};
> +
> +struct iommu_fault_group {
> + struct iommu_domain *domain;
> + struct iommu_fault_context  last_fault;
> + struct list_headfaults;
> + struct work_struct  work;
> +};
> +
> +/*
> + * iommu_fault_complete() - Finish handling a fault
> + *
> + * Send a response if necessary and pass on the sanitized status code
> + */
> +static int iommu_fault_complete(struct iommu_domain *domain, struct device 
> *dev,
> + struct iommu_fault_event *evt, int status)
> +{
> + struct page_response_msg resp = {
> + .addr   = evt->addr,
> + .pasid  = evt->pasid,
> + .pasid_present  = evt->pasid_valid,
> + 

Re: Preferred method to detect if a device is behind an enabled iommu.

2018-02-01 Thread Jonathan Cameron
On Thu, 1 Feb 2018 12:49:24 +
Robin Murphy <robin.mur...@arm.com> wrote:

> On 01/02/18 10:18, Jonathan Cameron wrote:
> > Hi All,
> > 
> > We have a crypto accelerator which needs to have a few different settings
> > depending on whether or not the SMMUv3 is enabled and translating addresses
> > or not.
> > 
> > https://marc.info/?l=linux-crypto-vger=151732626428206=2
> > 
> > 1) A quirk of the hardware revision means we need to turn some elements
> > off if the iommu is enabled.
> > 2) The device has certain cache related settings that means it needs to know
> > if it is dealing with VAs or PAs.
> > 
> > Current approach is to see if the iommu_group is set in struct device.
> > 
> > We could fine one instance of another driver doing this and copied that,
> > (drivers/dma/rcar-dmac.c)
> > but the precedence is weak enough that confirmation would be good.
> > So whilst it 'works' the question is whether it is safe in general
> > and whether there is a better way.  
> 
> The presence of a group alone is not sufficient, as it only tells you 
> that the device is associated with an IOMMU in some way (including VFIO 
> no-iommu mode where said IOMMU isn't even real).
> 
> To detect whether translation is active, I think the best way right now 
> would be to first call iommu_get_domain_for_dev() to see whether the 
> device is actually attached to a domain, then if so check the domain 
> type for the __IOMMU_DOMAIN_PAGING flag to confirm if it represents a 
> translation context rather than a bypass one.

Thanks - that works great.

> 
> It might be reasonable to propose wrapping that up in an IOMMU API (or 
> possibly DMA API, as appropriate) helper, as there are certainly other 
> drivers doing various degrees of this sort of thing for various reasons 
> (to the point where we currently have to accommodate rather nonsensical 
> iova_to_phys() calls on identity domains).

Sounds like a good plan but the fun question as ever is what to call it..

iommu_domain_can_map or iommu_domain_is_translating perhaps?

For now I'll just put the check in the driver so we can move forward
in parallel.

Thanks,

Jonathan
> 
> Robin.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Preferred method to detect if a device is behind an enabled iommu.

2018-02-01 Thread Jonathan Cameron
Hi All,

We have a crypto accelerator which needs to have a few different settings
depending on whether or not the SMMUv3 is enabled and translating addresses
or not.

https://marc.info/?l=linux-crypto-vger=151732626428206=2

1) A quirk of the hardware revision means we need to turn some elements
   off if the iommu is enabled.
2) The device has certain cache related settings that means it needs to know
   if it is dealing with VAs or PAs.

Current approach is to see if the iommu_group is set in struct device.

We could fine one instance of another driver doing this and copied that,
(drivers/dma/rcar-dmac.c)
but the precedence is weak enough that confirmation would be good.
So whilst it 'works' the question is whether it is safe in general
and whether there is a better way.

Thanks,

--
Jonathan Cameron
Huawei

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/2] iommu/arm-smmu: Add system PM support

2017-08-11 Thread Jonathan Cameron
On Tue, 8 Aug 2017 13:14:18 +0100
Robin Murphy  wrote:

> On 08/08/17 12:18, Will Deacon wrote:
> > On Tue, Jul 18, 2017 at 01:44:42PM +0100, Robin Murphy wrote:  
> >> With all our hardware state tracked in such a way that we can naturally
> >> restore it as part of the necessary reset, resuming is trivial, and
> >> there's nothing to do on suspend at all.
> >>
> >> Signed-off-by: Robin Murphy 
> >> ---
> >>  drivers/iommu/arm-smmu.c | 12 
> >>  1 file changed, 12 insertions(+)
> >>
> >> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> >> index 86897b7b81d8..0f5f06e9abfa 100644
> >> --- a/drivers/iommu/arm-smmu.c
> >> +++ b/drivers/iommu/arm-smmu.c
> >> @@ -2356,10 +2356,22 @@ static int arm_smmu_device_remove(struct 
> >> platform_device *pdev)
> >>return 0;
> >>  }
> >>  
> >> +static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
> >> +{  
> > 
> > Did you actually get a warning here without the __maybe_unused annotation?
> > It looks like some other drivers just guard the thing with CONFIG_PM_SLEEP. 
> >  
> 
> I'm under the impression that the annotation is preferred over #ifdefs
> for new code (for the sake of coverage, I guess).
https://patchwork.kernel.org/patch/9734367/

Is a good thread discussing this.  Both coverage and to avoid common
pitfalls of the ifdef fun.

Jonathan
> 
> >> +  struct arm_smmu_device *smmu = dev_get_drvdata(dev);
> >> +
> >> +  arm_smmu_device_reset(smmu);
> >> +  return 0;
> >> +}
> >> +
> >> +
> >> +static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
> >> +
> >>  static struct platform_driver arm_smmu_driver = {
> >>.driver = {
> >>.name   = "arm-smmu",
> >>.of_match_table = of_match_ptr(arm_smmu_of_match),
> >> +  .pm = _smmu_pm_ops,  
> > 
> > Cosmetic: can you tab-align this assignment please?  
> 
> Oops, I missed that - will do.
> 
> Robin.
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/5] iommu/arm-smmu-v3: put off the execution of TLBI* to reduce lock confliction

2017-07-18 Thread Jonathan Cameron
On Mon, 17 Jul 2017 13:28:47 -0400
Nate Watterson <nwatt...@codeaurora.org> wrote:

> Hi Jonathan,
> 
> On 7/17/2017 10:23 AM, Jonathan Cameron wrote:
> > On Mon, 17 Jul 2017 14:06:42 +0100
> > John Garry <john.ga...@huawei.com> wrote:
> >   
> >> +
> >>
> >> On 29/06/2017 03:08, Leizhen (ThunderTown) wrote:  
> >>>
> >>>
> >>> On 2017/6/28 17:32, Will Deacon wrote:  
> >>>> Hi Zhen Lei,
> >>>>
> >>>> Nate (CC'd), Robin and I have been working on something very similar to
> >>>> this series, but this patch is different to what we had planned. More 
> >>>> below.
> >>>>
> >>>> On Mon, Jun 26, 2017 at 09:38:46PM +0800, Zhen Lei wrote:  
> >>>>> Because all TLBI commands should be followed by a SYNC command, to make
> >>>>> sure that it has been completely finished. So we can just add the TLBI
> >>>>> commands into the queue, and put off the execution until meet SYNC or
> >>>>> other commands. To prevent the followed SYNC command waiting for a long
> >>>>> time because of too many commands have been delayed, restrict the max
> >>>>> delayed number.
> >>>>>
> >>>>> According to my test, I got the same performance data as I replaced 
> >>>>> writel
> >>>>> with writel_relaxed in queue_inc_prod.
> >>>>>
> >>>>> Signed-off-by: Zhen Lei <thunder.leiz...@huawei.com>
> >>>>> ---
> >>>>>   drivers/iommu/arm-smmu-v3.c | 42 
> >>>>> +-
> >>>>>   1 file changed, 37 insertions(+), 5 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> >>>>> index 291da5f..4481123 100644
> >>>>> --- a/drivers/iommu/arm-smmu-v3.c
> >>>>> +++ b/drivers/iommu/arm-smmu-v3.c
> >>>>> @@ -337,6 +337,7 @@
> >>>>>   /* Command queue */
> >>>>>   #define CMDQ_ENT_DWORDS   2
> >>>>>   #define CMDQ_MAX_SZ_SHIFT 8
> >>>>> +#define CMDQ_MAX_DELAYED   32
> >>>>>
> >>>>>   #define CMDQ_ERR_SHIFT24
> >>>>>   #define CMDQ_ERR_MASK 0x7f
> >>>>> @@ -472,6 +473,7 @@ struct arm_smmu_cmdq_ent {
> >>>>> };
> >>>>> } cfgi;
> >>>>>
> >>>>> +   #define CMDQ_OP_TLBI_NH_ALL 0x10
> >>>>> #define CMDQ_OP_TLBI_NH_ASID0x11
> >>>>> #define CMDQ_OP_TLBI_NH_VA  0x12
> >>>>> #define CMDQ_OP_TLBI_EL2_ALL0x20
> >>>>> @@ -499,6 +501,7 @@ struct arm_smmu_cmdq_ent {
> >>>>>
> >>>>>   struct arm_smmu_queue {
> >>>>> int irq; /* Wired interrupt */
> >>>>> +   u32 nr_delay;
> >>>>>
> >>>>> __le64  *base;
> >>>>> dma_addr_t  base_dma;
> >>>>> @@ -722,11 +725,16 @@ static int queue_sync_prod(struct arm_smmu_queue 
> >>>>> *q)
> >>>>> return ret;
> >>>>>   }
> >>>>>
> >>>>> -static void queue_inc_prod(struct arm_smmu_queue *q)
> >>>>> +static void queue_inc_swprod(struct arm_smmu_queue *q)
> >>>>>   {
> >>>>> -   u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
> >>>>> +   u32 prod = q->prod + 1;
> >>>>>
> >>>>> q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
> >>>>> +}
> >>>>> +
> >>>>> +static void queue_inc_prod(struct arm_smmu_queue *q)
> >>>>> +{
> >>>>> +   queue_inc_swprod(q);
> >>>>> writel(q->prod, q->prod_reg);
> >>>>>   }
> >>>>>
> >>>>> @@ -761,13 +769,24 @@ static void queue_write(__le64 *dst, u64 *src, 
> >>>>> size_t n_dwords)
> >>>>> *dst++ = cpu_to_le64(*sr

  1   2   >