Re: [RFC PATCH V3 4/5] platform: mtk-isp: Add Mediatek DIP driver

2019-09-11 Thread Tomasz Figa
On Thu, Sep 12, 2019 at 2:41 AM Frederic Chen
 wrote:
>
> Hi Tomasz,
>
> I appreciate your helpful comments.
>
>
> On Tue, 2019-09-10 at 13:04 +0900, Tomasz Figa wrote:
> > Hi Frederic,
> >
> > On Tue, Sep 10, 2019 at 4:23 AM  wrote:
> > >
> > > From: Frederic Chen 
> > >
> > > This patch adds the driver of Digital Image Processing (DIP)
> > > unit in Mediatek ISP system, providing image format
> > > conversion, resizing, and rotation features.
> > >
> > > The mtk-isp directory will contain drivers for multiple IP
> > > blocks found in Mediatek ISP system. It will include ISP
> > > Pass 1 driver(CAM), sensor interface driver, DIP driver and
> > > face detection driver.
> > >
> > > Signed-off-by: Frederic Chen 
> > > ---
> > >  drivers/media/platform/mtk-isp/Makefile   |7 +
> > >  .../media/platform/mtk-isp/isp_50/Makefile|7 +
> > >  .../platform/mtk-isp/isp_50/dip/Makefile  |   18 +
> > >  .../platform/mtk-isp/isp_50/dip/mtk_dip-dev.c |  650 +
> > >  .../platform/mtk-isp/isp_50/dip/mtk_dip-dev.h |  566 +
> > >  .../platform/mtk-isp/isp_50/dip/mtk_dip-hw.h  |  156 ++
> > >  .../platform/mtk-isp/isp_50/dip/mtk_dip-sys.c |  521 
> > >  .../mtk-isp/isp_50/dip/mtk_dip-v4l2.c | 2255 +
> > >  8 files changed, 4180 insertions(+)
> > >  create mode 100644 drivers/media/platform/mtk-isp/Makefile
> > >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/Makefile
> > >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/dip/Makefile
> > >  create mode 100644 
> > > drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-dev.c
> > >  create mode 100644 
> > > drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-dev.h
> > >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-hw.h
> > >  create mode 100644 
> > > drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-sys.c
> > >  create mode 100644 
> > > drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-v4l2.c
> > >
> >
> > Thanks for sending v3!
> >
> > I'm going to do a full review a bit later, but please check one
> > comment about power handling below.
> >
> > Other than that one comment, from a quick look, I think we only have a
> > number of style issues left. Thanks for the hard work!
> >
> > [snip]
> > > +static void dip_runner_func(struct work_struct *work)
> > > +{
> > > +   struct mtk_dip_request *req = mtk_dip_hw_mdp_work_to_req(work);
> > > +   struct mtk_dip_dev *dip_dev = req->dip_pipe->dip_dev;
> > > +   struct img_config *config_data =
> > > +   (struct img_config *)req->working_buf->config_data.vaddr;
> > > +
> > > +   /*
> > > +* Call MDP/GCE API to do HW excecution
> > > +* Pass the framejob to MDP driver
> > > +*/
> > > +   pm_runtime_get_sync(dip_dev->dev);
> > > +   mdp_cmdq_sendtask(dip_dev->mdp_pdev, config_data,
> > > + &req->img_fparam.frameparam, NULL, false,
> > > + dip_mdp_cb_func, req);
> > > +}
> > [snip]
> > > +static void dip_composer_workfunc(struct work_struct *work)
> > > +{
> > > +   struct mtk_dip_request *req = mtk_dip_hw_fw_work_to_req(work);
> > > +   struct mtk_dip_dev *dip_dev = req->dip_pipe->dip_dev;
> > > +   struct img_ipi_param ipi_param;
> > > +   struct mtk_dip_hw_subframe *buf;
> > > +   int ret;
> > > +
> > > +   down(&dip_dev->sem);
> > > +
> > > +   buf = mtk_dip_hw_working_buf_alloc(req->dip_pipe->dip_dev);
> > > +   if (!buf) {
> > > +   dev_err(req->dip_pipe->dip_dev->dev,
> > > +   "%s:%s:req(%p): no free working buffer 
> > > available\n",
> > > +   __func__, req->dip_pipe->desc->name, req);
> > > +   }
> > > +
> > > +   req->working_buf = buf;
> > > +   
> > > mtk_dip_wbuf_to_ipi_img_addr(&req->img_fparam.frameparam.subfrm_data,
> > > +&buf->buffer);
> > > +   memset(buf->buffer.vaddr, 0, DIP_SUB_FRM_SZ);
> > > +   
> > > mtk_dip_wbuf_to_ipi_img_sw_addr(&req->img_fparam.frameparam.config_data,
> > > +   &buf->config_data);
> > > +   memset(buf->config_data.vaddr, 0, DIP_COMP_SZ);
> > > +
> > > +   if (!req->img_fparam.frameparam.tuning_data.present) {
> > > +   /*
> > > +* When user enqueued without tuning buffer,
> > > +* it would use driver internal buffer.
> > > +*/
> > > +   dev_dbg(dip_dev->dev,
> > > +   "%s: frame_no(%d) has no tuning_data\n",
> > > +   __func__, req->img_fparam.frameparam.frame_no);
> > > +
> > > +   mtk_dip_wbuf_to_ipi_tuning_addr
> > > +   (&req->img_fparam.frameparam.tuning_data,
> > > +&buf->tuning_buf);
> > > +   memset(buf->tuning_buf.vaddr, 0, DIP_TUNING_SZ);
> > > +   }
> > > +
> > > +   
> > > mtk_dip_wbuf_to_ipi_img_sw

Re: [RFC PATCH V3 4/5] platform: mtk-isp: Add Mediatek DIP driver

2019-09-11 Thread Frederic Chen
Hi Tomasz,

I appreciate your helpful comments.


On Tue, 2019-09-10 at 13:04 +0900, Tomasz Figa wrote:
> Hi Frederic,
> 
> On Tue, Sep 10, 2019 at 4:23 AM  wrote:
> >
> > From: Frederic Chen 
> >
> > This patch adds the driver of Digital Image Processing (DIP)
> > unit in Mediatek ISP system, providing image format
> > conversion, resizing, and rotation features.
> >
> > The mtk-isp directory will contain drivers for multiple IP
> > blocks found in Mediatek ISP system. It will include ISP
> > Pass 1 driver(CAM), sensor interface driver, DIP driver and
> > face detection driver.
> >
> > Signed-off-by: Frederic Chen 
> > ---
> >  drivers/media/platform/mtk-isp/Makefile   |7 +
> >  .../media/platform/mtk-isp/isp_50/Makefile|7 +
> >  .../platform/mtk-isp/isp_50/dip/Makefile  |   18 +
> >  .../platform/mtk-isp/isp_50/dip/mtk_dip-dev.c |  650 +
> >  .../platform/mtk-isp/isp_50/dip/mtk_dip-dev.h |  566 +
> >  .../platform/mtk-isp/isp_50/dip/mtk_dip-hw.h  |  156 ++
> >  .../platform/mtk-isp/isp_50/dip/mtk_dip-sys.c |  521 
> >  .../mtk-isp/isp_50/dip/mtk_dip-v4l2.c | 2255 +
> >  8 files changed, 4180 insertions(+)
> >  create mode 100644 drivers/media/platform/mtk-isp/Makefile
> >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/Makefile
> >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/dip/Makefile
> >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-dev.c
> >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-dev.h
> >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-hw.h
> >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-sys.c
> >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/dip/mtk_dip-v4l2.c
> >
> 
> Thanks for sending v3!
> 
> I'm going to do a full review a bit later, but please check one
> comment about power handling below.
> 
> Other than that one comment, from a quick look, I think we only have a
> number of style issues left. Thanks for the hard work!
> 
> [snip]
> > +static void dip_runner_func(struct work_struct *work)
> > +{
> > +   struct mtk_dip_request *req = mtk_dip_hw_mdp_work_to_req(work);
> > +   struct mtk_dip_dev *dip_dev = req->dip_pipe->dip_dev;
> > +   struct img_config *config_data =
> > +   (struct img_config *)req->working_buf->config_data.vaddr;
> > +
> > +   /*
> > +* Call MDP/GCE API to do HW excecution
> > +* Pass the framejob to MDP driver
> > +*/
> > +   pm_runtime_get_sync(dip_dev->dev);
> > +   mdp_cmdq_sendtask(dip_dev->mdp_pdev, config_data,
> > + &req->img_fparam.frameparam, NULL, false,
> > + dip_mdp_cb_func, req);
> > +}
> [snip]
> > +static void dip_composer_workfunc(struct work_struct *work)
> > +{
> > +   struct mtk_dip_request *req = mtk_dip_hw_fw_work_to_req(work);
> > +   struct mtk_dip_dev *dip_dev = req->dip_pipe->dip_dev;
> > +   struct img_ipi_param ipi_param;
> > +   struct mtk_dip_hw_subframe *buf;
> > +   int ret;
> > +
> > +   down(&dip_dev->sem);
> > +
> > +   buf = mtk_dip_hw_working_buf_alloc(req->dip_pipe->dip_dev);
> > +   if (!buf) {
> > +   dev_err(req->dip_pipe->dip_dev->dev,
> > +   "%s:%s:req(%p): no free working buffer available\n",
> > +   __func__, req->dip_pipe->desc->name, req);
> > +   }
> > +
> > +   req->working_buf = buf;
> > +   
> > mtk_dip_wbuf_to_ipi_img_addr(&req->img_fparam.frameparam.subfrm_data,
> > +&buf->buffer);
> > +   memset(buf->buffer.vaddr, 0, DIP_SUB_FRM_SZ);
> > +   
> > mtk_dip_wbuf_to_ipi_img_sw_addr(&req->img_fparam.frameparam.config_data,
> > +   &buf->config_data);
> > +   memset(buf->config_data.vaddr, 0, DIP_COMP_SZ);
> > +
> > +   if (!req->img_fparam.frameparam.tuning_data.present) {
> > +   /*
> > +* When user enqueued without tuning buffer,
> > +* it would use driver internal buffer.
> > +*/
> > +   dev_dbg(dip_dev->dev,
> > +   "%s: frame_no(%d) has no tuning_data\n",
> > +   __func__, req->img_fparam.frameparam.frame_no);
> > +
> > +   mtk_dip_wbuf_to_ipi_tuning_addr
> > +   (&req->img_fparam.frameparam.tuning_data,
> > +&buf->tuning_buf);
> > +   memset(buf->tuning_buf.vaddr, 0, DIP_TUNING_SZ);
> > +   }
> > +
> > +   
> > mtk_dip_wbuf_to_ipi_img_sw_addr(&req->img_fparam.frameparam.self_data,
> > +   &buf->frameparam);
> > +   memcpy(buf->frameparam.vaddr, &req->img_fparam.frameparam,
> > +  sizeof(req->img_fparam.frameparam));
> > +   ipi_param.usage = IMG_IPI_FRAME;
> > +   ipi_param.frm_p

Re: [PATCH 0/3] iommu/io-pgtable-arm: Mali LPAE improvements

2019-09-11 Thread Robin Murphy

On 2019-09-11 5:20 pm, Will Deacon wrote:

On Wed, Sep 11, 2019 at 06:19:04PM +0200, Neil Armstrong wrote:

On 11/09/2019 16:42, Robin Murphy wrote:

Here's the eagerly-awaited fix to unblock T720/T820, plus a couple of
other bits that I've collected so far. I'm not considering this as
5.3 fixes material, but it would be nice if there's any chance still
to sneak it into 5.4.

Robin.


Robin Murphy (3):
   iommu/io-pgtable-arm: Correct Mali attributes
   iommu/io-pgtable-arm: Support more Mali configurations
   iommu/io-pgtable-arm: Allow coherent walks for Mali

  drivers/iommu/io-pgtable-arm.c | 61 ++
  1 file changed, 48 insertions(+), 13 deletions(-)



Tested-by: Neil Armstrong 

On Khadas VIM2 (Amlogic S912) with T820 Mali GPU

I hope this will be part of v5.4 so we can run panfrost on vanilla v5.4 !


Not a chance -- the merge window opens on Monday and -next isn't being
rolled out at the moment due to LPC. Let's shoot for 5.5 and get this
queued up in a few weeks.


Fair enough, that was certainly more extreme optimism than realistic 
expectation on my part :)


There is some argument for taking #1 and #2 as 5.4 fixes, though - the 
upcoming Mesa 19.2 release will enable T820 support on the userspace 
side - so let's pick that discussion up again in a few weeks.


Robin.

(And at worst, I guess we could carry the "cfg.ias = 48" workaround in 
the DRM driver for the 5.4 cycle if need be)

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/3] iommu/io-pgtable-arm: Mali LPAE improvements

2019-09-11 Thread Will Deacon
On Wed, Sep 11, 2019 at 06:19:04PM +0200, Neil Armstrong wrote:
> On 11/09/2019 16:42, Robin Murphy wrote:
> > Here's the eagerly-awaited fix to unblock T720/T820, plus a couple of
> > other bits that I've collected so far. I'm not considering this as
> > 5.3 fixes material, but it would be nice if there's any chance still
> > to sneak it into 5.4.
> > 
> > Robin.
> > 
> > 
> > Robin Murphy (3):
> >   iommu/io-pgtable-arm: Correct Mali attributes
> >   iommu/io-pgtable-arm: Support more Mali configurations
> >   iommu/io-pgtable-arm: Allow coherent walks for Mali
> > 
> >  drivers/iommu/io-pgtable-arm.c | 61 ++
> >  1 file changed, 48 insertions(+), 13 deletions(-)
> > 
> 
> Tested-by: Neil Armstrong 
> 
> On Khadas VIM2 (Amlogic S912) with T820 Mali GPU
> 
> I hope this will be part of v5.4 so we can run panfrost on vanilla v5.4 !

Not a chance -- the merge window opens on Monday and -next isn't being
rolled out at the moment due to LPC. Let's shoot for 5.5 and get this
queued up in a few weeks.

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/3] iommu/io-pgtable-arm: Mali LPAE improvements

2019-09-11 Thread Neil Armstrong
Hi,

On 11/09/2019 16:42, Robin Murphy wrote:
> Hi all,
> 
> Here's the eagerly-awaited fix to unblock T720/T820, plus a couple of
> other bits that I've collected so far. I'm not considering this as
> 5.3 fixes material, but it would be nice if there's any chance still
> to sneak it into 5.4.
> 
> Robin.
> 
> 
> Robin Murphy (3):
>   iommu/io-pgtable-arm: Correct Mali attributes
>   iommu/io-pgtable-arm: Support more Mali configurations
>   iommu/io-pgtable-arm: Allow coherent walks for Mali
> 
>  drivers/iommu/io-pgtable-arm.c | 61 ++
>  1 file changed, 48 insertions(+), 13 deletions(-)
> 

Tested-by: Neil Armstrong 

On Khadas VIM2 (Amlogic S912) with T820 Mali GPU

I hope this will be part of v5.4 so we can run panfrost on vanilla v5.4 !

Thanks for pushing this,
Neil
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/3] iommu/io-pgtable-arm: Allow coherent walks for Mali

2019-09-11 Thread Robin Murphy
Midgard GPUs have ACE-Lite master interfaces which allows systems to
integrate them in an I/O-coherent manner. It seems that from the GPU's
viewpoint, the rest of the system is its outer shareable domain, and it
will only emit snoop signals for outer shareable accesses. As such,
setting the TTBR_SHARE_OUTER bit does indeed get coherent pagetable
walks working nicely.

Making data accesses coherent seems to be more of a challenge...

Signed-off-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 77f41c9dd9be..2794d4661339 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1061,6 +1061,9 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, 
void *cookie)
cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
  ARM_MALI_LPAE_TTBR_READ_INNER |
  ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+   if (cfg->coherent_walk)
+   cfg->arm_mali_lpae_cfg.transtab |= 
ARM_MALI_LPAE_TTBR_SHARE_OUTER;
+
return &data->iop;
 
 out_free_data:
-- 
2.21.0.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] iommu/io-pgtable-arm: Correct Mali attributes

2019-09-11 Thread Robin Murphy
Whilst Midgard's MEMATTR follows a similar principle to the VMSA MAIR,
the actual attribute values differ, so although it currently appears to
work to some degree, we probably shouldn't be using our standard stage 1
MAIR for that. Instead, generate a reasonable MEMATTR with attribute
values borrowed from the kbase driver; at this point we'll be overriding
or ignoring pretty much all of the LPAE config, so just implement these
Mali details in a dedicated allocator instead of pretending to subclass
the standard VMSA format.

Signed-off-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm.c | 53 +-
 1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 161a7d56264d..9e35cd991f06 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -167,6 +167,9 @@
 #define ARM_MALI_LPAE_TTBR_READ_INNER  BIT(2)
 #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4)
 
+#define ARM_MALI_LPAE_MEMATTR_IMP_DEF  0x88ULL
+#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
+
 /* IOPTE accessors */
 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
@@ -1013,27 +1016,51 @@ arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg 
*cfg, void *cookie)
 static struct io_pgtable *
 arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
 {
-   struct io_pgtable *iop;
+   struct arm_lpae_io_pgtable *data;
+
+   /* No quirks for Mali (hopefully) */
+   if (cfg->quirks)
+   return NULL;
 
if (cfg->ias != 48 || cfg->oas > 40)
return NULL;
 
cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
-   iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
-   if (iop) {
-   u64 mair, ttbr;
 
-   /* Copy values as union fields overlap */
-   mair = cfg->arm_lpae_s1_cfg.mair[0];
-   ttbr = cfg->arm_lpae_s1_cfg.ttbr[0];
+   data = arm_lpae_alloc_pgtable(cfg);
+   if (!data)
+   return NULL;
 
-   cfg->arm_mali_lpae_cfg.memattr = mair;
-   cfg->arm_mali_lpae_cfg.transtab = ttbr |
-   ARM_MALI_LPAE_TTBR_READ_INNER |
-   ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
-   }
+   /*
+* MEMATTR: Mali has no actual notion of a non-cacheable type, so the
+* best we can do is mimic the out-of-tree driver and hope that the
+* "implementation-defined caching policy" is good enough. Similarly,
+* we'll use it for the sake of a valid attribute for our 'device'
+* index, although callers should never request that in practice.
+*/
+   cfg->arm_mali_lpae_cfg.memattr =
+   (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+<< ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
+   (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
+<< ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
+   (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+<< ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
 
-   return iop;
+   data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+   if (!data->pgd)
+   goto out_free_data;
+
+   /* Ensure the empty pgd is visible before TRANSTAB can be written */
+   wmb();
+
+   cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
+ ARM_MALI_LPAE_TTBR_READ_INNER |
+ ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+   return &data->iop;
+
+out_free_data:
+   kfree(data);
+   return NULL;
 }
 
 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
-- 
2.21.0.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/3] iommu/io-pgtable-arm: Mali LPAE improvements

2019-09-11 Thread Robin Murphy
Hi all,

Here's the eagerly-awaited fix to unblock T720/T820, plus a couple of
other bits that I've collected so far. I'm not considering this as
5.3 fixes material, but it would be nice if there's any chance still
to sneak it into 5.4.

Robin.


Robin Murphy (3):
  iommu/io-pgtable-arm: Correct Mali attributes
  iommu/io-pgtable-arm: Support more Mali configurations
  iommu/io-pgtable-arm: Allow coherent walks for Mali

 drivers/iommu/io-pgtable-arm.c | 61 ++
 1 file changed, 48 insertions(+), 13 deletions(-)

-- 
2.21.0.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/3] iommu/io-pgtable-arm: Support more Mali configurations

2019-09-11 Thread Robin Murphy
In principle, Midgard GPUs supporting smaller VA sizes should only
require 3-level pagetables, since the address bits resolved at level 0
(47:40) will never change. However, the kbase driver does not appear to
have any notion of a variable start level, and empirically T720 and T820
rapidly blow up with translation faults unless given a full 4-level
table, despite only supporting a 33-bit VA size.

The 'real' IAS value is still valuable in terms of validating addresses
on map/unmap, so tweak the allocator to allow smaller values while still
forcing the resultant tables to the full 4 levels.

Signed-off-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 9e35cd991f06..77f41c9dd9be 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1022,7 +1022,7 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, 
void *cookie)
if (cfg->quirks)
return NULL;
 
-   if (cfg->ias != 48 || cfg->oas > 40)
+   if (cfg->ias > 48 || cfg->oas > 40)
return NULL;
 
cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
@@ -1031,6 +1031,11 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, 
void *cookie)
if (!data)
return NULL;
 
+   /* Mali seems to need a full 4-level table regardless of IAS */
+   if (data->levels < ARM_LPAE_MAX_LEVELS) {
+   data->levels = ARM_LPAE_MAX_LEVELS;
+   data->pgd_size = sizeof(arm_lpae_iopte);
+   }
/*
 * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
 * best we can do is mimic the out-of-tree driver and hope that the
-- 
2.21.0.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [V2, 2/2] media: i2c: Add more sensor modes for ov8856 camera sensor

2019-09-11 Thread Sakari Ailus
Hi Tomasz,

On Wed, Sep 11, 2019 at 07:12:02PM +0900, Tomasz Figa wrote:
> Hi Sakari,
> 
> On Tue, Sep 10, 2019 at 10:05 PM  wrote:
> >
> > From: Dongchun Zhu 
> >
> > This patch mainly adds two more sensor modes for OV8856 CMOS image sensor.
> > That is, the resolution of 1632*1224 and 3264*2448, corresponding to the 
> > bayer order of BGGR.
> > The sensor revision also differs in some OTP register.
> >
> > Signed-off-by: Dongchun Zhu 
> > ---
> >  drivers/media/i2c/ov8856.c | 654 
> > +++--
> >  1 file changed, 639 insertions(+), 15 deletions(-)
> >
> 
> What do you think about the approach taken by this patch?
> 
> My understanding is that the register arrays being added by it can be
> only used with 24MHz input clock, while the existing ones are for
> 19.2MHz. That means that this patch makes the driver expose completely
> different modes (resolutions, mbus formats) depending on the input
> clock. Are we okay with this?

These register list based drivers only support a tiny subset of
configurations a sensor can support, and the number of those configurations
may be amended over time.

I don't see a problem in choosing a different set of available
configurations based on the external clock frequency; that may, after all,
cause that some of the configurations, at a particular frame rate, are not
even achievable --- albeit this is perhaps unlikely in this case.

In practice, it's often the case that the sensor vendor provides these
configurations and the vendor may provide different configurations
(including output resolutions etc.) to different parties. So it may well be
the submitter of the patch would also not have access to similar
configurations (output size, cropping etc.) that now exist in the driver.

I'll review the patch itself soonish.

-- 
Regards,

Sakari Ailus
sakari.ai...@linux.intel.com


Re: iommu/amd: Flushing and locking fixes

2019-09-11 Thread Joerg Roedel
Hi Filippo,

On Tue, Sep 10, 2019 at 07:49:20PM +0200, Filippo Sironi wrote:
> This patch series introduce patches to take the domain lock whenever we call
> functions that end up calling __domain_flush_pages.  Holding the domain lock 
> is
> necessary since __domain_flush_pages traverses the device list, which is
> protected by the domain lock.
> 
> The first patch in the series adds a completion right after an IOTLB flush in
> attach_device.

Thanks for pointing out these locking issues and your fixes. I have been
looking into it a bit and it seems there is more problems to take care
of.

The first problem is the racy access to domain->updated, which is best
fixed by moving that info onto the stack don't keep it in the domain
structure.

Other than that, I think your patches are kind of the big hammer
approach to fix it. As they are, they destroy the scalability of the
dma-api path. So we need something more fine-grained, also if we keep in
mind that the actual cases where we need to flush something in the
dma-api path are very rare. The default should be to not take any lock
in that path.

How does the attached patch look to you? It is completly untested but
should give an idea of a better way to fix these locking issues.

Regards,

Joerg

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 61de81965c44..bb93a2bbb73d 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1435,9 +1435,10 @@ static void free_pagetable(struct protection_domain 
*domain)
  * another level increases the size of the address space by 9 bits to a size up
  * to 64 bits.
  */
-static void increase_address_space(struct protection_domain *domain,
+static bool increase_address_space(struct protection_domain *domain,
   gfp_t gfp)
 {
+   bool updated = false;
unsigned long flags;
u64 *pte;
 
@@ -1455,27 +1456,30 @@ static void increase_address_space(struct 
protection_domain *domain,
iommu_virt_to_phys(domain->pt_root));
domain->pt_root  = pte;
domain->mode+= 1;
-   domain->updated  = true;
+   updated  = true;
 
 out:
spin_unlock_irqrestore(&domain->lock, flags);
 
-   return;
+   return updated;
 }
 
 static u64 *alloc_pte(struct protection_domain *domain,
  unsigned long address,
  unsigned long page_size,
  u64 **pte_page,
- gfp_t gfp)
+ gfp_t gfp,
+ bool *updated)
 {
int level, end_lvl;
u64 *pte, *page;
 
BUG_ON(!is_power_of_2(page_size));
 
+   *updated = false;
+
while (address > PM_LEVEL_SIZE(domain->mode))
-   increase_address_space(domain, gfp);
+   *updated = increase_address_space(domain, gfp) || *updated;
 
level   = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
@@ -1501,7 +1505,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
if (cmpxchg64(pte, __pte, __npte) != __pte)
free_page((unsigned long)page);
else if (pte_level == PAGE_MODE_7_LEVEL)
-   domain->updated = true;
+   *updated = true;
 
continue;
}
@@ -1617,6 +1621,7 @@ static int iommu_map_page(struct protection_domain *dom,
struct page *freelist = NULL;
u64 __pte, *pte;
int i, count;
+   bool updated;
 
BUG_ON(!IS_ALIGNED(bus_addr, page_size));
BUG_ON(!IS_ALIGNED(phys_addr, page_size));
@@ -1625,7 +1630,7 @@ static int iommu_map_page(struct protection_domain *dom,
return -EINVAL;
 
count = PAGE_SIZE_PTE_COUNT(page_size);
-   pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp);
+   pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
 
if (!pte)
return -ENOMEM;
@@ -1634,7 +1639,7 @@ static int iommu_map_page(struct protection_domain *dom,
freelist = free_clear_pte(&pte[i], pte[i], freelist);
 
if (freelist != NULL)
-   dom->updated = true;
+   updated = true;
 
if (count > 1) {
__pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
@@ -1650,7 +1655,8 @@ static int iommu_map_page(struct protection_domain *dom,
for (i = 0; i < count; ++i)
pte[i] = __pte;
 
-   update_domain(dom);
+   if (updated)
+   update_domain(dom);
 
/* Everything flushed out, free pages now */
free_page_list(freelist);
@@ -2041,6 +2047,13 @@ static int __attach_device(struct iommu_dev_data 
*dev_data,
/* Attach alias group root */
do_attach(dev_data, domain);
 
+   /*
+* We might boot into a crash-kernel here. The crashed ke

Re: swiotlb-xen cleanups v4

2019-09-11 Thread Christoph Hellwig
Applied to the dma-mapping tree.


Re: [PATCH] iommu/vt-d: Add Scalable Mode fault information

2019-09-11 Thread j...@8bytes.org
On Tue, Sep 10, 2019 at 05:30:09PM +, Mehta, Sohil wrote:
> On Tue, 2019-09-10 at 10:08 +0200, Joerg Roedel wrote:
> > > + "Unknown", "Unknown", "Unknown", "Unknown", "Unknown",
> > "Unknown", "Unknown", /* 0x49-0x4F */
> > 
> > Maybe add the number (0x49-0x4f) to the respecting "Unknown" fields?
> > If
> > we can't give a reason we should give the number for easier debugging
> > in
> > the future. Same for the "Unknown" fields below.
> 
> I believe a fault number is always printed in dmar_fault_do_one() even
> if the reason is unknown.
> 
> DMAR: [DMA Write] Request device [00:02.0] fault addr 108a000 [fault
> reason 23] Unknown

Right, applied the patch, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 1/5] swiotlb: Split size parameter to map/unmap APIs

2019-09-11 Thread Joerg Roedel
On Wed, Sep 11, 2019 at 02:16:07PM +0800, Lu Baolu wrote:
> How about this change?
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 89066efa3840..22a7848caca3 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -466,8 +466,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device
> *hwdev,
> pr_warn_once("%s is active and system is using DMA bounce
> buffers\n",
>  sme_active() ? "SME" : "SEV");
> 
> -   if (WARN_ON(mapping_size > alloc_size))
> +   if (mapping_size > alloc_size) {
> +   dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes,
> alloc: %zd bytes)",
> + mapping_size, alloc_size);
> return (phys_addr_t)DMA_MAPPING_ERROR;
> +   }
> 
> mask = dma_get_seg_boundary(hwdev);
> 
> @@ -584,9 +587,6 @@ void swiotlb_tbl_unmap_single(struct device *hwdev,
> phys_addr_t tlb_addr,
> int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
> phys_addr_t orig_addr = io_tlb_orig_addr[index];
> 
> -   if (WARN_ON(mapping_size > alloc_size))
> -   return;
> -
> /*
>  * First, sync the memory before unmapping the entry
>  */

Folded that into the commit, thanks Lu Baolu.


Re: [V2, 2/2] media: i2c: Add more sensor modes for ov8856 camera sensor

2019-09-11 Thread Tomasz Figa
Hi Sakari,

On Tue, Sep 10, 2019 at 10:05 PM  wrote:
>
> From: Dongchun Zhu 
>
> This patch mainly adds two more sensor modes for OV8856 CMOS image sensor.
> That is, the resolution of 1632*1224 and 3264*2448, corresponding to the 
> bayer order of BGGR.
> The sensor revision also differs in some OTP register.
>
> Signed-off-by: Dongchun Zhu 
> ---
>  drivers/media/i2c/ov8856.c | 654 
> +++--
>  1 file changed, 639 insertions(+), 15 deletions(-)
>

What do you think about the approach taken by this patch?

My understanding is that the register arrays being added by it can be
only used with 24MHz input clock, while the existing ones are for
19.2MHz. That means that this patch makes the driver expose completely
different modes (resolutions, mbus formats) depending on the input
clock. Are we okay with this?

Best regards,
Tomasz

> diff --git a/drivers/media/i2c/ov8856.c b/drivers/media/i2c/ov8856.c
> index cd347d6..9ad0b73 100644
> --- a/drivers/media/i2c/ov8856.c
> +++ b/drivers/media/i2c/ov8856.c
> @@ -1,12 +1,15 @@
>  // SPDX-License-Identifier: GPL-2.0
>  // Copyright (c) 2019 Intel Corporation.
>
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -18,10 +21,15 @@
>  #define OV8856_LINK_FREQ_360MHZ36000ULL
>  #define OV8856_LINK_FREQ_180MHZ18000ULL
>  #define OV8856_SCLK14400ULL
> -#define OV8856_MCLK1920
> +#define OV8856_XVCLK   1920
> +#define OV8856_XVCLK_TYP   2400
>  #define OV8856_DATA_LANES  4
>  #define OV8856_RGB_DEPTH   10
>
> +#define REG_X_ADDR_START   0x3808
> +#define X_OUTPUT_FULL_SIZE 0x0cc0
> +#define X_OUTPUT_BINNING_SIZE  0x0660
> +
>  #define OV8856_REG_CHIP_ID 0x300a
>  #define OV8856_CHIP_ID 0x00885a
>
> @@ -29,6 +37,22 @@
>  #define OV8856_MODE_STANDBY0x00
>  #define OV8856_MODE_STREAMING  0x01
>
> +/* define 1B module revision */
> +#define OV8856_1B_MODULE   0x02
> +
> +/* the OTP read-out buffer is at 0x7000 and 0xf is the offset
> + * of the byte in the OTP that means the module revision
> + */
> +#define OV8856_MODULE_REVISION 0x700f
> +#define OV8856_OTP_MODE_CTRL   0x3d84
> +#define OV8856_OTP_LOAD_CTRL   0x3d81
> +#define OV8856_OTP_MODE_AUTO   0x00
> +#define OV8856_OTP_LOAD_CTRL_ENABLEBIT(0)
> +
> +/* Analog control register that decided by module revision */
> +#define OV8856_ANAL_MODE_CTRL  0x3614
> +#define OV8856_ANAL_1B_VAL 0x20
> +
>  /* vertical-timings from sensor */
>  #define OV8856_REG_VTS 0x380e
>  #define OV8856_VTS_MAX 0x7fff
> @@ -64,6 +88,14 @@
>
>  #define to_ov8856(_sd) container_of(_sd, struct ov8856, sd)
>
> +static const char * const ov8856_supply_names[] = {
> +   "dovdd",/* Digital I/O power */
> +   "avdd", /* Analog power */
> +   "dvdd", /* Digital core power */
> +};
> +
> +#define OV8856_NUM_SUPPLIES ARRAY_SIZE(ov8856_supply_names)
> +
>  enum {
> OV8856_LINK_FREQ_720MBPS,
> OV8856_LINK_FREQ_360MBPS,
> @@ -195,11 +227,11 @@ static const struct ov8856_reg mode_3280x2464_regs[] = {
> {0x3800, 0x00},
> {0x3801, 0x00},
> {0x3802, 0x00},
> -   {0x3803, 0x06},
> +   {0x3803, 0x07},
> {0x3804, 0x0c},
> {0x3805, 0xdf},
> {0x3806, 0x09},
> -   {0x3807, 0xa7},
> +   {0x3807, 0xa6},
> {0x3808, 0x0c},
> {0x3809, 0xd0},
> {0x380a, 0x09},
> @@ -211,7 +243,7 @@ static const struct ov8856_reg mode_3280x2464_regs[] = {
> {0x3810, 0x00},
> {0x3811, 0x00},
> {0x3812, 0x00},
> -   {0x3813, 0x01},
> +   {0x3813, 0x00},
> {0x3814, 0x01},
> {0x3815, 0x01},
> {0x3816, 0x00},
> @@ -316,6 +348,209 @@ static const struct ov8856_reg mode_3280x2464_regs[] = {
> {0x5e00, 0x00}
>  };
>
> +static const struct ov8856_reg mode_3264x2448_regs[] = {
> +   {0x0103, 0x01},
> +   {0x0302, 0x3c},
> +   {0x0303, 0x01},
> +   {0x031e, 0x0c},
> +   {0x3000, 0x20},
> +   {0x3003, 0x08},
> +   {0x300e, 0x20},
> +   {0x3010, 0x00},
> +   {0x3015, 0x84},
> +   {0x3018, 0x72},
> +   {0x3021, 0x23},
> +   {0x3033, 0x24},
> +   {0x3500, 0x00},
> +   {0x3501, 0x9a},
> +   {0x3502, 0x20},
> +   {0x3503, 0x08},
> +   {0x3505, 0x83},
> +   {0x3508, 0x01},
> +   {0x3509, 0x80},
> +   {0x350c, 0x00},
> +   {0x350d, 0x80},
> +   {0x350e, 0x04},
> +   {0x350f, 0x00},
> +   {0x3510, 0x00},
> +   {0x3511, 0x02},
> +   {0x3512, 0x00},
> +   {0x3600, 0x72},
> +   {0x3601, 0x40},
> +   {0x3602, 0x30},
> +