Re: [PATCH v9 4/8] media: vsp1: Convert display lists to use new body pool

2018-05-16 Thread Laurent Pinchart
Hi Kieran,

Thank you for the patch.

On Thursday, 3 May 2018 16:35:43 EEST Kieran Bingham wrote:
> Adapt the dl->body0 object to use an object from the body pool. This
> greatly reduces the pressure on the TLB for IPMMU use cases, as all of
> the lists use a single allocation for the main body.
> 
> The CLU and LUT objects pre-allocate a pool containing three bodies,
> allowing a userspace update before the hardware has committed a previous
> set of tables.
> 
> Bodies are no longer 'freed' in interrupt context, but instead released
> back to their respective pools. This allows us to remove the garbage
> collector in the DLM.
> 
> Signed-off-by: Kieran Bingham 

Reviewed-by: Laurent Pinchart 

> ---
> v9:
>  - Remove redundant reference to gc_bodies
> 
> v8:
>  - Don't pass dlm->pool through vsp1_dl_list_alloc()  as it's already in the
> dlm. - Fix up comments
> 
> v4-v7:
>  - No changes (except rebases)
> 
> v3:
>  - 's/fragment/body', 's/fragments/bodies/'
>  - CLU/LUT now allocate 3 bodies
>  - vsp1_dl_list_fragments_free -> vsp1_dl_list_bodies_put
> 
> v2:
>  - Use dl->body0->max_entries to determine header offset, instead of the
>global constant VSP1_DL_NUM_ENTRIES which is incorrect.
>  - squash updates for LUT, CLU, and fragment cleanup into single patch.
>(Not fully bisectable when separated)
> ---
>  drivers/media/platform/vsp1/vsp1_clu.c |  27 ++-
>  drivers/media/platform/vsp1/vsp1_clu.h |   1 +-
>  drivers/media/platform/vsp1/vsp1_dl.c  | 221 ++
>  drivers/media/platform/vsp1/vsp1_dl.h  |   3 +-
>  drivers/media/platform/vsp1/vsp1_lut.c |  27 ++-
>  drivers/media/platform/vsp1/vsp1_lut.h |   1 +-
>  6 files changed, 100 insertions(+), 180 deletions(-)
> 
> diff --git a/drivers/media/platform/vsp1/vsp1_clu.c
> b/drivers/media/platform/vsp1/vsp1_clu.c index ebfbb915dcdc..8efa12f5e53f
> 100644
> --- a/drivers/media/platform/vsp1/vsp1_clu.c
> +++ b/drivers/media/platform/vsp1/vsp1_clu.c
> @@ -19,6 +19,8 @@
>  #define CLU_MIN_SIZE 4U
>  #define CLU_MAX_SIZE 8190U
> 
> +#define CLU_SIZE (17 * 17 * 17)
> +
>  /*
> ---
> -- * Device Access
>   */
> @@ -43,19 +45,19 @@ static int clu_set_table(struct vsp1_clu *clu, struct
> v4l2_ctrl *ctrl) struct vsp1_dl_body *dlb;
>   unsigned int i;
> 
> - dlb = vsp1_dl_body_alloc(clu->entity.vsp1, 1 + 17 * 17 * 17);
> + dlb = vsp1_dl_body_get(clu->pool);
>   if (!dlb)
>   return -ENOMEM;
> 
>   vsp1_dl_body_write(dlb, VI6_CLU_ADDR, 0);
> - for (i = 0; i < 17 * 17 * 17; ++i)
> + for (i = 0; i < CLU_SIZE; ++i)
>   vsp1_dl_body_write(dlb, VI6_CLU_DATA, ctrl->p_new.p_u32[i]);
> 
>   spin_lock_irq(>lock);
>   swap(clu->clu, dlb);
>   spin_unlock_irq(>lock);
> 
> - vsp1_dl_body_free(dlb);
> + vsp1_dl_body_put(dlb);
>   return 0;
>  }
> 
> @@ -216,8 +218,16 @@ static void clu_configure(struct vsp1_entity *entity,
>   }
>  }
> 
> +static void clu_destroy(struct vsp1_entity *entity)
> +{
> + struct vsp1_clu *clu = to_clu(>subdev);
> +
> + vsp1_dl_body_pool_destroy(clu->pool);
> +}
> +
>  static const struct vsp1_entity_operations clu_entity_ops = {
>   .configure = clu_configure,
> + .destroy = clu_destroy,
>  };
> 
>  /*
> ---
> -- @@ -243,6 +253,17 @@ struct vsp1_clu *vsp1_clu_create(struct vsp1_device
> *vsp1) if (ret < 0)
>   return ERR_PTR(ret);
> 
> + /*
> +  * Pre-allocate a body pool, with 3 bodies allowing a userspace update
> +  * before the hardware has committed a previous set of tables, handling
> +  * both the queued and pending dl entries. One extra entry is added to
> +  * the CLU_SIZE to allow for the VI6_CLU_ADDR header.
> +  */
> + clu->pool = vsp1_dl_body_pool_create(clu->entity.vsp1, 3, CLU_SIZE + 1,
> +  0);
> + if (!clu->pool)
> + return ERR_PTR(-ENOMEM);
> +
>   /* Initialize the control handler. */
>   v4l2_ctrl_handler_init(>ctrls, 2);
>   v4l2_ctrl_new_custom(>ctrls, _table_control, NULL);
> diff --git a/drivers/media/platform/vsp1/vsp1_clu.h
> b/drivers/media/platform/vsp1/vsp1_clu.h index c45e6e707592..cef2f44481ba
> 100644
> --- a/drivers/media/platform/vsp1/vsp1_clu.h
> +++ b/drivers/media/platform/vsp1/vsp1_clu.h
> @@ -32,6 +32,7 @@ struct vsp1_clu {
>   spinlock_t lock;
>   unsigned int mode;
>   struct vsp1_dl_body *clu;
> + struct vsp1_dl_body_pool *pool;
>  };
> 
>  static inline struct vsp1_clu *to_clu(struct v4l2_subdev *subdev)
> diff --git a/drivers/media/platform/vsp1/vsp1_dl.c
> b/drivers/media/platform/vsp1/vsp1_dl.c index 41ace89a585b..617c46a03dec
> 100644
> --- 

[PATCH v9 4/8] media: vsp1: Convert display lists to use new body pool

2018-05-03 Thread Kieran Bingham
Adapt the dl->body0 object to use an object from the body pool. This
greatly reduces the pressure on the TLB for IPMMU use cases, as all of
the lists use a single allocation for the main body.

The CLU and LUT objects pre-allocate a pool containing three bodies,
allowing a userspace update before the hardware has committed a previous
set of tables.

Bodies are no longer 'freed' in interrupt context, but instead released
back to their respective pools. This allows us to remove the garbage
collector in the DLM.

Signed-off-by: Kieran Bingham 

---
v9:
 - Remove redundant reference to gc_bodies

v8:
 - Don't pass dlm->pool through vsp1_dl_list_alloc()  as it's already in the 
dlm.
 - Fix up comments

v4-v7:
 - No changes (except rebases)

v3:
 - 's/fragment/body', 's/fragments/bodies/'
 - CLU/LUT now allocate 3 bodies
 - vsp1_dl_list_fragments_free -> vsp1_dl_list_bodies_put

v2:
 - Use dl->body0->max_entries to determine header offset, instead of the
   global constant VSP1_DL_NUM_ENTRIES which is incorrect.
 - squash updates for LUT, CLU, and fragment cleanup into single patch.
   (Not fully bisectable when separated)
---
 drivers/media/platform/vsp1/vsp1_clu.c |  27 ++-
 drivers/media/platform/vsp1/vsp1_clu.h |   1 +-
 drivers/media/platform/vsp1/vsp1_dl.c  | 221 ++
 drivers/media/platform/vsp1/vsp1_dl.h  |   3 +-
 drivers/media/platform/vsp1/vsp1_lut.c |  27 ++-
 drivers/media/platform/vsp1/vsp1_lut.h |   1 +-
 6 files changed, 100 insertions(+), 180 deletions(-)

diff --git a/drivers/media/platform/vsp1/vsp1_clu.c 
b/drivers/media/platform/vsp1/vsp1_clu.c
index ebfbb915dcdc..8efa12f5e53f 100644
--- a/drivers/media/platform/vsp1/vsp1_clu.c
+++ b/drivers/media/platform/vsp1/vsp1_clu.c
@@ -19,6 +19,8 @@
 #define CLU_MIN_SIZE   4U
 #define CLU_MAX_SIZE   8190U
 
+#define CLU_SIZE   (17 * 17 * 17)
+
 /* 
-
  * Device Access
  */
@@ -43,19 +45,19 @@ static int clu_set_table(struct vsp1_clu *clu, struct 
v4l2_ctrl *ctrl)
struct vsp1_dl_body *dlb;
unsigned int i;
 
-   dlb = vsp1_dl_body_alloc(clu->entity.vsp1, 1 + 17 * 17 * 17);
+   dlb = vsp1_dl_body_get(clu->pool);
if (!dlb)
return -ENOMEM;
 
vsp1_dl_body_write(dlb, VI6_CLU_ADDR, 0);
-   for (i = 0; i < 17 * 17 * 17; ++i)
+   for (i = 0; i < CLU_SIZE; ++i)
vsp1_dl_body_write(dlb, VI6_CLU_DATA, ctrl->p_new.p_u32[i]);
 
spin_lock_irq(>lock);
swap(clu->clu, dlb);
spin_unlock_irq(>lock);
 
-   vsp1_dl_body_free(dlb);
+   vsp1_dl_body_put(dlb);
return 0;
 }
 
@@ -216,8 +218,16 @@ static void clu_configure(struct vsp1_entity *entity,
}
 }
 
+static void clu_destroy(struct vsp1_entity *entity)
+{
+   struct vsp1_clu *clu = to_clu(>subdev);
+
+   vsp1_dl_body_pool_destroy(clu->pool);
+}
+
 static const struct vsp1_entity_operations clu_entity_ops = {
.configure = clu_configure,
+   .destroy = clu_destroy,
 };
 
 /* 
-
@@ -243,6 +253,17 @@ struct vsp1_clu *vsp1_clu_create(struct vsp1_device *vsp1)
if (ret < 0)
return ERR_PTR(ret);
 
+   /*
+* Pre-allocate a body pool, with 3 bodies allowing a userspace update
+* before the hardware has committed a previous set of tables, handling
+* both the queued and pending dl entries. One extra entry is added to
+* the CLU_SIZE to allow for the VI6_CLU_ADDR header.
+*/
+   clu->pool = vsp1_dl_body_pool_create(clu->entity.vsp1, 3, CLU_SIZE + 1,
+0);
+   if (!clu->pool)
+   return ERR_PTR(-ENOMEM);
+
/* Initialize the control handler. */
v4l2_ctrl_handler_init(>ctrls, 2);
v4l2_ctrl_new_custom(>ctrls, _table_control, NULL);
diff --git a/drivers/media/platform/vsp1/vsp1_clu.h 
b/drivers/media/platform/vsp1/vsp1_clu.h
index c45e6e707592..cef2f44481ba 100644
--- a/drivers/media/platform/vsp1/vsp1_clu.h
+++ b/drivers/media/platform/vsp1/vsp1_clu.h
@@ -32,6 +32,7 @@ struct vsp1_clu {
spinlock_t lock;
unsigned int mode;
struct vsp1_dl_body *clu;
+   struct vsp1_dl_body_pool *pool;
 };
 
 static inline struct vsp1_clu *to_clu(struct v4l2_subdev *subdev)
diff --git a/drivers/media/platform/vsp1/vsp1_dl.c 
b/drivers/media/platform/vsp1/vsp1_dl.c
index 41ace89a585b..617c46a03dec 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.c
+++ b/drivers/media/platform/vsp1/vsp1_dl.c
@@ -108,7 +108,7 @@ struct vsp1_dl_list {
struct vsp1_dl_header *header;
dma_addr_t dma;
 
-   struct vsp1_dl_body body0;
+   struct vsp1_dl_body *body0;
struct list_head bodies;
 
bool has_chain;
@@ -128,14 +128,12 @@