Re: [PATCH v7 4/8] media: vsp1: Convert display lists to use new body pool

2018-04-30 Thread Kieran Bingham
Hi Laurent,

On 06/04/18 23:55, Laurent Pinchart wrote:
> Hi Kieran,
> 
> Thank you for the patch.
> 
> On Thursday, 8 March 2018 02:05:27 EEST Kieran Bingham wrote:
>> Adapt the dl->body0 object to use an object from the body pool. This
>> greatly reduces the pressure on the TLB for IPMMU use cases, as all of
>> the lists use a single allocation for the main body.
>>
>> The CLU and LUT objects pre-allocate a pool containing three bodies,
>> allowing a userspace update before the hardware has committed a previous
>> set of tables.
>>
>> Bodies are no longer 'freed' in interrupt context, but instead released
>> back to their respective pools. This allows us to remove the garbage
>> collector in the DLM.
>>
>> Signed-off-by: Kieran Bingham 
>>
>> ---
>> v3:
>>  - 's/fragment/body', 's/fragments/bodies/'
>>  - CLU/LUT now allocate 3 bodies
>>  - vsp1_dl_list_fragments_free -> vsp1_dl_list_bodies_put
>>
>> v2:
>>  - Use dl->body0->max_entries to determine header offset, instead of the
>>global constant VSP1_DL_NUM_ENTRIES which is incorrect.
>>  - squash updates for LUT, CLU, and fragment cleanup into single patch.
>>(Not fully bisectable when separated)
>>
>>  drivers/media/platform/vsp1/vsp1_clu.c |  27 ++-
>>  drivers/media/platform/vsp1/vsp1_clu.h |   1 +-
>>  drivers/media/platform/vsp1/vsp1_dl.c  | 223 ++
>>  drivers/media/platform/vsp1/vsp1_dl.h  |   3 +-
>>  drivers/media/platform/vsp1/vsp1_lut.c |  27 ++-
>>  drivers/media/platform/vsp1/vsp1_lut.h |   1 +-
>>  6 files changed, 101 insertions(+), 181 deletions(-)
> 
> Still a nice diffstart :-)
> 
> [snip]
> 
>> diff --git a/drivers/media/platform/vsp1/vsp1_dl.c
>> b/drivers/media/platform/vsp1/vsp1_dl.c index 0208e72cb356..74476726451c
>> 100644
>> --- a/drivers/media/platform/vsp1/vsp1_dl.c
>> +++ b/drivers/media/platform/vsp1/vsp1_dl.c
> 
> [snip]
> 
>> @@ -399,11 +311,10 @@ void vsp1_dl_body_write(struct vsp1_dl_body *dlb, u32
>> reg, u32 data) * Display List Transaction Management
>>   */
>>
>> -static struct vsp1_dl_list *vsp1_dl_list_alloc(struct vsp1_dl_manager *dlm)
>> +static struct vsp1_dl_list *vsp1_dl_list_alloc(struct vsp1_dl_manager
>> *dlm,
>> +   struct vsp1_dl_body_pool *pool)
> 
> Given that the only caller of this function passes dlm->pool as the second 
> argument, can't you remove the second argument ?

Hrm ... I thought there was going to be a use case where the pool will be
separated. But perhaps not.

So yes - Removing.

> 
>>  {
>>  struct vsp1_dl_list *dl;
>> -size_t header_size;
>> -int ret;
>>
>>  dl = kzalloc(sizeof(*dl), GFP_KERNEL);
>>  if (!dl)
>> @@ -412,41 +323,39 @@ static struct vsp1_dl_list *vsp1_dl_list_alloc(struct
>> vsp1_dl_manager *dlm) INIT_LIST_HEAD(&dl->bodies);
>>  dl->dlm = dlm;
>>
>> -/*
>> - * Initialize the display list body and allocate DMA memory for the body
>> - * and the optional header. Both are allocated together to avoid memory
>> - * fragmentation, with the header located right after the body in
>> - * memory.
>> - */
>> -header_size = dlm->mode == VSP1_DL_MODE_HEADER
>> -? ALIGN(sizeof(struct vsp1_dl_header), 8)
>> -: 0;
>> -
>> -ret = vsp1_dl_body_init(dlm->vsp1, &dl->body0, VSP1_DL_NUM_ENTRIES,
>> -header_size);
>> -if (ret < 0) {
>> -kfree(dl);
>> +/* Retrieve a body from our DLM body pool */
> 
> s/body pool/body pool./
> 
> (And I would have said "Get a body" but that's up to you)

I think that's evident by the function name "vsp1_dl_body_get()", thus I've
adapted this comment to be a bit more meaningful:
/* Get a default body for our list. */

But I'm not opposed to dropping the comment. Also at somepoint, I think there's
scope to remove the dl->body0 so it may not matter.

> 
>> +dl->body0 = vsp1_dl_body_get(pool);
>> +if (!dl->body0)
>>  return NULL;
>> -}
>> -
>>  if (dlm->mode == VSP1_DL_MODE_HEADER) {
>> -size_t header_offset = VSP1_DL_NUM_ENTRIES
>> - * sizeof(*dl->body0.entries);
>> +size_t header_offset = dl->body0->max_entries
>> + * sizeof(*dl->body0->entries);
>>
>> -dl->header = ((void *)dl->body0.entries) + header_offset;
>> -dl->dma = dl->body0.dma + header_offset;
>> +dl->header = ((void *)dl->body0->entries) + header_offset;
>> +dl->dma = dl->body0->dma + header_offset;
>>
>>  memset(dl->header, 0, sizeof(*dl->header));
>> -dl->header->lists[0].addr = dl->body0.dma;
>> +dl->header->lists[0].addr = dl->body0->dma;
>>  }
>>
>>  return dl;
>>  }
>>
>> +static void vsp1_dl_list_bodies_put(struct vsp1_dl_list *dl)
>> +{
>> +struct vsp1_dl_body *dlb, *tmp;
>> +
>> +list_for_each_entry_safe(dlb, tmp, &dl->bodies, list) {
>> +list_del(&dlb->li

Re: [PATCH v7 4/8] media: vsp1: Convert display lists to use new body pool

2018-04-06 Thread Laurent Pinchart
Hi Kieran,

Thank you for the patch.

On Thursday, 8 March 2018 02:05:27 EEST Kieran Bingham wrote:
> Adapt the dl->body0 object to use an object from the body pool. This
> greatly reduces the pressure on the TLB for IPMMU use cases, as all of
> the lists use a single allocation for the main body.
> 
> The CLU and LUT objects pre-allocate a pool containing three bodies,
> allowing a userspace update before the hardware has committed a previous
> set of tables.
> 
> Bodies are no longer 'freed' in interrupt context, but instead released
> back to their respective pools. This allows us to remove the garbage
> collector in the DLM.
> 
> Signed-off-by: Kieran Bingham 
> 
> ---
> v3:
>  - 's/fragment/body', 's/fragments/bodies/'
>  - CLU/LUT now allocate 3 bodies
>  - vsp1_dl_list_fragments_free -> vsp1_dl_list_bodies_put
> 
> v2:
>  - Use dl->body0->max_entries to determine header offset, instead of the
>global constant VSP1_DL_NUM_ENTRIES which is incorrect.
>  - squash updates for LUT, CLU, and fragment cleanup into single patch.
>(Not fully bisectable when separated)
> 
>  drivers/media/platform/vsp1/vsp1_clu.c |  27 ++-
>  drivers/media/platform/vsp1/vsp1_clu.h |   1 +-
>  drivers/media/platform/vsp1/vsp1_dl.c  | 223 ++
>  drivers/media/platform/vsp1/vsp1_dl.h  |   3 +-
>  drivers/media/platform/vsp1/vsp1_lut.c |  27 ++-
>  drivers/media/platform/vsp1/vsp1_lut.h |   1 +-
>  6 files changed, 101 insertions(+), 181 deletions(-)

Still a nice diffstart :-)

[snip]

> diff --git a/drivers/media/platform/vsp1/vsp1_dl.c
> b/drivers/media/platform/vsp1/vsp1_dl.c index 0208e72cb356..74476726451c
> 100644
> --- a/drivers/media/platform/vsp1/vsp1_dl.c
> +++ b/drivers/media/platform/vsp1/vsp1_dl.c

[snip]

> @@ -399,11 +311,10 @@ void vsp1_dl_body_write(struct vsp1_dl_body *dlb, u32
> reg, u32 data) * Display List Transaction Management
>   */
> 
> -static struct vsp1_dl_list *vsp1_dl_list_alloc(struct vsp1_dl_manager *dlm)
> +static struct vsp1_dl_list *vsp1_dl_list_alloc(struct vsp1_dl_manager
> *dlm,
> +struct vsp1_dl_body_pool *pool)

Given that the only caller of this function passes dlm->pool as the second 
argument, can't you remove the second argument ?

>  {
>   struct vsp1_dl_list *dl;
> - size_t header_size;
> - int ret;
> 
>   dl = kzalloc(sizeof(*dl), GFP_KERNEL);
>   if (!dl)
> @@ -412,41 +323,39 @@ static struct vsp1_dl_list *vsp1_dl_list_alloc(struct
> vsp1_dl_manager *dlm) INIT_LIST_HEAD(&dl->bodies);
>   dl->dlm = dlm;
> 
> - /*
> -  * Initialize the display list body and allocate DMA memory for the body
> -  * and the optional header. Both are allocated together to avoid memory
> -  * fragmentation, with the header located right after the body in
> -  * memory.
> -  */
> - header_size = dlm->mode == VSP1_DL_MODE_HEADER
> - ? ALIGN(sizeof(struct vsp1_dl_header), 8)
> - : 0;
> -
> - ret = vsp1_dl_body_init(dlm->vsp1, &dl->body0, VSP1_DL_NUM_ENTRIES,
> - header_size);
> - if (ret < 0) {
> - kfree(dl);
> + /* Retrieve a body from our DLM body pool */

s/body pool/body pool./

(And I would have said "Get a body" but that's up to you)

> + dl->body0 = vsp1_dl_body_get(pool);
> + if (!dl->body0)
>   return NULL;
> - }
> -
>   if (dlm->mode == VSP1_DL_MODE_HEADER) {
> - size_t header_offset = VSP1_DL_NUM_ENTRIES
> -  * sizeof(*dl->body0.entries);
> + size_t header_offset = dl->body0->max_entries
> +  * sizeof(*dl->body0->entries);
> 
> - dl->header = ((void *)dl->body0.entries) + header_offset;
> - dl->dma = dl->body0.dma + header_offset;
> + dl->header = ((void *)dl->body0->entries) + header_offset;
> + dl->dma = dl->body0->dma + header_offset;
> 
>   memset(dl->header, 0, sizeof(*dl->header));
> - dl->header->lists[0].addr = dl->body0.dma;
> + dl->header->lists[0].addr = dl->body0->dma;
>   }
> 
>   return dl;
>  }
> 
> +static void vsp1_dl_list_bodies_put(struct vsp1_dl_list *dl)
> +{
> + struct vsp1_dl_body *dlb, *tmp;
> +
> + list_for_each_entry_safe(dlb, tmp, &dl->bodies, list) {
> + list_del(&dlb->list);
> + vsp1_dl_body_put(dlb);
> + }
> +}
> +
>  static void vsp1_dl_list_free(struct vsp1_dl_list *dl)
>  {
> - vsp1_dl_body_cleanup(&dl->body0);
> - list_splice_init(&dl->bodies, &dl->dlm->gc_bodies);
> + vsp1_dl_body_put(dl->body0);
> + vsp1_dl_list_bodies_put(dl);

Too bad we can't keep the list splice, it's more efficient than iterating over 
the list, but I suppose it's unavoidable if we want to reset the number of 
used entries to 0 for each body. Beside, we should have a small number of 
bodies only, so hopefully it won't be a big d

[PATCH v7 4/8] media: vsp1: Convert display lists to use new body pool

2018-03-07 Thread Kieran Bingham
Adapt the dl->body0 object to use an object from the body pool. This
greatly reduces the pressure on the TLB for IPMMU use cases, as all of
the lists use a single allocation for the main body.

The CLU and LUT objects pre-allocate a pool containing three bodies,
allowing a userspace update before the hardware has committed a previous
set of tables.

Bodies are no longer 'freed' in interrupt context, but instead released
back to their respective pools. This allows us to remove the garbage
collector in the DLM.

Signed-off-by: Kieran Bingham 

---
v3:
 - 's/fragment/body', 's/fragments/bodies/'
 - CLU/LUT now allocate 3 bodies
 - vsp1_dl_list_fragments_free -> vsp1_dl_list_bodies_put

v2:
 - Use dl->body0->max_entries to determine header offset, instead of the
   global constant VSP1_DL_NUM_ENTRIES which is incorrect.
 - squash updates for LUT, CLU, and fragment cleanup into single patch.
   (Not fully bisectable when separated)

 drivers/media/platform/vsp1/vsp1_clu.c |  27 ++-
 drivers/media/platform/vsp1/vsp1_clu.h |   1 +-
 drivers/media/platform/vsp1/vsp1_dl.c  | 223 ++
 drivers/media/platform/vsp1/vsp1_dl.h  |   3 +-
 drivers/media/platform/vsp1/vsp1_lut.c |  27 ++-
 drivers/media/platform/vsp1/vsp1_lut.h |   1 +-
 6 files changed, 101 insertions(+), 181 deletions(-)

diff --git a/drivers/media/platform/vsp1/vsp1_clu.c 
b/drivers/media/platform/vsp1/vsp1_clu.c
index 9621afa3658c..2018144470c5 100644
--- a/drivers/media/platform/vsp1/vsp1_clu.c
+++ b/drivers/media/platform/vsp1/vsp1_clu.c
@@ -23,6 +23,8 @@
 #define CLU_MIN_SIZE   4U
 #define CLU_MAX_SIZE   8190U
 
+#define CLU_SIZE   (17 * 17 * 17)
+
 /* 
-
  * Device Access
  */
@@ -47,19 +49,19 @@ static int clu_set_table(struct vsp1_clu *clu, struct 
v4l2_ctrl *ctrl)
struct vsp1_dl_body *dlb;
unsigned int i;
 
-   dlb = vsp1_dl_body_alloc(clu->entity.vsp1, 1 + 17 * 17 * 17);
+   dlb = vsp1_dl_body_get(clu->pool);
if (!dlb)
return -ENOMEM;
 
vsp1_dl_body_write(dlb, VI6_CLU_ADDR, 0);
-   for (i = 0; i < 17 * 17 * 17; ++i)
+   for (i = 0; i < CLU_SIZE; ++i)
vsp1_dl_body_write(dlb, VI6_CLU_DATA, ctrl->p_new.p_u32[i]);
 
spin_lock_irq(&clu->lock);
swap(clu->clu, dlb);
spin_unlock_irq(&clu->lock);
 
-   vsp1_dl_body_free(dlb);
+   vsp1_dl_body_put(dlb);
return 0;
 }
 
@@ -261,8 +263,16 @@ static void clu_configure(struct vsp1_entity *entity,
}
 }
 
+static void clu_destroy(struct vsp1_entity *entity)
+{
+   struct vsp1_clu *clu = to_clu(&entity->subdev);
+
+   vsp1_dl_body_pool_destroy(clu->pool);
+}
+
 static const struct vsp1_entity_operations clu_entity_ops = {
.configure = clu_configure,
+   .destroy = clu_destroy,
 };
 
 /* 
-
@@ -288,6 +298,17 @@ struct vsp1_clu *vsp1_clu_create(struct vsp1_device *vsp1)
if (ret < 0)
return ERR_PTR(ret);
 
+   /*
+* Pre-allocate a body pool, with 3 bodies allowing a userspace update
+* before the hardware has committed a previous set of tables, handling
+* both the queued and pending dl entries. One extra entry is added to
+* the CLU_SIZE to allow for the VI6_CLU_ADDR header.
+*/
+   clu->pool = vsp1_dl_body_pool_create(clu->entity.vsp1, 3, CLU_SIZE + 1,
+0);
+   if (!clu->pool)
+   return ERR_PTR(-ENOMEM);
+
/* Initialize the control handler. */
v4l2_ctrl_handler_init(&clu->ctrls, 2);
v4l2_ctrl_new_custom(&clu->ctrls, &clu_table_control, NULL);
diff --git a/drivers/media/platform/vsp1/vsp1_clu.h 
b/drivers/media/platform/vsp1/vsp1_clu.h
index 036e0a2f1a42..fa3fe856725b 100644
--- a/drivers/media/platform/vsp1/vsp1_clu.h
+++ b/drivers/media/platform/vsp1/vsp1_clu.h
@@ -36,6 +36,7 @@ struct vsp1_clu {
spinlock_t lock;
unsigned int mode;
struct vsp1_dl_body *clu;
+   struct vsp1_dl_body_pool *pool;
 };
 
 static inline struct vsp1_clu *to_clu(struct v4l2_subdev *subdev)
diff --git a/drivers/media/platform/vsp1/vsp1_dl.c 
b/drivers/media/platform/vsp1/vsp1_dl.c
index 0208e72cb356..74476726451c 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.c
+++ b/drivers/media/platform/vsp1/vsp1_dl.c
@@ -111,7 +111,7 @@ struct vsp1_dl_list {
struct vsp1_dl_header *header;
dma_addr_t dma;
 
-   struct vsp1_dl_body body0;
+   struct vsp1_dl_body *body0;
struct list_head bodies;
 
bool has_chain;
@@ -135,8 +135,6 @@ enum vsp1_dl_mode {
  * @queued: list queued to the hardware (written to the DL registers)
  * @pending: list waiting to be queued to the hardware
  * @pool: body pool for the display list bodies
- * @gc_work: bo