Re: [PATCH v2 2/8] v4l: vsp1: Provide a fragment pool

2017-09-12 Thread Laurent Pinchart
Hi Kieran,

On Monday, 11 September 2017 23:30:25 EEST Kieran Bingham wrote:
> On 17/08/17 13:13, Laurent Pinchart wrote:
> > On Monday 14 Aug 2017 16:13:25 Kieran Bingham wrote:
> >> Each display list allocates a body to store register values in a dma
> >> accessible buffer from a dma_alloc_wc() allocation. Each of these
> >> results in an entry in the TLB, and a large number of display list
> >> allocations adds pressure to this resource.
> >> 
> >> Reduce TLB pressure on the IPMMUs by allocating multiple display list
> >> bodies in a single allocation, and providing these to the display list
> >> through a 'fragment pool'. A pool can be allocated by the display list
> >> manager or entities which require their own body allocations.
> >> 
> >> Signed-off-by: Kieran Bingham 
> >> 
> >> ---
> >> 
> >> v2:
> >>  - assign dlb->dma correctly
> >> 
> >> ---
> >> 
> >>  drivers/media/platform/vsp1/vsp1_dl.c | 129 +++-
> >>  drivers/media/platform/vsp1/vsp1_dl.h |   8 ++-
> >>  2 files changed, 137 insertions(+)
> >> 
> >> diff --git a/drivers/media/platform/vsp1/vsp1_dl.c
> >> b/drivers/media/platform/vsp1/vsp1_dl.c index cb4625ae13c2..aab9dd6ec0eb
> >> 100644
> >> --- a/drivers/media/platform/vsp1/vsp1_dl.c
> >> +++ b/drivers/media/platform/vsp1/vsp1_dl.c

[snip]

> >>  /*
> >> + * Fragment pool's reduce the pressure on the iommu TLB by allocating a
> >> single
> >> + * large area of DMA memory and allocating it as a pool of fragment
> >> bodies
> >> + */
> > 
> > Could you document non-static function using kerneldoc ? Parameters to
> > this function would benefit from some documentation. I'd also like to see
> > the fragment get/put functions documented, as you remove existing
> > kerneldoc for the alloc/free existing functions in patch 3/8.
> 
> Ah yes of course.
> 
> >> +struct vsp1_dl_fragment_pool *
> >> +vsp1_dl_fragment_pool_alloc(struct vsp1_device *vsp1, unsigned int qty,
> > 
> > I think I would name this function vsp1_dl_fragment_pool_create(), as it
> > does more than just allocating memory. Similarly I'd call the free
> > function vsp1_dl_fragment_pool_destroy().
> 
> That sounds reasonable. Done.
> 
> > qty is a bit vague, I'd rename it to num_fragments.
> 
> Ok with me.
> 
> >> +  unsigned int num_entries, size_t extra_size)
> >> +{
> >> +  struct vsp1_dl_fragment_pool *pool;
> >> +  size_t dlb_size;
> >> +  unsigned int i;
> >> +
> >> +  pool = kzalloc(sizeof(*pool), GFP_KERNEL);
> >> +  if (!pool)
> >> +  return NULL;
> >> +
> >> +  pool->vsp1 = vsp1;
> >> +
> >> +  dlb_size = num_entries * sizeof(struct vsp1_dl_entry) + extra_size;
> > 
> > extra_size is only used by vsp1_dlm_create(), to allocate extra memory for
> > the display list header. We need one header per display list, not per
> > display list body.
> 
> Good catch, that will take a little bit of reworking.

I didn't propose a fix for this as I wasn't sure how to fix it properly. I 
thus won't complain too loudly if you can't fix it either and waste a bit of 
memory :-) But in that case please add a comment to explain what's going on.

> >> +  pool->size = dlb_size * qty;
> >> +
> >> +  pool->bodies = kcalloc(qty, sizeof(*pool->bodies), GFP_KERNEL);
> >> +  if (!pool->bodies) {
> >> +  kfree(pool);
> >> +  return NULL;
> >> +  }
> >> +
> >> +  pool->mem = dma_alloc_wc(vsp1->bus_master, pool->size, >dma,
> >> +  GFP_KERNEL);
> > 
> > This is a weird indentation.
> 
> I know! - Not sure how that slipped by :)
> 
> >> +  if (!pool->mem) {
> >> +  kfree(pool->bodies);
> >> +  kfree(pool);
> >> +  return NULL;
> >> +  }
> >> +
> >> +  spin_lock_init(>lock);
> >> +  INIT_LIST_HEAD(>free);
> >> +
> >> +  for (i = 0; i < qty; ++i) {
> >> +  struct vsp1_dl_body *dlb = >bodies[i];
> >> +
> >> +  dlb->pool = pool;
> >> +  dlb->max_entries = num_entries;
> >> +
> >> +  dlb->dma = pool->dma + i * dlb_size;
> >> +  dlb->entries = pool->mem + i * dlb_size;
> >> +
> >> +  list_add_tail(>free, >free);
> >> +  }
> >> +
> >> +  return pool;
> >> +}
> >> +
> >> +void vsp1_dl_fragment_pool_free(struct vsp1_dl_fragment_pool *pool)
> >> +{
> >> +  if (!pool)
> >> +  return;
> > 
> > Can this happen ?
> 
> I was mirroring 'kfree()' support here ... such that error paths can be
> simple.
> 
> Would you prefer that it's required to be valid (non-null) pointer?
> 
> Actually - I think it is better to leave this for now - as we now call this
> function from the .destroy() entity functions ...

It was a genuine question :-) We have more control over the 
vsp1_dl_fragment_pool_free() callers as the function is internal to the 
driver. If we have real use cases for pool being NULL then let's keep the 
check.

> >> +
> >> +  if (pool->mem)
> >> +  dma_free_wc(pool->vsp1->bus_master, pool->size, pool->mem,
> >> +  

Re: [PATCH v2 2/8] v4l: vsp1: Provide a fragment pool

2017-09-11 Thread Kieran Bingham
Hi Laurent,

Thanks for your review,

On 17/08/17 13:13, Laurent Pinchart wrote:
> Hi Kieran,
> 
> Thank you for the patch.
> 
> On Monday 14 Aug 2017 16:13:25 Kieran Bingham wrote:
>> Each display list allocates a body to store register values in a dma
>> accessible buffer from a dma_alloc_wc() allocation. Each of these
>> results in an entry in the TLB, and a large number of display list
>> allocations adds pressure to this resource.
>>
>> Reduce TLB pressure on the IPMMUs by allocating multiple display list
>> bodies in a single allocation, and providing these to the display list
>> through a 'fragment pool'. A pool can be allocated by the display list
>> manager or entities which require their own body allocations.
>>
>> Signed-off-by: Kieran Bingham 
>>
>> ---
>> v2:
>>  - assign dlb->dma correctly
>> ---
>>  drivers/media/platform/vsp1/vsp1_dl.c | 129 +++-
>>  drivers/media/platform/vsp1/vsp1_dl.h |   8 ++-
>>  2 files changed, 137 insertions(+)
>>
>> diff --git a/drivers/media/platform/vsp1/vsp1_dl.c
>> b/drivers/media/platform/vsp1/vsp1_dl.c index cb4625ae13c2..aab9dd6ec0eb
>> 100644
>> --- a/drivers/media/platform/vsp1/vsp1_dl.c
>> +++ b/drivers/media/platform/vsp1/vsp1_dl.c
>> @@ -45,6 +45,8 @@ struct vsp1_dl_entry {
>>  /**
>>   * struct vsp1_dl_body - Display list body
>>   * @list: entry in the display list list of bodies
>> + * @free: entry in the pool free body list
>> + * @pool: pool to which this body belongs
>>   * @vsp1: the VSP1 device
>>   * @entries: array of entries
>>   * @dma: DMA address of the entries
>> @@ -54,6 +56,9 @@ struct vsp1_dl_entry {
>>   */
>>  struct vsp1_dl_body {
>>  struct list_head list;
>> +struct list_head free;
>> +
>> +struct vsp1_dl_fragment_pool *pool;
>>  struct vsp1_device *vsp1;
>>
>>  struct vsp1_dl_entry *entries;
>> @@ -65,6 +70,30 @@ struct vsp1_dl_body {
>>  };
>>
>>  /**
>> + * struct vsp1_dl_fragment_pool - display list body/fragment pool
>> + * @dma: DMA address of the entries
>> + * @size: size of the full DMA memory pool in bytes
>> + * @mem: CPU memory pointer for the pool
>> + * @bodies: Array of DLB structures for the pool
>> + * @free: List of free DLB entries
>> + * @lock: Protects the pool and free list
>> + * @vsp1: the VSP1 device
>> + */
>> +struct vsp1_dl_fragment_pool {
>> +/* DMA allocation */
>> +dma_addr_t dma;
>> +size_t size;
>> +void *mem;
>> +
>> +/* Body management */
>> +struct vsp1_dl_body *bodies;
>> +struct list_head free;
>> +spinlock_t lock;
>> +
>> +struct vsp1_device *vsp1;
>> +};
>> +
>> +/**
>>   * struct vsp1_dl_list - Display list
>>   * @list: entry in the display list manager lists
>>   * @dlm: the display list manager
>> @@ -104,6 +133,7 @@ enum vsp1_dl_mode {
>>   * @active: list currently being processed (loaded) by hardware
>>   * @queued: list queued to the hardware (written to the DL registers)
>>   * @pending: list waiting to be queued to the hardware
>> + * @pool: fragment pool for the display list bodies
>>   * @gc_work: fragments garbage collector work struct
>>   * @gc_fragments: array of display list fragments waiting to be freed
>>   */
>> @@ -119,6 +149,8 @@ struct vsp1_dl_manager {
>>  struct vsp1_dl_list *queued;
>>  struct vsp1_dl_list *pending;
>>
>> +struct vsp1_dl_fragment_pool *pool;
>> +
>>  struct work_struct gc_work;
>>  struct list_head gc_fragments;
>>  };
>> @@ -128,6 +160,103 @@ struct vsp1_dl_manager {
>>   */
>>
>>  /*
>> + * Fragment pool's reduce the pressure on the iommu TLB by allocating a
>> single
>> + * large area of DMA memory and allocating it as a pool of fragment bodies
>> + */
> 
> Could you document non-static function using kerneldoc ? Parameters to this 
> function would benefit from some documentation. I'd also like to see the 
> fragment get/put functions documented, as you remove existing kerneldoc for 
> the alloc/free existing functions in patch 3/8.

Ah yes of course.

>> +struct vsp1_dl_fragment_pool *
>> +vsp1_dl_fragment_pool_alloc(struct vsp1_device *vsp1, unsigned int qty,
> 
> I think I would name this function vsp1_dl_fragment_pool_create(), as it does 
> more than just allocating memory. Similarly I'd call the free function 
> vsp1_dl_fragment_pool_destroy().

That sounds reasonable. Done.

> qty is a bit vague, I'd rename it to num_fragments.

Ok with me.

> 
>> +unsigned int num_entries, size_t extra_size)
>> +{
>> +struct vsp1_dl_fragment_pool *pool;
>> +size_t dlb_size;
>> +unsigned int i;
>> +
>> +pool = kzalloc(sizeof(*pool), GFP_KERNEL);
>> +if (!pool)
>> +return NULL;
>> +
>> +pool->vsp1 = vsp1;
>> +
>> +dlb_size = num_entries * sizeof(struct vsp1_dl_entry) + extra_size;
> 
> extra_size is only used by vsp1_dlm_create(), to allocate extra memory for 
> the 
> display list header. We need one header per display list, not per display 
> list 
> 

Re: [PATCH v2 2/8] v4l: vsp1: Provide a fragment pool

2017-08-17 Thread Laurent Pinchart
Hi Kieran,

Thank you for the patch.

On Monday 14 Aug 2017 16:13:25 Kieran Bingham wrote:
> Each display list allocates a body to store register values in a dma
> accessible buffer from a dma_alloc_wc() allocation. Each of these
> results in an entry in the TLB, and a large number of display list
> allocations adds pressure to this resource.
> 
> Reduce TLB pressure on the IPMMUs by allocating multiple display list
> bodies in a single allocation, and providing these to the display list
> through a 'fragment pool'. A pool can be allocated by the display list
> manager or entities which require their own body allocations.
> 
> Signed-off-by: Kieran Bingham 
> 
> ---
> v2:
>  - assign dlb->dma correctly
> ---
>  drivers/media/platform/vsp1/vsp1_dl.c | 129 +++-
>  drivers/media/platform/vsp1/vsp1_dl.h |   8 ++-
>  2 files changed, 137 insertions(+)
> 
> diff --git a/drivers/media/platform/vsp1/vsp1_dl.c
> b/drivers/media/platform/vsp1/vsp1_dl.c index cb4625ae13c2..aab9dd6ec0eb
> 100644
> --- a/drivers/media/platform/vsp1/vsp1_dl.c
> +++ b/drivers/media/platform/vsp1/vsp1_dl.c
> @@ -45,6 +45,8 @@ struct vsp1_dl_entry {
>  /**
>   * struct vsp1_dl_body - Display list body
>   * @list: entry in the display list list of bodies
> + * @free: entry in the pool free body list
> + * @pool: pool to which this body belongs
>   * @vsp1: the VSP1 device
>   * @entries: array of entries
>   * @dma: DMA address of the entries
> @@ -54,6 +56,9 @@ struct vsp1_dl_entry {
>   */
>  struct vsp1_dl_body {
>   struct list_head list;
> + struct list_head free;
> +
> + struct vsp1_dl_fragment_pool *pool;
>   struct vsp1_device *vsp1;
> 
>   struct vsp1_dl_entry *entries;
> @@ -65,6 +70,30 @@ struct vsp1_dl_body {
>  };
> 
>  /**
> + * struct vsp1_dl_fragment_pool - display list body/fragment pool
> + * @dma: DMA address of the entries
> + * @size: size of the full DMA memory pool in bytes
> + * @mem: CPU memory pointer for the pool
> + * @bodies: Array of DLB structures for the pool
> + * @free: List of free DLB entries
> + * @lock: Protects the pool and free list
> + * @vsp1: the VSP1 device
> + */
> +struct vsp1_dl_fragment_pool {
> + /* DMA allocation */
> + dma_addr_t dma;
> + size_t size;
> + void *mem;
> +
> + /* Body management */
> + struct vsp1_dl_body *bodies;
> + struct list_head free;
> + spinlock_t lock;
> +
> + struct vsp1_device *vsp1;
> +};
> +
> +/**
>   * struct vsp1_dl_list - Display list
>   * @list: entry in the display list manager lists
>   * @dlm: the display list manager
> @@ -104,6 +133,7 @@ enum vsp1_dl_mode {
>   * @active: list currently being processed (loaded) by hardware
>   * @queued: list queued to the hardware (written to the DL registers)
>   * @pending: list waiting to be queued to the hardware
> + * @pool: fragment pool for the display list bodies
>   * @gc_work: fragments garbage collector work struct
>   * @gc_fragments: array of display list fragments waiting to be freed
>   */
> @@ -119,6 +149,8 @@ struct vsp1_dl_manager {
>   struct vsp1_dl_list *queued;
>   struct vsp1_dl_list *pending;
> 
> + struct vsp1_dl_fragment_pool *pool;
> +
>   struct work_struct gc_work;
>   struct list_head gc_fragments;
>  };
> @@ -128,6 +160,103 @@ struct vsp1_dl_manager {
>   */
> 
>  /*
> + * Fragment pool's reduce the pressure on the iommu TLB by allocating a
> single
> + * large area of DMA memory and allocating it as a pool of fragment bodies
> + */

Could you document non-static function using kerneldoc ? Parameters to this 
function would benefit from some documentation. I'd also like to see the 
fragment get/put functions documented, as you remove existing kerneldoc for 
the alloc/free existing functions in patch 3/8.

> +struct vsp1_dl_fragment_pool *
> +vsp1_dl_fragment_pool_alloc(struct vsp1_device *vsp1, unsigned int qty,

I think I would name this function vsp1_dl_fragment_pool_create(), as it does 
more than just allocating memory. Similarly I'd call the free function 
vsp1_dl_fragment_pool_destroy().

qty is a bit vague, I'd rename it to num_fragments.

> + unsigned int num_entries, size_t extra_size)
> +{
> + struct vsp1_dl_fragment_pool *pool;
> + size_t dlb_size;
> + unsigned int i;
> +
> + pool = kzalloc(sizeof(*pool), GFP_KERNEL);
> + if (!pool)
> + return NULL;
> +
> + pool->vsp1 = vsp1;
> +
> + dlb_size = num_entries * sizeof(struct vsp1_dl_entry) + extra_size;

extra_size is only used by vsp1_dlm_create(), to allocate extra memory for the 
display list header. We need one header per display list, not per display list 
body.

> + pool->size = dlb_size * qty;
> +
> + pool->bodies = kcalloc(qty, sizeof(*pool->bodies), GFP_KERNEL);
> + if (!pool->bodies) {
> + kfree(pool);
> + return NULL;
> + }
> +
> + pool->mem = dma_alloc_wc(vsp1->bus_master, 

[PATCH v2 2/8] v4l: vsp1: Provide a fragment pool

2017-08-14 Thread Kieran Bingham
Each display list allocates a body to store register values in a dma
accessible buffer from a dma_alloc_wc() allocation. Each of these
results in an entry in the TLB, and a large number of display list
allocations adds pressure to this resource.

Reduce TLB pressure on the IPMMUs by allocating multiple display list
bodies in a single allocation, and providing these to the display list
through a 'fragment pool'. A pool can be allocated by the display list
manager or entities which require their own body allocations.

Signed-off-by: Kieran Bingham 

---
v2:
 - assign dlb->dma correctly
---
 drivers/media/platform/vsp1/vsp1_dl.c | 129 +++-
 drivers/media/platform/vsp1/vsp1_dl.h |   8 ++-
 2 files changed, 137 insertions(+)

diff --git a/drivers/media/platform/vsp1/vsp1_dl.c 
b/drivers/media/platform/vsp1/vsp1_dl.c
index cb4625ae13c2..aab9dd6ec0eb 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.c
+++ b/drivers/media/platform/vsp1/vsp1_dl.c
@@ -45,6 +45,8 @@ struct vsp1_dl_entry {
 /**
  * struct vsp1_dl_body - Display list body
  * @list: entry in the display list list of bodies
+ * @free: entry in the pool free body list
+ * @pool: pool to which this body belongs
  * @vsp1: the VSP1 device
  * @entries: array of entries
  * @dma: DMA address of the entries
@@ -54,6 +56,9 @@ struct vsp1_dl_entry {
  */
 struct vsp1_dl_body {
struct list_head list;
+   struct list_head free;
+
+   struct vsp1_dl_fragment_pool *pool;
struct vsp1_device *vsp1;
 
struct vsp1_dl_entry *entries;
@@ -65,6 +70,30 @@ struct vsp1_dl_body {
 };
 
 /**
+ * struct vsp1_dl_fragment_pool - display list body/fragment pool
+ * @dma: DMA address of the entries
+ * @size: size of the full DMA memory pool in bytes
+ * @mem: CPU memory pointer for the pool
+ * @bodies: Array of DLB structures for the pool
+ * @free: List of free DLB entries
+ * @lock: Protects the pool and free list
+ * @vsp1: the VSP1 device
+ */
+struct vsp1_dl_fragment_pool {
+   /* DMA allocation */
+   dma_addr_t dma;
+   size_t size;
+   void *mem;
+
+   /* Body management */
+   struct vsp1_dl_body *bodies;
+   struct list_head free;
+   spinlock_t lock;
+
+   struct vsp1_device *vsp1;
+};
+
+/**
  * struct vsp1_dl_list - Display list
  * @list: entry in the display list manager lists
  * @dlm: the display list manager
@@ -104,6 +133,7 @@ enum vsp1_dl_mode {
  * @active: list currently being processed (loaded) by hardware
  * @queued: list queued to the hardware (written to the DL registers)
  * @pending: list waiting to be queued to the hardware
+ * @pool: fragment pool for the display list bodies
  * @gc_work: fragments garbage collector work struct
  * @gc_fragments: array of display list fragments waiting to be freed
  */
@@ -119,6 +149,8 @@ struct vsp1_dl_manager {
struct vsp1_dl_list *queued;
struct vsp1_dl_list *pending;
 
+   struct vsp1_dl_fragment_pool *pool;
+
struct work_struct gc_work;
struct list_head gc_fragments;
 };
@@ -128,6 +160,103 @@ struct vsp1_dl_manager {
  */
 
 /*
+ * Fragment pool's reduce the pressure on the iommu TLB by allocating a single
+ * large area of DMA memory and allocating it as a pool of fragment bodies
+ */
+struct vsp1_dl_fragment_pool *
+vsp1_dl_fragment_pool_alloc(struct vsp1_device *vsp1, unsigned int qty,
+   unsigned int num_entries, size_t extra_size)
+{
+   struct vsp1_dl_fragment_pool *pool;
+   size_t dlb_size;
+   unsigned int i;
+
+   pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+   if (!pool)
+   return NULL;
+
+   pool->vsp1 = vsp1;
+
+   dlb_size = num_entries * sizeof(struct vsp1_dl_entry) + extra_size;
+   pool->size = dlb_size * qty;
+
+   pool->bodies = kcalloc(qty, sizeof(*pool->bodies), GFP_KERNEL);
+   if (!pool->bodies) {
+   kfree(pool);
+   return NULL;
+   }
+
+   pool->mem = dma_alloc_wc(vsp1->bus_master, pool->size, >dma,
+   GFP_KERNEL);
+   if (!pool->mem) {
+   kfree(pool->bodies);
+   kfree(pool);
+   return NULL;
+   }
+
+   spin_lock_init(>lock);
+   INIT_LIST_HEAD(>free);
+
+   for (i = 0; i < qty; ++i) {
+   struct vsp1_dl_body *dlb = >bodies[i];
+
+   dlb->pool = pool;
+   dlb->max_entries = num_entries;
+
+   dlb->dma = pool->dma + i * dlb_size;
+   dlb->entries = pool->mem + i * dlb_size;
+
+   list_add_tail(>free, >free);
+   }
+
+   return pool;
+}
+
+void vsp1_dl_fragment_pool_free(struct vsp1_dl_fragment_pool *pool)
+{
+   if (!pool)
+   return;
+
+   if (pool->mem)
+   dma_free_wc(pool->vsp1->bus_master, pool->size, pool->mem,
+   pool->dma);
+
+   kfree(pool->bodies);
+