On Fri, May 20, 2016 at 01:31:29AM +0100, Dave Gordon wrote:
> The existing for_each_sg_page() iterator is somewhat heavyweight, and is
> limiting i915 driver performance in a few benchmarks. So here we
> introduce somewhat lighter weight iterators, primarily for use with GEM
> objects or other case where we need only deal with whole aligned pages.
> 
> Unlike the old iterator, the new iterators use an internal state
> structure which is not intended to be accessed by the caller; instead
> each takes as a parameter an output variable which is set before each
> iteration. This makes them particularly simple to use :)
> 
> One of the new iterators provides the caller with the DMA address of
> each page in turn; the other provides the 'struct page' pointer required
> by many memory management operations.
> 
> Various uses of for_each_sg_page() are then converted to the new macros.
> 
> Signed-off-by: Dave Gordon <[email protected]>
> Cc: Chris Wilson <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 62 +++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_gem.c         | 20 ++++-----
>  drivers/gpu/drm/i915/i915_gem_fence.c   | 14 +++---
>  drivers/gpu/drm/i915/i915_gem_gtt.c     | 76 
> ++++++++++++++++-----------------
>  drivers/gpu/drm/i915/i915_gem_userptr.c |  7 ++-
>  5 files changed, 116 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6894d8e..01cde0f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2120,6 +2120,10 @@ struct drm_i915_gem_object_ops {
>  #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
>       (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
>  
> +void i915_gem_track_fb(struct drm_i915_gem_object *old,
> +                    struct drm_i915_gem_object *new,
> +                    unsigned frontbuffer_bits);
> +

That's not a much better spot, since this is now before the object it
acts upon. One day we'll get our types separated from their actors.

>  struct drm_i915_gem_object {
>       struct drm_gem_object base;
>  
> @@ -2251,9 +2255,61 @@ struct drm_i915_gem_object {
>  };
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
> -void i915_gem_track_fb(struct drm_i915_gem_object *old,
> -                    struct drm_i915_gem_object *new,
> -                    unsigned frontbuffer_bits);
> +/*
> + * New optimised SGL iterator for GEM objects
> + */
> +struct sgt_iter {
> +     struct scatterlist *sgp;
> +     union {
> +             unsigned long pfn;
> +             unsigned long dma;
> +     } ix;

An anonymous union here would be slightly more pleasant. And we should
be using the correct types.

> +     unsigned int curr;
> +     unsigned int max;
> +};
> +
> +/* Constructor for new iterator */
> +static inline struct sgt_iter
> +__sgt_iter(struct scatterlist *sgl, bool dma)
> +{
> +     struct sgt_iter s = { .sgp = sgl };
> +
> +     if (s.sgp) {

Not acceptable just to GPF out if passed in a NULL? It's a BUG for all
our use cases. And would save a conditional every chain step.

> +             s.max = s.curr = s.sgp->offset;
> +             s.max += s.sgp->length;
> +             if (dma)
> +                     s.ix.dma = sg_dma_address(s.sgp);
> +             else
> +                     s.ix.pfn = page_to_pfn(sg_page(s.sgp));
> +     }
> +
> +     return s;
> +}
> +
> +/**
> + * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
> + * @__dmap:  DMA address (output)
> + * @__iter:  'struct sgt_iter' (iterator state, internal)
> + * @__sgt:   sg_table to iterate over (input)
> + */
> +#define for_each_sgt_dma(__dmap, __iter, __sgt)                              
> \
> +     for ((__iter) = __sgt_iter((__sgt)->sgl, true);                 \
> +          ((__dmap) = (__iter).ix.dma + (__iter).curr);              \

This prevents us using dma_addr_t 0, which is the error value iirc, so ok.

> +          (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
> +             ((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0))
> +
> +/**
> + * for_each_sgt_page - iterate over the pages of the given sg_table
> + * @__pp:    page pointer (output)
> + * @__iter:  'struct sgt_iter' (iterator state, internal)
> + * @__sgt:   sg_table to iterate over (input)
> + */
> +#define for_each_sgt_page(__pp, __iter, __sgt)                               
> \
> +     for ((__iter) = __sgt_iter((__sgt)->sgl, false);                \
> +          ((__pp) = (__iter).ix.pfn == 0 ? NULL :                    \
> +                pfn_to_page((__iter).ix.pfn + ((__iter).curr >> 
> PAGE_SHIFT)));\

If we shifted in the __sgt_iter() we could do simpler instructions inside
the loop. The compiler won't know that pfn_to_page() is always true, so
maybe a pfn_to_page(),1 here will help?

> +          (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
> +             ((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0))

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to