On to, 2016-06-09 at 12:29 +0100, Chris Wilson wrote:
> With in the introduction of the reloc page cache, we are just one step
> away from refactoring the relocation write functions into one. Not only
> does it tidy the code (slightly), but it greatly simplifies the control
> logic much to gcc's satisfaction.
> 
> Signed-off-by: Chris Wilson <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 289 
> +++++++++++++++--------------
>  1 file changed, 150 insertions(+), 139 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 318c71b663f4..bda8ec8b02e6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -34,6 +34,8 @@
>  #include 
>  #include 
>  
> +#define DBG_USE_CPU_RELOC 0 /* force relocations to use the CPU write path */

Better connect to the new debug Kconfig menu you introduced?

> +
>  #define  __EXEC_OBJECT_HAS_PIN (1U<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1U<<30)
>  #define  __EXEC_OBJECT_NEEDS_MAP (1U<<29)
> @@ -53,6 +55,7 @@ struct i915_execbuffer_params {
>  };
>  
>  struct eb_vmas {
> +     struct drm_i915_private *i915;
>       struct list_head vmas;
>       int and;
>       union {
> @@ -62,7 +65,8 @@ struct eb_vmas {
>  };
>  
>  static struct eb_vmas *
> -eb_create(struct drm_i915_gem_execbuffer2 *args)
> +eb_create(struct drm_i915_private *i915,
> +       struct drm_i915_gem_execbuffer2 *args)
>  {
>       struct eb_vmas *eb = NULL;
>  
> @@ -89,6 +93,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args)
>       } else
>               eb->and = -args->buffer_count;
>  
> +     eb->i915 = i915;
>       INIT_LIST_HEAD(&eb->vmas);
>       return eb;
>  }
> @@ -272,7 +277,8 @@ static void eb_destroy(struct eb_vmas *eb)
>  
>  static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
>  {
> -     return (HAS_LLC(obj->base.dev) ||
> +     return (DBG_USE_CPU_RELOC ||
> +             HAS_LLC(obj->base.dev) ||
>               obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
>               obj->cache_level != I915_CACHE_NONE);
>  }
> @@ -296,32 +302,58 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t 
> address)
>  }
>  
>  static inline uint64_t
> -relocation_target(struct drm_i915_gem_relocation_entry *reloc,
> +relocation_target(const struct drm_i915_gem_relocation_entry *reloc,

These const changes could be a bunch instead of sprinkled around.
Unless we want to smuggle them in through the resistance.

>                 uint64_t target_offset)
>  {
>       return gen8_canonical_addr((int)reloc->delta + target_offset);
>  }
>  
>  struct reloc_cache {
> -     void *vaddr;
> +     struct drm_i915_private *i915;
> +     unsigned long vaddr;
>       unsigned page;
> -     enum { KMAP, IOMAP } type;
> +     struct drm_mm_node node;
> +     bool use_64bit_reloc;
>  };
>  
> -static void reloc_cache_init(struct reloc_cache *cache)
> +static void reloc_cache_init(struct reloc_cache *cache,
> +                          struct drm_i915_private *i915)
>  {
>       cache->page = -1;
> -     cache->vaddr = NULL;
> +     cache->vaddr = 0;
> +     cache->i915 = i915;
> +     cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8;
> +}
> +
> +static inline void *unmask_page(unsigned long p)
> +{
> +     return (void *)(uintptr_t)(p & PAGE_MASK);
>  }
>  
> +static inline unsigned unmask_flags(unsigned long p)
> +{
> +     return p & ~PAGE_MASK;
> +}
> +
> +#define KMAP 0x4
> +
>  static void reloc_cache_fini(struct reloc_cache *cache)
>  {
> -     if (cache->vaddr == NULL)
> +     void *vaddr;
> +
> +     if (cache->vaddr == 0)
>               return;
>  
> -     switch (cache->type) {
> -     case KMAP: kunmap_atomic(cache->vaddr); break;
> -     case IOMAP: io_mapping_unmap_atomic(cache->vaddr); break;
> +     vaddr = unmask_page(cache->vaddr);
> +     if (cache->vaddr & KMAP) {
> +             if (cache->vaddr & CLFLUSH_AFTER)
> +                     mb();
> +
> +             kunmap_atomic(vaddr);
> +             i915_gem_object_unpin_pages((struct drm_i915_gem_object 
> *)cache->node.mm);

Rather long line.

> +     } else {
> +             io_mapping_unmap_atomic(vaddr);
> +             i915_vma_unpin((struct i915_vma *)cache->node.mm);
>       }
>  }
>  
> @@ -329,148 +361,138 @@ static void *reloc_kmap(struct drm_i915_gem_object 
> *obj,
>                       struct reloc_cache *cache,
>                       int page)
>  {
> -     if (cache->page == page)
> -             return cache->vaddr;
> +     void *vaddr;
>  
> -     if (cache->vaddr)
> -             kunmap_atomic(cache->vaddr);
> +     if (cache->vaddr) {
> +             kunmap_atomic(unmask_page(cache->vaddr));
> +     } else {
> +             unsigned flushes;
> +             int ret;
> +
> +             ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);

Yeah, needs_clflush is so bad name earlier in the series, that even you
don't use it. "need_clflushes" or anything is better.

> +             if (ret)
> +                     return ERR_PTR(ret);
>  
> +             cache->vaddr = flushes | KMAP;
> +             cache->node.mm = (void *)obj;
> +             if (flushes)
> +                     mb();
> +     }
> +
> +     vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
> +     cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
>       cache->page = page;
> -     cache->vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
> -     cache->type = KMAP;
>  
> -     return cache->vaddr;
> +     return vaddr;
>  }
>  
> -static int
> -relocate_entry_cpu(struct drm_i915_gem_object *obj,
> -                struct drm_i915_gem_relocation_entry *reloc,
> -                struct reloc_cache *cache,
> -                uint64_t target_offset)
> +static void *reloc_iomap(struct drm_i915_gem_object *obj,
> +                      struct reloc_cache *cache,
> +                      int page)
>  {
> -     struct drm_device *dev = obj->base.dev;
> -     uint32_t page_offset = offset_in_page(reloc->offset);
> -     uint64_t delta = relocation_target(reloc, target_offset);
> -     char *vaddr;
> -     int ret;
> +     void *vaddr;
>  
> -     ret = i915_gem_object_set_to_cpu_domain(obj, true);
> -     if (ret)
> -             return ret;
> +     if (cache->vaddr) {
> +             io_mapping_unmap_atomic(unmask_page(cache->vaddr));
> +     } else {
> +             struct i915_vma *vma;
> +             int ret;
>  
> -     vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
> -     *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
> +             if (use_cpu_reloc(obj))
> +                     return NULL;
>  
> -     if (INTEL_GEN(dev) >= 8) {
> -             page_offset += sizeof(uint32_t);
> -             if (page_offset == PAGE_SIZE) {
> -                     vaddr = reloc_kmap(obj, cache, cache->page + 1);
> -                     page_offset = 0;
> -             }
> -             *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
> -     }
> +             vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
> +                                            PIN_MAPPABLE | PIN_NONBLOCK);
> +             if (IS_ERR(vma))
> +                     return NULL;
>  
> -     return 0;
> -}

Ugh, did you simultaneously move and rename functions. This is rather
hard to follow from this point on...

Regards, Joonas

> +             ret = i915_gem_object_set_to_gtt_domain(obj, true);
> +             if (ret)
> +                     return ERR_PTR(ret);
>  
> -static void *reloc_iomap(struct drm_i915_private *i915,
> -                      struct reloc_cache *cache,
> -                      uint64_t offset)
> -{
> -     if (cache->page == offset >> PAGE_SHIFT)
> -             return cache->vaddr;
> +             ret = i915_gem_object_put_fence(obj);
> +             if (ret)
> +                     return ERR_PTR(ret);
>  
> -     if (cache->vaddr)
> -             io_mapping_unmap_atomic(cache->vaddr);
> +             cache->node.start = vma->node.start;
> +             cache->node.mm = (void *)vma;
> +     }
>  
> -     cache->page = offset >> PAGE_SHIFT;
> -     cache->vaddr =
> -             io_mapping_map_atomic_wc(i915->ggtt.mappable,
> -                                      offset & PAGE_MASK);
> -     cache->type = IOMAP;
> +     vaddr = io_mapping_map_atomic_wc(cache->i915->ggtt.mappable,
> +                                      cache->node.start + (page << 
> PAGE_SHIFT));
> +     cache->page = page;
> +     cache->vaddr = (unsigned long)vaddr;
>  
> -     return cache->vaddr;
> +     return vaddr;
>  }
>  
> -static int
> -relocate_entry_gtt(struct drm_i915_gem_object *obj,
> -                struct drm_i915_gem_relocation_entry *reloc,
> -                struct reloc_cache *cache,
> -                uint64_t target_offset)
> +static void *reloc_vaddr(struct drm_i915_gem_object *obj,
> +                      struct reloc_cache *cache,
> +                      int page)
>  {
> -     struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> -     struct i915_vma *vma;
> -     uint64_t delta = relocation_target(reloc, target_offset);
> -     uint64_t offset;
> -     void __iomem *reloc_page;
> -     int ret;
> -
> -     vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
> -     if (IS_ERR(vma))
> -             return PTR_ERR(vma);
> -
> -     ret = i915_gem_object_set_to_gtt_domain(obj, true);
> -     if (ret)
> -             goto unpin;
> -
> -     ret = i915_gem_object_put_fence(obj);
> -     if (ret)
> -             goto unpin;
> -
> -     /* Map the page containing the relocation we're going to perform.  */
> -     offset = vma->node.start;
> -     offset += reloc->offset;
> -     reloc_page = reloc_iomap(dev_priv, cache, offset);
> -     iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
> +     void *vaddr;
>  
> -     if (INTEL_GEN(dev_priv) >= 8) {
> -             offset += sizeof(uint32_t);
> -             if (offset_in_page(offset) == 0)
> -                     reloc_page = reloc_iomap(dev_priv, cache, offset);
> -             iowrite32(upper_32_bits(delta),
> -                       reloc_page + offset_in_page(offset));
> +     if (cache->page == page) {
> +             vaddr = unmask_page(cache->vaddr);
> +     } else {
> +             vaddr = NULL;
> +             if ((cache->vaddr & KMAP) == 0)
> +                     vaddr = reloc_iomap(obj, cache, page);
> +             if (vaddr == NULL)
> +                     vaddr = reloc_kmap(obj, cache, page);
>       }
>  
> -unpin:
> -     __i915_vma_unpin(vma);
> -     return ret;
> +     return vaddr;
>  }
>  
>  static void
> -clflush_write32(void *addr, uint32_t value)
> +clflush_write32(uint32_t *addr, uint32_t value, unsigned flushes)
>  {
> -     /* This is not a fast path, so KISS. */
> -     drm_clflush_virt_range(addr, sizeof(uint32_t));
> -     *(uint32_t *)addr = value;
> -     drm_clflush_virt_range(addr, sizeof(uint32_t));
> +     if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
> +             if (flushes & CLFLUSH_BEFORE) {
> +                     clflushopt(addr);
> +                     mb();
> +             }
> +
> +             *addr = value;
> +
> +             /* Writes to the same cacheline are serialised by the CPU
> +              * (including clflush). On the write path, we only require
> +              * that it hits memory in an orderly fashion and place
> +              * mb barriers at the start and end of the relocation phase
> +              * to ensure ordering of clflush wrt to the system.
> +              */
> +             if (flushes & CLFLUSH_AFTER)
> +                     clflushopt(addr);
> +     } else
> +             *addr = value;
>  }
>  
>  static int
> -relocate_entry_clflush(struct drm_i915_gem_object *obj,
> -                    struct drm_i915_gem_relocation_entry *reloc,
> -                    struct reloc_cache *cache,
> -                    uint64_t target_offset)
> +relocate_entry(struct drm_i915_gem_object *obj,
> +            const struct drm_i915_gem_relocation_entry *reloc,
> +            struct reloc_cache *cache,
> +            uint64_t target_offset)
>  {
> -     struct drm_device *dev = obj->base.dev;
> -     uint32_t page_offset = offset_in_page(reloc->offset);
> -     uint64_t delta = relocation_target(reloc, target_offset);
> -     char *vaddr;
> -     int ret;
> +     uint64_t offset = reloc->offset;
> +     bool wide = cache->use_64bit_reloc;
> +     void *vaddr;
>  
> -     ret = i915_gem_object_set_to_gtt_domain(obj, true);
> -     if (ret)
> -             return ret;
> +     target_offset = relocation_target(reloc, target_offset);
> +repeat:
> +     vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT);
> +     if (IS_ERR(vaddr))
> +             return PTR_ERR(vaddr);
>  
> -     vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
> -     clflush_write32(vaddr + page_offset, lower_32_bits(delta));
> +     clflush_write32(vaddr + offset_in_page(offset),
> +                     lower_32_bits(target_offset),
> +                     cache->vaddr);
>  
> -     if (INTEL_GEN(dev) >= 8) {
> -             page_offset += sizeof(uint32_t);
> -             if (page_offset == PAGE_SIZE) {
> -                     vaddr = reloc_kmap(obj, cache, cache->page + 1);
> -                     page_offset = 0;
> -             }
> -             clflush_write32(vaddr + page_offset, upper_32_bits(delta));
> +     if (wide) {
> +             offset += sizeof(uint32_t);
> +             target_offset >>= 32;
> +             wide = false;
> +             goto repeat;
>       }
>  
>       return 0;
> @@ -557,7 +579,7 @@ i915_gem_execbuffer_relocate_entry(struct 
> drm_i915_gem_object *obj,
>  
>       /* Check that the relocation address is valid... */
>       if (unlikely(reloc->offset >
> -             obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
> +                  obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) {
>               DRM_DEBUG("Relocation beyond object bounds: "
>                         "obj %p target %d offset %d size %d.\n",
>                         obj, reloc->target_handle,
> @@ -577,23 +599,12 @@ i915_gem_execbuffer_relocate_entry(struct 
> drm_i915_gem_object *obj,
>       if (pagefault_disabled() && !object_is_idle(obj))
>               return -EFAULT;
>  
> -     if (use_cpu_reloc(obj))
> -             ret = relocate_entry_cpu(obj, reloc, cache, target_offset);
> -     else if (obj->map_and_fenceable)
> -             ret = relocate_entry_gtt(obj, reloc, cache, target_offset);
> -     else if (static_cpu_has(X86_FEATURE_CLFLUSH))
> -             ret = relocate_entry_clflush(obj, reloc, cache, target_offset);
> -     else {
> -             WARN_ONCE(1, "Impossible case in relocation handling\n");
> -             ret = -ENODEV;
> -     }
> -
> +     ret = relocate_entry(obj, reloc, cache, target_offset);
>       if (ret)
>               return ret;
>  
>       /* and update the user's relocation entry */
>       reloc->presumed_offset = target_offset;
> -
>       return 0;
>  }
>  
> @@ -609,7 +620,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
>       int remain, ret = 0;
>  
>       user_relocs = u64_to_user_ptr(entry->relocs_ptr);
> -     reloc_cache_init(&cache);
> +     reloc_cache_init(&cache, eb->i915);
>  
>       remain = entry->relocation_count;
>       while (remain) {
> @@ -658,7 +669,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma 
> *vma,
>       struct reloc_cache cache;
>       int i, ret = 0;
>  
> -     reloc_cache_init(&cache);
> +     reloc_cache_init(&cache, eb->i915);
>       for (i = 0; i < entry->relocation_count; i++) {
>               ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, 
> &relocs[i], &cache);
>               if (ret)
> @@ -1073,8 +1084,8 @@ i915_gem_execbuffer_move_to_gpu(struct 
> drm_i915_gem_request *req,
>       if (flush_chipset)
>               i915_gem_chipset_flush(req->engine->i915);
>  
> -     if (flush_domains & I915_GEM_DOMAIN_GTT)
> -             wmb();
> +     /* Make sure (untracked) CPU relocs/parsing are flushed */
> +     wmb();
>  
>       /* Unconditionally invalidate gpu caches and TLBs. */
>       return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
> @@ -1606,7 +1617,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
> *data,
>  
>       memset(¶ms_master, 0x00, sizeof(params_master));
>  
> -     eb = eb_create(args);
> +     eb = eb_create(dev_priv, args);
>       if (eb == NULL) {
>               i915_gem_context_put(ctx);
>               mutex_unlock(&dev->struct_mutex);
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to