On Thu, Oct 01, 2015 at 12:57:10PM +0100, Chris Wilson wrote:
> With a little complexity to handle cmds straddling page boundaries, we
> can completely avoiding having to vmap the batch and the shadow batch
> objects whilst running the command parser.
> 
> On ivb i7-3720MQ:
> 
> x11perf -dot before 54.3M, after 53.2M (max 203M)
> glxgears before 7110 fps, after 7300 fps (max 7860 fps)
> 
> Before:
> Time to blt 16384 bytes x      1:      12.400µs, 1.2GiB/s
> Time to blt 16384 bytes x   4096:       3.055µs, 5.0GiB/s
> 
> After:
> Time to blt 16384 bytes x      1:       8.600µs, 1.8GiB/s
> Time to blt 16384 bytes x   4096:       2.456µs, 6.2GiB/s
> 
> Removing the vmap is mostly a win, except we lose in a few cases where
> the batch size is greater than a page due to the extra complexity (loss
> of a simple cache efficient large copy, and boundary handling).
> 
> v2: Reorder so that we do check oacontrol remaining set at end-of-batch
> 
> Signed-off-by: Chris Wilson <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_cmd_parser.c | 298 
> ++++++++++++++++-----------------
>  1 file changed, 146 insertions(+), 152 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
> b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 237ff6884a22..91e4baa0f2b8 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -852,100 +852,6 @@ find_reg(const struct drm_i915_reg_descriptor *table,
>       return NULL;
>  }
>  
> -static u32 *vmap_batch(struct drm_i915_gem_object *obj,
> -                    unsigned start, unsigned len)
> -{
> -     int i;
> -     void *addr = NULL;
> -     struct sg_page_iter sg_iter;
> -     int first_page = start >> PAGE_SHIFT;
> -     int last_page = (len + start + 4095) >> PAGE_SHIFT;
> -     int npages = last_page - first_page;
> -     struct page **pages;
> -
> -     pages = drm_malloc_ab(npages, sizeof(*pages));
> -     if (pages == NULL) {
> -             DRM_DEBUG_DRIVER("Failed to get space for pages\n");
> -             goto finish;
> -     }
> -
> -     i = 0;
> -     for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 
> first_page) {
> -             pages[i++] = sg_page_iter_page(&sg_iter);
> -             if (i == npages)
> -                     break;
> -     }
> -
> -     addr = vmap(pages, i, 0, PAGE_KERNEL);
> -     if (addr == NULL) {
> -             DRM_DEBUG_DRIVER("Failed to vmap pages\n");
> -             goto finish;
> -     }
> -
> -finish:
> -     if (pages)
> -             drm_free_large(pages);
> -     return (u32*)addr;
> -}
> -
> -/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
> -static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
> -                    struct drm_i915_gem_object *src_obj,
> -                    u32 batch_start_offset,
> -                    u32 batch_len)
> -{
> -     int needs_clflush = 0;
> -     void *src_base, *src;
> -     void *dst = NULL;
> -     int ret;
> -
> -     if (batch_len > dest_obj->base.size ||
> -         batch_len + batch_start_offset > src_obj->base.size)
> -             return ERR_PTR(-E2BIG);
> -
> -     if (WARN_ON(dest_obj->pages_pin_count == 0))
> -             return ERR_PTR(-ENODEV);
> -
> -     ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
> -     if (ret) {
> -             DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
> -             return ERR_PTR(ret);
> -     }
> -
> -     src_base = vmap_batch(src_obj, batch_start_offset, batch_len);
> -     if (!src_base) {
> -             DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
> -             ret = -ENOMEM;
> -             goto unpin_src;
> -     }
> -
> -     ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
> -     if (ret) {
> -             DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
> -             goto unmap_src;
> -     }
> -
> -     dst = vmap_batch(dest_obj, 0, batch_len);
> -     if (!dst) {
> -             DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
> -             ret = -ENOMEM;
> -             goto unmap_src;
> -     }
> -
> -     src = src_base + offset_in_page(batch_start_offset);
> -     if (needs_clflush)
> -             drm_clflush_virt_range(src, batch_len);
> -
> -     memcpy(dst, src, batch_len);
> -
> -unmap_src:
> -     vunmap(src_base);
> -unpin_src:
> -     i915_gem_object_unpin_pages(src_obj);
> -
> -     return ret ? ERR_PTR(ret) : dst;
> -}
> -
>  /**
>   * i915_needs_cmd_parser() - should a given ring use software command 
> parsing?
>   * @ring: the ring in question
> @@ -1112,16 +1018,35 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
>                   u32 batch_len,
>                   bool is_master)
>  {
> -     u32 *cmd, *batch_base, *batch_end;
> +     u32 tmp[128];
> +     const struct drm_i915_cmd_descriptor *desc;
> +     unsigned dst_iter, src_iter;
> +     int needs_clflush = 0;
> +     struct get_page rewind;
> +     void *src, *dst;
> +     unsigned in, out;
> +     u32 *buf, partial = 0, length;
>       struct drm_i915_cmd_descriptor default_desc = { 0 };
>       bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
>       int ret = 0;
>  
> -     batch_base = copy_batch(shadow_batch_obj, batch_obj,
> -                             batch_start_offset, batch_len);
> -     if (IS_ERR(batch_base)) {
> -             DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
> -             return PTR_ERR(batch_base);
> +     if (batch_len > shadow_batch_obj->base.size ||
> +         batch_len + batch_start_offset > batch_obj->base.size)
> +             return -E2BIG;
> +
> +     if (WARN_ON(shadow_batch_obj->pages_pin_count == 0))
> +             return -ENODEV;
> +
> +     ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush);
> +     if (ret) {
> +             DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
> +             return ret;
> +     }
> +
> +     ret = i915_gem_object_set_to_cpu_domain(shadow_batch_obj, true);
> +     if (ret) {
> +             DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
> +             goto unpin;
>       }
>  
>       /*
> @@ -1129,69 +1054,138 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
>        * large or larger and copy_batch() will write MI_NOPs to the extra
>        * space. Parsing should be faster in some cases this way.
>        */
> -     batch_end = batch_base + (batch_len / sizeof(*batch_end));
> +     ret = -EINVAL;
> +     rewind = batch_obj->get_page;
> +
> +     dst_iter = 0;
> +     dst = kmap_atomic(i915_gem_object_get_page(shadow_batch_obj, dst_iter));
> +
> +     in = offset_in_page(batch_start_offset);
> +     out = 0;
> +     for (src_iter = batch_start_offset >> PAGE_SHIFT;
> +          src_iter < batch_obj->base.size >> PAGE_SHIFT;
> +          src_iter++) {
> +             u32 this, i, j, k;
> +             u32 *cmd, *page_end, *batch_end;
> +
> +             this = batch_len;
> +             if (this > PAGE_SIZE - in)
> +                     this = PAGE_SIZE - in;
> +
> +             src = kmap_atomic(i915_gem_object_get_page(batch_obj, 
> src_iter));
> +             if (needs_clflush)
> +                     drm_clflush_virt_range(src + in, this);
> +
> +             i = this;
> +             j = in;
> +             do {
> +                     /* We keep dst around so that we do not blow
> +                      * the CPU caches immediately after the copy (due
> +                      * to the kunmap_atomic(dst) doing a TLB flush.
> +                      */
> +                     if (out == PAGE_SIZE) {
> +                             kunmap_atomic(dst);
> +                             dst = 
> kmap_atomic(i915_gem_object_get_page(shadow_batch_obj, ++dst_iter));
> +                             out = 0;
> +                     }
>  
> -     cmd = batch_base;
> -     while (cmd < batch_end) {
> -             const struct drm_i915_cmd_descriptor *desc;
> -             u32 length;
> +                     k = i;
> +                     if (k > PAGE_SIZE - out)
> +                             k = PAGE_SIZE - out;
> +                     if (k == PAGE_SIZE)
> +                             copy_page(dst, src);
> +                     else
> +                             memcpy(dst + out, src + j, k);
> +
> +                     out += k;
> +                     j += k;
> +                     i -= k;
> +             } while (i);
> +
> +             cmd = src + in;

So you're now checking the src batch? What prevents userspace from
overwriting it with eg. NOPS between the time you copied it and the
time you check it?

> +             page_end = (void *)cmd + this;
> +             batch_end = (void *)cmd + batch_len;
> +
> +             if (partial) {
> +                     memcpy(tmp + partial, cmd, (length - partial)*4);
> +                     cmd -= partial;
> +                     partial = 0;
> +                     buf = tmp;
> +                     goto check;
> +             }
>  
> -             if (*cmd == MI_BATCH_BUFFER_END)
> -                     break;
> +             do {
> +                     if (*cmd == MI_BATCH_BUFFER_END) {
> +                             if (oacontrol_set) {
> +                                     DRM_DEBUG_DRIVER("CMD: batch set 
> OACONTROL but did not clear it\n");
> +                                     ret = -EINVAL;
> +                             } else
> +                                     ret = 0;
> +                             goto unmap;
> +                     }
>  
> -             desc = find_cmd(ring, *cmd, &default_desc);
> -             if (!desc) {
> -                     DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
> -                                      *cmd);
> -                     ret = -EINVAL;
> -                     break;
> -             }
> +                     desc = find_cmd(ring, *cmd, &default_desc);
> +                     if (!desc) {
> +                             DRM_DEBUG_DRIVER("CMD: Unrecognized command: 
> 0x%08X\n",
> +                                              *cmd);
> +                             goto unmap;
> +                     }
>  
> -             /*
> -              * If the batch buffer contains a chained batch, return an
> -              * error that tells the caller to abort and dispatch the
> -              * workload as a non-secure batch.
> -              */
> -             if (desc->cmd.value == MI_BATCH_BUFFER_START) {
> -                     ret = -EACCES;
> -                     break;
> -             }
> +                     /*
> +                      * If the batch buffer contains a chained batch, return 
> an
> +                      * error that tells the caller to abort and dispatch the
> +                      * workload as a non-secure batch.
> +                      */
> +                     if (desc->cmd.value == MI_BATCH_BUFFER_START) {
> +                             ret = -EACCES;
> +                             goto unmap;
> +                     }
>  
> -             if (desc->flags & CMD_DESC_FIXED)
> -                     length = desc->length.fixed;
> -             else
> -                     length = ((*cmd & desc->length.mask) + LENGTH_BIAS);
> -
> -             if ((batch_end - cmd) < length) {
> -                     DRM_DEBUG_DRIVER("CMD: Command length exceeds batch 
> length: 0x%08X length=%u batchlen=%td\n",
> -                                      *cmd,
> -                                      length,
> -                                      batch_end - cmd);
> -                     ret = -EINVAL;
> -                     break;
> -             }
> +                     if (desc->flags & CMD_DESC_FIXED)
> +                             length = desc->length.fixed;
> +                     else
> +                             length = ((*cmd & desc->length.mask) + 
> LENGTH_BIAS);
>  
> -             if (!check_cmd(ring, desc, cmd, length, is_master,
> -                            &oacontrol_set)) {
> -                     ret = -EINVAL;
> -                     break;
> -             }
> +                     if (cmd + length > page_end) {
> +                             if (length + cmd > batch_end) {
> +                                     DRM_DEBUG_DRIVER("CMD: Command length 
> exceeds batch length: 0x%08X length=%u batchlen=%td\n",
> +                                                      *cmd, length, 
> batch_end - cmd);
> +                                     goto unmap;
> +                             }
>  
> -             cmd += length;
> -     }
> +                             if (WARN_ON(length > sizeof(tmp)/4)) {
> +                                     ret = -ENODEV;
> +                                     goto unmap;
> +                             }
>  
> -     if (oacontrol_set) {
> -             DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear 
> it\n");
> -             ret = -EINVAL;
> -     }
> +                             partial = page_end - cmd;
> +                             memcpy(tmp, cmd, partial*4);
> +                             break;
> +                     }
>  
> -     if (cmd >= batch_end) {
> -             DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE 
> cmd!\n");
> -             ret = -EINVAL;
> -     }
> +                     buf = cmd;
> +check:
> +                     if (!check_cmd(ring, desc, buf, length, is_master,
> +                                    &oacontrol_set))
> +                             goto unmap;
>  
> -     vunmap(batch_base);
> +                     cmd += length;
> +             } while (cmd < page_end);
> +
> +             batch_len -= this;
> +             if (batch_len == 0)
> +                     break;
> +
> +             kunmap_atomic(src);
> +             in = 0;
> +     }
>  
> +unmap:
> +     kunmap_atomic(src);
> +     kunmap_atomic(dst);
> +     batch_obj->get_page = rewind;
> +unpin:
> +     i915_gem_object_unpin_pages(batch_obj);
>       return ret;
>  }
>  
> -- 
> 2.6.0
> 
> _______________________________________________
> Intel-gfx mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to