Re: [Intel-gfx] [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size

2016-06-08 Thread Chris Wilson
On Wed, Jun 08, 2016 at 11:41:01AM +0200, Daniel Vetter wrote:
> On Fri, Jun 03, 2016 at 05:55:21PM +0100, Chris Wilson wrote:
> > Our GPUs impose certain requirements upon buffers that depend upon how
> > exactly they are used. Typically this is expressed as that they require
> > a larger surface than would be naively computed by pitch * height.
> > Normally such requirements are hidden away in the userspace driver, but
> > when we accept pointers from strangers and later impose extra conditions
> > on them, the original client allocator has no idea about the
> > monstrosities in the GPU and we require the userspace driver to inform
> > the kernel how many padding pages are required beyond the client
> > allocation.
> > 
> > v2: Long time, no see
> > v3: Try an anonymous union for uapi struct compatability
> > 
> > Signed-off-by: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > Reviewed-by: Tvrtko Ursulin 
> 
> Hm, where's the userspace for this? Commit message should elaborate imo a
> bit more on what's going on here ...

ddx, igt both posted.

For dri3 the client passes us a buffer with one size that may not match
all uses (but is sufficient for its intended). At the moment we reject
it, but I could allow it through and pad the missing pages in the GTT
instead (ala lazy fencing). 

The earliest motivation for this was for OpenCL wrapping blobs of
userspace and trying to manage the similar problem that the client
memory may not match the actual requirements of the GPU.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size

2016-06-08 Thread Daniel Vetter
On Fri, Jun 03, 2016 at 05:55:21PM +0100, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
> 
> v2: Long time, no see
> v3: Try an anonymous union for uapi struct compatability
> 
> Signed-off-by: Chris Wilson 
> Cc: Tvrtko Ursulin 
> Reviewed-by: Tvrtko Ursulin 

Hm, where's the userspace for this? Commit message should elaborate imo a
bit more on what's going on here ...
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h|  6 ++-
>  drivers/gpu/drm/i915/i915_gem.c| 82 
> +++---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +-
>  include/uapi/drm/i915_drm.h|  8 ++-
>  4 files changed, 65 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a065325580d8..9520adba33f6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2945,11 +2945,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
>  int __must_check
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>   struct i915_address_space *vm,
> + uint64_t size,
>   uint32_t alignment,
>   uint64_t flags);
>  int __must_check
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>const struct i915_ggtt_view *view,
> +  uint64_t size,
>uint32_t alignment,
>uint64_t flags);
>  
> @@ -3209,8 +3211,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
>   struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>   struct i915_ggtt *ggtt = _priv->ggtt;
>  
> - return i915_gem_object_pin(obj, >base,
> -alignment, flags | PIN_GLOBAL);
> + return i915_gem_object_pin(obj, >base, 0, alignment,
> +flags | PIN_GLOBAL);
>  }
>  
>  void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 19b8d2ea7698..0f0101300b2b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1438,7 +1438,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct 
> vm_fault *vmf)
>   }
>  
>   /* Now pin it into the GTT if needed */
> - ret = i915_gem_object_ggtt_pin(obj, , 0, PIN_MAPPABLE);
> + ret = i915_gem_object_ggtt_pin(obj, , 0, 0, PIN_MAPPABLE);
>   if (ret)
>   goto unlock;
>  
> @@ -2678,21 +2678,20 @@ static struct i915_vma *
>  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  struct i915_address_space *vm,
>  const struct i915_ggtt_view *ggtt_view,
> +uint64_t size,
>  unsigned alignment,
>  uint64_t flags)
>  {
>   struct drm_device *dev = obj->base.dev;
>   struct drm_i915_private *dev_priv = to_i915(dev);
> - struct i915_ggtt *ggtt = _priv->ggtt;
> - u32 fence_alignment, unfenced_alignment;
> - u32 search_flag, alloc_flag;
>   u64 start, end;
> - u64 size, fence_size;
> + u32 search_flag, alloc_flag;
>   struct i915_vma *vma;
>   int ret;
>  
>   if (i915_is_ggtt(vm)) {
> - u32 view_size;
> + u32 fence_size, fence_alignment, unfenced_alignment;
> + u64 view_size;
>  
>   if (WARN_ON(!ggtt_view))
>   return ERR_PTR(-EINVAL);
> @@ -2710,48 +2709,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
> *obj,
>   view_size,
>   
> obj->tiling_mode,
>   false);
> - size = flags & PIN_MAPPABLE ? fence_size : view_size;
> + size = max(size, view_size);
> + if (flags & PIN_MAPPABLE)
> + size = max_t(u64, size, fence_size);
> +
> + if (alignment == 0)
> + alignment = flags & PIN_MAPPABLE ? fence_alignment :
> + unfenced_alignment;
> + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> +   

[Intel-gfx] [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size

2016-06-03 Thread Chris Wilson
Our GPUs impose certain requirements upon buffers that depend upon how
exactly they are used. Typically this is expressed as that they require
a larger surface than would be naively computed by pitch * height.
Normally such requirements are hidden away in the userspace driver, but
when we accept pointers from strangers and later impose extra conditions
on them, the original client allocator has no idea about the
monstrosities in the GPU and we require the userspace driver to inform
the kernel how many padding pages are required beyond the client
allocation.

v2: Long time, no see
v3: Try an anonymous union for uapi struct compatability

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.h|  6 ++-
 drivers/gpu/drm/i915/i915_gem.c| 82 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +-
 include/uapi/drm/i915_drm.h|  8 ++-
 4 files changed, 65 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a065325580d8..9520adba33f6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2945,11 +2945,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
+   uint64_t size,
uint32_t alignment,
uint64_t flags);
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 const struct i915_ggtt_view *view,
+uint64_t size,
 uint32_t alignment,
 uint64_t flags);
 
@@ -3209,8 +3211,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = _priv->ggtt;
 
-   return i915_gem_object_pin(obj, >base,
-  alignment, flags | PIN_GLOBAL);
+   return i915_gem_object_pin(obj, >base, 0, alignment,
+  flags | PIN_GLOBAL);
 }
 
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 19b8d2ea7698..0f0101300b2b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1438,7 +1438,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
}
 
/* Now pin it into the GTT if needed */
-   ret = i915_gem_object_ggtt_pin(obj, , 0, PIN_MAPPABLE);
+   ret = i915_gem_object_ggtt_pin(obj, , 0, 0, PIN_MAPPABLE);
if (ret)
goto unlock;
 
@@ -2678,21 +2678,20 @@ static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
   struct i915_address_space *vm,
   const struct i915_ggtt_view *ggtt_view,
+  uint64_t size,
   unsigned alignment,
   uint64_t flags)
 {
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
-   struct i915_ggtt *ggtt = _priv->ggtt;
-   u32 fence_alignment, unfenced_alignment;
-   u32 search_flag, alloc_flag;
u64 start, end;
-   u64 size, fence_size;
+   u32 search_flag, alloc_flag;
struct i915_vma *vma;
int ret;
 
if (i915_is_ggtt(vm)) {
-   u32 view_size;
+   u32 fence_size, fence_alignment, unfenced_alignment;
+   u64 view_size;
 
if (WARN_ON(!ggtt_view))
return ERR_PTR(-EINVAL);
@@ -2710,48 +2709,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
view_size,

obj->tiling_mode,
false);
-   size = flags & PIN_MAPPABLE ? fence_size : view_size;
+   size = max(size, view_size);
+   if (flags & PIN_MAPPABLE)
+   size = max_t(u64, size, fence_size);
+
+   if (alignment == 0)
+   alignment = flags & PIN_MAPPABLE ? fence_alignment :
+   unfenced_alignment;
+   if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
+   DRM_DEBUG("Invalid object (view type=%u) alignment 
requested %u\n",
+ ggtt_view ? ggtt_view->type : 0,
+ alignment);
+   return ERR_PTR(-EINVAL);
+   }
} else {
-