from:"Chris Wilson"

[PATCH] drm: Prevent NULL deref in drm_name_info()

2016-06-20 Thread Chris Wilson

If a driver does not have a parent, or never sets the unique name for
itself, then we may proceed to chase a NULL dereference through
debugfs/.../name.

Testcase: igt/vgem_basic/debugfs
Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/drm_info.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index 0090d5987801..e2d2543d5bd0 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -51,17 +51,16 @@ int drm_name_info(struct seq_file *m, void *data)
struct drm_minor *minor = node->minor;
struct drm_device *dev = minor->dev;
struct drm_master *master = minor->master;
-   if (!master)
-   return 0;
-
-   if (master->unique) {
-   seq_printf(m, "%s %s %s\n",
-  dev->driver->name,
-  dev_name(dev->dev), master->unique);
-   } else {
-   seq_printf(m, "%s %s\n",
-  dev->driver->name, dev_name(dev->dev));
-   }
+
+   seq_printf(m, "%s", dev->driver->name);
+   if (dev->dev)
+   seq_printf(m, " dev=%s", dev_name(dev->dev));
+   if (master && master->unique)
+   seq_printf(m, " master=%s", master->unique);
+   if (dev->unique)
+   seq_printf(m, " unique=%s", dev->unique);
+   seq_printf(m, "\n");
+
return 0;
 }

-- 
2.8.1

[PATCH v3 1/3] dmabuf

2016-06-20 Thread Chris Wilson

---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 49e7ff9840bd..c3f177231f6a 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -332,6 +332,8 @@ struct drm_gem_object *i915_gem_prime_import(struct 
drm_device *dev,
drm_gem_private_object_init(dev, >base, dma_buf->size);
i915_gem_object_init(obj, _gem_object_dmabuf_ops);
obj->base.import_attach = attach;
+   obj->base.read_domains = I915_GEM_DOMAIN_GTT;
+   obj->base.write_domain = 0;

return >base;

-- 
2.8.1

[PATCH v3 2/3] drm: Prevent NULL deref in drm_name_info()

2016-06-20 Thread Chris Wilson

If a driver does not have a parent, or never sets the unique name for
itself, then we may proceed to chase a NULL dereference through
debugfs/.../name.

Testcase: igt/vgem_basic/debugfs
Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/drm_info.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index 0090d5987801..e2d2543d5bd0 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -51,17 +51,16 @@ int drm_name_info(struct seq_file *m, void *data)
struct drm_minor *minor = node->minor;
struct drm_device *dev = minor->dev;
struct drm_master *master = minor->master;
-   if (!master)
-   return 0;
-
-   if (master->unique) {
-   seq_printf(m, "%s %s %s\n",
-  dev->driver->name,
-  dev_name(dev->dev), master->unique);
-   } else {
-   seq_printf(m, "%s %s\n",
-  dev->driver->name, dev_name(dev->dev));
-   }
+
+   seq_printf(m, "%s", dev->driver->name);
+   if (dev->dev)
+   seq_printf(m, " dev=%s", dev_name(dev->dev));
+   if (master && master->unique)
+   seq_printf(m, " master=%s", master->unique);
+   if (dev->unique)
+   seq_printf(m, " unique=%s", dev->unique);
+   seq_printf(m, "\n");
+
return 0;
 }

-- 
2.8.1

[PATCH v3 3/3] meh

2016-06-20 Thread Chris Wilson

---
 drivers/gpu/drm/i915/i915_debugfs.c| 162 +---
 drivers/gpu/drm/i915/i915_drv.h|  84 ++---
 drivers/gpu/drm/i915/i915_gem.c| 144 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +-
 drivers/gpu/drm/i915/i915_gem_request.c| 287 +++--
 drivers/gpu/drm/i915/i915_gem_request.h|   5 +
 drivers/gpu/drm/i915/i915_gpu_error.c  |  60 +-
 drivers/gpu/drm/i915/intel_lrc.c   |   4 +-
 drivers/gpu/drm/i915/intel_pm.c|   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c|   4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h|  14 +-
 11 files changed, 251 insertions(+), 519 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 6454e61f8ac3..3fe4f73916b5 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -366,28 +366,6 @@ static int per_file_stats(int id, void *ptr, void *data)
   stats.unbound); \
 } while (0)

-static void print_batch_pool_stats(struct seq_file *m,
-  struct drm_i915_private *dev_priv)
-{
-   struct drm_i915_gem_object *obj;
-   struct file_stats stats;
-   struct intel_engine_cs *engine;
-   int j;
-
-   memset(, 0, sizeof(stats));
-
-   for_each_engine(engine, dev_priv) {
-   for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) 
{
-   list_for_each_entry(obj,
-   >batch_pool.cache_list[j],
-   batch_pool_link)
-   per_file_stats(0, obj, );
-   }
-   }
-
-   print_file_stats(m, "[k]batch pool", stats);
-}
-
 static int per_file_ctx_stats(int id, void *ptr, void *data)
 {
struct i915_gem_context *ctx = ptr;
@@ -545,7 +523,6 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
   ggtt->base.total, ggtt->mappable_end - ggtt->base.start);

seq_putc(m, '\n');
-   print_batch_pool_stats(m, dev_priv);
mutex_unlock(>struct_mutex);

mutex_lock(>filelist_mutex);
@@ -655,10 +632,9 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
if (work->flip_queued_req) {
struct intel_engine_cs *engine = 
i915_gem_request_get_engine(work->flip_queued_req);

-   seq_printf(m, "Flip queued on %s at seqno %x, 
next seqno %x [current breadcrumb %x], completed? %d\n",
+   seq_printf(m, "Flip queued on %s at seqno %x, 
current breadcrumb %x, completed? %d\n",
   engine->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
-  dev_priv->next_seqno,
   intel_engine_get_seqno(engine),
   
i915_gem_request_completed(work->flip_queued_req));
} else
@@ -688,99 +664,6 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
return 0;
 }

-static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
-{
-   struct drm_info_node *node = m->private;
-   struct drm_device *dev = node->minor->dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   struct drm_i915_gem_object *obj;
-   struct intel_engine_cs *engine;
-   int total = 0;
-   int ret, j;
-
-   ret = mutex_lock_interruptible(>struct_mutex);
-   if (ret)
-   return ret;
-
-   for_each_engine(engine, dev_priv) {
-   for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) 
{
-   int count;
-
-   count = 0;
-   list_for_each_entry(obj,
-   >batch_pool.cache_list[j],
-   batch_pool_link)
-   count++;
-   seq_printf(m, "%s cache[%d]: %d objects\n",
-  engine->name, j, count);
-
-   list_for_each_entry(obj,
-   >batch_pool.cache_list[j],
-   batch_pool_link) {
-   seq_puts(m, "   ");
-   describe_obj(m, obj);
-   seq_putc(m, '\n');
-   }
-
-   total += count;
-   }
-   }
-
-   seq_printf(m, "total: %d\n", total);
-
-   mutex_unlock(>struct_mutex);
-
-   return 0;
-}
-
-static int i915_gem_request_info(struct seq_file *m, void *data)
-{
-   struct drm_info_node *node = m->private;
-   struct drm_device *dev =

[PATCH v3 1/3] dmabuf

2016-06-20 Thread Chris Wilson

On Mon, Jun 20, 2016 at 09:04:32PM +0100, Chris Wilson wrote:

Eek, appologies. This was meant to git-send-email -v3 but mangled into
-3 instead.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH v3] drm/vgem: Enable dmabuf interface for export

2016-06-20 Thread Chris Wilson

Enable the standard GEM dma-buf interface provided by the DRM core, but
only for exporting the VGEM object. This allows passing around the VGEM
objects created from the dumb interface and using them as sources
elsewhere. Creating a VGEM object for a foriegn handle is not supported.

v2: With additional completeness.
v3: Need to clear the CPU cache upon exporting the dma-addresses.

Testcase: igt/vgem_basic/dmabuf-*
Testcase: igt/prime_vgem
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
---
 drivers/gpu/drm/vgem/vgem_drv.c | 112 +++-
 1 file changed, 111 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index e1a697d0662f..db48e837992d 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -193,14 +193,124 @@ static const struct file_operations vgem_driver_fops = {
.release= drm_release,
 };

+static void __put_pages(struct page **pages, long n_pages)
+{
+   while (n_pages--)
+   put_page(pages[n_pages]);
+   drm_free_large(pages);
+}
+
+static int vgem_prime_pin(struct drm_gem_object *obj)
+{
+   long n_pages = obj->size >> PAGE_SHIFT;
+   struct page **pages;
+
+   /* Flush the object from the CPU cache so that importers
+* can rely on coherent indirect access via access the
+* exported dma-address.
+*/
+   pages = drm_gem_get_pages(obj);
+   if (IS_ERR(pages))
+   return PTR_ERR(pages);
+
+   drm_clflush_pages(pages, n_pages);
+   __put_pages(pages, n_pages);
+
+   return 0;
+}
+
+static struct sg_table *vgem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+   long n_pages = obj->size >> PAGE_SHIFT;
+   struct sg_table *st;
+   struct page **pages;
+   int ret;
+
+   st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+   if (st == NULL)
+   return ERR_PTR(-ENOMEM);
+
+   pages = drm_gem_get_pages(obj);
+   if (IS_ERR(pages)) {
+   ret = PTR_ERR(pages);
+   goto err;
+   }
+
+   ret = sg_alloc_table_from_pages(st, pages, n_pages,
+   0, obj->size, GFP_KERNEL);
+   __put_pages(pages, n_pages);
+   if (ret)
+   goto err;
+
+   return st;
+
+err:
+   kfree(st);
+   return ERR_PTR(ret);
+}
+
+static void *vgem_prime_vmap(struct drm_gem_object *obj)
+{
+   long n_pages = obj->size >> PAGE_SHIFT;
+   struct page **pages;
+   void *addr;
+
+   pages = drm_gem_get_pages(obj);
+   if (IS_ERR(pages))
+   return NULL;
+
+   addr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL_IO));
+   __put_pages(pages, n_pages);
+
+   return addr;
+}
+
+static void vgem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+   vunmap(vaddr);
+}
+
+static int vgem_prime_mmap(struct drm_gem_object *obj,
+  struct vm_area_struct *vma)
+{
+   int ret;
+
+   if (obj->size < vma->vm_end - vma->vm_start)
+   return -EINVAL;
+
+   if (!obj->filp)
+   return -ENODEV;
+
+   ret = obj->filp->f_op->mmap(obj->filp, vma);
+   if (ret)
+   return ret;
+
+   fput(vma->vm_file);
+   vma->vm_file = get_file(obj->filp);
+   vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+   vma->vm_page_prot = 
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+
+   return 0;
+}
+
 static struct drm_driver vgem_driver = {
-   .driver_features= DRIVER_GEM,
+   .driver_features= DRIVER_GEM | DRIVER_PRIME,
.gem_free_object_unlocked   = vgem_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
.ioctls = vgem_ioctls,
.fops   = _driver_fops,
+
.dumb_create= vgem_gem_dumb_create,
.dumb_map_offset= vgem_gem_dumb_map,
+
+   .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+   .gem_prime_pin = vgem_prime_pin,
+   .gem_prime_export = drm_gem_prime_export,
+   .gem_prime_get_sg_table = vgem_prime_get_sg_table,
+   .gem_prime_vmap = vgem_prime_vmap,
+   .gem_prime_vunmap = vgem_prime_vunmap,
+   .gem_prime_mmap = vgem_prime_mmap,
+
.name   = DRIVER_NAME,
.desc   = DRIVER_DESC,
.date   = DRIVER_DATE,
-- 
2.8.1

[PATCH] dma-buf: Wait on the reservation object when sync'ing before CPU access

2016-06-21 Thread Chris Wilson

Rendering operations to the dma-buf are tracked implicitly via the
reservation_object (dmabuf->resv). This is used to allow poll() to
wait upon outstanding rendering (or just query the current status of
rendering). The dma-buf sync ioctl allows userspace to prepare the
dma-buf for CPU access, which should include waiting upon rendering.
(Some drivers may need to do more work to ensure that the dma-buf mmap
is coherent as well as complete.)

Signed-off-by: Chris Wilson 
Cc: Sumit Semwal 
Cc: Daniel Vetter 
Cc: linux-media at vger.kernel.org
Cc: dri-devel at lists.freedesktop.org
Cc: linaro-mm-sig at lists.linaro.org
Cc: linux-kernel at vger.kernel.org
---

I'm wondering whether it makes sense just to always do the wait first.
It is one of the first operations every driver has to make. A driver
that wants to implement it differently (e.g. they can special case
native waits) will still require a wait on the reservation object to
finish external rendering.
-Chris

---
 drivers/dma-buf/dma-buf.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index ddaee60ae52a..123f14b8e882 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -586,6 +586,22 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment 
*attach,
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);

+static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+ enum dma_data_direction direction)
+{
+   bool write = (direction == DMA_BIDIRECTIONAL ||
+ direction == DMA_TO_DEVICE);
+   struct reservation_object *resv = dma_buf->resv;
+   long ret;
+
+   /* Wait on any implicit rendering fences */
+   ret = reservation_object_wait_timeout_rcu(resv, write, true,
+ MAX_SCHEDULE_TIMEOUT);
+   if (ret < 0)
+   return ret;
+
+   return 0;
+}

 /**
  * dma_buf_begin_cpu_access - Must be called before accessing a dma_buf from 
the
@@ -607,6 +623,8 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,

if (dmabuf->ops->begin_cpu_access)
ret = dmabuf->ops->begin_cpu_access(dmabuf, direction);
+   else
+   ret = __dma_buf_begin_cpu_access(dmabuf, direction);

return ret;
 }
-- 
2.8.1

drm_connector_register_all() stragglers

2016-06-21 Thread Chris Wilson

A couple of drivers open-coded drm_connector_register_all() and so were
missed in my last pass. Also Emil pointed out that atmel-hlcdc also had
a redundant drm_connector_unregister_all() we could remove.
-Chris

[PATCH 1/3] drm/atmel-hlcdc: Remove redundant call to drm_connector_unregister_all()

2016-06-21 Thread Chris Wilson

drm_connector_unregister_all() is not automatically called by
drm_dev_unregister() so we can drop the local call.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Cc: Boris Brezillon 
Cc: David Airlie 
Cc: dri-devel at lists.freedesktop.org
---
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c 
b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 99c4af697c8a..d4a3d61b7b06 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -691,13 +691,6 @@ static void atmel_hlcdc_dc_unload(struct drm_device *dev)
destroy_workqueue(dc->wq);
 }

-static void atmel_hlcdc_dc_connector_unplug_all(struct drm_device *dev)
-{
-   mutex_lock(>mode_config.mutex);
-   drm_connector_unregister_all(dev);
-   mutex_unlock(>mode_config.mutex);
-}
-
 static void atmel_hlcdc_dc_lastclose(struct drm_device *dev)
 {
struct atmel_hlcdc_dc *dc = dev->dev_private;
@@ -830,7 +823,6 @@ static int atmel_hlcdc_dc_drm_remove(struct platform_device 
*pdev)
 {
struct drm_device *ddev = platform_get_drvdata(pdev);

-   atmel_hlcdc_dc_connector_unplug_all(ddev);
drm_dev_unregister(ddev);
atmel_hlcdc_dc_unload(ddev);
drm_dev_unref(ddev);
-- 
2.8.1

[PATCH 2/3] drm/vc4: Remove open-coded drm_connector_register_all()

2016-06-21 Thread Chris Wilson

drm_dev_register() will now register all known connectors, so we no
longer have to do so manually.

Signed-off-by: Chris Wilson 
Cc: Eric Anholt 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: dri-devel at lists.freedesktop.org
---
 drivers/gpu/drm/vc4/vc4_drv.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 98cf2c4d622e..1cd6b7b36241 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -211,22 +211,10 @@ static int vc4_drm_bind(struct device *dev)
if (ret < 0)
goto unbind_all;

-   /* Connector registration has to occur after DRM device
-* registration, because it creates sysfs entries based on the
-* DRM device.
-*/
-   list_for_each_entry(connector, >mode_config.connector_list, head) {
-   ret = drm_connector_register(connector);
-   if (ret)
-   goto unregister;
-   }
-
vc4_kms_load(drm);

return 0;

-unregister:
-   drm_dev_unregister(drm);
 unbind_all:
component_unbind_all(dev, drm);
 gem_destroy:
-- 
2.8.1

[PATCH 3/3] drm/sun4i: Remove open-coded drm_connector_register_all()

2016-06-21 Thread Chris Wilson

drm_dev_register() will now register all known connectors, so we no
longer have to do so manually.

Signed-off-by: Chris Wilson 
Cc: Maxime Ripard 
Cc: David Airlie 
Cc: Chen-Yu Tsai 
Cc: Daniel Vetter 
Cc: dri-devel at lists.freedesktop.org
Cc: linux-arm-kernel at lists.infradead.org
---
 drivers/gpu/drm/sun4i/sun4i_drv.c | 34 --
 1 file changed, 34 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c 
b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 68e9d85085fb..59cd8b27ee02 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -24,34 +24,6 @@
 #include "sun4i_layer.h"
 #include "sun4i_tcon.h"

-static int sun4i_drv_connector_plug_all(struct drm_device *drm)
-{
-   struct drm_connector *connector, *failed;
-   int ret;
-
-   mutex_lock(>mode_config.mutex);
-   list_for_each_entry(connector, >mode_config.connector_list, head) {
-   ret = drm_connector_register(connector);
-   if (ret) {
-   failed = connector;
-   goto err;
-   }
-   }
-   mutex_unlock(>mode_config.mutex);
-   return 0;
-
-err:
-   list_for_each_entry(connector, >mode_config.connector_list, head) {
-   if (failed == connector)
-   break;
-
-   drm_connector_unregister(connector);
-   }
-   mutex_unlock(>mode_config.mutex);
-
-   return ret;
-}
-
 static int sun4i_drv_enable_vblank(struct drm_device *drm, unsigned int pipe)
 {
struct sun4i_drv *drv = drm->dev_private;
@@ -187,14 +159,8 @@ static int sun4i_drv_bind(struct device *dev)
if (ret)
goto free_drm;

-   ret = sun4i_drv_connector_plug_all(drm);
-   if (ret)
-   goto unregister_drm;
-
return 0;

-unregister_drm:
-   drm_dev_unregister(drm);
 free_drm:
drm_dev_unref(drm);
return ret;
-- 
2.8.1

[Intel-gfx] [PATCH 09/10] drm: Drop mode_config.mutex from get_resources ioctl

2016-06-21 Thread Chris Wilson

On Tue, Jun 21, 2016 at 11:10:34AM +0200, Daniel Vetter wrote:
> The only thing this protected is the connector_list, which is
> now protected differently.
> 
> Signed-off-by: Daniel Vetter 
> ---
>  drivers/gpu/drm/drm_crtc.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> index d104717cab6b..0a678cfd9920 100644
> --- a/drivers/gpu/drm/drm_crtc.c
> +++ b/drivers/gpu/drm/drm_crtc.c
> @@ -1906,7 +1906,6 @@ int drm_mode_getresources(struct drm_device *dev, void 
> *data,
>  
>   /* mode_config.mutex protects the connector list against e.g. DP MST
>* connector hot-adding. CRTC/Plane lists are invariant. */
> - mutex_lock(>mode_config.mutex);

And the comment needs removal.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 02/10] drm: Don't compute obj counts expensively in get_resources

2016-06-21 Thread Chris Wilson

On Tue, Jun 21, 2016 at 11:10:27AM +0200, Daniel Vetter wrote:
> And to avoid surprising userspace, make sure we don't copy more
> connectors than planned, and report the actual number of connectors
> copied. That way any racing hot-add/remove will be handled.

> @@ -1938,6 +1929,7 @@ int drm_mode_getresources(struct drm_device *dev, void 
> *data,
>   }
>   copied++;
>   }
> + connector_count = copied;

You forgot to actually make sure we don't copy more conectors than
planned.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH v2 1/3] drm: Add callbacks for late registering

2016-06-21 Thread Chris Wilson

On Tue, Jun 21, 2016 at 11:31:34AM +0200, Benjamin Gaignard wrote:
> Like what has been done for connectors add callbacks on encoder,
> crtc and plane to let driver do actions after drm device registration.
> 
> Correspondingly, add callbacks called before unregister drm device.
> 
> version 2:
> add drm_modeset_register_all() and drm_modeset_unregister_all()
> to centralize all calls
> 
> Signed-off-by: Benjamin Gaignard 
> ---
>  drivers/gpu/drm/drm_crtc.c | 122 
> +
>  drivers/gpu/drm/drm_drv.c  |   4 +-
>  include/drm/drm_crtc.h |  79 +
>  3 files changed, 203 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> index e7c862b..b393feb 100644
> --- a/drivers/gpu/drm/drm_crtc.c
> +++ b/drivers/gpu/drm/drm_crtc.c
> @@ -608,6 +608,31 @@ static unsigned int drm_num_crtcs(struct drm_device *dev)
>   return num;
>  }
>  
> +static int drm_crtc_register_all(struct drm_device *dev)
> +{
> + struct drm_crtc *crtc;
> + int ret;
> +
> + drm_for_each_crtc(crtc, dev) {
> + if (crtc->funcs->late_register)
> + ret = crtc->funcs->late_register(crtc);
> + if (ret)

ret is uninitialised.

It is a good question (probably for another series) on what exactly to
do on registration failure? At the very least we need to unwind on the
error path, or we just ignore errors (other than a DRM_ERROR to
userspace explaining why one object is not available, but otherwise let
the driver load).

> +int drm_modeset_register_all(struct drm_device *dev)
> +{
> + int ret;
> +
> + ret = drm_plane_register_all(dev);
> + if (ret)
> + return ret;
> +
> + ret = drm_crtc_register_all(dev);
> + if  (ret)
> + return ret;
> +
> + ret = drm_encoder_register_all(dev);
> + if (ret)
> + return ret;
> +
> + ret = drm_connector_register_all(dev);

Might as well continue on with 

ret = <>
if (ret)
return ret;

for a consistent style. Along with the comment about how should we be
handling errors? If we report an error, everything up to that point
should be unwound.

> + return ret;
> +}
> +EXPORT_SYMBOL(drm_modeset_register_all);
> +
> +/**
> + * drm_modeset_unregister_all - do early unregistration
> + * @dev: drm device
> + *
> + * This function call early_unregister callbakc for all planes,
> + * crtcs, encoders and connectors
> + */
> +void drm_modeset_unregister_all(struct drm_device *dev)
> +{
> + drm_plane_unregister_all(dev);
> + drm_crtc_unregister_all(dev);
> + drm_encoder_unregister_all(dev);
> + drm_connector_unregister_all(dev);

Unregister should be in the opposite order.

> +}
> +EXPORT_SYMBOL(drm_modeset_unregister_all);

I think the plan is not to export these symbols,
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH] drm: Don't compute obj counts expensively in get_resources

2016-06-21 Thread Chris Wilson

On Tue, Jun 21, 2016 at 02:29:48PM +0200, Daniel Vetter wrote:
> Looping when we keep track of this is silly. Only thing we have to
> be careful is with sampling the connector count. To avoid inconsisten
> results due to gcc re-computing this, use READ_ONCE.
> 
> And to avoid surprising userspace, make sure we don't copy more
> connectors than planned, and report the actual number of connectors
> copied. That way any racing hot-add/remove will be handled.
> 
> v2: Actually try to not blow up, somehow I lost the hunk that checks
> we don't copy too much. Noticed by Chris.
> 
> Cc: Chris Wilson 
> Signed-off-by: Daniel Vetter 

The defacto API is call once with count_connectors=0 then with again
with the previous value of count_connectors. On the second pass, the
user is expecting no more then count_connectors to be copied, and is not
expecting to be told about new ones. The expected ABI looks unchanged.

> diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> index 28c109ff7330..59c5261a309c 100644
> --- a/drivers/gpu/drm/drm_crtc.c
> +++ b/drivers/gpu/drm/drm_crtc.c
> @@ -1842,10 +1842,10 @@ int drm_mode_getresources(struct drm_device *dev, 
> void *data,
>   struct drm_crtc *crtc;
>   struct drm_encoder *encoder;
>   int ret = 0;
> - int connector_count = 0;
> - int crtc_count = 0;
> + int connector_count = READ_ONCE(dev->mode_config.num_connector);

Ok, since in the future num_connector is not guarded by mode_conf.mutex,
moving the read underneath that lock doesn't obviate the need for
READ_ONCE. Still it reduces the window and how far one has too look back
if it were set just before the connector loop.

Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH v3 1/3] drm: Add callbacks for late registering

2016-06-21 Thread Chris Wilson

On Tue, Jun 21, 2016 at 03:09:38PM +0200, Benjamin Gaignard wrote:
> Like what has been done for connectors add callbacks on encoder,
> crtc and plane to let driver do actions after drm device registration.
> 
> Correspondingly, add callbacks called before unregister drm device.
> 
> version 2:
> add drm_modeset_register_all() and drm_modeset_unregister_all()
> to centralize all calls
> 
> version 3:
> in error case unwind registers in drm_modeset_register_all
> fix uninitialed return value
> inverse order of unregistration in drm_modeset_unregister_all
> 
> Signed-off-by: Benjamin Gaignard 
> ---
>  drivers/gpu/drm/drm_crtc.c | 132 
> +
>  drivers/gpu/drm/drm_drv.c  |   4 +-
>  include/drm/drm_crtc.h |  79 +++
>  3 files changed, 213 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> index e7c862b..c078bc4 100644
> --- a/drivers/gpu/drm/drm_crtc.c
> +++ b/drivers/gpu/drm/drm_crtc.c
> @@ -608,6 +608,31 @@ static unsigned int drm_num_crtcs(struct drm_device *dev)
>   return num;
>  }
>  
> +static int drm_crtc_register_all(struct drm_device *dev)
> +{
> + struct drm_crtc *crtc;
> + int ret = 0;
> +
> + drm_for_each_crtc(crtc, dev) {
> + if (crtc->funcs->late_register)
> + ret = crtc->funcs->late_register(crtc);
> + if (ret)

What happened to the unwind here?

> + return ret;
> + }
> +
> + return 0;
> +}
> +/**
> + * drm_modeset_register_all - do late registration
> + * @dev: drm device
> + *
> + * This function call late_register callback for all planes,
> + * crcts, encoders and connectors
> + *
> + * Returns:
> + * Zero on success, erro code on failure
> + */
> +int drm_modeset_register_all(struct drm_device *dev)
> +{
> + int ret;
> +
> + ret = drm_plane_register_all(dev);
> + if (ret)
> + goto err_plane;
> +
> + ret = drm_crtc_register_all(dev);
> + if  (ret)
> + goto err_crtc;
> +
> + ret = drm_encoder_register_all(dev);
> + if (ret)
> + goto err_encoder;
> +
> + ret = drm_connector_register_all(dev);
> + if (ret)
> + goto err_connector;
> +
> + return 0;
> +
> +err_connector:
> + drm_encoder_unregister_all(dev);

The name here should be what we are about to free. That just makes it a
bit easier to change the sequence later (if adding new stags).

Looks good enough though,
Reviewed-by: Chris Wilson 
-Chris


-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH v4 1/3] drm: Add callbacks for late registering

2016-06-21 Thread Chris Wilson

On Tue, Jun 21, 2016 at 04:37:09PM +0200, Benjamin Gaignard wrote:
> Like what has been done for connectors add callbacks on encoder,
> crtc and plane to let driver do actions after drm device registration.
> 
> Correspondingly, add callbacks called before unregister drm device.
> 
> version 2:
> add drm_modeset_register_all() and drm_modeset_unregister_all()
> to centralize all calls
> 
> version 3:
> in error case unwind registers in drm_modeset_register_all
> fix uninitialed return value
> inverse order of unregistration in drm_modeset_unregister_all
> 
> version 4:
> move function definitions in drm_crtc_internal.h
> remove not needed documentation
> 
> Signed-off-by: Benjamin Gaignard 
Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 2/3] drm/vgem: Fix mmaping

2016-06-21 Thread Chris Wilson

On Sat, Jun 18, 2016 at 04:20:48PM +0100, Chris Wilson wrote:
> The vGEM mmap code has bitrotted slightly and now immediately BUGs.
> Since vGEM was last updated, there are new core GEM facilities to
> provide more common functions, so let's use those here.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96603
> Testcase: igt/vgem_basic/mmap
> Signed-off-by: Chris Wilson 
Tested-by:  Humberto Israel Perez Rodriguez 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-06-22 Thread Chris Wilson

vGEM buffers are useful for passing data between software clients and
hardware renders. By allowing the user to create and attach fences to
the exported vGEM buffers (on the dma-buf), the user can implement a
deferred renderer and queue hardware operations like flipping and then
signal the buffer readiness (i.e. this allows the user to schedule
operations out-of-order, but have them complete in-order).

This also makes it much easier to write tightly controlled testcases for
dma-buf fencing and signaling between hardware drivers.

Testcase: igt/vgem_basic/dmabuf-fence
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
---
 drivers/gpu/drm/vgem/Makefile |   2 +-
 drivers/gpu/drm/vgem/vgem_drv.c   |  34 ++
 drivers/gpu/drm/vgem/vgem_drv.h   |  18 
 drivers/gpu/drm/vgem/vgem_fence.c | 217 ++
 include/uapi/drm/vgem_drm.h   |  62 +++
 5 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/vgem/vgem_fence.c
 create mode 100644 include/uapi/drm/vgem_drm.h

diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
index 3f4c7b842028..bfcdea1330e6 100644
--- a/drivers/gpu/drm/vgem/Makefile
+++ b/drivers/gpu/drm/vgem/Makefile
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o
+vgem-y := vgem_drv.o vgem_fence.o

 obj-$(CONFIG_DRM_VGEM) += vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index d7f962068ad0..c16e9c47c4da 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -84,6 +84,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops = {
.close = drm_gem_vm_close,
 };

+static int vgem_open(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile;
+   int ret;
+
+   vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
+   if (!vfile)
+   return -ENOMEM;
+
+   file->driver_priv = vfile;
+
+   ret = vgem_fence_open(vfile);
+   if (ret) {
+   kfree(vfile);
+   return ret;
+   }
+
+   return 0;
+}
+
+static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile = file->driver_priv;
+
+   vgem_fence_close(vfile);
+   kfree(vfile);
+}
+
 /* ioctls */

 static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
@@ -165,6 +193,8 @@ unref:
 }

 static struct drm_ioctl_desc vgem_ioctls[] = {
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
 };

 static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -287,9 +317,12 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,

 static struct drm_driver vgem_driver = {
.driver_features= DRIVER_GEM | DRIVER_PRIME,
+   .open   = vgem_open,
+   .preclose   = vgem_preclose,
.gem_free_object_unlocked   = vgem_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
.ioctls = vgem_ioctls,
+   .num_ioctls = ARRAY_SIZE(vgem_ioctls),
.fops   = _driver_fops,

.dumb_create= vgem_gem_dumb_create,
@@ -344,5 +377,6 @@ module_init(vgem_init);
 module_exit(vgem_exit);

 MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h
index 988cbaae7588..88ce21010e28 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.h
+++ b/drivers/gpu/drm/vgem/vgem_drv.h
@@ -32,9 +32,27 @@
 #include 
 #include 

+#include 
+
+struct vgem_file {
+   struct idr fence_idr;
+   struct mutex fence_mutex;
+   u64 fence_context;
+   atomic_t fence_seqno;
+};
+
 #define to_vgem_bo(x) container_of(x, struct drm_vgem_gem_object, base)
 struct drm_vgem_gem_object {
struct drm_gem_object base;
 };

+int vgem_fence_open(struct vgem_file *file);
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+void vgem_fence_close(struct vgem_file *file);
+
 #endif
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c 
b/drivers/gpu/drm/vgem/vgem_fence.c
new file mode 100644
index ..46130e4a3506
--- /dev/null
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentatio

[PATCH] drm/fb-helper: Reduce READ_ONCE(master) to lockless_dereference

2016-06-22 Thread Chris Wilson

We are only documenting that the read is outside of the lock, and do not
require strict ordering on the operation. In this case the more relaxed
lockless_dereference() will suffice.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Cc: Julia Lawall 
Cc: Chris Wilson 
Cc: Emil Velikov 
---
 drivers/gpu/drm/drm_fb_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 0a06f9120b5a..ce54e985d91b 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -464,7 +464,7 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper 
*fb_helper)

/* Sometimes user space wants everything disabled, so don't steal the
 * display if there's a master. */
-   if (READ_ONCE(dev->master))
+   if (lockless_dereference(dev->master))
return false;

drm_for_each_crtc(crtc, dev) {
-- 
2.8.1

[PATCH 1/3] drm/vgem: Fix mmaping

2016-06-23 Thread Chris Wilson

The vGEM mmap code has bitrotted slightly and now immediately BUGs.
Since vGEM was last updated, there are new core GEM facilities to
provide more common functions, so let's use those here.

v2: drm_gem_free_mmap_offset() is performed from
drm_gem_object_release() so we can remove the redundant call.

Testcase: igt/vgem_basic/mmap
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96603
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
Cc: Matthew Auld 
Tested-by: Humberto Israel Perez Rodriguez 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/vgem/vgem_drv.c | 164 +++-
 drivers/gpu/drm/vgem/vgem_drv.h |   6 --
 2 files changed, 61 insertions(+), 109 deletions(-)

diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 35ea5d02a827..c161b6d7e427 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -42,81 +42,38 @@
 #define DRIVER_MAJOR   1
 #define DRIVER_MINOR   0

-void vgem_gem_put_pages(struct drm_vgem_gem_object *obj)
-{
-   drm_gem_put_pages(>base, obj->pages, false, false);
-   obj->pages = NULL;
-}
-
 static void vgem_gem_free_object(struct drm_gem_object *obj)
 {
struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);

-   drm_gem_free_mmap_offset(obj);
-
-   if (vgem_obj->use_dma_buf && obj->dma_buf) {
-   dma_buf_put(obj->dma_buf);
-   obj->dma_buf = NULL;
-   }
-
drm_gem_object_release(obj);
-
-   if (vgem_obj->pages)
-   vgem_gem_put_pages(vgem_obj);
-
-   vgem_obj->pages = NULL;
-
kfree(vgem_obj);
 }

-int vgem_gem_get_pages(struct drm_vgem_gem_object *obj)
-{
-   struct page **pages;
-
-   if (obj->pages || obj->use_dma_buf)
-   return 0;
-
-   pages = drm_gem_get_pages(>base);
-   if (IS_ERR(pages)) {
-   return PTR_ERR(pages);
-   }
-
-   obj->pages = pages;
-
-   return 0;
-}
-
 static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
struct drm_vgem_gem_object *obj = vma->vm_private_data;
-   loff_t num_pages;
-   pgoff_t page_offset;
-   int ret;
-
/* We don't use vmf->pgoff since that has the fake offset */
-   page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
-   PAGE_SHIFT;
-
-   num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE);
-
-   if (page_offset > num_pages)
-   return VM_FAULT_SIGBUS;
-
-   ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
-obj->pages[page_offset]);
-   switch (ret) {
-   case 0:
-   return VM_FAULT_NOPAGE;
-   case -ENOMEM:
-   return VM_FAULT_OOM;
-   case -EBUSY:
-   return VM_FAULT_RETRY;
-   case -EFAULT:
-   case -EINVAL:
-   return VM_FAULT_SIGBUS;
-   default:
-   WARN_ON(1);
-   return VM_FAULT_SIGBUS;
+   unsigned long vaddr = (unsigned long)vmf->virtual_address;
+   struct page *page;
+
+   page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping,
+  (vaddr - vma->vm_start) >> PAGE_SHIFT);
+   if (!IS_ERR(page)) {
+   vmf->page = page;
+   return 0;
+   } else switch (PTR_ERR(page)) {
+   case -ENOSPC:
+   case -ENOMEM:
+   return VM_FAULT_OOM;
+   case -EBUSY:
+   return VM_FAULT_RETRY;
+   case -EFAULT:
+   case -EINVAL:
+   return VM_FAULT_SIGBUS;
+   default:
+   WARN_ON_ONCE(PTR_ERR(page));
+   return VM_FAULT_SIGBUS;
}
 }

@@ -134,57 +91,43 @@ static struct drm_gem_object *vgem_gem_create(struct 
drm_device *dev,
  unsigned long size)
 {
struct drm_vgem_gem_object *obj;
-   struct drm_gem_object *gem_object;
-   int err;
-
-   size = roundup(size, PAGE_SIZE);
+   int ret;

obj = kzalloc(sizeof(*obj), GFP_KERNEL);
if (!obj)
return ERR_PTR(-ENOMEM);

-   gem_object = >base;
-
-   err = drm_gem_object_init(dev, gem_object, size);
-   if (err)
-   goto out;
-
-   err = vgem_gem_get_pages(obj);
-   if (err)
-   goto out;
-
-   err = drm_gem_handle_create(file, gem_object, handle);
-   if (err)
-   goto handle_out;
+   ret = drm_gem_object_init(dev, >base, roundup(size, PAGE_SIZE));
+   if (ret)
+   goto err_free;

-   drm_gem_object_unreference_unlocked(gem_object);
+   ret = drm_gem_handle_create(file, >base, handle);
+   drm_gem_object_unreference_unlocked(>ba

[PATCH 2/3] drm/vgem: Enable dmabuf interface for export

2016-06-23 Thread Chris Wilson

Enable the standard GEM dma-buf interface provided by the DRM core, but
only for exporting the VGEM object. This allows passing around the VGEM
objects created from the dumb interface and using them as sources
elsewhere. Creating a VGEM object for a foriegn handle is not supported.

v2: With additional completeness.
v3: Need to clear the CPU cache upon exporting the dma-addresses.
v4: Use drm_gem_put_pages() as well.

Testcase: igt/vgem_basic/dmabuf-*
Testcase: igt/prime_vgem
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
---
 drivers/gpu/drm/vgem/vgem_drv.c | 104 +++-
 1 file changed, 103 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index c161b6d7e427..69468b5f3d82 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -192,14 +192,116 @@ static const struct file_operations vgem_driver_fops = {
.release= drm_release,
 };

+static int vgem_prime_pin(struct drm_gem_object *obj)
+{
+   long n_pages = obj->size >> PAGE_SHIFT;
+   struct page **pages;
+
+   /* Flush the object from the CPU cache so that importers can rely
+* on coherent indirect access via the exported dma-address.
+*/
+   pages = drm_gem_get_pages(obj);
+   if (IS_ERR(pages))
+   return PTR_ERR(pages);
+
+   drm_clflush_pages(pages, n_pages);
+   drm_gem_put_pages(obj, pages, true, false);
+
+   return 0;
+}
+
+static struct sg_table *vgem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+   long n_pages = obj->size >> PAGE_SHIFT;
+   struct sg_table *st;
+   struct page **pages;
+   int ret;
+
+   st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+   if (st == NULL)
+   return ERR_PTR(-ENOMEM);
+
+   pages = drm_gem_get_pages(obj);
+   if (IS_ERR(pages)) {
+   ret = PTR_ERR(pages);
+   goto err;
+   }
+
+   ret = sg_alloc_table_from_pages(st, pages, n_pages,
+   0, obj->size, GFP_KERNEL);
+   drm_gem_put_pages(obj, pages, false, false);
+   if (ret)
+   goto err;
+
+   return st;
+
+err:
+   kfree(st);
+   return ERR_PTR(ret);
+}
+
+static void *vgem_prime_vmap(struct drm_gem_object *obj)
+{
+   long n_pages = obj->size >> PAGE_SHIFT;
+   struct page **pages;
+   void *addr;
+
+   pages = drm_gem_get_pages(obj);
+   if (IS_ERR(pages))
+   return NULL;
+
+   addr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL_IO));
+   drm_gem_put_pages(obj, pages, false, false);
+
+   return addr;
+}
+
+static void vgem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+   vunmap(vaddr);
+}
+
+static int vgem_prime_mmap(struct drm_gem_object *obj,
+  struct vm_area_struct *vma)
+{
+   int ret;
+
+   if (obj->size < vma->vm_end - vma->vm_start)
+   return -EINVAL;
+
+   if (!obj->filp)
+   return -ENODEV;
+
+   ret = obj->filp->f_op->mmap(obj->filp, vma);
+   if (ret)
+   return ret;
+
+   fput(vma->vm_file);
+   vma->vm_file = get_file(obj->filp);
+   vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+   vma->vm_page_prot = 
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+
+   return 0;
+}
+
 static struct drm_driver vgem_driver = {
-   .driver_features= DRIVER_GEM,
+   .driver_features= DRIVER_GEM | DRIVER_PRIME,
.gem_free_object_unlocked   = vgem_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
.ioctls = vgem_ioctls,
.fops   = _driver_fops,
+
.dumb_create= vgem_gem_dumb_create,
.dumb_map_offset= vgem_gem_dumb_map,
+
+   .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+   .gem_prime_pin = vgem_prime_pin,
+   .gem_prime_export = drm_gem_prime_export,
+   .gem_prime_get_sg_table = vgem_prime_get_sg_table,
+   .gem_prime_vmap = vgem_prime_vmap,
+   .gem_prime_vunmap = vgem_prime_vunmap,
+   .gem_prime_mmap = vgem_prime_mmap,
+
.name   = DRIVER_NAME,
.desc   = DRIVER_DESC,
.date   = DRIVER_DATE,
-- 
2.8.1

[PATCH 3/3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-06-23 Thread Chris Wilson

vGEM buffers are useful for passing data between software clients and
hardware renders. By allowing the user to create and attach fences to
the exported vGEM buffers (on the dma-buf), the user can implement a
deferred renderer and queue hardware operations like flipping and then
signal the buffer readiness (i.e. this allows the user to schedule
operations out-of-order, but have them complete in-order).

This also makes it much easier to write tightly controlled testcases for
dma-buf fencing and signaling between hardware drivers.

Testcase: igt/vgem_basic/dmabuf-fence
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
---
 drivers/gpu/drm/vgem/Makefile |   2 +-
 drivers/gpu/drm/vgem/vgem_drv.c   |  34 ++
 drivers/gpu/drm/vgem/vgem_drv.h   |  18 
 drivers/gpu/drm/vgem/vgem_fence.c | 217 ++
 include/uapi/drm/vgem_drm.h   |  62 +++
 5 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/vgem/vgem_fence.c
 create mode 100644 include/uapi/drm/vgem_drm.h

diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
index 3f4c7b842028..bfcdea1330e6 100644
--- a/drivers/gpu/drm/vgem/Makefile
+++ b/drivers/gpu/drm/vgem/Makefile
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o
+vgem-y := vgem_drv.o vgem_fence.o

 obj-$(CONFIG_DRM_VGEM) += vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 69468b5f3d82..56e348701382 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -83,6 +83,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops = {
.close = drm_gem_vm_close,
 };

+static int vgem_open(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile;
+   int ret;
+
+   vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
+   if (!vfile)
+   return -ENOMEM;
+
+   file->driver_priv = vfile;
+
+   ret = vgem_fence_open(vfile);
+   if (ret) {
+   kfree(vfile);
+   return ret;
+   }
+
+   return 0;
+}
+
+static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile = file->driver_priv;
+
+   vgem_fence_close(vfile);
+   kfree(vfile);
+}
+
 /* ioctls */

 static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
@@ -164,6 +192,8 @@ unref:
 }

 static struct drm_ioctl_desc vgem_ioctls[] = {
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
 };

 static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -286,9 +316,12 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,

 static struct drm_driver vgem_driver = {
.driver_features= DRIVER_GEM | DRIVER_PRIME,
+   .open   = vgem_open,
+   .preclose   = vgem_preclose,
.gem_free_object_unlocked   = vgem_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
.ioctls = vgem_ioctls,
+   .num_ioctls = ARRAY_SIZE(vgem_ioctls),
.fops   = _driver_fops,

.dumb_create= vgem_gem_dumb_create,
@@ -343,5 +376,6 @@ module_init(vgem_init);
 module_exit(vgem_exit);

 MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h
index 988cbaae7588..88ce21010e28 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.h
+++ b/drivers/gpu/drm/vgem/vgem_drv.h
@@ -32,9 +32,27 @@
 #include 
 #include 

+#include 
+
+struct vgem_file {
+   struct idr fence_idr;
+   struct mutex fence_mutex;
+   u64 fence_context;
+   atomic_t fence_seqno;
+};
+
 #define to_vgem_bo(x) container_of(x, struct drm_vgem_gem_object, base)
 struct drm_vgem_gem_object {
struct drm_gem_object base;
 };

+int vgem_fence_open(struct vgem_file *file);
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+void vgem_fence_close(struct vgem_file *file);
+
 #endif
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c 
b/drivers/gpu/drm/vgem/vgem_fence.c
new file mode 100644
index ..46130e4a3506
--- /dev/null
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentatio

[RFC 4/5] dma-buf/fence-array: add fence_array_get_fences()

2016-06-23 Thread Chris Wilson

On Thu, Jun 23, 2016 at 12:29:49PM -0300, Gustavo Padovan wrote:
> From: Gustavo Padovan 
> 
> This function returns a copy of the array of fences.
> 
> Signed-off-by: Gustavo Padovan 
> ---
>  drivers/dma-buf/fence-array.c | 14 ++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/drivers/dma-buf/fence-array.c b/drivers/dma-buf/fence-array.c
> index 601448a..ce98249 100644
> --- a/drivers/dma-buf/fence-array.c
> +++ b/drivers/dma-buf/fence-array.c
> @@ -33,6 +33,19 @@ static const char *fence_array_get_timeline_name(struct 
> fence *fence)
>   return "unbound";
>  }
>  
> +static struct fence **fence_array_get_fences(struct fence *fence)
> +{
> + struct fence_array *array = to_fence_array(fence);
> + struct fence **fences;
> +
> + fences = kmalloc(array->num_fences * sizeof(*fences), GFP_KERNEL);
> + if (!fences)
> + return NULL;
> +
> + memcpy(fences, array->fences, array->num_fences * sizeof(*fences));
> + return fences;

return kmemdup(array->fences,
   array->num_fences * sizeof(*array->fences),
   GFP_KERNEL);

-- 
Chris Wilson, Intel Open Source Technology Centre

[RFC 3/5] dma-buf/fence: add .get_fences() ops

2016-06-23 Thread Chris Wilson

On Thu, Jun 23, 2016 at 12:29:48PM -0300, Gustavo Padovan wrote:
> From: Gustavo Padovan 
> 
> get_fences() should return a copy of all fences in the fence as some
> fence subclass (such as fence_array) can store more than one fence at
> time.
> 
> Signed-off-by: Gustavo Padovan 
> ---
>  drivers/dma-buf/fence.c | 14 ++
>  include/linux/fence.h   |  3 +++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
> index 4e61afb..f4094fd 100644
> --- a/drivers/dma-buf/fence.c
> +++ b/drivers/dma-buf/fence.c
> @@ -185,6 +185,20 @@ void fence_release(struct kref *kref)
>  }
>  EXPORT_SYMBOL(fence_release);
>  
> +struct fence **fence_get_fences(struct fence *fence)

Returning an array, but not telling the caller how many elements in the
array?

> +{
> + if (fence->ops->get_fences) {
> + return fence->ops->get_fences(fence);
> + } else {
> + struct fence **fences = kmalloc(sizeof(**fences), GFP_KERNEL);

One too many * (=> sizeof(struct fence), not sizeof(struct fence *))

return kmemdup(, sizeof(fence), GFP_KERNEL);

The documentation should emphasize that the fences in the
returned array have a "borrowed" reference (i.e. it does not return a
new reference to each fence).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[RFC 1/5] dma-buf/fence: add .teardown() ops

2016-06-23 Thread Chris Wilson

On Thu, Jun 23, 2016 at 12:29:46PM -0300, Gustavo Padovan wrote:
> From: Gustavo Padovan 
> 
> fence_array requires a function to clean up its state before we
> are able to call fence_put() and release it.

An explanation along the lines of:

As the array of fence callbacks held by an active struct fence_array
each has a reference to the struct fence_array, when the owner of the
fence_array is freed it must dispose of the callback references before
it can free the fence_array. This can not happen simply during
fence_release() because of the extra references and so we need a new
function to run before the final fence_put().

would help, it is not until you use it in 5/5 that it becomes apparent
why it is needed.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[RFC 5/5] dma-buf/sync_file: rework fence storage in struct file

2016-06-23 Thread Chris Wilson

On Thu, Jun 23, 2016 at 12:29:50PM -0300, Gustavo Padovan wrote:
> -static void sync_file_add_pt(struct sync_file *sync_file, int *i,
> +static int sync_file_set_fence(struct sync_file *sync_file,
> +struct fence **fences)
> +{
> + struct fence_array *array;
> +
> + if (sync_file->num_fences == 1) {
> + sync_file->fence = fences[0];

Straightforward pointer assignment.

> + } else {
> + array = fence_array_create(sync_file->num_fences, fences,
> +fence_context_alloc(1), 1, false);
> + if (!array)
> + return -ENOMEM;
> +
> + sync_file->fence = >base;

New reference.

Imbalance will promptly go bang after we release the single fence[0].

Would fence_array_create(1, fence) returning fence_get(fence) be too
much of a hack?

I would suggest dropping the exported fence_get_fences() and use a local
instead that could avoid the copy, e.g.

static struct fence *get_fences(struct fence **fence,
unsigned int *num_fences)
{
if (fence_is_array(*fence)) {
struct fence_array *array = to_fence_array(*fence);
*num_fences = array->num_fences;
return array->fences;
} else {
*num_fences = 1;
return fence;
}
}

sync_file_merge() {
int num_fences, num_a_fences, num_b_fences;
struct fence **fences, **a_fences, **b_fences;

a_fences = get_fences(, _a_fences);
b_fences = get_fences(, _b_fences);

num_fences = num_a_fences + num_b_fences;

>  static void sync_file_free(struct kref *kref)
>  {
>   struct sync_file *sync_file = container_of(kref, struct sync_file,
>kref);
> - int i;
> -
> - for (i = 0; i < sync_file->num_fences; ++i) {
> - fence_remove_callback(sync_file->cbs[i].fence,
> -   _file->cbs[i].cb);
> - fence_put(sync_file->cbs[i].fence);
> - }
>  
> + fence_remove_callback(sync_file->fence, _file->cb);
> + fence_teardown(sync_file->fence);

Hmm. Could we detect the removal of the last callback and propagate that
to the fence_array? (Rather then introduce a manual call to
fence_teardown.)

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH] drm: Promote drm_mm alignment to u64

2016-01-19 Thread Chris Wilson

In places (e.g. i915.ko), the alignment is exported to userspace as u64
and there now exists hardware for which we can indeed utilize a u64
alignment. As such, we need to keep 64bit integers throughout when
handling alignment.

Testcase: igt/gem_exec_alignment
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/drm_mm.c | 37 +
 include/drm/drm_mm.h | 16 
 2 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 66c993840f47..1095084947fa 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -93,12 +93,12 @@

 static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
u64 size,
-   unsigned alignment,
+   u64 alignment,
unsigned long color,
enum drm_mm_search_flags flags);
 static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct 
drm_mm *mm,
u64 size,
-   unsigned alignment,
+   u64 alignment,
unsigned long color,
u64 start,
u64 end,
@@ -172,7 +172,7 @@ static void drm_mm_interval_tree_add_node(struct 
drm_mm_node *hole_node,

 static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 struct drm_mm_node *node,
-u64 size, unsigned alignment,
+u64 size, u64 alignment,
 unsigned long color,
 enum drm_mm_allocator_flags flags)
 {
@@ -191,10 +191,9 @@ static void drm_mm_insert_helper(struct drm_mm_node 
*hole_node,
adj_start = adj_end - size;

if (alignment) {
-   u64 tmp = adj_start;
-   unsigned rem;
+   u64 rem;

-   rem = do_div(tmp, alignment);
+   div64_u64_rem(adj_start, alignment, );
if (rem) {
if (flags & DRM_MM_CREATE_TOP)
adj_start -= rem;
@@ -308,7 +307,7 @@ EXPORT_SYMBOL(drm_mm_reserve_node);
  * 0 on success, -ENOSPC if there's no suitable hole.
  */
 int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
-  u64 size, unsigned alignment,
+  u64 size, u64 alignment,
   unsigned long color,
   enum drm_mm_search_flags sflags,
   enum drm_mm_allocator_flags aflags)
@@ -327,7 +326,7 @@ EXPORT_SYMBOL(drm_mm_insert_node_generic);

 static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
   struct drm_mm_node *node,
-  u64 size, unsigned alignment,
+  u64 size, u64 alignment,
   unsigned long color,
   u64 start, u64 end,
   enum drm_mm_allocator_flags flags)
@@ -352,10 +351,9 @@ static void drm_mm_insert_helper_range(struct drm_mm_node 
*hole_node,
adj_start = adj_end - size;

if (alignment) {
-   u64 tmp = adj_start;
-   unsigned rem;
+   u64 rem;

-   rem = do_div(tmp, alignment);
+   div64_u64_rem(adj_start, alignment, );
if (rem) {
if (flags & DRM_MM_CREATE_TOP)
adj_start -= rem;
@@ -409,7 +407,7 @@ static void drm_mm_insert_helper_range(struct drm_mm_node 
*hole_node,
  * 0 on success, -ENOSPC if there's no suitable hole.
  */
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node 
*node,
-   u64 size, unsigned alignment,
+   u64 size, u64 alignment,
unsigned long color,
u64 start, u64 end,
enum drm_mm_search_flags sflags,
@@ -473,16 +471,15 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 }
 EXPORT_SYMBOL(drm_mm_remove_node);

-static int check_free_hole(u64 start, u64 end, u64 size, unsigned alignment)
+static int check_free_hole(u64 start, u64 end, u64 size, u64 alignment)
 {
if (end - start < size)
return 0;

if (alignment) {
-   u64 tmp = start;
-   unsigned rem;
+

[RFC libdrm] intel: Add support for softpin

2016-01-25 Thread Chris Wilson

On Wed, Sep 09, 2015 at 04:07:10PM +0200, MichaÅ Winiarski wrote:
> Softpin allows userspace to take greater control of GPU virtual address
> space and eliminates the need of relocations. It can also be used to
> mirror addresses between GPU and CPU (shared virtual memory).
> Calls to drm_intel_bo_emit_reloc are still required to build the list of
> drm_i915_gem_exec_objects at exec time, but no entries in relocs are
> created. Self-relocs don't make any sense for softpinned objects and can
> indicate a programming errors, thus are forbidden. Softpinned objects
> are marked by asterisk in debug dumps.

This patch gets the kernel interface wrong.

>  static int
> +drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo 
> *target_bo)
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
> + if (bo_gem->has_error)
> + return -ENOMEM;
> +
> + if (target_bo_gem->has_error) {
> + bo_gem->has_error = true;
> + return -ENOMEM;
> + }
> +
> + if (!target_bo_gem->is_softpin)
> + return -EINVAL;
> + if (target_bo_gem == bo_gem)
> + return -EINVAL;
> +
> + if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
> + int new_size = bo_gem->softpin_target_size * 2;
> + if (new_size == 0)
> + new_size = bufmgr_gem->max_relocs;
> +
> + bo_gem->softpin_target = realloc(bo_gem->softpin_target, 
> new_size *
> + sizeof(drm_intel_bo *));
> + if (!bo_gem->softpin_target)
> + return -ENOMEM;
> +
> + bo_gem->softpin_target_size = new_size;
> + }
> + bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
> + drm_intel_gem_bo_reference(target_bo);
> + bo_gem->softpin_target_count++;
> +
> + return 0;
> +}

Short-circuiting the reloc handling like this is broken without
instructing the kernel via the alternative mechanisms about the special
handling the buffer requires EXEC_OBJECT_WRITE, EXEC_OBJECT_NEEDS_GTT).

>  void
>  drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
> @@ -1998,6 +2084,12 @@ drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int 
> start)
>   }
>   bo_gem->reloc_count = start;
>  
> + for (i = 0; i < bo_gem->softpin_target_count; i++) {
> + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) 
> bo_gem->softpin_target[i];
> + drm_intel_gem_bo_unreference_locked_timed(_bo_gem->bo, 
> time.tv_sec);
> + }
> + bo_gem->softpin_target_count = 0;

This would have better pursued using a batch manager.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH] drm/i915: Remove select to deleted STOP_MACHINE from Kconfig

2016-01-25 Thread Chris Wilson

On Mon, Jan 25, 2016 at 12:14:41PM +0100, Andreas Ziegler wrote:
> Commit 86fffe4a61dd ("kernel: remove stop_machine() Kconfig
> dependency") removed the option STOP_MACHINE from init/Kconfig, but
> missed a select to this option from the DRM_I915 option.

The select didn't exist on the branch on which the patch was applied, so
it couldn't remove it.

Commit 5bab6f60cb4 ("drm/i915: Serialise updates to GGTT with access
through GGTT on Braswel") depended upon a working stop_machine() and so
forced its selection. However, commit 86fffe4a61dd ("kernel: remove
stop_machine() Kconfig dependency") removed the option STOP_MACHINE from
init/Kconfig and ensured that stop_machine() universally works.

Remove this now obsolete select statement.

> Signed-off-by: Andreas Ziegler 

With a little bit of story correction,
Revieweed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH v2 2/4] dma-buf/sync_file: rework fence storage in struct file

2016-07-01 Thread Chris Wilson

On Fri, Jul 01, 2016 at 10:05:05AM -0300, Gustavo Padovan wrote:
> @@ -333,16 +384,16 @@ static long sync_file_ioctl_fence_info(struct sync_file 
> *sync_file,
>   if (!info.num_fences)
>   goto no_fences;
>  
> - if (info.num_fences < sync_file->num_fences)
> + if (info.num_fences < num_fences)
>   return -EINVAL;
>  
> - size = sync_file->num_fences * sizeof(*fence_info);
> + size = num_fences * sizeof(*fence_info);
>   fence_info = kzalloc(size, GFP_KERNEL);

Does size get used elsewhere? Otherwise it would be safer to reduce this
to kcalloc(num_fences, sizeof(*fence_info), GFP_KERNEL)

>   if (!fence_info)
>   return -ENOMEM;
>  

> diff --git a/drivers/staging/android/sync_debug.c 
> b/drivers/staging/android/sync_debug.c
> index 5f57499..0638a06 100644
> --- a/drivers/staging/android/sync_debug.c
> +++ b/drivers/staging/android/sync_debug.c
> @@ -159,10 +159,15 @@ static void sync_print_sync_file(struct seq_file *s,
>   int i;
>  
>   seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
> -sync_status_str(atomic_read(_file->status)));
> -
> - for (i = 0; i < sync_file->num_fences; ++i)
> - sync_print_fence(s, sync_file->cbs[i].fence, true);
> +sync_status_str(!fence_is_signaled(sync_file->fence)));
> +
> + if (fence_is_array(sync_file->fence)) {
> + struct fence_array *array = to_fence_array(sync_file->fence);

checkpatch will complain about the missing line between decl and code
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH v2 3/4] dma-buf/sync_file: add sync_file_get_fence()

2016-07-01 Thread Chris Wilson

On Fri, Jul 01, 2016 at 10:05:06AM -0300, Gustavo Padovan wrote:
> From: Gustavo Padovan 
> 
> Creates a function that given an sync file descriptor returns a
> fence containing all fences in the sync_file.
> 
> v2: Comments by Daniel Vetter
>   - Adapt to new version of fence_collection_init()
>   - Hold a reference for the fence we return
> 
> v3:
>   - Adapt to use fput() directly
>   - rename to sync_file_get_fence() as we always return one fence
> 
> v4: Adapt to use fence_array
> 
> Signed-off-by: Gustavo Padovan 
> ---
>  drivers/dma-buf/sync_file.c | 24 
>  include/linux/sync_file.h   |  1 +
>  2 files changed, 25 insertions(+)
> 
> diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
> index 12dbf17..3ffaea8 100644
> --- a/drivers/dma-buf/sync_file.c
> +++ b/drivers/dma-buf/sync_file.c
> @@ -116,6 +116,30 @@ err:
>   return NULL;
>  }
>  
> +/**
> + * sync_file_get_fence - get the fence related to the sync_file fd
> + * @fd:  sync_file fd to get the fence from
> + *
> + * Ensures @fd references a valid sync_file and returns a fence that
> + * represents all fence in the sync_file. On error NULL is returned.
> + */
> +struct fence *sync_file_get_fence(int fd)
> +{
> + struct sync_file *sync_file;
> + struct fence *fence;
> +
> + sync_file = sync_file_fdget(fd);
> + if (!sync_file)
> + return NULL;
> +
> + fence = sync_file->fence;
> + fence_get(fence);

Or just fence = get_fence(sync_file->fence);

> + fput(sync_file->file);

Reviewed-by: Chris Wilson 

Using fence-array for this works very nicely, as we can then inspect the
fences returned and handle native fences for fd passed around.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH 2/3] drm/vgem: Enable dmabuf interface for export

2016-07-01 Thread Chris Wilson

On Fri, Jul 01, 2016 at 05:56:25PM +0100, Matthew Auld wrote:
> > +static struct sg_table *vgem_prime_get_sg_table(struct drm_gem_object *obj)
> > +{
> > +   long n_pages = obj->size >> PAGE_SHIFT;
> > +   struct sg_table *st;
> > +   struct page **pages;
> > +   int ret;
> > +
> > +   st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
> > +   if (st == NULL)
> > +   return ERR_PTR(-ENOMEM);
> > +
> > +   pages = drm_gem_get_pages(obj);
> > +   if (IS_ERR(pages)) {
> > +   ret = PTR_ERR(pages);
> > +   goto err;
> > +   }
> > +
> > +   ret = sg_alloc_table_from_pages(st, pages, n_pages,
> > +   0, obj->size, GFP_KERNEL);
> > +   drm_gem_put_pages(obj, pages, false, false);
> > +   if (ret)
> > +   goto err;
> > +
> > +   return st;
> > +
> > +err:
> > +   kfree(st);
> > +   return ERR_PTR(ret);
> > +}
> > +
> Couldn't this be written more simply as:
> 
> pages = drm_gem_get_pages(obj);
> if (IS_ERR(pages))
> return ERR_CAST(pages);
> 
> st = drm_prime_pages_to_sg(pages, n_pages);
> drm_gem_put_pages(obj, pages, false, false);
> 
> return st;

That looks better, thanks.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

Pageflipping bugs in drm-next on at least Ironlake and Ivybridge.

2016-07-06 Thread Chris Wilson

On Wed, Jul 06, 2016 at 12:17:55PM +0200, Mario Kleiner wrote:
> Since i pulled the current drm-next tree i see strong flicker and
> visual corruption during pageflipping, both in my own app, but also
> in KDE4 and KDE5 Plasma with desktop composition enabled. This
> happens on both Intel HD Ironake mobile (Apple MBP 2010) and HD-4000
> Ivybridge mobile (Apple macMini 2012).
> 
> It looks like page flips are not waiting properly for rendering to
> complete, showing partially rendered frames at flip time.
> 
> If i revert Daniel's commit that switches legacy pageflips from the
> old code path to the atomic code, all problems disappear, so
> apparently the atomic code for Intel is not quite ready at least on
> those parts?

Exactly right, we've reverted the enabling patch for the time being.
Daniel Stone has spotted the likely problem, but we also want to review
the handling of state/old_state to see if the same problem has cropped
up elsewhere.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 06/64] drm: Restore double clflush on the last partial cacheline

2016-07-07 Thread Chris Wilson

This effectively reverts

commit afcd950cafea6e27b739fe7772cbbeed37d05b8b
Author: Chris Wilson 
Date:   Wed Jun 10 15:58:01 2015 +0100

drm: Avoid the double clflush on the last cache line in 
drm_clflush_virt_range()

as we have observed issues with serialisation of the clflush operations
on Baytrail+ Atoms with partial updates. Applying the double flush on the
last cacheline forces that clflush to be ordered with respect to the
previous clflush, and the mfence then protects against prefetches crossing
the clflush boundary.

The same issue can be demonstrated in userspace with igt/gem_exec_flush.

Fixes: afcd950cafea6 (drm: Avoid the double clflush on the last cache...)
Testcase: igt/gem_concurrent_blit
Testcase: igt/gem_partial_pread_pwrite
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92845
Signed-off-by: Chris Wilson 
Cc: dri-devel at lists.freedesktop.org
Cc: Akash Goel 
Cc: Imre Deak 
Cc: Daniel Vetter 
Cc: Jason Ekstrand 
Cc: stable at vger.kernel.org
Reviewed-by: Mika Kuoppala 
---
 drivers/gpu/drm/drm_cache.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 059f7c39c582..a7916e5f8864 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -136,6 +136,7 @@ drm_clflush_virt_range(void *addr, unsigned long length)
mb();
for (; addr < end; addr += size)
clflushopt(addr);
+   clflushopt(end - 1); /* force serialisation */
mb();
return;
}
-- 
2.8.1

drm/vgem fixes and new ioctl for testing prime

2016-07-11 Thread Chris Wilson

Just a quick resend of the existing vgem patches, all 3 have been acked,
but only the first 2 have reviews. The third involves both new ioctl and
dma-buf/fences, so perhaps people have been reluctant... But now is a
good time! These patches are exercised by intel-gpu-tools (or will be once
the new ioctls are ratified).
-Chris

[PATCH 1/3] drm/vgem: Fix mmaping

2016-07-11 Thread Chris Wilson

The vGEM mmap code has bitrotted slightly and now immediately BUGs.
Since vGEM was last updated, there are new core GEM facilities to
provide more common functions, so let's use those here.

v2: drm_gem_free_mmap_offset() is performed from
drm_gem_object_release() so we can remove the redundant call.

Testcase: igt/vgem_basic/mmap
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96603
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
Cc: Matthew Auld 
Tested-by: Humberto Israel Perez Rodriguez 
Reviewed-by: Matthew Auld 
Acked-by: Zach Reizner 
---
 drivers/gpu/drm/vgem/vgem_drv.c | 164 +++-
 drivers/gpu/drm/vgem/vgem_drv.h |   6 --
 2 files changed, 61 insertions(+), 109 deletions(-)

diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 35ea5d02a827..c161b6d7e427 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -42,81 +42,38 @@
 #define DRIVER_MAJOR   1
 #define DRIVER_MINOR   0

-void vgem_gem_put_pages(struct drm_vgem_gem_object *obj)
-{
-   drm_gem_put_pages(>base, obj->pages, false, false);
-   obj->pages = NULL;
-}
-
 static void vgem_gem_free_object(struct drm_gem_object *obj)
 {
struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);

-   drm_gem_free_mmap_offset(obj);
-
-   if (vgem_obj->use_dma_buf && obj->dma_buf) {
-   dma_buf_put(obj->dma_buf);
-   obj->dma_buf = NULL;
-   }
-
drm_gem_object_release(obj);
-
-   if (vgem_obj->pages)
-   vgem_gem_put_pages(vgem_obj);
-
-   vgem_obj->pages = NULL;
-
kfree(vgem_obj);
 }

-int vgem_gem_get_pages(struct drm_vgem_gem_object *obj)
-{
-   struct page **pages;
-
-   if (obj->pages || obj->use_dma_buf)
-   return 0;
-
-   pages = drm_gem_get_pages(>base);
-   if (IS_ERR(pages)) {
-   return PTR_ERR(pages);
-   }
-
-   obj->pages = pages;
-
-   return 0;
-}
-
 static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
struct drm_vgem_gem_object *obj = vma->vm_private_data;
-   loff_t num_pages;
-   pgoff_t page_offset;
-   int ret;
-
/* We don't use vmf->pgoff since that has the fake offset */
-   page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
-   PAGE_SHIFT;
-
-   num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE);
-
-   if (page_offset > num_pages)
-   return VM_FAULT_SIGBUS;
-
-   ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
-obj->pages[page_offset]);
-   switch (ret) {
-   case 0:
-   return VM_FAULT_NOPAGE;
-   case -ENOMEM:
-   return VM_FAULT_OOM;
-   case -EBUSY:
-   return VM_FAULT_RETRY;
-   case -EFAULT:
-   case -EINVAL:
-   return VM_FAULT_SIGBUS;
-   default:
-   WARN_ON(1);
-   return VM_FAULT_SIGBUS;
+   unsigned long vaddr = (unsigned long)vmf->virtual_address;
+   struct page *page;
+
+   page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping,
+  (vaddr - vma->vm_start) >> PAGE_SHIFT);
+   if (!IS_ERR(page)) {
+   vmf->page = page;
+   return 0;
+   } else switch (PTR_ERR(page)) {
+   case -ENOSPC:
+   case -ENOMEM:
+   return VM_FAULT_OOM;
+   case -EBUSY:
+   return VM_FAULT_RETRY;
+   case -EFAULT:
+   case -EINVAL:
+   return VM_FAULT_SIGBUS;
+   default:
+   WARN_ON_ONCE(PTR_ERR(page));
+   return VM_FAULT_SIGBUS;
}
 }

@@ -134,57 +91,43 @@ static struct drm_gem_object *vgem_gem_create(struct 
drm_device *dev,
  unsigned long size)
 {
struct drm_vgem_gem_object *obj;
-   struct drm_gem_object *gem_object;
-   int err;
-
-   size = roundup(size, PAGE_SIZE);
+   int ret;

obj = kzalloc(sizeof(*obj), GFP_KERNEL);
if (!obj)
return ERR_PTR(-ENOMEM);

-   gem_object = >base;
-
-   err = drm_gem_object_init(dev, gem_object, size);
-   if (err)
-   goto out;
-
-   err = vgem_gem_get_pages(obj);
-   if (err)
-   goto out;
-
-   err = drm_gem_handle_create(file, gem_object, handle);
-   if (err)
-   goto handle_out;
+   ret = drm_gem_object_init(dev, >base, roundup(size, PAGE_SIZE));
+   if (ret)
+   goto err_free;

-   drm_gem_object_unreference_unlocked(gem_object);
+   ret = drm_gem_handle_create(file, >base, handle);
+   drm_gem_object_unreference_u

[PATCH 2/3] drm/vgem: Enable dmabuf interface for export

2016-07-11 Thread Chris Wilson

Enable the standard GEM dma-buf interface provided by the DRM core, but
only for exporting the VGEM object. This allows passing around the VGEM
objects created from the dumb interface and using them as sources
elsewhere. Creating a VGEM object for a foriegn handle is not supported.

v2: With additional completeness.
v3: Need to clear the CPU cache upon exporting the dma-addresses.
v4: Use drm_gem_put_pages() as well.
v5: Use drm_prime_pages_to_sg()

Testcase: igt/vgem_basic/dmabuf-*
Testcase: igt/prime_vgem
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
Acked-by: Zach Reizner 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/vgem/vgem_drv.c | 89 -
 1 file changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index c161b6d7e427..b5fb968d2d5c 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -192,14 +192,101 @@ static const struct file_operations vgem_driver_fops = {
.release= drm_release,
 };

+static int vgem_prime_pin(struct drm_gem_object *obj)
+{
+   long n_pages = obj->size >> PAGE_SHIFT;
+   struct page **pages;
+
+   /* Flush the object from the CPU cache so that importers can rely
+* on coherent indirect access via the exported dma-address.
+*/
+   pages = drm_gem_get_pages(obj);
+   if (IS_ERR(pages))
+   return PTR_ERR(pages);
+
+   drm_clflush_pages(pages, n_pages);
+   drm_gem_put_pages(obj, pages, true, false);
+
+   return 0;
+}
+
+static struct sg_table *vgem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+   struct sg_table *st;
+   struct page **pages;
+
+   pages = drm_gem_get_pages(obj);
+   if (IS_ERR(pages))
+   return ERR_CAST(pages);
+
+   st = drm_prime_pages_to_sg(pages, obj->size >> PAGE_SHIFT);
+   drm_gem_put_pages(obj, pages, false, false);
+
+   return st;
+}
+
+static void *vgem_prime_vmap(struct drm_gem_object *obj)
+{
+   long n_pages = obj->size >> PAGE_SHIFT;
+   struct page **pages;
+   void *addr;
+
+   pages = drm_gem_get_pages(obj);
+   if (IS_ERR(pages))
+   return NULL;
+
+   addr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL_IO));
+   drm_gem_put_pages(obj, pages, false, false);
+
+   return addr;
+}
+
+static void vgem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+   vunmap(vaddr);
+}
+
+static int vgem_prime_mmap(struct drm_gem_object *obj,
+  struct vm_area_struct *vma)
+{
+   int ret;
+
+   if (obj->size < vma->vm_end - vma->vm_start)
+   return -EINVAL;
+
+   if (!obj->filp)
+   return -ENODEV;
+
+   ret = obj->filp->f_op->mmap(obj->filp, vma);
+   if (ret)
+   return ret;
+
+   fput(vma->vm_file);
+   vma->vm_file = get_file(obj->filp);
+   vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+   vma->vm_page_prot = 
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+
+   return 0;
+}
+
 static struct drm_driver vgem_driver = {
-   .driver_features= DRIVER_GEM,
+   .driver_features= DRIVER_GEM | DRIVER_PRIME,
.gem_free_object_unlocked   = vgem_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
.ioctls = vgem_ioctls,
.fops   = _driver_fops,
+
.dumb_create= vgem_gem_dumb_create,
.dumb_map_offset= vgem_gem_dumb_map,
+
+   .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+   .gem_prime_pin = vgem_prime_pin,
+   .gem_prime_export = drm_gem_prime_export,
+   .gem_prime_get_sg_table = vgem_prime_get_sg_table,
+   .gem_prime_vmap = vgem_prime_vmap,
+   .gem_prime_vunmap = vgem_prime_vunmap,
+   .gem_prime_mmap = vgem_prime_mmap,
+
.name   = DRIVER_NAME,
.desc   = DRIVER_DESC,
.date   = DRIVER_DATE,
-- 
2.8.1

[PATCH 3/3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-11 Thread Chris Wilson

vGEM buffers are useful for passing data between software clients and
hardware renders. By allowing the user to create and attach fences to
the exported vGEM buffers (on the dma-buf), the user can implement a
deferred renderer and queue hardware operations like flipping and then
signal the buffer readiness (i.e. this allows the user to schedule
operations out-of-order, but have them complete in-order).

This also makes it much easier to write tightly controlled testcases for
dma-buf fencing and signaling between hardware drivers.

Testcase: igt/vgem_basic/dmabuf-fence
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
Cc: Gustavo Padovan 
Acked-by: Zach Reizner 
---
 drivers/gpu/drm/vgem/Makefile |   2 +-
 drivers/gpu/drm/vgem/vgem_drv.c   |  34 ++
 drivers/gpu/drm/vgem/vgem_drv.h   |  18 
 drivers/gpu/drm/vgem/vgem_fence.c | 220 ++
 include/uapi/drm/vgem_drm.h   |  62 +++
 5 files changed, 335 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/vgem/vgem_fence.c
 create mode 100644 include/uapi/drm/vgem_drm.h

diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
index 3f4c7b842028..bfcdea1330e6 100644
--- a/drivers/gpu/drm/vgem/Makefile
+++ b/drivers/gpu/drm/vgem/Makefile
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o
+vgem-y := vgem_drv.o vgem_fence.o

 obj-$(CONFIG_DRM_VGEM) += vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index b5fb968d2d5c..2659e5cda857 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -83,6 +83,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops = {
.close = drm_gem_vm_close,
 };

+static int vgem_open(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile;
+   int ret;
+
+   vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
+   if (!vfile)
+   return -ENOMEM;
+
+   file->driver_priv = vfile;
+
+   ret = vgem_fence_open(vfile);
+   if (ret) {
+   kfree(vfile);
+   return ret;
+   }
+
+   return 0;
+}
+
+static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile = file->driver_priv;
+
+   vgem_fence_close(vfile);
+   kfree(vfile);
+}
+
 /* ioctls */

 static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
@@ -164,6 +192,8 @@ unref:
 }

 static struct drm_ioctl_desc vgem_ioctls[] = {
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
 };

 static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -271,9 +301,12 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,

 static struct drm_driver vgem_driver = {
.driver_features= DRIVER_GEM | DRIVER_PRIME,
+   .open   = vgem_open,
+   .preclose   = vgem_preclose,
.gem_free_object_unlocked   = vgem_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
.ioctls = vgem_ioctls,
+   .num_ioctls = ARRAY_SIZE(vgem_ioctls),
.fops   = _driver_fops,

.dumb_create= vgem_gem_dumb_create,
@@ -328,5 +361,6 @@ module_init(vgem_init);
 module_exit(vgem_exit);

 MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h
index 988cbaae7588..88ce21010e28 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.h
+++ b/drivers/gpu/drm/vgem/vgem_drv.h
@@ -32,9 +32,27 @@
 #include 
 #include 

+#include 
+
+struct vgem_file {
+   struct idr fence_idr;
+   struct mutex fence_mutex;
+   u64 fence_context;
+   atomic_t fence_seqno;
+};
+
 #define to_vgem_bo(x) container_of(x, struct drm_vgem_gem_object, base)
 struct drm_vgem_gem_object {
struct drm_gem_object base;
 };

+int vgem_fence_open(struct vgem_file *file);
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+void vgem_fence_close(struct vgem_file *file);
+
 #endif
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c 
b/drivers/gpu/drm/vgem/vgem_fence.c
new file mode 100644
index ..649e9e1cee35
--- /dev/null
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this s

[PATCH 3/3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-11 Thread Chris Wilson

On Mon, Jul 11, 2016 at 12:10:40PM -0300, Gustavo Padovan wrote:
> 2016-07-11 Chris Wilson :
> 
> > vGEM buffers are useful for passing data between software clients and
> > hardware renders. By allowing the user to create and attach fences to
> > the exported vGEM buffers (on the dma-buf), the user can implement a
> > deferred renderer and queue hardware operations like flipping and then
> > signal the buffer readiness (i.e. this allows the user to schedule
> > operations out-of-order, but have them complete in-order).
> > 
> > This also makes it much easier to write tightly controlled testcases for
> > dma-buf fencing and signaling between hardware drivers.
> > 
> > Testcase: igt/vgem_basic/dmabuf-fence
> > Signed-off-by: Chris Wilson 
> > Cc: Sean Paul 
> > Cc: Zach Reizner 
> > Cc: Gustavo Padovan 
> > Acked-by: Zach Reizner 
> > ---
> >  drivers/gpu/drm/vgem/Makefile |   2 +-
> >  drivers/gpu/drm/vgem/vgem_drv.c   |  34 ++
> >  drivers/gpu/drm/vgem/vgem_drv.h   |  18 
> >  drivers/gpu/drm/vgem/vgem_fence.c | 220 
> > ++
> >  include/uapi/drm/vgem_drm.h   |  62 +++
> >  5 files changed, 335 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/vgem/vgem_fence.c
> >  create mode 100644 include/uapi/drm/vgem_drm.h
> > 
> > diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
> > index 3f4c7b842028..bfcdea1330e6 100644
> > --- a/drivers/gpu/drm/vgem/Makefile
> > +++ b/drivers/gpu/drm/vgem/Makefile
> > @@ -1,4 +1,4 @@
> >  ccflags-y := -Iinclude/drm
> > -vgem-y := vgem_drv.o
> > +vgem-y := vgem_drv.o vgem_fence.o
> >  
> >  obj-$(CONFIG_DRM_VGEM) += vgem.o
> > diff --git a/drivers/gpu/drm/vgem/vgem_drv.c 
> > b/drivers/gpu/drm/vgem/vgem_drv.c
> > index b5fb968d2d5c..2659e5cda857 100644
> > --- a/drivers/gpu/drm/vgem/vgem_drv.c
> > +++ b/drivers/gpu/drm/vgem/vgem_drv.c
> > @@ -83,6 +83,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops 
> > = {
> > .close = drm_gem_vm_close,
> >  };
> >  
> > +static int vgem_open(struct drm_device *dev, struct drm_file *file)
> > +{
> > +   struct vgem_file *vfile;
> > +   int ret;
> > +
> > +   vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
> > +   if (!vfile)
> > +   return -ENOMEM;
> > +
> > +   file->driver_priv = vfile;
> > +
> > +   ret = vgem_fence_open(vfile);
> > +   if (ret) {
> > +   kfree(vfile);
> > +   return ret;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
> > +{
> > +   struct vgem_file *vfile = file->driver_priv;
> > +
> > +   vgem_fence_close(vfile);
> > +   kfree(vfile);
> > +}
> > +
> >  /* ioctls */
> >  
> >  static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
> > @@ -164,6 +192,8 @@ unref:
> >  }
> >  
> >  static struct drm_ioctl_desc vgem_ioctls[] = {
> > +   DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, 
> > DRM_AUTH|DRM_RENDER_ALLOW),
> > +   DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, 
> > DRM_AUTH|DRM_RENDER_ALLOW),
> >  };
> >  
> >  static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
> > @@ -271,9 +301,12 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,
> >  
> >  static struct drm_driver vgem_driver = {
> > .driver_features= DRIVER_GEM | DRIVER_PRIME,
> > +   .open   = vgem_open,
> > +   .preclose   = vgem_preclose,
> > .gem_free_object_unlocked   = vgem_gem_free_object,
> > .gem_vm_ops = _gem_vm_ops,
> > .ioctls = vgem_ioctls,
> > +   .num_ioctls = ARRAY_SIZE(vgem_ioctls),
> > .fops   = _driver_fops,
> >  
> > .dumb_create= vgem_gem_dumb_create,
> > @@ -328,5 +361,6 @@ module_init(vgem_init);
> >  module_exit(vgem_exit);
> >  
> >  MODULE_AUTHOR("Red Hat, Inc.");
> > +MODULE_AUTHOR("Intel Corporation");
> >  MODULE_DESCRIPTION(DRIVER_DESC);
> >  MODULE_LICENSE("GPL and additional rights");
> > diff --git a/drivers/gpu/drm/vgem/vgem_drv.h 
> > b/drivers/gpu/drm/vgem/vgem_drv.h
> > index 988cbaae7588..88ce21010e28 100644
> > --- a/drivers/

[RFC] dma-buf: Import/export the implicit fences on the dma-buf

2016-07-11 Thread Chris Wilson

When dealing with user interfaces that utilize explicit fences, it is
convenient to sometimes create those fences after the fact, i.e. to
query the dma-buf for the current set of implicit fences, encapsulate
those into a sync_file and hand that fd back to userspace.
Correspondingly, being able to add an explicit fence back into the mix
of fences being tracked by the dma-buf allows that userspace fence to be
included in any implicit tracking.

Signed-off-by: Chris Wilson 
Cc: Gustavo Padovan 
Cc: Rob Clark 
Cc: Sumit Semwal 
Cc: Daniel Vetter 
Cc: dri-devel at lists.freedesktop.org
---

Gustavo, could you look at the sync-file/fence-array handling? There's
definitely room for a cleaner sync_file_create() API.

I was thinking about this for the "why not sync-file" question Gustavo
posed for the vgem_fences. I wanted to add an ioctl to the vgem to allow
exporting and creating fences from sync-file for testing passing 
explicit userspaces around between drivers, and realised that I was just
writing a generic mechanism that only required dma-buf.

Whilst this interface could be used for lazily creating explicit fences,
drivers will also likely want to support specific ioctl to skip the
dmabuf creation, I think this interfaces will be useful with the vgem
fences for testing sync_file handling in drivers (on i915 at the moment,
my tests for sync_file involve sleeping and a few white lies). So
fulfilling a similar role in driver testing to debugfs/sw_sync?
(sw_sync is still more apt for testing timelines etc, vgem feels more
apt for ease of testing rendering.)
-Chris

---
 drivers/dma-buf/dma-buf.c| 100 +++
 include/uapi/linux/dma-buf.h |   9 
 2 files changed, 109 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 41fbce0c273a..6f066a8affd7 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -26,11 +26,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -254,6 +256,97 @@ out:
return events;
 }

+static long dma_buf_import_fence_ioctl(struct dma_buf *dmabuf,
+  struct dma_buf_fence  __user *arg)
+{
+   struct reservation_object *resv = dmabuf->resv;
+   struct dma_buf_fence cmd;
+   struct fence *fence;
+   int ret;
+
+   if (copy_from_user(, arg, sizeof(cmd)))
+   return -EFAULT;
+
+   fence = NULL;
+   //fence = sync_file_get_fence(cmd.fd);
+   if (!fence)
+   return -EINVAL;
+
+   mutex_lock(>lock.base);
+   if (cmd.flags & DMA_BUF_FENCE_WRITE)
+   reservation_object_add_excl_fence(resv, fence);
+   else if ((ret = reservation_object_reserve_shared(resv)) == 0)
+   reservation_object_add_shared_fence(resv, fence);
+   mutex_unlock(>lock.base);
+
+   fence_put(fence);
+   return ret;
+}
+
+static long dma_buf_export_fence_ioctl(struct dma_buf *dmabuf,
+  struct dma_buf_fence  __user *arg)
+{
+   struct reservation_object *resv = dmabuf->resv;
+   struct dma_buf_fence cmd;
+   struct fence *excl, **shared;
+   struct sync_file *sync = NULL;
+   unsigned int count, n;
+   int ret;
+
+   if (get_user(cmd.flags, >flags))
+   return -EFAULT;
+
+   ret = reservation_object_get_fences_rcu(resv, , , );
+   if (ret)
+   return ret;
+
+   if (cmd.flags & DMA_BUF_FENCE_WRITE) {
+   if (excl) {
+   sync = sync_file_create(excl);
+   if (!sync) {
+   ret = -ENOMEM;
+   fence_put(excl);
+   }
+   }
+   for (n = 0; n < count; n++)
+   fence_put(shared[n]);
+   kfree(shared);
+   } else {
+   if (count) {
+   struct fence_array *array;
+
+   array = fence_array_create(count, shared, 0, 0, false);
+   if (!array) {
+   for (n = 0; n < count; n++)
+   fence_put(shared[n]);
+   kfree(shared);
+   } else
+   sync = sync_file_create(>base);
+   if (!sync) {
+   ret = -ENOMEM;
+   fence_put(>base);
+   }
+   }
+   fence_put(excl);
+   }
+   if (ret)
+   return ret;
+
+   cmd.fd = get_unused_fd_flags(O_CLOEXEC);
+   if (cmd.fd < 0) {
+   fput(sync->file);
+   return cmd.fd;
+   }
+
+   if (put_user(cmd.fd, >fd)) {
+   fput(sync->file);
+

[Intel-gfx] [PATCH 2/3] drm/i915/guc: downgrade some DRM_ERROR() messages to DRM_WARN()

2016-07-12 Thread Chris Wilson

On Tue, Jul 12, 2016 at 10:20:43AM +0100, Tvrtko Ursulin wrote:
> On 11/07/16 19:01, Dave Gordon wrote:
> >@@ -553,8 +551,8 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
> > if (db_ret.db_status == GUC_DOORBELL_DISABLED)
> > break;
> >
> >-DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n",
> >-  db_cmp.cookie, db_ret.cookie);
> >+DRM_WARN("Cookie mismatch. Expected %d, found %d\n",
> >+ db_cmp.cookie, db_ret.cookie);
> 
> This one is interesting, error is propagated out a bit but then
> ignored in actual command submission.
> 
> If the above message means command will not be submitted error is
> probably more appropriate. Or perhaps we cannot tell if the command
> was submitted or not in this case?

It's insignificant. An actual error would result in a GPU hang, and
without being recorded in the error state any message here is useless.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 3/3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-12 Thread Chris Wilson

On Tue, Jul 12, 2016 at 12:44:17PM +0200, Daniel Vetter wrote:
> On Mon, Jul 11, 2016 at 04:24:45PM +0100, Chris Wilson wrote:
> > That doesn't fit the out-of-order unbound nature of the interface. The
> > interface is just a collection of fences that userspace associates with
> > the buffer that it may signal at any time. (Having no strict timeline is
> > an advantage!)
> 
> Fences on the same timeline are supposed to be signalled in-order. If you
> want full out-of-order fences then you need to grab a new timeline number
> for each one. Drivers can and do merge fences on the same timeline and
> just carry the one with the largest seqno around.

Ugh. Timelines simply don't mean anything everywhere - a very leaky
abstration.

Nevertheless, a fence_context per vgem_fence would do the trick.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[drm-intel:topic/drm-misc 2/2] drivers/gpu/drm/vgem/vgem_drv.c:238:53: error: 'PAGE_KERNEL_IO' undeclared

2016-07-12 Thread Chris Wilson

On Tue, Jul 12, 2016 at 07:23:55PM +0800, kbuild test robot wrote:
> tree:   git://anongit.freedesktop.org/drm-intel topic/drm-misc
> head:   e6f15b763ab2bc47000ec302123e2fb3bf2ad7d4
> commit: e6f15b763ab2bc47000ec302123e2fb3bf2ad7d4 [2/2] drm/vgem: Enable 
> dmabuf interface for export
> config: sparc64-allmodconfig (attached as .config)
> compiler: sparc64-linux-gnu-gcc (Debian 5.3.1-8) 5.3.1 20160205
> reproduce:
> wget 
> https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
>  -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> git checkout e6f15b763ab2bc47000ec302123e2fb3bf2ad7d4
> # save the attached .config to linux build tree
> make.cross ARCH=sparc64 
> 
> All errors (new ones prefixed by >>):
> 
>drivers/gpu/drm/vgem/vgem_drv.c: In function 'vgem_prime_vmap':
> >> drivers/gpu/drm/vgem/vgem_drv.c:238:53: error: 'PAGE_KERNEL_IO' undeclared 
> >> (first use in this function)
>  addr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL_IO));
> ^
>drivers/gpu/drm/vgem/vgem_drv.c:238:53: note: each undeclared identifier 
> is reported only once for each function it appears in
> 
> vim +/PAGE_KERNEL_IO +238 drivers/gpu/drm/vgem/vgem_drv.c
> 
>232void *addr;
>233
>234pages = drm_gem_get_pages(obj);
>235if (IS_ERR(pages))
>236return NULL;
>237
>  > 238addr = vmap(pages, n_pages, 0, 
> pgprot_writecombine(PAGE_KERNEL_IO));

sparc64

I guess we need an ifdef PAGE_KERNEL_IO ?

or just rely on pgprot_writecombine(PAGE_KERNEL) being good enough
-Chris


-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH] drm/vgem: Use PAGE_KERNEL in place of x86-specific PAGE_KERNEL_IO

2016-07-12 Thread Chris Wilson

Since PAGE_KERNEL_IO is specific to x86 and equivalent to PAGE_KERNEL
for our wrapping with pgprot_writecombine(), just use the common define.

   drivers/gpu/drm/vgem/vgem_drv.c: In function 'vgem_prime_vmap':
>> drivers/gpu/drm/vgem/vgem_drv.c:238:53: error: 'PAGE_KERNEL_IO' undeclared 
>> (first use in this function)
 addr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL_IO));

Reported-by: 0day
Fixes: e6f15b763ab2 ("drm/vgem: Enable dmabuf interface for export")
Signed-off-by: Chris Wilson 
Cc: Matthew Auld 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/vgem/vgem_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index b5fb968d2d5c..29c2aab3c1a7 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -235,7 +235,7 @@ static void *vgem_prime_vmap(struct drm_gem_object *obj)
if (IS_ERR(pages))
return NULL;

-   addr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL_IO));
+   addr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL));
drm_gem_put_pages(obj, pages, false, false);

return addr;
-- 
2.8.1

[RFC] dma-buf: Import/export the implicit fences on the dma-buf

2016-07-12 Thread Chris Wilson

On Tue, Jul 12, 2016 at 01:53:56PM +0200, Christian KÃ¶nig wrote:
> Am 11.07.2016 um 23:59 schrieb Chris Wilson:
> >When dealing with user interfaces that utilize explicit fences, it is
> >convenient to sometimes create those fences after the fact, i.e. to
> >query the dma-buf for the current set of implicit fences, encapsulate
> >those into a sync_file and hand that fd back to userspace.
> >Correspondingly, being able to add an explicit fence back into the mix
> >of fences being tracked by the dma-buf allows that userspace fence to be
> >included in any implicit tracking.
> 
> Well I think that this is a rather questionable interface.
> 
> For example how do you deal with race conditions? E.g. between
> querying the fences from the reservation object and adding a new one
> we could gain new fences because of the kernel swapping things out
> or another thread making some submission with this buffer.
> 
> Additional to that IIRC reservation_object_add_excl_fence()
> currently replaces all shared fences with the exclusive one. A
> malicious application could use this to trick the kernel driver into
> thinking that this buffer object is idle while it is still accessed
> by some operation. At least with GPU operations you can easily take
> over the system when you manage to get access to a page table with
> this.

The only difference here is that we believe the GPU drivers to enforce
the ordering between each other. So we can either insert a wait before
adding the exclusive fence, or we can just not expose an import ioctl.
Extracting the set of fences isn't an issue? (That's the part that has a
more legitimate usecase.)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH v2] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-12 Thread Chris Wilson

vGEM buffers are useful for passing data between software clients and
hardware renders. By allowing the user to create and attach fences to
the exported vGEM buffers (on the dma-buf), the user can implement a
deferred renderer and queue hardware operations like flipping and then
signal the buffer readiness (i.e. this allows the user to schedule
operations out-of-order, but have them complete in-order).

This also makes it much easier to write tightly controlled testcases for
dma-buf fencing and signaling between hardware drivers.

v2: Don't pretend the fences exist in an ordered timeline, but allocate
a separate fence-context for each fence so that the fences are
unordered.

Testcase: igt/vgem_basic/dmabuf-fence
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
Cc: Gustavo Padovan 
Cc: Daniel Vetter 
Acked-by: Zach Reizner 
---
 drivers/gpu/drm/vgem/Makefile |   2 +-
 drivers/gpu/drm/vgem/vgem_drv.c   |  34 +++
 drivers/gpu/drm/vgem/vgem_drv.h   |  16 +++
 drivers/gpu/drm/vgem/vgem_fence.c | 207 ++
 include/uapi/drm/vgem_drm.h   |  62 
 5 files changed, 320 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/vgem/vgem_fence.c
 create mode 100644 include/uapi/drm/vgem_drm.h

diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
index 3f4c7b842028..bfcdea1330e6 100644
--- a/drivers/gpu/drm/vgem/Makefile
+++ b/drivers/gpu/drm/vgem/Makefile
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o
+vgem-y := vgem_drv.o vgem_fence.o

 obj-$(CONFIG_DRM_VGEM) += vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 29c2aab3c1a7..c15bafb06665 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -83,6 +83,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops = {
.close = drm_gem_vm_close,
 };

+static int vgem_open(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile;
+   int ret;
+
+   vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
+   if (!vfile)
+   return -ENOMEM;
+
+   file->driver_priv = vfile;
+
+   ret = vgem_fence_open(vfile);
+   if (ret) {
+   kfree(vfile);
+   return ret;
+   }
+
+   return 0;
+}
+
+static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile = file->driver_priv;
+
+   vgem_fence_close(vfile);
+   kfree(vfile);
+}
+
 /* ioctls */

 static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
@@ -164,6 +192,8 @@ unref:
 }

 static struct drm_ioctl_desc vgem_ioctls[] = {
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
 };

 static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -271,9 +301,12 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,

 static struct drm_driver vgem_driver = {
.driver_features= DRIVER_GEM | DRIVER_PRIME,
+   .open   = vgem_open,
+   .preclose   = vgem_preclose,
.gem_free_object_unlocked   = vgem_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
.ioctls = vgem_ioctls,
+   .num_ioctls = ARRAY_SIZE(vgem_ioctls),
.fops   = _driver_fops,

.dumb_create= vgem_gem_dumb_create,
@@ -328,5 +361,6 @@ module_init(vgem_init);
 module_exit(vgem_exit);

 MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h
index 988cbaae7588..1f8798ad329c 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.h
+++ b/drivers/gpu/drm/vgem/vgem_drv.h
@@ -32,9 +32,25 @@
 #include 
 #include 

+#include 
+
+struct vgem_file {
+   struct idr fence_idr;
+   struct mutex fence_mutex;
+};
+
 #define to_vgem_bo(x) container_of(x, struct drm_vgem_gem_object, base)
 struct drm_vgem_gem_object {
struct drm_gem_object base;
 };

+int vgem_fence_open(struct vgem_file *file);
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+void vgem_fence_close(struct vgem_file *file);
+
 #endif
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c 
b/drivers/gpu/drm/vgem/vgem_fence.c
new file mode 100644
index ..63095331c446
--- /dev/null
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2016

[PATCH] drm: Don't overwrite user ioctl arg unless requested

2016-07-12 Thread Chris Wilson

Currently, we completely ignore the user when it comes to the in/out
direction of the ioctl argument, as we simply cannot trust userspace.
(For example, they might request a copy of the modified ioctl argument
when the driver is not expecting such and so leak kernel stack.)
However, blindly copying over the target address may also lead to a
spurious EFAULT, and a failure after the ioctl was completed
successfully. This is important in order to avoid an ABI break when
extending an ioctl from IOR to IORW. Similar to how we only copy the
intersection of the kernel arg size and the user arg size, we only want
to copy back the kernel arg data iff both the kernel and userspace
request the copy.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/drm_ioctl.c | 50 -
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 2c87c1df0910..33af4a5ddca1 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -648,7 +648,7 @@ long drm_ioctl(struct file *filp,
int retcode = -EINVAL;
char stack_kdata[128];
char *kdata = NULL;
-   unsigned int usize, asize, drv_size;
+   unsigned int in_size, out_size, drv_size, ksize;
bool is_driver_ioctl;

dev = file_priv->minor->dev;
@@ -671,9 +671,12 @@ long drm_ioctl(struct file *filp,
}

drv_size = _IOC_SIZE(ioctl->cmd);
-   usize = _IOC_SIZE(cmd);
-   asize = max(usize, drv_size);
-   cmd = ioctl->cmd;
+   out_size = in_size = _IOC_SIZE(cmd);
+   if ((cmd & ioctl->cmd & IOC_IN) == 0)
+   in_size = 0;
+   if ((cmd & ioctl->cmd & IOC_OUT) == 0)
+   out_size = 0;
+   ksize = max(max(in_size, out_size), drv_size);

DRM_DEBUG("pid=%d, dev=0x%lx, auth=%d, %s\n",
  task_pid_nr(current),
@@ -693,30 +696,24 @@ long drm_ioctl(struct file *filp,
if (unlikely(retcode))
goto err_i1;

-   if (cmd & (IOC_IN | IOC_OUT)) {
-   if (asize <= sizeof(stack_kdata)) {
-   kdata = stack_kdata;
-   } else {
-   kdata = kmalloc(asize, GFP_KERNEL);
-   if (!kdata) {
-   retcode = -ENOMEM;
-   goto err_i1;
-   }
+   if (ksize <= sizeof(stack_kdata)) {
+   kdata = stack_kdata;
+   } else {
+   kdata = kmalloc(ksize, GFP_KERNEL);
+   if (!kdata) {
+   retcode = -ENOMEM;
+   goto err_i1;
}
-   if (asize > usize)
-   memset(kdata + usize, 0, asize - usize);
}

-   if (cmd & IOC_IN) {
-   if (copy_from_user(kdata, (void __user *)arg,
-  usize) != 0) {
-   retcode = -EFAULT;
-   goto err_i1;
-   }
-   } else if (cmd & IOC_OUT) {
-   memset(kdata, 0, usize);
+   if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) {
+   retcode = -EFAULT;
+   goto err_i1;
}

+   if (ksize > in_size)
+   memset(kdata + in_size, 0, ksize - in_size);
+
/* Enforce sane locking for kms driver ioctls. Core ioctls are
 * too messy still. */
if ((drm_core_check_feature(dev, DRIVER_MODESET) && is_driver_ioctl) ||
@@ -728,11 +725,8 @@ long drm_ioctl(struct file *filp,
mutex_unlock(_global_mutex);
}

-   if (cmd & IOC_OUT) {
-   if (copy_to_user((void __user *)arg, kdata,
-usize) != 0)
-   retcode = -EFAULT;
-   }
+   if (copy_to_user((void __user *)arg, kdata, out_size) != 0)
+   retcode = -EFAULT;

   err_i1:
if (!ioctl)
-- 
2.8.1

[PATCH 2/9] async: Introduce kfence, a N:M completion mechanism

2016-07-13 Thread Chris Wilson

On Wed, Jul 13, 2016 at 11:38:52AM +0200, Peter Zijlstra wrote:
> On Fri, Jun 24, 2016 at 10:08:46AM +0100, Chris Wilson wrote:
> > diff --git a/kernel/async.c b/kernel/async.c
> > index d2edd6efec56..d0bcb7cc4884 100644
> > --- a/kernel/async.c
> > +++ b/kernel/async.c
> > @@ -50,6 +50,7 @@ asynchronous and synchronous parts of the kernel.
> >  
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> 
> So why does this live in async.c? It got its own header, why not also
> give it its own .c file?

I started in kernel/async (since my first goal was fine-grained
async_work serialisation). It is still in kernel/async.c as the embedded
fence inside the async_work needs a return code to integrate. I should
have done that before posting...

> Also, I'm not a particular fan of the k* naming, but I see 'fence' is
> already taken.

Agreed, I really want to rename the dma-buf fence to struct dma_fence -
we would need to do that whilst it dma-buf fencing is still in its infancy.

> > +/**
> > + * DOC: kfence overview
> > + *
> > + * kfences provide synchronisation barriers between multiple processes.
> > + * They are very similar to completions, or a pthread_barrier. Where
> > + * kfence differs from completions is their ability to track multiple
> > + * event sources rather than being a singular "completion event". Similar
> > + * to completions, multiple processes or other kfences can listen or wait
> > + * upon a kfence to signal its completion.
> > + *
> > + * The kfence is a one-shot flag, signaling that work has progressed passed
> > + * a certain point (as measured by completion of all events the kfence is
> > + * listening for) and the waiters upon kfence may proceed.
> > + *
> > + * kfences provide both signaling and waiting routines:
> > + *
> > + * kfence_pending()
> > + *
> > + * indicates that the kfence should itself wait for another signal. A
> > + * kfence created by kfence_create() starts in the pending state.
> 
> I would much prefer:
> 
>  *  - kfence_pending(): indicates that the kfence should itself wait for
>  *another signal. A kfence created by kfence_create() starts in the
>  *pending state.
> 
> Which is much clearer in what text belongs where.

Ok, I was just copying the style from
Documentation/scheduler/completion.txt

> Also, what !? I don't get what this function does.

Hmm. Something more like:

"To check the state of a kfence without changing it in any way, call
kfence_pending(), which returns true if the kfence is still waiting for
its event sources to be signaled."

s/signaled/completed/ depending on kfence_signal() vs kfence_complete()

> > + *
> > + * kfence_signal()
> > + *
> > + * undoes the earlier pending notification and allows the fence to complete
> > + * if all pending events have then been signaled.
> 
> So I know _signal() is the posix thing, but seeing how we already
> completions, how about being consistent with those and use _complete()
> for this?

Possibly, but we also have the dma-buf fences to try and be fairly
consistent with. struct completion is definitely a closer sibling
though. The biggest conceptual change from completions though is that a
kfence will be signaled multiple times before it is complete - I think
that is a strong argument in favour of using _signal().

> > + *
> > + * kfence_wait()
> > + *
> > + * allows the caller to sleep (uninterruptibly) until the fence is 
> > complete.
> 
> whitespace to separate the description of kfence_wait() from whatever
> else follows.
> 
> > + * Meanwhile,
> > + *
> > + * kfence_complete()
> > + *
> > + * reports whether or not the kfence has been passed.
> 
> kfence_done(), again to match completions.

Ok, will do a spin with completion naming convention and see how that
fits in (and complete the extraction to a separate .c)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[RFC] dma-buf: Rename struct fence to dma_fence

2016-07-13 Thread Chris Wilson

I plan to usurp the short name of struct fence for a core kernel struct,
and so I need to rename the specialised fence/timeline for DMA
operations to make room.

As an indication of the scale of the flag day:

 91 files changed, 904 insertions(+), 880 deletions(-)

with the greatest victim being amdgpu.

Just the highlights shown below.
-Chris

---
 drivers/base/Kconfig|   6 +-
 drivers/dma-buf/Makefile|   2 +-
 drivers/dma-buf/dma-buf.c   |  28 +-
 drivers/dma-buf/dma-fence.c | 535 
 drivers/dma-buf/fence.c | 532 ---
 drivers/dma-buf/reservation.c   |  90 ++--
 drivers/dma-buf/seqno-fence.c   |  18 +-
 drivers/dma-buf/sw_sync.c   |  44 +-
 drivers/dma-buf/sync_debug.c|   9 +-
 drivers/dma-buf/sync_debug.h|  13 +-
 drivers/dma-buf/sync_file.c |  30 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  56 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c   |   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  18 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  22 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  16 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   |  50 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  18 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c  |  24 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c|  56 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_test.c|  12 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h   |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c |  18 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c |  22 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  56 +--
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c   |   8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |   8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   |  16 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  |   8 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  |   8 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c   |   6 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c   |   6 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |   6 +-
 drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h |   4 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c   |  42 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h   |  24 +-
 drivers/gpu/drm/amd/scheduler/sched_fence.c |  22 +-
 drivers/gpu/drm/drm_atomic_helper.c |   6 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem.c   |   6 +-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c   |  46 +-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h   |   4 +-
 drivers/gpu/drm/imx/ipuv3-crtc.c|  12 +-
 drivers/gpu/drm/msm/msm_drv.h   |   2 +-
 drivers/gpu/drm/msm/msm_fence.c |  30 +-
 drivers/gpu/drm/msm/msm_fence.h |   2 +-
 drivers/gpu/drm/msm/msm_gem.c   |  14 +-
 drivers/gpu/drm/msm/msm_gem.h   |   2 +-
 drivers/gpu/drm/msm/msm_gem_submit.c|   2 +-
 drivers/gpu/drm/msm/msm_gpu.c   |   2 +-
 drivers/gpu/drm/nouveau/nouveau_bo.c|   6 +-
 drivers/gpu/drm/nouveau/nouveau_fence.c |  68 +--
 drivers/gpu/drm/nouveau/nouveau_fence.h |   6 +-
 drivers/gpu/drm/nouveau/nouveau_gem.c   |   2 +-
 drivers/gpu/drm/nouveau/nv04_fence.c|   2 +-
 drivers/gpu/drm/nouveau/nv10_fence.c|   2 +-
 drivers/gpu/drm/nouveau/nv17_fence.c|   2 +-
 drivers/gpu/drm/nouveau/nv50_fence.c|   2 +-
 drivers/gpu/drm/nouveau/nv84_fence.c|   2 +-
 drivers/gpu/drm/qxl/qxl_drv.h   |   4 +-
 drivers/gpu/drm/qxl/qxl_release.c   |  27 +-
 drivers/gpu/drm/radeon/radeon.h |  10 +-
 drivers/gpu/drm/radeon/radeon_device.c  |   2 +-
 drivers/gpu/drm/radeon/radeon_display.c |   8 +-
 drivers/gpu/drm/radeon/radeon_fence.c   |  50 +--
 drivers/gpu/drm/radeon/radeon_sync.c|   6 +-
 drivers/gpu/drm/radeon/radeon_uvd.c |   2 +-
 drivers/gpu/drm/ttm/ttm_bo.c|  24 +-
 drivers/gpu/drm/ttm/ttm_bo_util.c   |   2 +-
 drivers/gpu/drm/ttm/ttm_execbuf_util.c  |   3 +-
 drivers/gpu/drm/virtio/virtgpu_display.c|   2 +-
 drivers/gpu/drm/virtio/virtgpu_drv.h|   2 +-
 drivers/gpu/drm/virtio/virtgpu_fence.c  |  22 +-

[RFC] dma-buf: Rename struct fence to dma_fence

2016-07-13 Thread Chris Wilson

On Wed, Jul 13, 2016 at 11:54:50PM +0900, Inki Dae wrote:
> Hi,
> 
> 2016-07-13 23:10 GMT+09:00 Chris Wilson :
> > I plan to usurp the short name of struct fence for a core kernel struct,
> > and so I need to rename the specialised fence/timeline for DMA
> > operations to make room.
> >
> > As an indication of the scale of the flag day:
> >
> >  91 files changed, 904 insertions(+), 880 deletions(-)
> 
> Seems files changed and below patch codes are not inconsistent. i.e.,
> I cannot see modified codes for Android sync driver.

The cut'n'paste doesn't include the renames which the patch below does
(i..e. it should be a more accurate representation of lines changed by
ignoring the lines moved).

> And Android sync driver - explicit fence - can use a fence object
> regardless of DMA buffer. So it looks reasonable to use 'fence' as-is.
> Was there any changes for Android sync driver to be dependent on DMA buffer?

This was based on Gustova Padovan's destaged sync tree, so all the
Android changes should be inside drivers/dma-buf/*sync*

I was using grep to find all users of struct fence and callers of
fence_*() so I don't think I missed any drivers/staging/ - and obviously
this will have to repeated closer to the flag day.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 1/2] drm/sun4i: Remove redundant call to drm_connector_unregister_all()

2016-07-13 Thread Chris Wilson

drm_connector_unregister_all() is automatically called by
drm_dev_unregister() and so the manual call can be dropped.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Cc: Maxime Ripard 
Cc: David Airlie 
Cc: Chen-Yu Tsai 
Cc: dri-devel at lists.freedesktop.org
Cc: linux-arm-kernel at lists.infradead.org
---
 drivers/gpu/drm/sun4i/sun4i_drv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c 
b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 4dc543e1db10..7092daaf6c43 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -185,7 +185,6 @@ static void sun4i_drv_unbind(struct device *dev)
 {
struct drm_device *drm = dev_get_drvdata(dev);

-   drm_connector_unregister_all(drm);
drm_dev_unregister(drm);
drm_kms_helper_poll_fini(drm);
sun4i_framebuffer_free(drm);
-- 
2.8.1

[PATCH 2/2] drm: Unexport drm_connector_unregister_all()

2016-07-13 Thread Chris Wilson

This has now been removed from all drivers as it is performed centrally
as a part of device unregistration for modesetting drivers. With the last
user gone, we can unexport it from the DRM module. That requires us to
move the code slightly to avoid the need for a forward declaration.

Signed-off-by: Chris Wilson 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: dri-devel at lists.freedesktop.org
---
 drivers/gpu/drm/drm_crtc.c | 29 +
 include/drm/drm_crtc.h |  3 ---
 2 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index c85963f4f1dc..f65b75949c20 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1115,6 +1115,15 @@ void drm_connector_unregister(struct drm_connector 
*connector)
 }
 EXPORT_SYMBOL(drm_connector_unregister);

+static void drm_connector_unregister_all(struct drm_device *dev)
+{
+   struct drm_connector *connector;
+
+   /* FIXME: taking the mode config mutex ends up in a clash with sysfs */
+   list_for_each_entry(connector, >mode_config.connector_list, head)
+   drm_connector_unregister(connector);
+}
+
 static int drm_connector_register_all(struct drm_device *dev)
 {
struct drm_connector *connector;
@@ -1138,26 +1147,6 @@ err:
return ret;
 }

-/**
- * drm_connector_unregister_all - unregister connector userspace interfaces
- * @dev: drm device
- *
- * This functions unregisters all connectors from sysfs and other places so
- * that userspace can no longer access them. Drivers should call this as the
- * first step tearing down the device instace, or when the underlying
- * physical device disappeared (e.g. USB unplug), right before calling
- * drm_dev_unregister().
- */
-void drm_connector_unregister_all(struct drm_device *dev)
-{
-   struct drm_connector *connector;
-
-   /* FIXME: taking the mode config mutex ends up in a clash with sysfs */
-   list_for_each_entry(connector, >mode_config.connector_list, head)
-   drm_connector_unregister(connector);
-}
-EXPORT_SYMBOL(drm_connector_unregister_all);
-
 static int drm_encoder_register_all(struct drm_device *dev)
 {
struct drm_encoder *encoder;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index ddaa7243af55..b1e72322ebd6 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -2588,9 +2588,6 @@ static inline unsigned drm_connector_index(struct 
drm_connector *connector)
return connector->connector_id;
 }

-/* helpers to {un}register all connectors from sysfs for device */
-extern void drm_connector_unregister_all(struct drm_device *dev);
-
 extern __printf(5, 6)
 int drm_encoder_init(struct drm_device *dev,
 struct drm_encoder *encoder,
-- 
2.8.1

[PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

2016-07-13 Thread Chris Wilson

On Tue, Jul 12, 2016 at 06:49:34PM +0200, Peter Wu wrote:
> The FBIOPUT_CON2FBMAP ioctl takes a console_lock(). When this is called
> while nouveau was runtime suspended, a deadlock would occur due to
> nouveau_fbcon_set_suspend also trying to obtain console_lock().
> 
> Fix this by delaying the drm_fb_helper_set_suspend call. Based on the
> i915 code (which was done for performance reasons though).
> 
> Cc: Chris Wilson 
> Cc: Daniel Vetter 
> Signed-off-by: Peter Wu 
> ---
> Tested on top of v4.7-rc5, the deadlock is gone.
> ---
>  drivers/gpu/drm/nouveau/nouveau_drm.c   |  4 +--
>  drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 +
>  drivers/gpu/drm/nouveau/nouveau_fbcon.c | 54 
> -
>  drivers/gpu/drm/nouveau/nouveau_fbcon.h |  2 +-
>  4 files changed, 50 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c 
> b/drivers/gpu/drm/nouveau/nouveau_drm.c
> index 11f8dd9..f9a2c10 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> @@ -552,7 +552,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
>  
>   if (dev->mode_config.num_crtc) {
>   NV_INFO(drm, "suspending console...\n");
> - nouveau_fbcon_set_suspend(dev, 1);
> + nouveau_fbcon_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
>   NV_INFO(drm, "suspending display...\n");
>   ret = nouveau_display_suspend(dev, runtime);
>   if (ret)
> @@ -635,7 +635,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
>   NV_INFO(drm, "resuming display...\n");
>   nouveau_display_resume(dev, runtime);
>   NV_INFO(drm, "resuming console...\n");
> - nouveau_fbcon_set_suspend(dev, 0);
> + nouveau_fbcon_set_suspend(dev, FBINFO_STATE_RUNNING, false);
>   }
>  
>   return 0;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
> b/drivers/gpu/drm/nouveau/nouveau_drv.h
> index 822a021..a743d19 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -147,6 +147,7 @@ struct nouveau_drm {
>   struct nouveau_channel *channel;
>   struct nvkm_gpuobj *notify;
>   struct nouveau_fbdev *fbcon;
> + struct work_struct fbdev_suspend_work;
>   struct nvif_object nvsw;
>   struct nvif_object ntfy;
>   struct nvif_notify flip;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c 
> b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> index d1f248f..089156a 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> @@ -492,19 +492,53 @@ static const struct drm_fb_helper_funcs 
> nouveau_fbcon_helper_funcs = {
>   .fb_probe = nouveau_fbcon_create,
>  };
>  
> +static void nouveau_fbcon_suspend_worker(struct work_struct *work)
> +{
> + nouveau_fbcon_set_suspend(container_of(work,
> +struct nouveau_drm,
> +fbdev_suspend_work)->dev,
> +   FBINFO_STATE_RUNNING,
> +   true);
> +}
> +
>  void
> -nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
> +nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool 
> synchronous)
>  {
>   struct nouveau_drm *drm = nouveau_drm(dev);
> - if (drm->fbcon) {
> - console_lock();
> - if (state == FBINFO_STATE_RUNNING)
> - nouveau_fbcon_accel_restore(dev);
> - drm_fb_helper_set_suspend(>fbcon->helper, state);
> + if (!drm->fbcon)
> + return;
> +
> + if (synchronous) {
> + /* Flush any pending work to turn the console on, and then
> +  * wait to turn it off. It must be synchronous as we are
> +  * about to suspend or unload the driver.
> +  *
> +  * Note that from within the work-handler, we cannot flush
> +  * ourselves, so only flush outstanding work upon suspend!
> +  */
>   if (state != FBINFO_STATE_RUNNING)
> - nouveau_fbcon_accel_save_disable(dev);
> - console_unlock();
> + flush_work(>fbdev_suspend_work);
> + console_lock();
> + } else {
> + /*
> +  * The console lock can be pretty contented on resume due
> +  * to all the printk activity.  Try to keep it out of the hot
> +  * path of resume if possible.  This also prevents a de

[PATCH 1/2] drm/sun4i: Remove redundant call to drm_connector_unregister_all()

2016-07-13 Thread Chris Wilson

On Wed, Jul 13, 2016 at 10:56:58AM -0700, Sean Paul wrote:
> On Wed, Jul 13, 2016 at 9:39 AM, Chris Wilson  
> wrote:
> > drm_connector_unregister_all() is automatically called by
> > drm_dev_unregister() and so the manual call can be dropped.
> >
> 
> The documentation for drm_connector_unregister_all says "Drivers
> should call this [...] right before calling drm_dev_unregister()". If
> this is no longer true, could you update that comment as part of this
> series?

That is done. (The comment block is entirely removed so that we don't
distract authors with superfluous functions that they cannot call
themselves, i.e. Daniel wanted only the DRM interfaces documented.)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH v3] drm/i915/skl: Add support for the SAGV, fix underrun hangs

2016-07-13 Thread Chris Wilson

On Wed, Jul 13, 2016 at 11:17:52AM -0700, Matt Roper wrote:
> On Wed, Jul 13, 2016 at 09:12:09PM +0300, Ville SyrjÃ¤lÃ¤ wrote:
> > We have wait_for()/_wait_for() for polling stuff.
> 
> Those just block until a condition becomes true, right?  In this case my
> understanding from the bspec is that we need to keep re-writing the SAGV
> disable until it sticks.

the condition is an arbitrary expression such as

static inline bool sagv_disabled(struct drm_i915_private (dev_priv)
{
u32 tmp;

if (sandybridge_pcode_write(dev_priv,
GEN9_PCODE_SAGV_CONTROL,
GEN9_SAGV_DISABLE))
goto error;

if (sandybridge_pcode_read(dev_priv,
   GEN9_PCODE_SAGV_CONTROL,
   ))
goto error;

return tmp & 1;

error:
DRM_ERROR("Failed to disable the SAGV\n");
return true;
}


ret = wait_for(sagv_disabled(dev_priv), 1);
if (ret)
DRM_ERROR("Timed out waiting for SAGV to be disabled\n");

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH v2] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-13 Thread Chris Wilson

On Tue, Jul 12, 2016 at 01:46:21PM +0100, Chris Wilson wrote:
> vGEM buffers are useful for passing data between software clients and
> hardware renders. By allowing the user to create and attach fences to
> the exported vGEM buffers (on the dma-buf), the user can implement a
> deferred renderer and queue hardware operations like flipping and then
> signal the buffer readiness (i.e. this allows the user to schedule
> operations out-of-order, but have them complete in-order).
> 
> This also makes it much easier to write tightly controlled testcases for
> dma-buf fencing and signaling between hardware drivers.
> 
> v2: Don't pretend the fences exist in an ordered timeline, but allocate
> a separate fence-context for each fence so that the fences are
> unordered.
> 
> Testcase: igt/vgem_basic/dmabuf-fence
> Signed-off-by: Chris Wilson 
> Cc: Sean Paul 
> Cc: Zach Reizner 
> Cc: Gustavo Padovan 
> Cc: Daniel Vetter 
> Acked-by: Zach Reizner 

For purely selfish reasons that this enables more testing for i915,
poke?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Beignet] [PATCH] intel: Export pooled EU and min no. of eus in a pool.

2016-07-13 Thread Chris Wilson

On Wed, Jul 13, 2016 at 11:44:09AM +0100, Arun Siluvery wrote:
> On 06/07/2016 05:51, Yang Rong wrote:
> >Update kernel interface with new I915_GETPARAM ioctl entries for
> >pooled EU and min no. of eus in a pool. Add a wrapping function
> >for each parameter. Userspace drivers need these values when decide
> >the thread count. This kernel enabled pooled eu by default for BXT
> >and for fused down 2x6 parts it is advised to turn it off.
> >
> >But there is another HW issue in these parts (fused
> >down 2x6 parts) before C0 that requires Pooled EU to be enabled as a
> >workaround. In this case the pool configuration changes depending upon
> >which subslice is disabled and the no. of eus in a pool is different,
> >So userspace need to know min no. of eus in a pool.
> >
> >Signed-off-by: Yang Rong 
> >---
> 
> Could you check this and give comments/ACK to merge this to libdrm?
> Kernel changes are merged in drm-intel.
> 
> regards
> Arun
> 
> >  include/drm/i915_drm.h   |  2 ++
> >  intel/intel_bufmgr.h |  3 +++
> >  intel/intel_bufmgr_gem.c | 32 
> >  3 files changed, 37 insertions(+)
> >
> >diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> >index c4ce6b2..eb611a7 100644
> >--- a/include/drm/i915_drm.h
> >+++ b/include/drm/i915_drm.h
> >@@ -357,6 +357,8 @@ typedef struct drm_i915_irq_wait {
> >  #define I915_PARAM_HAS_GPU_RESET35
> >  #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> >  #define I915_PARAM_HAS_EXEC_SOFTPIN 37
> >+#define I915_PARAM_HAS_POOLED_EU 38
> >+#define I915_PARAM_MIN_EU_IN_POOL39
> >
> >  typedef struct drm_i915_getparam {
> > __s32 param;
> >diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> >index a1abbcd..8370694 100644
> >--- a/intel/intel_bufmgr.h
> >+++ b/intel/intel_bufmgr.h
> >@@ -273,6 +273,9 @@ int drm_intel_get_reset_stats(drm_intel_context *ctx,
> >  int drm_intel_get_subslice_total(int fd, unsigned int *subslice_total);
> >  int drm_intel_get_eu_total(int fd, unsigned int *eu_total);
> >
> >+int drm_intel_get_pooled_eu(int fd, unsigned int *has_pooled_eu);
> >+int drm_intel_get_min_eu_in_pool(int fd, unsigned int *min_eu);

Do we export more than 2 billion eu? Or more than 2 billion states for
the bool has_pooled_eu?

Could we not just use

int drm_intel_get_pooled_eu(int fd);

ret < 0 => error occurred / unknown
ret == 0 => no
ret > 0 => yes

and same for get_min_eu.

> >+
> >  /** @{ Compatibility defines to keep old code building despite the symbol 
> > rename
> >   * from dri_* to drm_intel_*
> >   */
> >diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> >index 0a4012b..b8bb654 100644
> >--- a/intel/intel_bufmgr_gem.c
> >+++ b/intel/intel_bufmgr_gem.c
> >@@ -3237,6 +3237,38 @@ drm_intel_get_eu_total(int fd, unsigned int *eu_total)
> > return 0;
> >  }
> >
> >+int
> >+drm_intel_get_pooled_eu(int fd, unsigned int *has_pooled_eu)
> >+{
> >+drm_i915_getparam_t gp;
> >+int ret;
> >+
> >+memclear(gp);
> >+gp.value = (int*)has_pooled_eu;
> >+gp.param = I915_PARAM_HAS_POOLED_EU;
> >+ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, );
> >+if (ret)

save a few electrons

int drm_intel_get_pooled_eu(int fd)
{
drm_i915_getparam_t gp;
int ret;

memclear(gp);
gp.param = I915_PARAM_HAS_POOLED_EU;
gp.value = 
if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, ))
return -errno;

return ret;
}

(save a few more by calling this int __getparam(int fd, unsigned name))
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH v2] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-13 Thread Chris Wilson

On Wed, Jul 13, 2016 at 05:29:21PM -0300, Gustavo Padovan wrote:
> 2016-07-12 Chris Wilson :
> 
> > vGEM buffers are useful for passing data between software clients and
> > hardware renders. By allowing the user to create and attach fences to
> > the exported vGEM buffers (on the dma-buf), the user can implement a
> > deferred renderer and queue hardware operations like flipping and then
> > signal the buffer readiness (i.e. this allows the user to schedule
> > operations out-of-order, but have them complete in-order).
> > 
> > This also makes it much easier to write tightly controlled testcases for
> > dma-buf fencing and signaling between hardware drivers.
> > 
> > v2: Don't pretend the fences exist in an ordered timeline, but allocate
> > a separate fence-context for each fence so that the fences are
> > unordered.
> > 
> > Testcase: igt/vgem_basic/dmabuf-fence
> > Signed-off-by: Chris Wilson 
> > Cc: Sean Paul 
> > Cc: Zach Reizner 
> > Cc: Gustavo Padovan 
> > Cc: Daniel Vetter 
> > Acked-by: Zach Reizner 
> > ---
> >  drivers/gpu/drm/vgem/Makefile |   2 +-
> >  drivers/gpu/drm/vgem/vgem_drv.c   |  34 +++
> >  drivers/gpu/drm/vgem/vgem_drv.h   |  16 +++
> >  drivers/gpu/drm/vgem/vgem_fence.c | 207 
> > ++
> >  include/uapi/drm/vgem_drm.h   |  62 
> >  5 files changed, 320 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/vgem/vgem_fence.c
> >  create mode 100644 include/uapi/drm/vgem_drm.h
> > 
> > diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
> > index 3f4c7b842028..bfcdea1330e6 100644
> > --- a/drivers/gpu/drm/vgem/Makefile
> > +++ b/drivers/gpu/drm/vgem/Makefile
> > @@ -1,4 +1,4 @@
> >  ccflags-y := -Iinclude/drm
> > -vgem-y := vgem_drv.o
> > +vgem-y := vgem_drv.o vgem_fence.o
> >  
> >  obj-$(CONFIG_DRM_VGEM) += vgem.o
> > diff --git a/drivers/gpu/drm/vgem/vgem_drv.c 
> > b/drivers/gpu/drm/vgem/vgem_drv.c
> > index 29c2aab3c1a7..c15bafb06665 100644
> > --- a/drivers/gpu/drm/vgem/vgem_drv.c
> > +++ b/drivers/gpu/drm/vgem/vgem_drv.c
> > @@ -83,6 +83,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops 
> > = {
> > .close = drm_gem_vm_close,
> >  };
> >  
> > +static int vgem_open(struct drm_device *dev, struct drm_file *file)
> > +{
> > +   struct vgem_file *vfile;
> > +   int ret;
> > +
> > +   vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
> > +   if (!vfile)
> > +   return -ENOMEM;
> > +
> > +   file->driver_priv = vfile;
> > +
> > +   ret = vgem_fence_open(vfile);
> > +   if (ret) {
> > +   kfree(vfile);
> > +   return ret;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
> > +{
> > +   struct vgem_file *vfile = file->driver_priv;
> > +
> > +   vgem_fence_close(vfile);
> > +   kfree(vfile);
> > +}
> > +
> >  /* ioctls */
> >  
> >  static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
> > @@ -164,6 +192,8 @@ unref:
> >  }
> >  
> >  static struct drm_ioctl_desc vgem_ioctls[] = {
> > +   DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, 
> > DRM_AUTH|DRM_RENDER_ALLOW),
> > +   DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, 
> > DRM_AUTH|DRM_RENDER_ALLOW),
> >  };
> >  
> >  static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
> > @@ -271,9 +301,12 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,
> >  
> >  static struct drm_driver vgem_driver = {
> > .driver_features= DRIVER_GEM | DRIVER_PRIME,
> > +   .open   = vgem_open,
> > +   .preclose   = vgem_preclose,
> > .gem_free_object_unlocked   = vgem_gem_free_object,
> > .gem_vm_ops = _gem_vm_ops,
> > .ioctls = vgem_ioctls,
> > +   .num_ioctls = ARRAY_SIZE(vgem_ioctls),
> > .fops   = _driver_fops,
> >  
> > .dumb_create= vgem_gem_dumb_create,
> > @@ -328,5 +361,6 @@ module_init(vgem_init);
> >  module_exit(vgem_exit);
> >  
> >  MODULE_AUTHOR("Red Hat, Inc.");
> > +MODULE_AUTHOR("Intel Corporation");
> >  MODULE_DESCRIPTION(DRIVER_DESC);
>

[PATCH v3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-14 Thread Chris Wilson

vGEM buffers are useful for passing data between software clients and
hardware renders. By allowing the user to create and attach fences to
the exported vGEM buffers (on the dma-buf), the user can implement a
deferred renderer and queue hardware operations like flipping and then
signal the buffer readiness (i.e. this allows the user to schedule
operations out-of-order, but have them complete in-order).

This also makes it much easier to write tightly controlled testcases for
dma-buf fencing and signaling between hardware drivers.

v2: Don't pretend the fences exist in an ordered timeline, but allocate
a separate fence-context for each fence so that the fences are
unordered.
v3: Make the debug output more interesting, and so the signaled status.

Testcase: igt/vgem_basic/dmabuf-fence
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
Cc: Gustavo Padovan 
Cc: Daniel Vetter 
Acked-by: Zach Reizner 
---
 drivers/gpu/drm/vgem/Makefile |   2 +-
 drivers/gpu/drm/vgem/vgem_drv.c   |  34 +++
 drivers/gpu/drm/vgem/vgem_drv.h   |  16 +++
 drivers/gpu/drm/vgem/vgem_fence.c | 207 ++
 include/uapi/drm/vgem_drm.h   |  62 
 5 files changed, 320 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/vgem/vgem_fence.c
 create mode 100644 include/uapi/drm/vgem_drm.h

diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
index 3f4c7b842028..bfcdea1330e6 100644
--- a/drivers/gpu/drm/vgem/Makefile
+++ b/drivers/gpu/drm/vgem/Makefile
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o
+vgem-y := vgem_drv.o vgem_fence.o

 obj-$(CONFIG_DRM_VGEM) += vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 29c2aab3c1a7..c15bafb06665 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -83,6 +83,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops = {
.close = drm_gem_vm_close,
 };

+static int vgem_open(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile;
+   int ret;
+
+   vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
+   if (!vfile)
+   return -ENOMEM;
+
+   file->driver_priv = vfile;
+
+   ret = vgem_fence_open(vfile);
+   if (ret) {
+   kfree(vfile);
+   return ret;
+   }
+
+   return 0;
+}
+
+static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile = file->driver_priv;
+
+   vgem_fence_close(vfile);
+   kfree(vfile);
+}
+
 /* ioctls */

 static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
@@ -164,6 +192,8 @@ unref:
 }

 static struct drm_ioctl_desc vgem_ioctls[] = {
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
 };

 static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -271,9 +301,12 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,

 static struct drm_driver vgem_driver = {
.driver_features= DRIVER_GEM | DRIVER_PRIME,
+   .open   = vgem_open,
+   .preclose   = vgem_preclose,
.gem_free_object_unlocked   = vgem_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
.ioctls = vgem_ioctls,
+   .num_ioctls = ARRAY_SIZE(vgem_ioctls),
.fops   = _driver_fops,

.dumb_create= vgem_gem_dumb_create,
@@ -328,5 +361,6 @@ module_init(vgem_init);
 module_exit(vgem_exit);

 MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h
index 988cbaae7588..1f8798ad329c 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.h
+++ b/drivers/gpu/drm/vgem/vgem_drv.h
@@ -32,9 +32,25 @@
 #include 
 #include 

+#include 
+
+struct vgem_file {
+   struct idr fence_idr;
+   struct mutex fence_mutex;
+};
+
 #define to_vgem_bo(x) container_of(x, struct drm_vgem_gem_object, base)
 struct drm_vgem_gem_object {
struct drm_gem_object base;
 };

+int vgem_fence_open(struct vgem_file *file);
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+void vgem_fence_close(struct vgem_file *file);
+
 #endif
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c 
b/drivers/gpu/drm/vgem/vgem_fence.c
new file mode 100644
index ..b7da11419ad6
--- /dev/null
+++ b/drive

[PATCH v3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-14 Thread Chris Wilson

On Thu, Jul 14, 2016 at 10:12:17AM +0200, Daniel Vetter wrote:
> On Thu, Jul 14, 2016 at 08:04:19AM +0100, Chris Wilson wrote:
> > vGEM buffers are useful for passing data between software clients and
> > hardware renders. By allowing the user to create and attach fences to
> > the exported vGEM buffers (on the dma-buf), the user can implement a
> > deferred renderer and queue hardware operations like flipping and then
> > signal the buffer readiness (i.e. this allows the user to schedule
> > operations out-of-order, but have them complete in-order).
> > 
> > This also makes it much easier to write tightly controlled testcases for
> > dma-buf fencing and signaling between hardware drivers.
> > 
> > v2: Don't pretend the fences exist in an ordered timeline, but allocate
> > a separate fence-context for each fence so that the fences are
> > unordered.
> > v3: Make the debug output more interesting, and so the signaled status.
> > 
> > Testcase: igt/vgem_basic/dmabuf-fence
> > Signed-off-by: Chris Wilson 
> > Cc: Sean Paul 
> > Cc: Zach Reizner 
> > Cc: Gustavo Padovan 
> > Cc: Daniel Vetter 
> > Acked-by: Zach Reizner 
> 
> One thing I completely forgotten: This allows userspace to hang kernel
> drivers. i915 (and other gpu drivers) can recover using hangcheck, but
> dumber drivers (v4l, if that ever happens) probably never except such a
> case. We've had a similar discusion with the userspace fences exposed in
> sw_fence, and decided to move all those ioctl into debugfs. I think we
> should do the same for this vgem-based debugging of implicit sync. Sorry
> for realizing this this late.

One of the very tests I make is to ensure that we recover from such a
hang. I don't see the difference between this any of the other ways
userspace can shoot itself (and others) in the foot.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH v3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-14 Thread Chris Wilson

On Thu, Jul 14, 2016 at 10:59:04AM +0100, Chris Wilson wrote:
> On Thu, Jul 14, 2016 at 10:12:17AM +0200, Daniel Vetter wrote:
> > On Thu, Jul 14, 2016 at 08:04:19AM +0100, Chris Wilson wrote:
> > > vGEM buffers are useful for passing data between software clients and
> > > hardware renders. By allowing the user to create and attach fences to
> > > the exported vGEM buffers (on the dma-buf), the user can implement a
> > > deferred renderer and queue hardware operations like flipping and then
> > > signal the buffer readiness (i.e. this allows the user to schedule
> > > operations out-of-order, but have them complete in-order).
> > > 
> > > This also makes it much easier to write tightly controlled testcases for
> > > dma-buf fencing and signaling between hardware drivers.
> > > 
> > > v2: Don't pretend the fences exist in an ordered timeline, but allocate
> > > a separate fence-context for each fence so that the fences are
> > > unordered.
> > > v3: Make the debug output more interesting, and so the signaled status.
> > > 
> > > Testcase: igt/vgem_basic/dmabuf-fence
> > > Signed-off-by: Chris Wilson 
> > > Cc: Sean Paul 
> > > Cc: Zach Reizner 
> > > Cc: Gustavo Padovan 
> > > Cc: Daniel Vetter 
> > > Acked-by: Zach Reizner 
> > 
> > One thing I completely forgotten: This allows userspace to hang kernel
> > drivers. i915 (and other gpu drivers) can recover using hangcheck, but
> > dumber drivers (v4l, if that ever happens) probably never except such a
> > case. We've had a similar discusion with the userspace fences exposed in
> > sw_fence, and decided to move all those ioctl into debugfs. I think we
> > should do the same for this vgem-based debugging of implicit sync. Sorry
> > for realizing this this late.
> 
> One of the very tests I make is to ensure that we recover from such a
> hang. I don't see the difference between this any of the other ways
> userspace can shoot itself (and others) in the foot.

So one solution would be to make vgem fences automatically timeout (with
a flag for root to override for the sake of testing hang detection).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH v3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-14 Thread Chris Wilson

On Thu, Jul 14, 2016 at 11:11:02AM +0100, Chris Wilson wrote:
> So one solution would be to make vgem fences automatically timeout (with
> a flag for root to override for the sake of testing hang detection).

diff --git a/drivers/gpu/drm/vgem/vgem_fence.c 
b/drivers/gpu/drm/vgem/vgem_fence.c
index b7da11419ad6..17c63c9a8ea0 100644
--- a/drivers/gpu/drm/vgem/vgem_fence.c
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -28,6 +28,7 @@
 struct vgem_fence {
struct fence base;
struct spinlock lock;
+   struct timer_list timer;
 };

 static const char *vgem_fence_get_driver_name(struct fence *fence)
@@ -50,6 +51,14 @@ static bool vgem_fence_enable_signaling(struct fence *fence)
return true;
 }

+static void vgem_fence_release(struct fence *base)
+{
+   struct vgem_fence *fence = container_of(base, typeof(*fence), base);
+
+   del_timer_sync(>timer);
+   fence_free(>base);
+}
+
 static void vgem_fence_value_str(struct fence *fence, char *str, int size)
 {
snprintf(str, size, "%u", fence->seqno);
@@ -67,11 +76,21 @@ const struct fence_ops vgem_fence_ops = {
.enable_signaling = vgem_fence_enable_signaling,
.signaled = vgem_fence_signaled,
.wait = fence_default_wait,
+   .release = vgem_fence_release,
+
.fence_value_str = vgem_fence_value_str,
.timeline_value_str = vgem_fence_timeline_value_str,
 };

-static struct fence *vgem_fence_create(struct vgem_file *vfile)
+static void vgem_fence_timeout(unsigned long data)
+{
+   struct vgem_fence *fence = (struct vgem_fence *)data;
+
+   fence_signal(>base);
+}
+
+static struct fence *vgem_fence_create(struct vgem_file *vfile,
+  unsigned int flags)
 {
struct vgem_fence *fence;

@@ -83,6 +102,12 @@ static struct fence *vgem_fence_create(struct vgem_file 
*vfile)
fence_init(>base, _fence_ops, >lock,
   fence_context_alloc(1), 1);

+   setup_timer(>timer, vgem_fence_timeout, (unsigned long)fence);
+
+   /* We force the fence to expire within 10s to prevent driver hangs */
+   if (!(flags & VGEM_FENCE_NOTIMEOUT))
+   mod_timer(>timer, 10*HZ);
+
return >base;
 }

@@ -114,9 +139,12 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
struct fence *fence;
int ret;

-   if (arg->flags & ~VGEM_FENCE_WRITE)
+   if (arg->flags & ~(VGEM_FENCE_WRITE | VGEM_FENCE_NOTIMEOUT))
return -EINVAL;

+   if (arg->flags & VGEM_FENCE_NOTIMEOUT && !capable(CAP_SYS_ADMIN))
+   return -EPERM;
+
if (arg->pad)
return -EINVAL;

@@ -128,7 +156,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
if (ret)
goto out;

-   fence = vgem_fence_create(vfile);
+   fence = vgem_fence_create(vfile, arg->flags);
if (!fence) {
ret = -ENOMEM;
goto out;
diff --git a/include/uapi/drm/vgem_drm.h b/include/uapi/drm/vgem_drm.h
index 352d2fae8de9..55fd08750773 100644
--- a/include/uapi/drm/vgem_drm.h
+++ b/include/uapi/drm/vgem_drm.h
@@ -45,7 +45,8 @@ extern "C" {
 struct drm_vgem_fence_attach {
__u32 handle;
__u32 flags;
-#define VGEM_FENCE_WRITE 0x1
+#define VGEM_FENCE_WRITE   0x1
+#define VGEM_FENCE_NOTIMEOUT   0x2
__u32 out_fence;
__u32 pad;
 };


-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH v3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-14 Thread Chris Wilson

On Thu, Jul 14, 2016 at 02:40:59PM +0200, Daniel Vetter wrote:
> On Thu, Jul 14, 2016 at 11:11:02AM +0100, Chris Wilson wrote:
> > On Thu, Jul 14, 2016 at 10:59:04AM +0100, Chris Wilson wrote:
> > > On Thu, Jul 14, 2016 at 10:12:17AM +0200, Daniel Vetter wrote:
> > > > On Thu, Jul 14, 2016 at 08:04:19AM +0100, Chris Wilson wrote:
> > > > > vGEM buffers are useful for passing data between software clients and
> > > > > hardware renders. By allowing the user to create and attach fences to
> > > > > the exported vGEM buffers (on the dma-buf), the user can implement a
> > > > > deferred renderer and queue hardware operations like flipping and then
> > > > > signal the buffer readiness (i.e. this allows the user to schedule
> > > > > operations out-of-order, but have them complete in-order).
> > > > > 
> > > > > This also makes it much easier to write tightly controlled testcases 
> > > > > for
> > > > > dma-buf fencing and signaling between hardware drivers.
> > > > > 
> > > > > v2: Don't pretend the fences exist in an ordered timeline, but 
> > > > > allocate
> > > > > a separate fence-context for each fence so that the fences are
> > > > > unordered.
> > > > > v3: Make the debug output more interesting, and so the signaled 
> > > > > status.
> > > > > 
> > > > > Testcase: igt/vgem_basic/dmabuf-fence
> > > > > Signed-off-by: Chris Wilson 
> > > > > Cc: Sean Paul 
> > > > > Cc: Zach Reizner 
> > > > > Cc: Gustavo Padovan 
> > > > > Cc: Daniel Vetter 
> > > > > Acked-by: Zach Reizner 
> > > > 
> > > > One thing I completely forgotten: This allows userspace to hang kernel
> > > > drivers. i915 (and other gpu drivers) can recover using hangcheck, but
> > > > dumber drivers (v4l, if that ever happens) probably never except such a
> > > > case. We've had a similar discusion with the userspace fences exposed in
> > > > sw_fence, and decided to move all those ioctl into debugfs. I think we
> > > > should do the same for this vgem-based debugging of implicit sync. Sorry
> > > > for realizing this this late.
> > > 
> > > One of the very tests I make is to ensure that we recover from such a
> > > hang. I don't see the difference between this any of the other ways
> > > userspace can shoot itself (and others) in the foot.
> > 
> > So one solution would be to make vgem fences automatically timeout (with
> > a flag for root to override for the sake of testing hang detection).
> 
> The problem is other drivers. E.g. right now atomic helpers assume that
> fences will signal, and can't recover if they don't. This is why drivers
> where things might fail must have some recovery (hangcheck, timeout) to
> make sure dma_fences always signal.

Urm, all the atomic helpers should work with fails. The waits on dma-buf
should be before any hardware is modified and so cancellation is trivial.
Anyone using a foriegn fence (or even native) must cope that it may not
meet some deadline.

They have to. Anyone sharing a i915 dma-buf is susceptible to all kinds
of (unprivileged) fun.

> Imo not even root should be allowed to break this, since it could put
> drivers into a non-recoverable state. I think this must be restricted to
> something known-unsafe-don't-enable-on-production like debugfs.

Providing fences is extremely useful, even for software buffers. (For
the sake of argument, just imagine an asynchronous multithreaded llvmpipe
wanting to support client fences for deferred rendering.) The only
question in my mind is how much cotton wool to use.

> Other solutions which I don't like:
> - Everyone needs to be able to recover. Given how much effort it is to
>   just keep i915 hangcheck in working order I think that's totally
>   illusionary to assume. At least once world+dog (atomic, v4l, ...) all
>   consume/produce fences, subsystems where the usual assumption holds that
>   async ops complete.
> 
> - Really long timeouts are allowed for root in vgem. Could lead to even
>   more fun in testing i915 hangchecks I think, so don't like that much
>   either.

The whole point is in testing our handling before we become suspectible
to real world fail - because as you point out, not everyone guarantees
that a fence will be signaled. I can't simply pass around i915 dma-buf
simply because we may unwind them and in the process completely curtail
being able to test a foriegn fence that hangs.

> I think the best option is to just do the same

[Intel-gfx] [PATCH v3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-14 Thread Chris Wilson

On Thu, Jul 14, 2016 at 02:23:04PM +0100, Chris Wilson wrote:
> The biggest reason I had against going the sw_sync only route was that
> vgem should provide unprivileged fences and that through the bookkeeping
> in vgem we can keep them safe, ensure that we don't leak random buffers
> or fences. (And I need a source of foriegn dma-buf with implicit fence
> tracking with which I can try and break the driver.)

And for testing passing around content + fences is more useful than
passing fences alone.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH v3] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-14 Thread Chris Wilson

On Thu, Jul 14, 2016 at 04:36:37PM +0200, Daniel Vetter wrote:
> On Thu, Jul 14, 2016 at 02:39:54PM +0100, Chris Wilson wrote:
> > On Thu, Jul 14, 2016 at 02:23:04PM +0100, Chris Wilson wrote:
> > > The biggest reason I had against going the sw_sync only route was that
> > > vgem should provide unprivileged fences and that through the bookkeeping
> > > in vgem we can keep them safe, ensure that we don't leak random buffers
> > > or fences. (And I need a source of foriegn dma-buf with implicit fence
> > > tracking with which I can try and break the driver.)
> > 
> > And for testing passing around content + fences is more useful than
> > passing fences alone.
> 
> Yup, agreed. But having fences free-standing isn't a real issue since
> their refcounted and the userspace parts (sync_file) will get cleaned up
> on process exit latest. IÍ'm not advocating for any behaviour change at
> all, just for hiding these things in debugfs.

It's just a choice of api. We could equally hide it behind a separate
config flag.

First question, are we happy that there is a legitimate usecase for fences
on vgem?

If so, what enforced timeout on the fence should we use?

(I think that this ioctl api is correct, I don't forsee sw_sync being
viable for unprivileged use.)

Then we can restrict this patch to add the safe interface, enable a bunch
more tests and get on with discussing how to break the kernel "safely"!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH v4] drm/vgem: Attach sw fences to exported vGEM dma-buf (ioctl)

2016-07-15 Thread Chris Wilson

vGEM buffers are useful for passing data between software clients and
hardware renders. By allowing the user to create and attach fences to
the exported vGEM buffers (on the dma-buf), the user can implement a
deferred renderer and queue hardware operations like flipping and then
signal the buffer readiness (i.e. this allows the user to schedule
operations out-of-order, but have them complete in-order).

This also makes it much easier to write tightly controlled testcases for
dma-buf fencing and signaling between hardware drivers.

v2: Don't pretend the fences exist in an ordered timeline, but allocate
a separate fence-context for each fence so that the fences are
unordered.
v3: Make the debug output more interesting, and show the signaled status.
v4: Automatically signal the fence to prevent userspace from
indefinitely hanging drivers.

Testcase: igt/vgem_basic/dmabuf-fence
Testcase: igt/vgem_slow/nohang
Signed-off-by: Chris Wilson 
Cc: Sean Paul 
Cc: Zach Reizner 
Cc: Gustavo Padovan 
Cc: Daniel Vetter 
Acked-by: Zach Reizner 
---
 drivers/gpu/drm/vgem/Makefile |   2 +-
 drivers/gpu/drm/vgem/vgem_drv.c   |  34 +
 drivers/gpu/drm/vgem/vgem_drv.h   |  16 +++
 drivers/gpu/drm/vgem/vgem_fence.c | 283 ++
 include/uapi/drm/vgem_drm.h   |  62 +
 5 files changed, 396 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/vgem/vgem_fence.c
 create mode 100644 include/uapi/drm/vgem_drm.h

diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
index 3f4c7b842028..bfcdea1330e6 100644
--- a/drivers/gpu/drm/vgem/Makefile
+++ b/drivers/gpu/drm/vgem/Makefile
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o
+vgem-y := vgem_drv.o vgem_fence.o

 obj-$(CONFIG_DRM_VGEM) += vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 29c2aab3c1a7..c15bafb06665 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -83,6 +83,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops = {
.close = drm_gem_vm_close,
 };

+static int vgem_open(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile;
+   int ret;
+
+   vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
+   if (!vfile)
+   return -ENOMEM;
+
+   file->driver_priv = vfile;
+
+   ret = vgem_fence_open(vfile);
+   if (ret) {
+   kfree(vfile);
+   return ret;
+   }
+
+   return 0;
+}
+
+static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
+{
+   struct vgem_file *vfile = file->driver_priv;
+
+   vgem_fence_close(vfile);
+   kfree(vfile);
+}
+
 /* ioctls */

 static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
@@ -164,6 +192,8 @@ unref:
 }

 static struct drm_ioctl_desc vgem_ioctls[] = {
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
 };

 static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -271,9 +301,12 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,

 static struct drm_driver vgem_driver = {
.driver_features= DRIVER_GEM | DRIVER_PRIME,
+   .open   = vgem_open,
+   .preclose   = vgem_preclose,
.gem_free_object_unlocked   = vgem_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
.ioctls = vgem_ioctls,
+   .num_ioctls = ARRAY_SIZE(vgem_ioctls),
.fops   = _driver_fops,

.dumb_create= vgem_gem_dumb_create,
@@ -328,5 +361,6 @@ module_init(vgem_init);
 module_exit(vgem_exit);

 MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h
index 988cbaae7588..1f8798ad329c 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.h
+++ b/drivers/gpu/drm/vgem/vgem_drv.h
@@ -32,9 +32,25 @@
 #include 
 #include 

+#include 
+
+struct vgem_file {
+   struct idr fence_idr;
+   struct mutex fence_mutex;
+};
+
 #define to_vgem_bo(x) container_of(x, struct drm_vgem_gem_object, base)
 struct drm_vgem_gem_object {
struct drm_gem_object base;
 };

+int vgem_fence_open(struct vgem_file *file);
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+   void *data,
+   struct drm_file *file);
+void vgem_fence_close(struct vgem_file *file);
+
 #endif
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c

[Beignet] [Patch V2] intel: Export pooled EU and min no. of eus in a pool.

2016-07-15 Thread Chris Wilson

On Fri, Jul 15, 2016 at 11:37:41AM +0100, Arun Siluvery wrote:
> On 15/07/2016 08:08, Yang Rong wrote:
> >Update kernel interface with new I915_GETPARAM ioctl entries for
> >pooled EU and min no. of eus in a pool. Add a wrapping function
> >for each parameter. Userspace drivers need these values when decide
> >the thread count. This kernel enabled pooled eu by default for BXT
> >and for fused down 2x6 parts it is advised to turn it off.
> >
> >But there is another HW issue in these parts (fused
> >down 2x6 parts) before C0 that requires Pooled EU to be enabled as a
> >workaround. In this case the pool configuration changes depending upon
> >which subslice is disabled and the no. of eus in a pool is different,
> >So userspace need to know min no. of eus in a pool.
> >
> >V2: use return value as the query results.
> > ret < 0 when error, ret = 0 when not support, and ret > 0 indicate
> > query results.(Chris)
> >
> >Signed-off-by: Yang Rong 
> >---
> 
> [+ chris, intel-gfx]
> 
> 
> regards
> Arun
> 
> >  include/drm/i915_drm.h   |  2 ++
> >  intel/intel_bufmgr.h |  3 +++
> >  intel/intel_bufmgr_gem.c | 32 
> >  3 files changed, 37 insertions(+)
> >
> >diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> >index c4ce6b2..eb611a7 100644
> >--- a/include/drm/i915_drm.h
> >+++ b/include/drm/i915_drm.h
> >@@ -357,6 +357,8 @@ typedef struct drm_i915_irq_wait {
> >  #define I915_PARAM_HAS_GPU_RESET35
> >  #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> >  #define I915_PARAM_HAS_EXEC_SOFTPIN 37
> >+#define I915_PARAM_HAS_POOLED_EU 38
> >+#define I915_PARAM_MIN_EU_IN_POOL39
> >
> >  typedef struct drm_i915_getparam {
> > __s32 param;
> >diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> >index a1abbcd..96a4d9d 100644
> >--- a/intel/intel_bufmgr.h
> >+++ b/intel/intel_bufmgr.h
> >@@ -273,6 +273,9 @@ int drm_intel_get_reset_stats(drm_intel_context *ctx,
> >  int drm_intel_get_subslice_total(int fd, unsigned int *subslice_total);
> >  int drm_intel_get_eu_total(int fd, unsigned int *eu_total);
> >
> >+int drm_intel_get_pooled_eu(int fd);
> >+int drm_intel_get_min_eu_in_pool(int fd);
> >+
> >  /** @{ Compatibility defines to keep old code building despite the symbol 
> > rename
> >   * from dri_* to drm_intel_*
> >   */
> >diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> >index 0a4012b..4d9899d 100644
> >--- a/intel/intel_bufmgr_gem.c
> >+++ b/intel/intel_bufmgr_gem.c
> >@@ -3237,6 +3237,38 @@ drm_intel_get_eu_total(int fd, unsigned int *eu_total)
> > return 0;
> >  }
> >
> >+int
> >+drm_intel_get_pooled_eu(int fd)
> >+{
> >+drm_i915_getparam_t gp;
> >+int ret;
> >+
> >+memclear(gp);
> >+gp.param = I915_PARAM_HAS_POOLED_EU;
> >+gp.value = 
> >+ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, );
> >+if (ret)
> >+return -errno;

Do I need to point out how the above is broken?

if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, ))
return -errno;
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

2016-07-15 Thread Chris Wilson

On Fri, Jul 15, 2016 at 01:26:22PM +0200, Peter Wu wrote:
> On Wed, Jul 13, 2016 at 06:17:47PM +0100, Chris Wilson wrote:
> > Hmm, since suspend_work can theorectically rearm itself, this should be
> > cancel_work_sync().
> 
> How so? The worker calls with state = FBINFO_STATE_RUNNING and
> synchronous = true, so schedule_work() can never be called.

No wories then, I feel victim to having to read the code again.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH 06/11] drm/dp-mst: Remove tx_down_in_progress

2016-07-15 Thread Chris Wilson

On Fri, Jul 15, 2016 at 09:48:03PM +0200, Daniel Vetter wrote:
> Just replicates whether the list is empty or not. Nuke code
> to avoid writing docs for it!
> 
> Cc: Dave Airlie 
> Signed-off-by: Daniel Vetter 

Reviewed-by: Chris Wilson 

Unless Dave to planned to use it elsewhere, tx_msg_downq is indeed
redundant.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH 09/11] drm/i915: Clean up kerneldoc for intel_lrc.c

2016-07-15 Thread Chris Wilson

On Fri, Jul 15, 2016 at 09:48:06PM +0200, Daniel Vetter wrote:
> Fairly minimal, there's still lots of functions without any docs, and
> which aren't static. But probably we want to first clean this up some more.
> 
> - Drop the bogus const. Marking argument pointers themselves (instead of
>   what they point at) as const provides roughly 0 value. And it's confusing,
>   since the data the pointer points at _is_ being changed.

It served its purpose of getting the compiler to sanity check that batch
was unchanged as we passed it around the various obscuration macros.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 02/11] drm/doc: Add kerneldoc for @index

2016-07-15 Thread Chris Wilson

On Fri, Jul 15, 2016 at 09:47:59PM +0200, Daniel Vetter wrote:
> Was forgotten when adding them all over. 0-day should complain about
> new missing kernel-doc, not sure why that wasn't caught/fixed.
> 
> Cc: Chris Wilson 
> Signed-off-by: Daniel Vetter 
> ---
>  include/drm/drm_crtc.h | 26 +-
>  1 file changed, 21 insertions(+), 5 deletions(-)
> 
> diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
> index ebf078685f86..656f189f1198 100644
> --- a/include/drm/drm_crtc.h
> +++ b/include/drm/drm_crtc.h
> @@ -783,7 +783,10 @@ struct drm_crtc {
>   struct drm_plane *primary;
>   struct drm_plane *cursor;
>  
> - /* position inside the mode_config.list, can be used as a [] idx */
> + /**
> +  * @index: Position inside the mode_config.list, can be used as an array

For all:

@index: Fixed position inside the mode_config.list, can be used as an
array index. The @index is assigned when the crtc is inserted into the
list, and it will remain at that position inside the list until the
module is unloaded.

Just want to emphasis the unchanging nature of the @index. Second
sentence may be overkill.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

Introduce fences for N:M completion variables [v2]

2016-07-17 Thread Chris Wilson

Peter Zijlstra gave a lot of feedback and thanks to him, I think both
the function names and docs are a lot more sane. There is also a good
consensus on renaming dma-buf's struct fence to be struct dma_fence,
allowing for use of the cleaner name for the core struct.

A quick overview of a fence is that it is a struct completion that waits
on multiple events (rather than just one).
-Chris

[PATCH v2 1/7] kfence: Introduce kfence, a N:M completion mechanism

2016-07-17 Thread Chris Wilson

Completions are a simple synchronization mechanism, suitable for 1:M
barriers where many waiters maybe waiting for a single event. However,
some event driven mechanisms require a graph of events where one event
may depend upon several earlier events. The kfence extends the struct
completion to be able to asynchronously wait upon several event sources,
including completions and other fences forming the basis on which an
acyclic dependency graph can be built. Most often this is used to create
a set of interdependent tasks that can be run concurrently but yet
serialised where necessary. For example, the kernel init sequence has
many tasks that could be run in parallel so long as their dependencies
on previous tasks have been completed. Similarly we have the problem of
assigning interdependent tasks to multiple hardware execution engines,
be they used for rendering or for display. kfences provides a building
block which can be used for determining an order in which tasks can
execute.

Signed-off-by: Chris Wilson 
Cc: Sumit Semwal 
Cc: Shuah Khan 
Cc: Tejun Heo 
Cc: Daniel Vetter 
Cc: Andrew Morton 
Cc: Ingo Molnar 
Cc: Kees Cook 
Cc: Thomas Gleixner 
Cc: "Paul E. McKenney" 
Cc: Dan Williams 
Cc: Andrey Ryabinin 
Cc: Davidlohr Bueso 
Cc: Nikolay Aleksandrov 
Cc: "David S. Miller" 
Cc: "Peter Zijlstra (Intel)" 
Cc: Rasmus Villemoes 
Cc: Andy Shevchenko 
Cc: Dmitry Vyukov 
Cc: Alexander Potapenko 
Cc: linux-kernel at vger.kernel.org
Cc: linux-media at vger.kernel.org
Cc: dri-devel at lists.freedesktop.org
Cc: linaro-mm-sig at lists.linaro.org
---
 include/linux/kfence.h|  64 
 kernel/Makefile   |   2 +-
 kernel/kfence.c   | 431 +++
 lib/Kconfig.debug |  23 ++
 lib/Makefile  |   1 +
 lib/test-kfence.c | 536 ++
 tools/testing/selftests/lib/kfence.sh |  10 +
 7 files changed, 1066 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/kfence.h
 create mode 100644 kernel/kfence.c
 create mode 100644 lib/test-kfence.c
 create mode 100755 tools/testing/selftests/lib/kfence.sh

diff --git a/include/linux/kfence.h b/include/linux/kfence.h
new file mode 100644
index ..6e32385b3b8c
--- /dev/null
+++ b/include/linux/kfence.h
@@ -0,0 +1,64 @@
+/*
+ * kfence.h - library routines for N:M synchronisation points
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifndef _KFENCE_H_
+#define _KFENCE_H_
+
+#include 
+#include 
+#include  /* for NOTIFY_DONE */
+#include 
+
+struct completion;
+
+struct kfence {
+   wait_queue_head_t wait;
+   unsigned long flags;
+   struct kref kref;
+   atomic_t pending;
+};
+
+#define KFENCE_CHECKED_BIT 0 /* used internally for DAG checking */
+#define KFENCE_PRIVATE_BIT 1 /* available for use by owner */
+#define KFENCE_MASK(~3)
+
+#define __kfence_call __aligned(4)
+typedef int (*kfence_notify_t)(struct kfence *);
+
+void kfence_init(struct kfence *fence, kfence_notify_t fn);
+
+struct kfence *kfence_get(struct kfence *fence);
+void kfence_put(struct kfence *fence);
+
+void kfence_await(struct kfence *fence);
+int kfence_await_kfence(struct kfence *fence,
+   struct kfence *after,
+   gfp_t gfp);
+int kfence_await_completion(struct kfence *fence,
+   struct completion *x,
+   gfp_t gfp);
+void kfence_complete(struct kfence *fence);
+void kfence_wake_up_all(struct kfence *fence);
+void kfence_wait(struct kfence *fence);
+
+/**
+ * kfence_done - report when the fence has been passed
+ * @fence: the kfence to query
+ *
+ * kfence_done() reports true when the fence is no longer waiting for any
+ * events and has completed its fence-complete notification.
+ *
+ * Returns true when the fence has been passed, false otherwise.
+ */
+static inline bool kfence_done(const struct kfence *fence)
+{
+   return atomic_read(>pending) < 0;
+}
+
+#endif /* _KFENCE_H_ */
diff --git a/kernel/Makefile b/kernel/Makefile
index e2ec54e2b952..ff11f31b7ec9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o \
extable.o params.o \
kthread.o sys_ni.o nsproxy.o \
notifier.o ksysfs.o cred.o reboot.o \
-   async.o range.o smpboot.o
+   async.o kfence.o range.o smpboot.o

 obj-$(CONFIG_MULTIUSER) += groups.o

diff --git a/kernel/kfence.c b/kernel/kfence.c
new file mode 100644
index ..693af9da545a
--- /dev/null
+++ b/kernel/kfence.c
@@ -0,0 +1,431 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the

[PATCH v2 2/7] kfence: Wrap hrtimer to provide a time source for a kfence

2016-07-17 Thread Chris Wilson

A common requirement when scheduling a task is that it should be not be
begun until a certain point in time is passed (e.g.
queue_delayed_work()).  kfence_await_hrtimer() causes the kfence to
asynchronously wait until after the appropriate time before being woken.

Signed-off-by: Chris Wilson 
Cc: Sumit Semwal 
Cc: Shuah Khan 
Cc: Tejun Heo 
Cc: Daniel Vetter 
Cc: Andrew Morton 
Cc: Ingo Molnar 
Cc: Kees Cook 
Cc: Thomas Gleixner 
Cc: "Paul E. McKenney" 
Cc: Dan Williams 
Cc: Andrey Ryabinin 
Cc: Davidlohr Bueso 
Cc: Nikolay Aleksandrov 
Cc: "David S. Miller" 
Cc: "Peter Zijlstra (Intel)" 
Cc: Rasmus Villemoes 
Cc: Andy Shevchenko 
Cc: Dmitry Vyukov 
Cc: Alexander Potapenko 
Cc: linux-kernel at vger.kernel.org
Cc: linux-media at vger.kernel.org
Cc: dri-devel at lists.freedesktop.org
Cc: linaro-mm-sig at lists.linaro.org
---
 include/linux/kfence.h |  5 +
 kernel/kfence.c| 58 ++
 lib/test-kfence.c  | 44 ++
 3 files changed, 107 insertions(+)

diff --git a/include/linux/kfence.h b/include/linux/kfence.h
index 6e32385b3b8c..76a2f95dfb70 100644
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -16,6 +16,7 @@
 #include 

 struct completion;
+enum hrtimer_mode;

 struct kfence {
wait_queue_head_t wait;
@@ -43,6 +44,10 @@ int kfence_await_kfence(struct kfence *fence,
 int kfence_await_completion(struct kfence *fence,
struct completion *x,
gfp_t gfp);
+int kfence_await_hrtimer(struct kfence *fence,
+clockid_t clock, enum hrtimer_mode mode,
+ktime_t delay, u64 slack,
+gfp_t gfp);
 void kfence_complete(struct kfence *fence);
 void kfence_wake_up_all(struct kfence *fence);
 void kfence_wait(struct kfence *fence);
diff --git a/kernel/kfence.c b/kernel/kfence.c
index 693af9da545a..59c27910a749 100644
--- a/kernel/kfence.c
+++ b/kernel/kfence.c
@@ -48,6 +48,9 @@
  * - kfence_await_completion(): the kfence asynchronously waits upon a
  *   completion
  *
+ * - kfence_await_hrtimer(): the kfence asynchronously wait for an expiration
+ *   of a timer
+ *
  * A kfence is initialised using kfence_init(), and starts off awaiting an
  * event. Once you have finished setting up the fence, including adding
  * all of its asynchronous waits, call kfence_complete().
@@ -429,3 +432,58 @@ int kfence_await_completion(struct kfence *fence,
return pending;
 }
 EXPORT_SYMBOL_GPL(kfence_await_completion);
+
+struct timer_cb {
+   struct hrtimer timer;
+   struct kfence *fence;
+};
+
+static enum hrtimer_restart
+timer_kfence_wake(struct hrtimer *timer)
+{
+   struct timer_cb *cb = container_of(timer, typeof(*cb), timer);
+
+   kfence_complete(cb->fence);
+   kfence_put(cb->fence);
+   kfree(cb);
+
+   return HRTIMER_NORESTART;
+}
+
+/**
+ * kfence_await_hrtimer - set the fence to wait for a period of time
+ * @fence: this kfence
+ * @clock: which clock to program
+ * @mode: delay given as relative or absolute
+ * @delay: how long or until what time to wait
+ * @slack: how much slack that may be applied to the delay
+ *
+ * kfence_await_hrtimer() causes the @fence to wait for a a period of time, or
+ * until a certain point in time. It is a convenience wrapper around
+ * hrtimer_start_range_ns(). For more details on @clock, @mode, @delay and
+ * @slack please consult the hrtimer documentation.
+ *
+ * Returns 1 if the delay was sucessfuly added to the @fence, or a negative
+ * error code on failure.
+ */
+int kfence_await_hrtimer(struct kfence *fence,
+clockid_t clock, enum hrtimer_mode mode,
+ktime_t delay, u64 slack,
+gfp_t gfp)
+{
+   struct timer_cb *cb;
+
+   cb = kmalloc(sizeof(*cb), gfp);
+   if (!cb)
+   return -ENOMEM;
+
+   cb->fence = kfence_get(fence);
+   kfence_await(fence);
+
+   hrtimer_init(>timer, clock, mode);
+   cb->timer.function = timer_kfence_wake;
+
+   hrtimer_start_range_ns(>timer, delay, slack, mode);
+   return 1;
+}
+EXPORT_SYMBOL_GPL(kfence_await_hrtimer);
diff --git a/lib/test-kfence.c b/lib/test-kfence.c
index b40719fce967..1b0853fda7c3 100644
--- a/lib/test-kfence.c
+++ b/lib/test-kfence.c
@@ -352,6 +352,44 @@ static int __init test_completion(void)
return 0;
 }

+static int __init test_delay(void)
+{
+   struct kfence *fence;
+   ktime_t delay;
+   int ret;
+
+   /* Test use of a hrtimer as an event source for kfences */
+   pr_debug("%s\n", __func__);
+
+   fence = alloc_kfence();
+   if (!fence)
+   return -ENOMEM;
+
+   delay = ktime_get();
+
+   ret = kfence_await_hrtimer(fence, CLOCK_MONOTONIC, HRTIMER_MODE_REL,
+  ms_to_ktime(1), 1 << 10,
+

[PATCH v2 3/7] kfence: Extend kfences for listening on DMA fences

2016-07-17 Thread Chris Wilson

dma-buf provides an interfaces for receiving notifications from DMA
hardware, and for implicitly tracking fences used for rendering into
dma-buf. We want to be able to use these event sources along with kfence
for easy collection and combining with other events.

Signed-off-by: Chris Wilson 
Cc: Sumit Semwal 
Cc: Shuah Khan 
Cc: Tejun Heo 
Cc: Daniel Vetter 
Cc: Andrew Morton 
Cc: Ingo Molnar 
Cc: Kees Cook 
Cc: Thomas Gleixner 
Cc: "Paul E. McKenney" 
Cc: Dan Williams 
Cc: Andrey Ryabinin 
Cc: Davidlohr Bueso 
Cc: Nikolay Aleksandrov 
Cc: "David S. Miller" 
Cc: "Peter Zijlstra (Intel)" 
Cc: Rasmus Villemoes 
Cc: Andy Shevchenko 
Cc: Dmitry Vyukov 
Cc: Alexander Potapenko 
Cc: linux-kernel at vger.kernel.org
Cc: linux-media at vger.kernel.org
Cc: dri-devel at lists.freedesktop.org
Cc: linaro-mm-sig at lists.linaro.org
---
 drivers/dma-buf/fence.c   | 58 +++
 drivers/dma-buf/reservation.c | 48 +++
 include/linux/fence.h |  6 +
 include/linux/kfence.h|  2 ++
 include/linux/reservation.h   |  7 ++
 kernel/kfence.c   |  8 ++
 6 files changed, 129 insertions(+)

diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 7b05dbe9b296..3f06b3b1b4cc 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 

 #define CREATE_TRACE_POINTS
 #include 
@@ -530,3 +531,60 @@ fence_init(struct fence *fence, const struct fence_ops 
*ops,
trace_fence_init(fence);
 }
 EXPORT_SYMBOL(fence_init);
+
+struct dma_fence_cb {
+   struct fence_cb base;
+   struct kfence *fence;
+};
+
+static void dma_kfence_wake(struct fence *dma, struct fence_cb *data)
+{
+   struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+   kfence_complete(cb->fence);
+   kfence_put(cb->fence);
+   kfree(cb);
+}
+
+/**
+ * kfence_await_dma_fence - set the fence to wait upon a DMA fence
+ * @fence: this kfence
+ * @dma: target DMA fence to wait upon
+ * @gfp: the allowed allocation type
+ *
+ * kfence_add_dma() causes the @fence to wait upon completion of a DMA fence.
+ *
+ * Returns 1 if the @fence was successfully to the waitqueue of @dma, 0
+ * if @dma was already signaled (and so not added), or a negative error code.
+ */
+int kfence_await_dma_fence(struct kfence *fence, struct fence *dma, gfp_t gfp)
+{
+   struct dma_fence_cb *cb;
+   int ret;
+
+   if (fence_is_signaled(dma))
+   return 0;
+
+   cb = kmalloc(sizeof(*cb), gfp);
+   if (!cb) {
+   if (!gfpflags_allow_blocking(gfp))
+   return -ENOMEM;
+
+   return fence_wait(dma, false);
+   }
+
+   cb->fence = kfence_get(fence);
+   kfence_await(fence);
+
+   ret = fence_add_callback(dma, >base, dma_kfence_wake);
+   if (ret == 0) {
+   ret = 1;
+   } else {
+   dma_kfence_wake(dma, >base);
+   if (ret == -ENOENT) /* fence already signaled */
+   ret = 0;
+   }
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(kfence_await_dma_fence);
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 9566a62ad8e3..138b792af0c3 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -543,3 +543,51 @@ unlock_retry:
goto retry;
 }
 EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu);
+
+/**
+ * kfence_add_reservation - set the fence to wait upon a reservation_object
+ * @fence: this kfence
+ * @resv: target reservation_object (collection of DMA fences) to wait upon
+ * @write: Wait for read or read/write access
+ * @gfp: the allowed allocation type
+ *
+ * kfence_add_reservation() causes the @fence to wait upon completion of the
+ * reservation object (a collection of DMA fences), either for read access
+ * or for read/write access.
+ *
+ * Returns 1 if the @fence was successfully to the waitqueues of @resv, 0
+ * if @resev was already signaled (and so not added), or a negative error code.
+ */
+int kfence_await_reservation(struct kfence *fence,
+struct reservation_object *resv,
+bool write,
+gfp_t gfp)
+{
+   struct fence *excl, **shared;
+   unsigned int count, i;
+   int ret;
+
+   ret = reservation_object_get_fences_rcu(resv, , , );
+   if (ret)
+   return ret;
+
+   if (write) {
+   for (i = 0; i < count; i++) {
+   ret |= kfence_await_dma_fence(fence, shared[i], gfp);
+   if (ret < 0)
+   goto out;
+   }
+   }
+
+   if (excl)
+   ret |= kfence_await_dma_fence(fence, excl, gfp);
+
+out:
+   fence_put(excl);
+   for (i = 0; i < count; i++)
+   fence_put(s

[PATCH v2 4/7] async: Add kselftests for async-domains

2016-07-17 Thread Chris Wilson

A preparatory patch for adding new features (and their tests). First we
want to add coverage of existing features to kselftest.

Signed-off-by: Chris Wilson 
---
 lib/Kconfig.debug   |   9 ++
 lib/Makefile|   1 +
 lib/test-async-domain.c | 131 
 tools/testing/selftests/lib/Makefile|   2 +-
 tools/testing/selftests/lib/async-domain.sh |  10 +++
 5 files changed, 152 insertions(+), 1 deletion(-)
 create mode 100644 lib/test-async-domain.c
 create mode 100755 tools/testing/selftests/lib/async-domain.sh

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index df1182d41f06..4b180aed88b6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1784,6 +1784,15 @@ config KFENCE_CHECK_DAG
  graphs (DAG), as otherwise the cycles in the graph means that they
  will never be signaled (or the corresponding task executed).

+config ASYNC_DOMAIN_SELFTEST
+   tristate "Asynchronous domain self tests"
+   depends on DEBUG_KERNEL
+   default n
+   help
+ the asynchronous task execution. This option is not useful for
+ distributions or general kernels, but only for kernel developers
+ working on the async_domain facility.
+
  Say N if you are unsure.

 config BACKTRACE_SELF_TEST
diff --git a/lib/Makefile b/lib/Makefile
index 943781cfe8d1..5864053cf63e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -28,6 +28,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o

 obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
+obj-$(CONFIG_ASYNC_DOMAIN_SELFTEST) += test-async-domain.o
 obj-$(CONFIG_KFENCE_SELFTEST) += test-kfence.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/test-async-domain.c b/lib/test-async-domain.c
new file mode 100644
index ..558a71414fb6
--- /dev/null
+++ b/lib/test-async-domain.c
@@ -0,0 +1,131 @@
+/*
+ * Test cases for async-domain facility.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+
+static void task_A(void *data, async_cookie_t cookie)
+{
+   long *result = data;
+   smp_store_mb(*result, 'A');
+}
+
+static void task_B(void *data, async_cookie_t cookie)
+{
+   long *result = data;
+   usleep_range(100, 200);
+   smp_store_mb(*result, 'B');
+}
+
+static int __init test_implicit(struct async_domain *domain)
+{
+   const long expected = 'B';
+   long result = 0;
+
+   if (!async_schedule_domain(task_B, , domain))
+   return -ENOMEM;
+
+   async_synchronize_full_domain(domain);
+
+   if (READ_ONCE(result) != expected) {
+   pr_warn("%s expected %c [%ld], got %ld\n",
+   __func__, (char)expected, expected, result);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int __init test_registered(struct async_domain *domain)
+{
+   const long expected = 'B';
+   long result = 0;
+
+   if (!async_schedule_domain(task_B, , domain))
+   return -ENOMEM;
+
+   async_synchronize_full();
+
+   if (READ_ONCE(result) != expected) {
+   pr_warn("%s expected %c [%ld], got %ld\n",
+   __func__, (char)expected, expected, result);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static void task_nop(void *data, async_cookie_t cookie)
+{
+   async_cookie_t *result = data;
+   smp_store_mb(*result, cookie);
+}
+
+static int __init perf_nop(int batch, long timeout_us)
+{
+   ktime_t start;
+   async_cookie_t nop, last;
+   long count, delay;
+
+   count = 0;
+   nop = last = 0;
+   start = ktime_get();
+   do {
+   ktime_t delta;
+   int n;
+
+   for (n = 0; n < batch; n++)
+   last = async_schedule(task_nop, );
+   async_synchronize_full();
+   delta = ktime_sub(ktime_get(), start);
+   delay = ktime_to_ns(delta) >> 10;
+   count += batch;
+   } while (delay < timeout_us);
+
+   pr_info("%ld nop tasks (batches of %d) completed in %ldus; last queued 
%lld, saw %lld last\n",
+   count, batch, delay,
+   (long long)last, (long long)READ_ONCE(nop));
+   return 0;
+}
+
+static int __init test_async_domain_init(void)
+{
+   ASYNC_DOMAIN(domain);
+   int ret;
+
+   pr_info("Testing async-domains\n");
+
+   ret = test_implicit();
+   if (ret)
+   return ret;
+
+   ret = test_registered();
+   if (ret)
+   return ret;
+
+   ret = perf_nop(1, 100);
+   if (ret)
+   return ret;
+
+   ret = perf_nop(128, 1000);
+   if (ret)
+   return ret;
+
+   async_unregister_domain();
+

[PATCH v2 5/7] async: Add support for explicit fine-grained barriers

2016-07-17 Thread Chris Wilson

The current async-domain model supports running a multitude of
independent tasks with a coarse synchronisation point. This is
sufficient for its original purpose of allowing independent drivers to
run concurrently during various phases (booting, early resume, late
resume etc), and keep the asynchronous domain out of the synchronous
kernel domains. However, for greater exploitation, drivers themselves
want to schedule multiple tasks within a phase (or between phases) and
control the order of execution within those tasks relative to each
other. To enable this, we extend the synchronisation scheme based upon
kfences and back every task with one. Any task may now wait upon the
kfence before being scheduled, and equally the kfence may be used to
wait on the task itself (rather than waiting on the cookie for all
previous tasks to be completed).

Signed-off-by: Chris Wilson 
Cc: Sumit Semwal 
Cc: Shuah Khan 
Cc: Tejun Heo 
Cc: Daniel Vetter 
Cc: Andrew Morton 
Cc: Ingo Molnar 
Cc: Kees Cook 
Cc: Thomas Gleixner 
Cc: "Paul E. McKenney" 
Cc: Dan Williams 
Cc: Andrey Ryabinin 
Cc: Davidlohr Bueso 
Cc: Nikolay Aleksandrov 
Cc: "David S. Miller" 
Cc: "Peter Zijlstra (Intel)" 
Cc: Rasmus Villemoes 
Cc: Andy Shevchenko 
Cc: Dmitry Vyukov 
Cc: Alexander Potapenko 
Cc: linux-kernel at vger.kernel.org
Cc: linux-media at vger.kernel.org
Cc: dri-devel at lists.freedesktop.org
Cc: linaro-mm-sig at lists.linaro.org
---
 include/linux/async.h   |  60 -
 kernel/async.c  | 234 --
 lib/test-async-domain.c | 324 +++-
 3 files changed, 515 insertions(+), 103 deletions(-)

diff --git a/include/linux/async.h b/include/linux/async.h
index 6b0226bdaadc..e7d7289a9889 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -13,38 +13,88 @@
 #define __ASYNC_H__

 #include 
+#include 
 #include 

 typedef u64 async_cookie_t;
 typedef void (*async_func_t) (void *data, async_cookie_t cookie);
+
+struct async_work {
+   struct kfence fence;
+   /* private */
+};
+
 struct async_domain {
struct list_head pending;
unsigned registered:1;
 };

+#define ASYNC_DOMAIN_INIT(_name, _r) { \
+   .pending = LIST_HEAD_INIT(_name.pending),   \
+   .registered = _r\
+}
+
 /*
  * domain participates in global async_synchronize_full
  */
 #define ASYNC_DOMAIN(_name) \
-   struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), 
\
- .registered = 1 }
+   struct async_domain _name = ASYNC_DOMAIN_INIT(_name, 1)

 /*
  * domain is free to go out of scope as soon as all pending work is
  * complete, this domain does not participate in async_synchronize_full
  */
 #define ASYNC_DOMAIN_EXCLUSIVE(_name) \
-   struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), 
\
- .registered = 0 }
+   struct async_domain _name = ASYNC_DOMAIN_INIT(_name, 0)
+
+extern void init_async_domain(struct async_domain *domain, bool registered);

 extern async_cookie_t async_schedule(async_func_t func, void *data);
 extern async_cookie_t async_schedule_domain(async_func_t func, void *data,
struct async_domain *domain);
-void async_unregister_domain(struct async_domain *domain);
+extern void async_unregister_domain(struct async_domain *domain);
 extern void async_synchronize_full(void);
 extern void async_synchronize_full_domain(struct async_domain *domain);
 extern void async_synchronize_cookie(async_cookie_t cookie);
 extern void async_synchronize_cookie_domain(async_cookie_t cookie,
struct async_domain *domain);
+
 extern bool current_is_async(void);
+
+extern struct async_work *
+async_work_create(async_func_t func, void *data, gfp_t gfp);
+
+static inline struct async_work *async_work_get(struct async_work *work)
+{
+   kfence_get(>fence);
+   return work;
+}
+
+static inline int
+async_work_after(struct async_work *work, struct kfence *fence)
+{
+   return kfence_await_kfence(>fence, fence, GFP_KERNEL);
+}
+
+static inline int
+async_work_before(struct async_work *work, struct kfence *fence)
+{
+   return kfence_await_kfence(fence, >fence, GFP_KERNEL);
+}
+
+static inline void async_work_wait(struct async_work *work)
+{
+   kfence_wait(>fence);
+}
+
+static inline void async_work_put(struct async_work *work)
+{
+   kfence_put(>fence);
+}
+
+extern async_cookie_t queue_async_work(struct async_domain *domain,
+  struct async_work *work,
+  gfp_t gfp);
+extern async_cookie_t schedule_async_work(struct async_work *work);
+
 #endif
diff --git a/kernel/async.c b/kernel/async.c
index d2edd6efec56..0d695919a60d 100644
--- a/ker

[PATCH v2 7/7] async: Introduce a dependency resolver for parallel execution

2016-07-17 Thread Chris Wilson

A challenge in driver initialisation is the coordination of many small
sometimes independent, sometimes interdependent tasks. We would like to
schedule the independent tasks for execution in parallel across as many
cores as possible for rapid initialisation, and then schedule all the
dependent tasks once they have completed, again running as many of those
in parallel as is possible.

Resolving the interdependencies by hand is time consuming and error
prone. Instead, we want to declare what dependencies a particular task
has, and what that task provides, and let a runtime dependency solver
work out what tasks to run and when, and which in parallel. To this end,
we introduce the struct async_dependency_graph building upon the kfence
and async_work from the previous patches to allow for the runtime
computation of the topological task ordering.

The graph is constructed with async_dependency_graph_build(), which
takes the task, its dependencies and what it provides, and builds the
graph of kfences required for ordering. Additional kfences can be
inserted through async_dependency_depends() and
async_dependency_provides() for manual control of the execution order,
and async_dependency_get() retrieves a kfence for inspection or waiting
upon.

Signed-off-by: Chris Wilson 
Cc: Sumit Semwal 
Cc: Shuah Khan 
Cc: Tejun Heo 
Cc: Daniel Vetter 
Cc: Andrew Morton 
Cc: Ingo Molnar 
Cc: Kees Cook 
Cc: Thomas Gleixner 
Cc: "Paul E. McKenney" 
Cc: Dan Williams 
Cc: Andrey Ryabinin 
Cc: Davidlohr Bueso 
Cc: Nikolay Aleksandrov 
Cc: "David S. Miller" 
Cc: "Peter Zijlstra (Intel)" 
Cc: Rasmus Villemoes 
Cc: Andy Shevchenko 
Cc: Dmitry Vyukov 
Cc: Alexander Potapenko 
Cc: linux-kernel at vger.kernel.org
Cc: linux-media at vger.kernel.org
Cc: dri-devel at lists.freedesktop.org
Cc: linaro-mm-sig at lists.linaro.org
---
 include/linux/async.h  |  38 +++
 kernel/async.c | 250 
 lib/Kconfig.debug  |  12 +
 lib/Makefile   |   1 +
 lib/test-async-dependency-graph.c  | 316 +
 .../selftests/lib/async-dependency-graph.sh|  10 +
 6 files changed, 627 insertions(+)
 create mode 100644 lib/test-async-dependency-graph.c
 create mode 100755 tools/testing/selftests/lib/async-dependency-graph.sh

diff --git a/include/linux/async.h b/include/linux/async.h
index de44306f8cb7..0a0040d3fc01 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 

 typedef u64 async_cookie_t;
 typedef void (*async_func_t) (void *data, async_cookie_t cookie);
@@ -101,4 +102,41 @@ extern async_cookie_t queue_async_work(struct async_domain 
*domain,
   gfp_t gfp);
 extern async_cookie_t schedule_async_work(struct async_work *work);

+/* Build a graph of work based on dependencies generated by keywords.
+ * The graph must be acyclic. Can be used to both generate a topological
+ * ordering of tasks, and to execute independent chains of tasks in
+ * parallel.
+ */
+
+struct async_dependency_graph {
+   struct rb_root root;
+   struct list_head list;
+};
+
+#define ASYNC_DEPENDENCY_GRAPH_INIT(_name) {   \
+   .root = RB_ROOT,\
+   .list = LIST_HEAD_INIT(_name.list), \
+}
+
+#define ASYNC_DEPENDENCY_GRAPH(_name) \
+   struct async_dependency_graph _name = ASYNC_DEPENDENCY_GRAPH_INIT(_name)
+
+extern int async_dependency_graph_build(struct async_dependency_graph *adg,
+   async_func_t fn, void *data,
+   const char *depends,
+   const char *provides);
+
+extern int async_dependency_depends(struct async_dependency_graph *adg,
+   struct kfence *fence,
+   const char *depends);
+
+extern int async_dependency_provides(struct async_dependency_graph *adg,
+struct kfence *fence,
+const char *provides);
+
+extern struct kfence *async_dependency_get(struct async_dependency_graph *adg,
+  const char *name);
+
+extern void async_dependency_graph_execute(struct async_dependency_graph *adg);
+
 #endif
diff --git a/kernel/async.c b/kernel/async.c
index 5cfa398a19b2..ac12566f2e0b 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -447,3 +447,253 @@ void init_async_domain(struct async_domain *domain, bool 
registered)
domain->registered = registered;
 }
 EXPORT_SYMBOL_GPL(init_async_domain);
+
+struct async_dependency {
+   struct kfence fence;
+   struct rb_node node;
+   struct list_head link;
+   char name[0];
+};
+
+static struct async_

[PATCH v2 6/7] async: Add execution barriers

2016-07-17 Thread Chris Wilson

A frequent mode of operation is fanning out N tasks to execute in
parallel, collating results, fanning out M tasks, rinse and repeat. This
is also common to the notion of the async/sync kernel domain split.
A barrier provides a mechanism by which all work queued after the
barrier must wait (i.e. not be scheduled) until all work queued before the
barrier is completed.

Signed-off-by: Chris Wilson 
Cc: Sumit Semwal 
Cc: Shuah Khan 
Cc: Tejun Heo 
Cc: Daniel Vetter 
Cc: Andrew Morton 
Cc: Ingo Molnar 
Cc: Kees Cook 
Cc: Thomas Gleixner 
Cc: "Paul E. McKenney" 
Cc: Dan Williams 
Cc: Andrey Ryabinin 
Cc: Davidlohr Bueso 
Cc: Nikolay Aleksandrov 
Cc: "David S. Miller" 
Cc: "Peter Zijlstra (Intel)" 
Cc: Rasmus Villemoes 
Cc: Andy Shevchenko 
Cc: Dmitry Vyukov 
Cc: Alexander Potapenko 
Cc: linux-kernel at vger.kernel.org
Cc: linux-media at vger.kernel.org
Cc: dri-devel at lists.freedesktop.org
Cc: linaro-mm-sig at lists.linaro.org
---
 include/linux/async.h |  4 +++
 kernel/async.c| 72 +++
 2 files changed, 76 insertions(+)

diff --git a/include/linux/async.h b/include/linux/async.h
index e7d7289a9889..de44306f8cb7 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -26,6 +26,7 @@ struct async_work {

 struct async_domain {
struct list_head pending;
+   struct kfence *barrier;
unsigned registered:1;
 };

@@ -59,6 +60,9 @@ extern void async_synchronize_cookie(async_cookie_t cookie);
 extern void async_synchronize_cookie_domain(async_cookie_t cookie,
struct async_domain *domain);

+extern void async_barrier(void);
+extern void async_barrier_domain(struct async_domain *domain);
+
 extern bool current_is_async(void);

 extern struct async_work *
diff --git a/kernel/async.c b/kernel/async.c
index 0d695919a60d..5cfa398a19b2 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -154,6 +154,15 @@ struct async_work *async_work_create(async_func_t func, 
void *data, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(async_work_create);

+static void async_barrier_delete(struct async_domain *domain)
+{
+   if (!domain->barrier)
+   return;
+
+   kfence_put(domain->barrier);
+   domain->barrier = NULL;
+}
+
 async_cookie_t queue_async_work(struct async_domain *domain,
struct async_work *work,
gfp_t gfp)
@@ -174,6 +183,10 @@ async_cookie_t queue_async_work(struct async_domain 
*domain,
async_pending_count++;
spin_unlock_irqrestore(_lock, flags);

+   if (domain->barrier &&
+   !kfence_await_kfence(>base.fence, domain->barrier, gfp))
+   async_barrier_delete(domain);
+
/* mark that this task has queued an async job, used by module init */
current->flags |= PF_USED_ASYNC;

@@ -241,6 +254,63 @@ async_cookie_t async_schedule_domain(async_func_t func, 
void *data,
 }
 EXPORT_SYMBOL_GPL(async_schedule_domain);

+static struct kfence *__async_barrier_create(struct async_domain *domain)
+{
+   struct kfence *fence;
+   struct async_entry *entry;
+   unsigned long flags;
+   int ret;
+
+   fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+   if (!fence)
+   goto out_sync;
+
+   kfence_init(fence, NULL);
+
+   ret = 0;
+   spin_lock_irqsave(_lock, flags);
+   list_for_each_entry(entry, >pending, pending_link[0]) {
+   ret |= kfence_await_kfence(fence,
+  >base.fence,
+  GFP_ATOMIC);
+   if (ret < 0)
+   break;
+   }
+   spin_unlock_irqrestore(_lock, flags);
+   if (ret <= 0)
+   goto out_put;
+
+   if (domain->barrier)
+   kfence_await_kfence(fence, domain->barrier, GFP_KERNEL);
+
+   kfence_complete(fence);
+   return fence;
+
+out_put:
+   kfence_complete(fence);
+   kfence_put(fence);
+out_sync:
+   async_synchronize_full_domain(domain);
+   return NULL;
+}
+
+void async_barrier(void)
+{
+   async_barrier_domain(_dfl_domain);
+}
+EXPORT_SYMBOL_GPL(async_barrier);
+
+void async_barrier_domain(struct async_domain *domain)
+{
+   struct kfence *barrier = __async_barrier_create(domain);
+
+   if (domain->barrier)
+   kfence_put(domain->barrier);
+
+   domain->barrier = barrier;
+}
+EXPORT_SYMBOL_GPL(async_barrier_domain);
+
 /**
  * async_synchronize_full - synchronize all asynchronous function calls
  *
@@ -264,6 +334,8 @@ EXPORT_SYMBOL_GPL(async_synchronize_full);
 void async_unregister_domain(struct async_domain *domain)
 {
WARN_ON(!list_empty(>pending));
+
+   async_barrier_delete(domain);
domain->registered = 0;
 }
 EXPORT_SYMBOL_GPL(async_unregister_domain);
-- 
2.8.1

[PATCH 02/11] drm/doc: Add kerneldoc for @index

2016-07-18 Thread Chris Wilson

On Mon, Jul 18, 2016 at 09:15:35AM +0200, Daniel Vetter wrote:
> On Fri, Jul 15, 2016 at 09:06:04PM +0100, Chris Wilson wrote:
> > On Fri, Jul 15, 2016 at 09:47:59PM +0200, Daniel Vetter wrote:
> > > Was forgotten when adding them all over. 0-day should complain about
> > > new missing kernel-doc, not sure why that wasn't caught/fixed.
> > > 
> > > Cc: Chris Wilson 
> > > Signed-off-by: Daniel Vetter 
> > > ---
> > >  include/drm/drm_crtc.h | 26 +-
> > >  1 file changed, 21 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
> > > index ebf078685f86..656f189f1198 100644
> > > --- a/include/drm/drm_crtc.h
> > > +++ b/include/drm/drm_crtc.h
> > > @@ -783,7 +783,10 @@ struct drm_crtc {
> > >   struct drm_plane *primary;
> > >   struct drm_plane *cursor;
> > >  
> > > - /* position inside the mode_config.list, can be used as a [] idx */
> > > + /**
> > > +  * @index: Position inside the mode_config.list, can be used as an array
> > 
> > For all:
> > 
> > @index: Fixed position inside the mode_config.list, can be used as an
> > array index. The @index is assigned when the crtc is inserted into the
> > list, and it will remain at that position inside the list until the
> > module is unloaded.
> > 
> > Just want to emphasis the unchanging nature of the @index. Second
> > sentence may be overkill.
> 
> Just adding "It is invariant over the lifetime of $object." for brevity?

Wins.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH] drm/vgem: Remember to offset relative timeouts to mod_timer() by jiffies

2016-07-18 Thread Chris Wilson

mod_timer() takes an absolute jiffie value, not a relative timeout and
quietly fixup the missed ret=0 otherwise gcc just always returns that
the fence timed out.

Testcase: igt/vgem_basic/fence
Fixes: 407779848445 ("drm/vgem: Attach sw fences to exported vGEM dma-buf")
Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/vgem/vgem_fence.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vgem/vgem_fence.c 
b/drivers/gpu/drm/vgem/vgem_fence.c
index e77b52208699..892417ba2622 100644
--- a/drivers/gpu/drm/vgem/vgem_fence.c
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -107,7 +107,7 @@ static struct fence *vgem_fence_create(struct vgem_file 
*vfile,
setup_timer(>timer, vgem_fence_timeout, (unsigned long)fence);

/* We force the fence to expire within 10s to prevent driver hangs */
-   mod_timer(>timer, VGEM_FENCE_TIMEOUT);
+   mod_timer(>timer, jiffies + VGEM_FENCE_TIMEOUT);

return >base;
 }
@@ -240,7 +240,7 @@ int vgem_fence_signal_ioctl(struct drm_device *dev,
struct vgem_file *vfile = file->driver_priv;
struct drm_vgem_fence_signal *arg = data;
struct fence *fence;
-   int ret;
+   int ret = 0;

if (arg->flags)
return -EINVAL;
-- 
2.8.1

[PATCH] dma-buf: Release module reference on creation failure

2016-07-18 Thread Chris Wilson

If we fail to create the anon file, we need to remember to release the
module reference on the owner.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
Cc: Joonas Lahtinen 
Cc: Sumit Semwal 
Cc: Daniel Vetter 
Cc: linux-media at vger.kernel.org
Cc: dri-devel at lists.freedesktop.org
Cc: linaro-mm-sig at lists.linaro.org
---
 drivers/dma-buf/dma-buf.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 20ce0687b111..ddaee60ae52a 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -334,6 +334,7 @@ struct dma_buf *dma_buf_export(const struct 
dma_buf_export_info *exp_info)
struct reservation_object *resv = exp_info->resv;
struct file *file;
size_t alloc_size = sizeof(struct dma_buf);
+   int ret;

if (!exp_info->resv)
alloc_size += sizeof(struct reservation_object);
@@ -357,8 +358,8 @@ struct dma_buf *dma_buf_export(const struct 
dma_buf_export_info *exp_info)

dmabuf = kzalloc(alloc_size, GFP_KERNEL);
if (!dmabuf) {
-   module_put(exp_info->owner);
-   return ERR_PTR(-ENOMEM);
+   ret = -ENOMEM;
+   goto err_module;
}

dmabuf->priv = exp_info->priv;
@@ -379,8 +380,8 @@ struct dma_buf *dma_buf_export(const struct 
dma_buf_export_info *exp_info)
file = anon_inode_getfile("dmabuf", _buf_fops, dmabuf,
exp_info->flags);
if (IS_ERR(file)) {
-   kfree(dmabuf);
-   return ERR_CAST(file);
+   ret = PTR_ERR(file);
+   goto err_dmabuf;
}

file->f_mode |= FMODE_LSEEK;
@@ -394,6 +395,12 @@ struct dma_buf *dma_buf_export(const struct 
dma_buf_export_info *exp_info)
mutex_unlock(_list.lock);

return dmabuf;
+
+err_dmabuf:
+   kfree(dmabuf);
+err_module:
+   module_put(exp_info->owner);
+   return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(dma_buf_export);

-- 
2.8.1

[PATCH] drm: drm_connector->s/connector_id/index/ for consistency

2016-07-19 Thread Chris Wilson

On Tue, Jul 19, 2016 at 12:57:52PM +0200, Daniel Vetter wrote:
> connector_id in the uapi actually means drm_connector->base.id, which
> is something entirely different. And ->index is also consistent with
> plane/encoder/CRTCS and the various drm_*_index() functions.
> 
> While at it also improve/align the kerneldoc comment.
> 
> v2: Mention where those ids are from ...
> 
> Cc: Chris Wilson 
> Cc: Maarten Lankhorst 
> Acked-by: Chris Wilson 
> Acked-by: Maarten Lankhorst 
> Signed-off-by: Daniel Vetter 
> ---
>  drivers/gpu/drm/drm_crtc.c | 11 +--
>  include/drm/drm_crtc.h | 13 ++---
>  2 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> index c456628740dd..f9f2506b1855 100644
> --- a/drivers/gpu/drm/drm_crtc.c
> +++ b/drivers/gpu/drm/drm_crtc.c
> @@ -934,11 +934,10 @@ int drm_connector_init(struct drm_device *dev,
>   connector->dev = dev;
>   connector->funcs = funcs;
>  
> - connector->connector_id = ida_simple_get(>connector_ida, 0, 0, 
> GFP_KERNEL);
> - if (connector->connector_id < 0) {
> - ret = connector->connector_id;
> + ret = ida_simple_get(>connector_ida, 0, 0, GFP_KERNEL);
> + if (ret < 0)
>   goto out_put;
> - }
> + connector->index = ret;

Oh, that is glorious!

>  out_put_id:
>   if (ret)
> - ida_remove(>connector_ida, connector->connector_id);
> + ida_remove(>connector_ida, connector->index);

ret == connector->index (it's only otherwise set on failure).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH -next] drm/vgem: Fix non static symbol warning

2016-07-19 Thread Chris Wilson

On Tue, Jul 19, 2016 at 12:44:22PM +, Wei Yongjun wrote:
> From: Wei Yongjun 
> 
> Fixes the following sparse warning:
> 
> drivers/gpu/drm/vgem/vgem_fence.c:75:24: warning:
>   symbol 'vgem_fence_ops' was not declared. Should it be static?
> 
> Signed-off-by: Wei Yongjun 

Thanks,
Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH] drm: drm_connector->s/connector_id/index/ for consistency

2016-07-20 Thread Chris Wilson

On Tue, Jul 19, 2016 at 06:25:01PM +0200, Daniel Vetter wrote:
> connector_id in the uapi actually means drm_connector->base.id, which
> is something entirely different. And ->index is also consistent with
> plane/encoder/CRTCS and the various drm_*_index() functions.
> 
> While at it also improve/align the kerneldoc comment.
> 
> v2: Mention where those ids are from ...
> 
> v3: Add -ing to supporting and try to not break the world.
> 
> Cc: Chris Wilson 
> Cc: Maarten Lankhorst 
> Acked-by: Chris Wilson 
> Acked-by: Maarten Lankhorst 
> Signed-off-by: Daniel Vetter 

No longer breaks...
Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH 2/7] drm/atomic: Reject attempts to use multiple rotation angles at once

2016-07-20 Thread Chris Wilson

On Wed, Jul 20, 2016 at 04:18:07PM +0300, ville.syrjala at linux.intel.com 
wrote:
> From: Ville SyrjÃ¤lÃ¤ 
> 
> The rotation property should only accept exactly one rotation angle
> at once. Let's reject attempts to set none or multiple angles.
> 
> Testcase: igt/kms_rotation_crc/bad-rotation
> Signed-off-by: Ville SyrjÃ¤lÃ¤ 
Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH 3/7] drm: Add support for optional per-plane rotation property

2016-07-20 Thread Chris Wilson

On Wed, Jul 20, 2016 at 04:18:08PM +0300, ville.syrjala at linux.intel.com 
wrote:
> From: Ville SyrjÃ¤lÃ¤ 
> 
> Not all planes on the ssytem may support the same rotations/reflections,
> so make it possible to create a separate property for each plane.
> This way userspace gets told exactly which rotations/reflections are
> possible for each plane.

Hmm, can't see that it would make much difference here, but I was
expecting something like

drm_plane_lookup_rotation_propery()
{
return plane->rotation_property ?: 
plane->dev->mode_config.rotation_property;
}

so that it is obvious that the plane takes precedence over the global
property when provided.

You've caught all the spots I can see so,
Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH 4/7] drm/i915: Use the per-plane rotation property

2016-07-20 Thread Chris Wilson

On Wed, Jul 20, 2016 at 04:18:09PM +0300, ville.syrjala at linux.intel.com 
wrote:
> From: Ville SyrjÃ¤lÃ¤ 
> 
> On certain platforms not all planes support the same set of
> rotations/reflections, so let's use the per-plane property
> for this.
> 
> This is already a problem on SKL when we use the legay cursor plane
> as it only supports 0|180 whereas the universal planes support
> 0|90|180|270, and it will be a problem on CHV soon.
> 
> Signed-off-by: Ville SyrjÃ¤lÃ¤ 

Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH 5/7] drm/i915: Use & instead if == to check for rotations

2016-07-20 Thread Chris Wilson

On Wed, Jul 20, 2016 at 04:18:10PM +0300, ville.syrjala at linux.intel.com 
wrote:
> From: Ville SyrjÃ¤lÃ¤ 
> 
> Using == to check for 180 degree rotation only works as long as the
> reflection bits aren't set. That will change soon enough for CHV, so
> let's stop doing things the wrong way.
> 
> Signed-off-by: Ville SyrjÃ¤lÃ¤ 
Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 6/7] drm/i915: Clean up rotation DSPCNTR/DVSCNTR/etc. setup

2016-07-20 Thread Chris Wilson

On Wed, Jul 20, 2016 at 04:18:11PM +0300, ville.syrjala at linux.intel.com 
wrote:
> From: Ville SyrjÃ¤lÃ¤ 
> 
> Move the plane control register rotation setup away from the
> coordinate munging code. This will result in neater looking
> code once we add reflection support for CHV.
> 
> Signed-off-by: Ville SyrjÃ¤lÃ¤ 
Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[Intel-gfx] [PATCH 1/7] drm: Add drm_rotation_90_or_270()

2016-07-20 Thread Chris Wilson

On Wed, Jul 20, 2016 at 04:18:06PM +0300, ville.syrjala at linux.intel.com 
wrote:
> From: Ville SyrjÃ¤lÃ¤ 
> 
> We have intel_rotation_90_or_270() in i915 to check if the rotation is
> 90 or 270 degrees. Similar checks are elsewhere in drm, so let's move
> the helper into a central place and use it everwhere.
> 
> Signed-off-by: Ville SyrjÃ¤lÃ¤ 
Reviewed-by: Chris Wilson 

I didn't check for any other candidates though.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH] drm/vgem: Allow root to inject hanging fences onto dmabufs

2016-07-20 Thread Chris Wilson

When performing driver testing, one factor we want to test is how we
handle a foreign fence that is never signaled. We can wait on that fence
indefinitely, in which case the driver appears hung, or we can take some
remedial action (with risks regarding the state of any shared content).
Whatever the action choosen by the driver, in order to perform the test
we want to disable the safety feature of vgem fence (which is then used
to test implicit dma-buf fencing). This is regarded as a highly
dangerous feature and so hidden behind an expert config option and only
available to root when enabled.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/Kconfig   | 13 +
 drivers/gpu/drm/vgem/vgem_fence.c | 14 --
 include/uapi/drm/vgem_drm.h   |  1 +
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index fc357319de35..2b25bc38fad2 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -228,6 +228,19 @@ config DRM_VGEM
  as used by Mesa's software renderer for enhanced performance.
  If M is selected the module will be called vgem.

+config DRM_VGEM_FENCE_HANG
+   bool "Enable fence hang testing (DANGEROUS)"
+   depends on DRM_VGEM
+   depends on EXPERT
+   depends on !COMPILE_TEST
+   default n
+   help
+ Enables support for root to use indefinite fences.
+ Do not enable this unless you are testing DRM drivers.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".

 source "drivers/gpu/drm/exynos/Kconfig"

diff --git a/drivers/gpu/drm/vgem/vgem_fence.c 
b/drivers/gpu/drm/vgem/vgem_fence.c
index 5c57c1ffa1f9..91d3d75fc9c5 100644
--- a/drivers/gpu/drm/vgem/vgem_fence.c
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -107,7 +107,8 @@ static struct fence *vgem_fence_create(struct vgem_file 
*vfile,
setup_timer(>timer, vgem_fence_timeout, (unsigned long)fence);

/* We force the fence to expire within 10s to prevent driver hangs */
-   mod_timer(>timer, jiffies + VGEM_FENCE_TIMEOUT);
+   if ((flags & VGEM_FENCE_NOTIMEOUT) == 0)
+   mod_timer(>timer, jiffies + VGEM_FENCE_TIMEOUT);

return >base;
 }
@@ -160,9 +161,18 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
struct fence *fence;
int ret;

-   if (arg->flags & ~VGEM_FENCE_WRITE)
+   if (arg->flags & ~(VGEM_FENCE_WRITE | VGEM_FENCE_NOTIMEOUT))
return -EINVAL;

+   if (arg->flags & VGEM_FENCE_NOTIMEOUT) {
+   if (config_enabled(CONFIG_DRM_VGEM_FENCE_HANG)) {
+   if (!capable(CAP_SYS_ADMIN))
+   return -EPERM;
+   } else {
+   return -EINVAL;
+   }
+   }
+
if (arg->pad)
return -EINVAL;

diff --git a/include/uapi/drm/vgem_drm.h b/include/uapi/drm/vgem_drm.h
index bf66f5db6da8..55fd08750773 100644
--- a/include/uapi/drm/vgem_drm.h
+++ b/include/uapi/drm/vgem_drm.h
@@ -46,6 +46,7 @@ struct drm_vgem_fence_attach {
__u32 handle;
__u32 flags;
 #define VGEM_FENCE_WRITE   0x1
+#define VGEM_FENCE_NOTIMEOUT   0x2
__u32 out_fence;
__u32 pad;
 };
-- 
2.8.1

[PATCH] drm/vgem: Allow root to inject hanging fences onto dmabufs

2016-07-21 Thread Chris Wilson

On Wed, Jul 20, 2016 at 04:28:40PM -0700, Kristian HÃ¸gsberg wrote:
> Why is this useful if only root can use it?

> > When performing driver testing, one factor we want to test is how we
> > handle a foreign fence that is never signaled. We can wait on that fence
> > indefinitely, in which case the driver appears hung, or we can take some
> > remedial action (with risks regarding the state of any shared content).
> > Whatever the action choosen by the driver, in order to perform the test
> > we want to disable the safety feature of vgem fence (which is then used
> > to test implicit dma-buf fencing). This is regarded as a highly
> > dangerous feature and so hidden behind an expert config option and only
> > available to root when enabled.

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 4/9] drm/i915: Use drm_plane_state.{src,dst,visible}

2016-07-26 Thread Chris Wilson

On Tue, Jul 26, 2016 at 07:06:59PM +0300, ville.syrjala at linux.intel.com 
wrote:
> From: Ville SyrjÃ¤lÃ¤ 
> 
> Replace the private drm_rects/flags in intel_plane_state
> with the ones now living in drm_plane_state.
> 
> Signed-off-by: Ville SyrjÃ¤lÃ¤ 

Didn't spot any mistakes creeping in, and assuming the patch is
complete:
Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 1/9] drm: Warn about negative sizes when calculating scale factor

2016-07-26 Thread Chris Wilson

On Tue, Jul 26, 2016 at 07:06:56PM +0300, ville.syrjala at linux.intel.com 
wrote:
> From: Ville SyrjÃ¤lÃ¤ 
> 
> Passing negative width/hight to scale factor calculations is not
> legal. Let's WARN if that happens.

Does this get called with user controllable inputs? A quick grep leads
me to drm_primary_helper_update() which suggests no. Did I miss a
potential user controllable WARN->panic?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[PATCH 3/9] drm/plane-helper: Add drm_plane_helper_check_state()

2016-07-26 Thread Chris Wilson

On Tue, Jul 26, 2016 at 07:06:58PM +0300, ville.syrjala at linux.intel.com 
wrote:
> From: Ville SyrjÃ¤lÃ¤ 
> 
> Add a version of drm_plane_helper_check_update() which takes a plane
> state instead of having the caller pass in everything.
> 
> And to reduce code duplication, let's reimplement
> drm_plane_helper_check_update() in terms of the new function, by
> having a tempororary plane state on the stack.
> 
> Signed-off-by: Ville SyrjÃ¤lÃ¤ 
> ---
>  drivers/gpu/drm/drm_plane_helper.c | 136 
> -
>  include/drm/drm_plane_helper.h |   5 ++
>  2 files changed, 110 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_plane_helper.c 
> b/drivers/gpu/drm/drm_plane_helper.c
> index 16c4a7bd7465..1124eb827671 100644
> --- a/drivers/gpu/drm/drm_plane_helper.c
> +++ b/drivers/gpu/drm/drm_plane_helper.c
> @@ -108,14 +108,9 @@ static int get_connectors_for_crtc(struct drm_crtc *crtc,
>  }
>  
>  /**
> - * drm_plane_helper_check_update() - Check plane update for validity
> - * @plane: plane object to update
> - * @crtc: owning CRTC of owning plane
> - * @fb: framebuffer to flip onto plane
> - * @src: source coordinates in 16.16 fixed point
> - * @dest: integer destination coordinates
> + * drm_plane_helper_check_state() - Check plane state for validity
> + * @state: plane state to check
>   * @clip: integer clipping coordinates
> - * @rotation: plane rotation
>   * @min_scale: minimum @src:@dest scaling factor in 16.16 fixed point
>   * @max_scale: maximum @src:@dest scaling factor in 16.16 fixed point
>   * @can_position: is it legal to position the plane such that it
> @@ -123,8 +118,6 @@ static int get_connectors_for_crtc(struct drm_crtc *crtc,
>   *only be false for primary planes.
>   * @can_update_disabled: can the plane be updated while the crtc
>   *   is disabled?
> - * @visible: output parameter indicating whether plane is still visible after
> - *   clipping
>   *
>   * Checks that a desired plane update is valid.  Drivers that provide
>   * their own plane handling rather than helper-provided implementations may
> @@ -134,29 +127,38 @@ static int get_connectors_for_crtc(struct drm_crtc 
> *crtc,
>   * RETURNS:
>   * Zero if update appears valid, error code on failure
>   */
> -int drm_plane_helper_check_update(struct drm_plane *plane,
> -   struct drm_crtc *crtc,
> -   struct drm_framebuffer *fb,
> -   struct drm_rect *src,
> -   struct drm_rect *dest,
> -   const struct drm_rect *clip,
> -   unsigned int rotation,
> -   int min_scale,
> -   int max_scale,
> -   bool can_position,
> -   bool can_update_disabled,
> -   bool *visible)
> +int drm_plane_helper_check_state(struct drm_plane_state *state,
> +  const struct drm_rect *clip,
> +  int min_scale,
> +  int max_scale,
> +  bool can_position,
> +  bool can_update_disabled)
>  {
> + struct drm_crtc *crtc = state->crtc;
> + struct drm_framebuffer *fb = state->fb;
> + struct drm_rect *src = >src;
> + struct drm_rect *dst = >dst;
> + unsigned int rotation = state->rotation;
>   int hscale, vscale;
>  
> + src->x1 = state->src_x;
> + src->y1 = state->src_y;
> + src->x2 = state->src_x + state->src_w;
> + src->y2 = state->src_y + state->src_h;
> +
> + dst->x1 = state->crtc_x;
> + dst->y1 = state->crtc_y;
> + dst->x2 = state->crtc_x + state->crtc_w;
> + dst->y2 = state->crtc_y + state->crtc_h;

It's a little surprising to me that the check writes fields into the
drm_plane_state. Care to at least mention the side-effects in the kernel
doc?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[urgent] drm/fb-helper: Fix the dummy remove_conflicting_framebuffers

2016-08-22 Thread Chris Wilson

We always need to remove conflicting framebuffers if any other fb driver
is enabled, and not just if we are setting up an fbdev ourselves.

Unfortunately remove_conflicting_framebuffers() was incorrectly stubbed
out if !fbdev rather than !fb leading to major memory corruption (and
corrupt filesystems) upon boot.

Fixes: 44adece57e26 ("drm/fb-helper: Add a dummy 
remove_conflicting_framebuffers")
Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Cc: Tobias Jakobi 
Cc: Noralf TrÃ¸nnes 
Cc: tomi.valkeinen at ti.com
Cc: dh.herrmann at gmail.com
Cc: Alex Deucher 
---
 include/drm/drm_fb_helper.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 97889a90ff23..f811d755c254 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -282,12 +282,6 @@ drm_pick_cmdline_mode(struct drm_fb_helper_connector 
*fb_helper_conn,
 int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct 
drm_connector *connector);
 int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
   struct drm_connector *connector);
-static inline int
-drm_fb_helper_remove_conflicting_framebuffers(struct apertures_struct *a,
- const char *name, bool primary)
-{
-   return remove_conflicting_framebuffers(a, name, primary);
-}
 #else
 static inline int drm_fb_helper_modinit(void)
 {
@@ -482,11 +476,17 @@ drm_fb_helper_remove_one_connector(struct drm_fb_helper 
*fb_helper,
return 0;
 }

+#endif
+
 static inline int
 drm_fb_helper_remove_conflicting_framebuffers(struct apertures_struct *a,
  const char *name, bool primary)
 {
+#if IS_ENABLED(CONFIG_FB)
+   return remove_conflicting_framebuffers(a, name, primary);
+#else
return 0;
-}
 #endif
+}
+
 #endif
-- 
2.9.3

i865, drm_modeset_lock_all: BUG: unable to handle kernel NULL pointer dereference at 00000104

2016-08-23 Thread Chris Wilson

On Tue, Aug 23, 2016 at 12:58:43PM +0300, Meelis Roos wrote:
> This on a P4 PC with 82865G chipset and onboard Intel graphics. 4.7.0 
> worked fine, current 4.8 git shows NULL pointer dereference as shown 
> below at the end of dmesg.
> 
> [   10.066261] BUG: unable to handle kernel NULL pointer dereference at 
> 0104
> [   10.066273] IP: [] mutex_lock+0xa/0x15
> [   10.066287] *pde =  
> [   10.066295] Oops: 0002 [#1]
> [   10.066302] Modules linked in: i915(+) video i2c_algo_bit drm_kms_helper 
> syscopyarea sysfillrect sysimgblt fb_sys_fops drm iTCO_wdt 
> iTCO_vendor_support ppdev evdev snd_intel8x0 snd_ac97_codec ac97_bus psmouse 
> snd_pcm snd_timer snd pcspkr uhci_hcd ehci_pci soundcore sr_mod ehci_hcd 
> serio_raw i2c_i801 usbcore i2c_smbus cdrom lpc_ich mfd_core rng_core e100 mii 
> floppy parport_pc parport acpi_cpufreq button processor usb_common eeprom 
> lm85 hwmon_vid autofs4
> [   10.066378] CPU: 0 PID: 132 Comm: systemd-udevd Not tainted 
> 4.8.0-rc3-00013-gef0e1ea #34
> [   10.066389] Hardware name: MicroLink   
> /D865GLC, BIOS BF86510A.86A.0077.P25.0508040031 
> 08/04/2005
> [   10.066401] task: f62db800 task.stack: f597
> [   10.066409] EIP: 0060:[] EFLAGS: 00010286 CPU: 0
> [   10.066417] EIP is at mutex_lock+0xa/0x15
> [   10.066424] EAX: 0104 EBX: 0104 ECX:  EDX: 8000
> [   10.066432] ESI:  EDI: 0104 EBP: f5be8000 ESP: f5971b58
> [   10.066439]  DS: 007b ES: 007b FS:  GS: 00e0 SS: 0068
> [   10.066446] CR0: 80050033 CR2: 0104 CR3: 35945000 CR4: 06d0
> [   10.066453] Stack:
> [   10.066459]  f503d740 f824dddf  f61170c0 f61170c0 f82371ae 
> f850f40e 0001
> [   10.066476]  f61170c0 f5971bcc f5be8000 f9c2d401 0001 f8236fcc 
> 0001 
> [   10.066491]  f5144014 f5be8104 0008 f9c5267c 0007 f61170c0 
> f5144400 f9c4ff00
> [   10.066507] Call Trace:
> [   10.066526]  [] ? drm_modeset_lock_all+0x27/0xb3 [drm]
> [   10.066545]  [] ? drm_encoder_cleanup+0x1a/0x132 [drm]
> [   10.066559]  [] ? drm_atomic_helper_connector_reset+0x3f/0x5c 
> [drm_kms_helper]
> [   10.066644]  [] ? intel_dvo_init+0x569/0x788 [i915]

Looks like an incorrect call to drm_encoder_cleanup() from the error
path. If we hit the error path we have never called drm_encoder_init.
Please try:

diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 47bdf9dad0d3..b9e5a63a7c9e 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -554,7 +554,6 @@ void intel_dvo_init(struct drm_device *dev)
return;
    }

-   drm_encoder_cleanup(_encoder->base);
kfree(intel_dvo);
kfree(intel_connector);
 }

-- 
Chris Wilson, Intel Open Source Technology Centre

i865, drm_modeset_lock_all: BUG: unable to handle kernel NULL pointer dereference at 00000104

2016-08-23 Thread Chris Wilson

On Tue, Aug 23, 2016 at 02:35:03PM +0300, Meelis Roos wrote:
> > Looks like an incorrect call to drm_encoder_cleanup() from the error
> > path. If we hit the error path we have never called drm_encoder_init.
> > Please try:
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_dvo.c 
> > b/drivers/gpu/drm/i915/intel_dvo.c
> > index 47bdf9dad0d3..b9e5a63a7c9e 100644
> > --- a/drivers/gpu/drm/i915/intel_dvo.c
> > +++ b/drivers/gpu/drm/i915/intel_dvo.c
> > @@ -554,7 +554,6 @@ void intel_dvo_init(struct drm_device *dev)
> > return;
> > }
> >  
> > -   drm_encoder_cleanup(_encoder->base);
> > kfree(intel_dvo);
> > kfree(intel_connector);
> >  }
> 
> It works - the BUG is gone.
> 
> Now I get just 
> [drm:__intel_set_cpu_fifo_underrun_reporting [i915]] *ERROR* pipe A underrun

Other than the annoying underrun, is everything else as expected? i.e.
no connected outputs? Have we lost dvo detection?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

< 6 7 8 9 10 11 12 13 14 15 >

1001 - 1100 of 3670 matches

Mail list logo