Re: [Intel-gfx] [PATCH 23/41] drm/i915: Move object release to a freelist + worker

2016-10-20 Thread Chris Wilson
On Tue, Oct 18, 2016 at 10:51:53AM +0100, John Harrison wrote:
> On 14/10/2016 13:18, Chris Wilson wrote:
> >@@ -338,11 +345,10 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
> > case I915_TILING_Y:
> > args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
> > break;
> >+default:
> > case I915_TILING_NONE:
> > args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
> > break;
> >-default:
> >-DRM_ERROR("unknown tiling mode\n");
> Why is this change still needed? Now that it returns early on lookup
> failure, there should be no need to ignore broken/unsupported tiling
> modes. So why silence the error message?

We do not emit *ERROR* (a driver error) under direct control of the
user.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 23/41] drm/i915: Move object release to a freelist + worker

2016-10-18 Thread John Harrison

On 14/10/2016 13:18, Chris Wilson wrote:

We want to hide the latency of releasing objects and their backing
storage from the submission, so we move the actual free to a worker.
This allows us to switch to struct_mutex freeing of the object in the
next patch.

Furthermore, if we know that the object we are dereferencing remains valid
for the duration of our access, we can forgo the usual synchronisation
barriers and atomic reference counting. To ensure this we defer freeing
an object til after an RCU grace period, such that any lookup of the
object within an RCU read critical section will remain valid until
after we exit that critical section. We also employ this delay for
rate-limiting the serialisation on reallocation - we have to slow down
object creation in order to prevent resource starvation (in particular,
files).

v2: Return early in i915_gem_tiling() ioctl to skip over superfluous
work on error.

Signed-off-by: Chris Wilson 
---
  drivers/gpu/drm/i915/i915_debugfs.c  |  15 ++-
  drivers/gpu/drm/i915/i915_drv.c  |  19 ++--
  drivers/gpu/drm/i915/i915_drv.h  |  44 +++-
  drivers/gpu/drm/i915/i915_gem.c  | 166 +--
  drivers/gpu/drm/i915/i915_gem_shrinker.c |  14 ++-
  drivers/gpu/drm/i915/i915_gem_tiling.c   |  21 ++--
  6 files changed, 202 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 118bd35f750c..27fd5370f0cc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4873,10 +4873,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
  #define DROP_BOUND 0x2
  #define DROP_RETIRE 0x4
  #define DROP_ACTIVE 0x8
-#define DROP_ALL (DROP_UNBOUND | \
- DROP_BOUND | \
- DROP_RETIRE | \
- DROP_ACTIVE)
+#define DROP_FREED 0x10
+#define DROP_ALL (DROP_UNBOUND | \
+ DROP_BOUND| \
+ DROP_RETIRE   | \
+ DROP_ACTIVE   | \
+ DROP_FREED)
  static int
  i915_drop_caches_get(void *data, u64 *val)
  {
@@ -4920,6 +4922,11 @@ i915_drop_caches_set(void *data, u64 val)
  unlock:
mutex_unlock(>struct_mutex);
  
+	if (val & DROP_FREED) {

+   synchronize_rcu();
+   flush_work(_priv->mm.free_work);
+   }
+
return ret;
  }
  
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index 5b72da6d45a2..c46f96d8bb38 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -537,14 +537,17 @@ static const struct vga_switcheroo_client_ops 
i915_switcheroo_ops = {
.can_switch = i915_switcheroo_can_switch,
  };
  
-static void i915_gem_fini(struct drm_device *dev)

+static void i915_gem_fini(struct drm_i915_private *dev_priv)
  {
-   mutex_lock(>struct_mutex);
-   i915_gem_cleanup_engines(dev);
-   i915_gem_context_fini(dev);
-   mutex_unlock(>struct_mutex);
+   mutex_lock(_priv->drm.struct_mutex);
+   i915_gem_cleanup_engines(_priv->drm);
+   i915_gem_context_fini(_priv->drm);
+   mutex_unlock(_priv->drm.struct_mutex);
+
+   synchronize_rcu();
+   flush_work(_priv->mm.free_work);
  
-	WARN_ON(!list_empty(_i915(dev)->context_list));

+   WARN_ON(!list_empty(_priv->context_list));
  }
  
  static int i915_load_modeset_init(struct drm_device *dev)

@@ -619,7 +622,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
  cleanup_gem:
if (i915_gem_suspend(dev))
DRM_ERROR("failed to idle hardware; continuing to unload!\n");
-   i915_gem_fini(dev);
+   i915_gem_fini(dev_priv);
  cleanup_irq:
intel_guc_fini(dev);
drm_irq_uninstall(dev);
@@ -1299,7 +1302,7 @@ void i915_driver_unload(struct drm_device *dev)
drain_workqueue(dev_priv->wq);
  
  	intel_guc_fini(dev);

-   i915_gem_fini(dev);
+   i915_gem_fini(dev_priv);
intel_fbc_cleanup_cfb(dev_priv);
  
  	intel_power_domains_fini(dev_priv);

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e066284aace9..e2fe50b6b493 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1355,8 +1355,8 @@ struct i915_gem_mm {
struct list_head bound_list;
/**
 * List of objects which are not bound to the GTT (thus
-* are idle and not used by the GPU) but still have
-* (presumably uncached) pages still attached.
+* are idle and not used by the GPU). These objects may or may
+* not actually have any pages attached.
 */
struct list_head unbound_list;
  
@@ -1365,6 +1365,12 @@ struct i915_gem_mm {

 */
struct list_head userfault_list;
  
+	/**

+* List of objects which are pending destruction.
+*/
+   struct llist_head free_list;
+   struct work_struct free_work;
+
/** Usable portion of 

Re: [Intel-gfx] [PATCH 23/41] drm/i915: Move object release to a freelist + worker

2016-10-18 Thread Joonas Lahtinen
On ti, 2016-10-18 at 12:19 +0300, Joonas Lahtinen wrote:
> With the change from John,
> 

"Now that the change from John was done" would have been better
wording.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 23/41] drm/i915: Move object release to a freelist + worker

2016-10-18 Thread Joonas Lahtinen
On pe, 2016-10-14 at 13:18 +0100, Chris Wilson wrote:
> We want to hide the latency of releasing objects and their backing
> storage from the submission, so we move the actual free to a worker.
> This allows us to switch to struct_mutex freeing of the object in the
> next patch.
> 
> Furthermore, if we know that the object we are dereferencing remains valid
> for the duration of our access, we can forgo the usual synchronisation
> barriers and atomic reference counting. To ensure this we defer freeing
> an object til after an RCU grace period, such that any lookup of the
> object within an RCU read critical section will remain valid until
> after we exit that critical section. We also employ this delay for
> rate-limiting the serialisation on reallocation - we have to slow down
> object creation in order to prevent resource starvation (in particular,
> files).
> 
> v2: Return early in i915_gem_tiling() ioctl to skip over superfluous
> work on error.
> 
> Signed-off-by: Chris Wilson 

With the change from John,

Reviewed-by: Joonas Lahtinen 

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 23/41] drm/i915: Move object release to a freelist + worker

2016-10-14 Thread Chris Wilson
We want to hide the latency of releasing objects and their backing
storage from the submission, so we move the actual free to a worker.
This allows us to switch to struct_mutex freeing of the object in the
next patch.

Furthermore, if we know that the object we are dereferencing remains valid
for the duration of our access, we can forgo the usual synchronisation
barriers and atomic reference counting. To ensure this we defer freeing
an object til after an RCU grace period, such that any lookup of the
object within an RCU read critical section will remain valid until
after we exit that critical section. We also employ this delay for
rate-limiting the serialisation on reallocation - we have to slow down
object creation in order to prevent resource starvation (in particular,
files).

v2: Return early in i915_gem_tiling() ioctl to skip over superfluous
work on error.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  15 ++-
 drivers/gpu/drm/i915/i915_drv.c  |  19 ++--
 drivers/gpu/drm/i915/i915_drv.h  |  44 +++-
 drivers/gpu/drm/i915/i915_gem.c  | 166 +--
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  14 ++-
 drivers/gpu/drm/i915/i915_gem_tiling.c   |  21 ++--
 6 files changed, 202 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 118bd35f750c..27fd5370f0cc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4873,10 +4873,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
 #define DROP_BOUND 0x2
 #define DROP_RETIRE 0x4
 #define DROP_ACTIVE 0x8
-#define DROP_ALL (DROP_UNBOUND | \
- DROP_BOUND | \
- DROP_RETIRE | \
- DROP_ACTIVE)
+#define DROP_FREED 0x10
+#define DROP_ALL (DROP_UNBOUND | \
+ DROP_BOUND| \
+ DROP_RETIRE   | \
+ DROP_ACTIVE   | \
+ DROP_FREED)
 static int
 i915_drop_caches_get(void *data, u64 *val)
 {
@@ -4920,6 +4922,11 @@ i915_drop_caches_set(void *data, u64 val)
 unlock:
mutex_unlock(>struct_mutex);
 
+   if (val & DROP_FREED) {
+   synchronize_rcu();
+   flush_work(_priv->mm.free_work);
+   }
+
return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5b72da6d45a2..c46f96d8bb38 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -537,14 +537,17 @@ static const struct vga_switcheroo_client_ops 
i915_switcheroo_ops = {
.can_switch = i915_switcheroo_can_switch,
 };
 
-static void i915_gem_fini(struct drm_device *dev)
+static void i915_gem_fini(struct drm_i915_private *dev_priv)
 {
-   mutex_lock(>struct_mutex);
-   i915_gem_cleanup_engines(dev);
-   i915_gem_context_fini(dev);
-   mutex_unlock(>struct_mutex);
+   mutex_lock(_priv->drm.struct_mutex);
+   i915_gem_cleanup_engines(_priv->drm);
+   i915_gem_context_fini(_priv->drm);
+   mutex_unlock(_priv->drm.struct_mutex);
+
+   synchronize_rcu();
+   flush_work(_priv->mm.free_work);
 
-   WARN_ON(!list_empty(_i915(dev)->context_list));
+   WARN_ON(!list_empty(_priv->context_list));
 }
 
 static int i915_load_modeset_init(struct drm_device *dev)
@@ -619,7 +622,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 cleanup_gem:
if (i915_gem_suspend(dev))
DRM_ERROR("failed to idle hardware; continuing to unload!\n");
-   i915_gem_fini(dev);
+   i915_gem_fini(dev_priv);
 cleanup_irq:
intel_guc_fini(dev);
drm_irq_uninstall(dev);
@@ -1299,7 +1302,7 @@ void i915_driver_unload(struct drm_device *dev)
drain_workqueue(dev_priv->wq);
 
intel_guc_fini(dev);
-   i915_gem_fini(dev);
+   i915_gem_fini(dev_priv);
intel_fbc_cleanup_cfb(dev_priv);
 
intel_power_domains_fini(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e066284aace9..e2fe50b6b493 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1355,8 +1355,8 @@ struct i915_gem_mm {
struct list_head bound_list;
/**
 * List of objects which are not bound to the GTT (thus
-* are idle and not used by the GPU) but still have
-* (presumably uncached) pages still attached.
+* are idle and not used by the GPU). These objects may or may
+* not actually have any pages attached.
 */
struct list_head unbound_list;
 
@@ -1365,6 +1365,12 @@ struct i915_gem_mm {
 */
struct list_head userfault_list;
 
+   /**
+* List of objects which are pending destruction.
+*/
+   struct llist_head free_list;
+   struct work_struct free_work;
+
/** Usable portion of the GTT for GEM */
unsigned long stolen_base; /*