Re: [Intel-gfx] [PATCH 23/42] drm/i915: Move object release to a freelist + worker

2016-10-11 Thread John Harrison

On 07/10/2016 10:46, Chris Wilson wrote:

We want to hide the latency of releasing objects and their backing
storage from the submission, so we move the actual free to a worker.
This allows us to switch to struct_mutex freeing of the object in the
next patch.

Furthermore, if we know that the object we are dereferencing remains valid
for the duration of our access, we can forgo the usual synchronisation
barriers and atomic reference counting. To ensure this we defer freeing
an object til after an RCU grace period, such that any lookup of the
object within an RCU read critical section will remain valid until
after we exit that critical section. We also employ this delay for
rate-limiting the serialisation on reallocation - we have to slow down
object creation in order to prevent resource starvation (in particular,
files).

Signed-off-by: Chris Wilson 
---
  drivers/gpu/drm/i915/i915_debugfs.c  |  15 ++-
  drivers/gpu/drm/i915/i915_drv.c  |   3 +
  drivers/gpu/drm/i915/i915_drv.h  |  44 +++-
  drivers/gpu/drm/i915/i915_gem.c  | 166 +--
  drivers/gpu/drm/i915/i915_gem_shrinker.c |  14 ++-
  drivers/gpu/drm/i915/i915_gem_tiling.c   |  21 ++--
  6 files changed, 193 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index b807ddf65e04..144013875513 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4872,10 +4872,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
  #define DROP_BOUND 0x2
  #define DROP_RETIRE 0x4
  #define DROP_ACTIVE 0x8
-#define DROP_ALL (DROP_UNBOUND | \
- DROP_BOUND | \
- DROP_RETIRE | \
- DROP_ACTIVE)
+#define DROP_FREED 0x10
+#define DROP_ALL (DROP_UNBOUND | \
+ DROP_BOUND| \
+ DROP_RETIRE   | \
+ DROP_ACTIVE   | \
+ DROP_FREED)
  static int
  i915_drop_caches_get(void *data, u64 *val)
  {
@@ -4919,6 +4921,11 @@ i915_drop_caches_set(void *data, u64 val)
  unlock:
mutex_unlock(>struct_mutex);
  
+	if (val & DROP_FREED) {

+   synchronize_rcu();
+   flush_work(_priv->mm.free_work);
+   }
+
return ret;
  }
  
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index 89d322215c84..f7d48f97993d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -563,6 +563,9 @@ static void i915_gem_fini(struct drm_device *dev)
i915_gem_context_fini(dev);
mutex_unlock(>struct_mutex);
  
+	synchronize_rcu();

+   flush_work(_priv->mm.free_work);
+
WARN_ON(!list_empty(_i915(dev)->context_list));
  }
  
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 7e9df190c891..e79a5cb78b5d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1354,11 +1354,17 @@ struct i915_gem_mm {
struct list_head bound_list;
/**
 * List of objects which are not bound to the GTT (thus
-* are idle and not used by the GPU) but still have
-* (presumably uncached) pages still attached.
+* are idle and not used by the GPU). These objects may or may
+* not actually have any pages attached.
 */
struct list_head unbound_list;
  
+	/**

+* List of objects which are pending destruction.
+*/
+   struct llist_head free_list;
+   struct work_struct free_work;
+
/** Usable portion of the GTT for GEM */
unsigned long stolen_base; /* limited to low memory (32-bit) */
  
@@ -2214,6 +2220,10 @@ struct drm_i915_gem_object {

/** Stolen memory for this object, instead of being backed by shmem. */
struct drm_mm_node *stolen;
struct list_head global_list;
+   union {
+   struct rcu_head rcu;
+   struct llist_node freed;
+   };
  
  	/** Used in execbuf to temporarily hold a ref */

struct list_head obj_exec_link;
@@ -2333,10 +2343,38 @@ to_intel_bo(struct drm_gem_object *gem)
return container_of(gem, struct drm_i915_gem_object, base);
  }
  
+/**

+ * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
+ * @filp: DRM file private date
+ * @handle: userspace handle
+ *
+ * Returns:
+ *
+ * A pointer to the object named by the handle if such exists on @filp, NULL
+ * otherwise. This object is only valid whilst under the RCU read lock, and
+ * note carefully the object may be in the process of being destroyed.
+ */
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
+{
+#ifdef CONFIG_LOCKDEP
+   WARN_ON(!lock_is_held(_lock_map));
+#endif
+   return idr_find(>object_idr, handle);
+}
+
  static inline struct drm_i915_gem_object *
  i915_gem_object_lookup(struct drm_file *file, u32 

[Intel-gfx] [PATCH 23/42] drm/i915: Move object release to a freelist + worker

2016-10-07 Thread Chris Wilson
We want to hide the latency of releasing objects and their backing
storage from the submission, so we move the actual free to a worker.
This allows us to switch to struct_mutex freeing of the object in the
next patch.

Furthermore, if we know that the object we are dereferencing remains valid
for the duration of our access, we can forgo the usual synchronisation
barriers and atomic reference counting. To ensure this we defer freeing
an object til after an RCU grace period, such that any lookup of the
object within an RCU read critical section will remain valid until
after we exit that critical section. We also employ this delay for
rate-limiting the serialisation on reallocation - we have to slow down
object creation in order to prevent resource starvation (in particular,
files).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  15 ++-
 drivers/gpu/drm/i915/i915_drv.c  |   3 +
 drivers/gpu/drm/i915/i915_drv.h  |  44 +++-
 drivers/gpu/drm/i915/i915_gem.c  | 166 +--
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  14 ++-
 drivers/gpu/drm/i915/i915_gem_tiling.c   |  21 ++--
 6 files changed, 193 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index b807ddf65e04..144013875513 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4872,10 +4872,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
 #define DROP_BOUND 0x2
 #define DROP_RETIRE 0x4
 #define DROP_ACTIVE 0x8
-#define DROP_ALL (DROP_UNBOUND | \
- DROP_BOUND | \
- DROP_RETIRE | \
- DROP_ACTIVE)
+#define DROP_FREED 0x10
+#define DROP_ALL (DROP_UNBOUND | \
+ DROP_BOUND| \
+ DROP_RETIRE   | \
+ DROP_ACTIVE   | \
+ DROP_FREED)
 static int
 i915_drop_caches_get(void *data, u64 *val)
 {
@@ -4919,6 +4921,11 @@ i915_drop_caches_set(void *data, u64 val)
 unlock:
mutex_unlock(>struct_mutex);
 
+   if (val & DROP_FREED) {
+   synchronize_rcu();
+   flush_work(_priv->mm.free_work);
+   }
+
return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 89d322215c84..f7d48f97993d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -563,6 +563,9 @@ static void i915_gem_fini(struct drm_device *dev)
i915_gem_context_fini(dev);
mutex_unlock(>struct_mutex);
 
+   synchronize_rcu();
+   flush_work(_priv->mm.free_work);
+
WARN_ON(!list_empty(_i915(dev)->context_list));
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7e9df190c891..e79a5cb78b5d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1354,11 +1354,17 @@ struct i915_gem_mm {
struct list_head bound_list;
/**
 * List of objects which are not bound to the GTT (thus
-* are idle and not used by the GPU) but still have
-* (presumably uncached) pages still attached.
+* are idle and not used by the GPU). These objects may or may
+* not actually have any pages attached.
 */
struct list_head unbound_list;
 
+   /**
+* List of objects which are pending destruction.
+*/
+   struct llist_head free_list;
+   struct work_struct free_work;
+
/** Usable portion of the GTT for GEM */
unsigned long stolen_base; /* limited to low memory (32-bit) */
 
@@ -2214,6 +2220,10 @@ struct drm_i915_gem_object {
/** Stolen memory for this object, instead of being backed by shmem. */
struct drm_mm_node *stolen;
struct list_head global_list;
+   union {
+   struct rcu_head rcu;
+   struct llist_node freed;
+   };
 
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
@@ -2333,10 +2343,38 @@ to_intel_bo(struct drm_gem_object *gem)
return container_of(gem, struct drm_i915_gem_object, base);
 }
 
+/**
+ * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
+ * @filp: DRM file private date
+ * @handle: userspace handle
+ *
+ * Returns:
+ *
+ * A pointer to the object named by the handle if such exists on @filp, NULL
+ * otherwise. This object is only valid whilst under the RCU read lock, and
+ * note carefully the object may be in the process of being destroyed.
+ */
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
+{
+#ifdef CONFIG_LOCKDEP
+   WARN_ON(!lock_is_held(_lock_map));
+#endif
+   return idr_find(>object_idr, handle);
+}
+
 static inline struct drm_i915_gem_object *
 i915_gem_object_lookup(struct drm_file *file, u32 handle)
 {
-   return