This patch doesn't seem to be the panacea that I had hoped, although
I've yet to thoroughly test if it actually improves some of the other
tests which caused trouble.

What occurs is the following (assume just 2 procs)
1. proc A gets to execbuf while out of memory, gets struct_mutex.
2. OOM killer comes in and chooses proc B
3. proc B closes it's fds, which requires struct mutex, blocks
4, OOM killer waits for B to die before killing another process (this
part is speculative)

In order to let the OOM complete, we defer processing the context
destruction parts which are those that require struct_mutex.

This patch has not really been cleaned up, I am only posting it for
posterity. (Several of the hunks are only relevant to full PPGTT patches
which are not merged).

Signed-off-by: Ben Widawsky <[email protected]>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  6 ++-
 drivers/gpu/drm/i915/i915_drv.h         |  9 +++--
 drivers/gpu/drm/i915/i915_gem.c         |  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c | 68 ++++++++++++++++++++++++---------
 4 files changed, 61 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 835a43e..e6b5f3e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1717,11 +1717,13 @@ static void gen6_ppgtt_info(struct seq_file *m, struct 
drm_device *dev)
                struct drm_i915_file_private *file_priv = file->driver_priv;
                struct i915_hw_ppgtt *pvt_ppgtt;
 
-               pvt_ppgtt = ctx_to_ppgtt(file_priv->private_default_ctx);
+               pvt_ppgtt =
+                       ctx_to_ppgtt(file_priv->ctx_info->private_default_ctx);
                seq_printf(m, "proc: %s\n",
                           get_pid_task(file->pid, PIDTYPE_PID)->comm);
                seq_puts(m, "  default context:\n");
-               idr_for_each(&file_priv->context_idr, per_file_ctx, m);
+               idr_for_each(&file_priv->ctx_info->context_idr, per_file_ctx,
+                            m);
        }
        seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6fdab40..931fc42 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1735,15 +1735,18 @@ struct drm_i915_gem_request {
 
 struct drm_i915_file_private {
        struct drm_i915_private *dev_priv;
-
        struct {
                spinlock_t lock;
                struct list_head request_list;
                struct delayed_work idle_work;
        } mm;
-       struct idr context_idr;
+       struct i915_gem_ctx_info {
+               struct drm_i915_private *dev_priv;
+               struct idr context_idr;
+               struct work_struct close_work;
+               struct i915_hw_context *private_default_ctx;
+       } *ctx_info;
 
-       struct i915_hw_context *private_default_ctx;
        atomic_t rps_wait_boost;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6312d61..a32f6df 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2326,7 +2326,7 @@ static void i915_set_reset_status(struct 
intel_ring_buffer *ring,
        if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID)
                hs = &request->ctx->hang_stats;
        else if (request->file_priv)
-               hs = &request->file_priv->private_default_ctx->hang_stats;
+               hs = 
&request->file_priv->ctx_info->private_default_ctx->hang_stats;
 
        if (hs) {
                if (guilty) {
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index f55f0a9..770b394 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -209,8 +209,8 @@ __create_hw_context(struct drm_device *dev,
        if (file_priv == NULL)
                return ctx;
 
-       ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID, 0,
-                       GFP_KERNEL);
+       ret = idr_alloc(&file_priv->ctx_info->context_idr,
+                       ctx, DEFAULT_CONTEXT_ID, 0, GFP_KERNEL);
        if (ret < 0)
                goto err_out;
 
@@ -481,29 +481,54 @@ static int context_idr_cleanup(int id, void *p, void 
*data)
        return 0;
 }
 
+static void i915_gem_context_close_work_handler(struct work_struct *work)
+{
+       struct i915_gem_ctx_info *ctx_info = container_of(work,
+                                                         struct 
i915_gem_ctx_info,
+                                                         close_work);
+       struct drm_i915_private *dev_priv = ctx_info->dev_priv;
+
+       mutex_lock(&dev_priv->dev->struct_mutex);
+       idr_for_each(&ctx_info->context_idr, context_idr_cleanup, NULL);
+       i915_gem_context_unreference(ctx_info->private_default_ctx);
+       idr_destroy(&ctx_info->context_idr);
+       mutex_unlock(&dev_priv->dev->struct_mutex);
+
+       kfree(ctx_info);
+}
+
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 {
        struct drm_i915_file_private *file_priv = file->driver_priv;
        struct drm_i915_private *dev_priv = dev->dev_private;
 
+       file_priv->ctx_info = kzalloc(sizeof(*file_priv->ctx_info), GFP_KERNEL);
+       if (!file_priv->ctx_info)
+               return -ENOMEM;
+
+       file_priv->ctx_info->dev_priv = file_priv->dev_priv;
+       INIT_WORK(&file_priv->ctx_info->close_work,
+                 i915_gem_context_close_work_handler);
+
        if (!HAS_HW_CONTEXTS(dev)) {
                /* Cheat for hang stats */
-               file_priv->private_default_ctx =
+               file_priv->ctx_info->private_default_ctx =
                        kzalloc(sizeof(struct i915_hw_context), GFP_KERNEL);
-               file_priv->private_default_ctx->vm = &dev_priv->gtt.base;
+               file_priv->ctx_info->private_default_ctx->vm =
+                       &dev_priv->gtt.base;
                return 0;
        }
 
-       idr_init(&file_priv->context_idr);
+       idr_init(&file_priv->ctx_info->context_idr);
 
        mutex_lock(&dev->struct_mutex);
-       file_priv->private_default_ctx =
+       file_priv->ctx_info->private_default_ctx =
                i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
        mutex_unlock(&dev->struct_mutex);
 
-       if (IS_ERR(file_priv->private_default_ctx)) {
-               idr_destroy(&file_priv->context_idr);
-               return PTR_ERR(file_priv->private_default_ctx);
+       if (IS_ERR(file_priv->ctx_info->private_default_ctx)) {
+               idr_destroy(&file_priv->ctx_info->context_idr);
+               return PTR_ERR(file_priv->ctx_info->private_default_ctx);
        }
 
        return 0;
@@ -511,27 +536,34 @@ int i915_gem_context_open(struct drm_device *dev, struct 
drm_file *file)
 
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
 {
+       struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_i915_file_private *file_priv = file->driver_priv;
 
        if (!HAS_HW_CONTEXTS(dev)) {
-               kfree(file_priv->private_default_ctx);
+               kfree(file_priv->ctx_info->private_default_ctx);
+               kfree(file_priv->ctx_info);
                return;
        }
 
-       mutex_lock(&dev->struct_mutex);
-       idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
-       i915_gem_context_unreference(file_priv->private_default_ctx);
-       idr_destroy(&file_priv->context_idr);
-       mutex_unlock(&dev->struct_mutex);
+       if (mutex_trylock(&dev->struct_mutex)) {
+               idr_for_each(&file_priv->ctx_info->context_idr,
+                            context_idr_cleanup, NULL);
+               
i915_gem_context_unreference(file_priv->ctx_info->private_default_ctx);
+               idr_destroy(&file_priv->ctx_info->context_idr);
+               mutex_unlock(&dev->struct_mutex);
+               kfree(file_priv->ctx_info);
+       } else {
+               queue_work(dev_priv->wq, &file_priv->ctx_info->close_work);
+       }
 }
 
 struct i915_hw_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
 {
        if (!HAS_HW_CONTEXTS(file_priv->dev_priv->dev))
-               return file_priv->private_default_ctx;
+               return file_priv->ctx_info->private_default_ctx;
 
-       return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id);
+       return (struct i915_hw_context 
*)idr_find(&file_priv->ctx_info->context_idr, id);
 }
 
 static inline int
@@ -773,7 +805,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, 
void *data,
                return -ENOENT;
        }
 
-       idr_remove(&ctx->file_priv->context_idr, ctx->id);
+       idr_remove(&ctx->file_priv->ctx_info->context_idr, ctx->id);
        i915_gem_context_unreference(ctx);
        mutex_unlock(&dev->struct_mutex);
 
-- 
1.8.4.2

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to