From: Thomas Hellström <[email protected]>

With the huge number of sites where multiple-object locking is
needed in the driver, it becomes difficult to avoid recursive
ww_acquire_ctx initialization, and the function prototypes become
bloated passing the ww_acquire_ctx around. Furthermore it's not
always easy to get the -EDEADLK handling correct and to follow it.

Introduce a i915_gem_do_ww utility that tries to remedy all these problems
by enclosing parts of a ww transaction in the following way:

my_function() {
        struct i915_gem_ww_ctx *ww, template;
        int err;
        bool interruptible = true;

        i915_do_ww(ww, &template, err, interruptible) {
                err = ww_transaction_part(ww);
        }
        return err;
}

The utility will automatically look up an active ww_acquire_ctx if one
is initialized previously in the call chain, and if one found will
forward the -EDEADLK instead of handling it, which takes care of the
recursive initalization. Using the utility also discourages nested ww
unlocking / relocking that is both very fragile and hard to follow.

To look up and register an active ww_acquire_ctx, use a
driver-wide hash table for now. But noting that a task could only have
a single active ww_acqurie_ctx per ww_class, the active CTX is really
task state and a generic version of this utility in the ww_mutex code
could thus probably use a quick lookup from a list in the
struct task_struct.

Signed-off-by: Thomas Hellström <[email protected]>
---
 drivers/gpu/drm/i915/i915_gem_ww.c  | 74 ++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_ww.h  | 56 +++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_globals.c |  1 +
 drivers/gpu/drm/i915/i915_globals.h |  1 +
 4 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_ww.c 
b/drivers/gpu/drm/i915/i915_gem_ww.c
index 3490b72cf613..6247af1dba87 100644
--- a/drivers/gpu/drm/i915/i915_gem_ww.c
+++ b/drivers/gpu/drm/i915/i915_gem_ww.c
@@ -1,10 +1,12 @@
 // SPDX-License-Identifier: MIT
 /*
- * Copyright © 2020 Intel Corporation
+ * Copyright © 2019 Intel Corporation
  */
+#include <linux/rhashtable.h>
 #include <linux/dma-resv.h>
 #include <linux/stacktrace.h>
 #include "i915_gem_ww.h"
+#include "i915_globals.h"
 #include "gem/i915_gem_object.h"
 
 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
@@ -70,3 +72,73 @@ int __must_check i915_gem_ww_ctx_backoff(struct 
i915_gem_ww_ctx *ww)
 
        return ret;
 }
+
+static struct rhashtable ww_ht;
+static const struct rhashtable_params ww_params = {
+       .key_len = sizeof(struct task_struct *),
+       .key_offset = offsetof(struct i915_gem_ww_ctx, ctx.task),
+       .head_offset = offsetof(struct i915_gem_ww_ctx, head),
+       .min_size = 128,
+};
+
+static void i915_ww_item_free(void *ptr, void *arg)
+{
+       WARN_ON_ONCE(1);
+}
+
+static void i915_global_ww_exit(void)
+{
+       rhashtable_free_and_destroy(&ww_ht, i915_ww_item_free, NULL);
+}
+
+static void i915_global_ww_shrink(void)
+{
+}
+
+static struct i915_global global = {
+       .shrink = i915_global_ww_shrink,
+       .exit = i915_global_ww_exit,
+};
+
+int __init i915_global_ww_init(void)
+{
+       int ret = rhashtable_init(&ww_ht, &ww_params);
+
+       if (ret)
+               return ret;
+
+       i915_global_register(&global);
+
+       return 0;
+}
+
+void __i915_gem_ww_mark_unused(struct i915_gem_ww_ctx *ww)
+{
+       GEM_WARN_ON(rhashtable_remove_fast(&ww_ht, &ww->head, ww_params));
+}
+
+/**
+ * __i915_gem_ww_locate_or_use - return the task's i915_gem_ww_ctx context
+ * to use.
+ *
+ * @template: The context to use if there was none initialized previously
+ * in the call chain.
+ *
+ * RETURN: The task's i915_gem_ww_ctx context.
+ */
+struct i915_gem_ww_ctx *
+__i915_gem_ww_locate_or_use(struct i915_gem_ww_ctx *template)
+{
+       struct i915_gem_ww_ctx *tmp;
+
+       /* ctx.task is the hash key, so set it first. */
+       template->ctx.task = current;
+
+       /*
+        * Ideally we'd just hook the active context to the
+        * struct task_struct. But for now use a hash table.
+        */
+       tmp = rhashtable_lookup_get_insert_fast(&ww_ht, &template->head,
+                                               ww_params);
+       return tmp;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h 
b/drivers/gpu/drm/i915/i915_gem_ww.h
index 94fdf8c5f89b..b844596067c7 100644
--- a/drivers/gpu/drm/i915/i915_gem_ww.h
+++ b/drivers/gpu/drm/i915/i915_gem_ww.h
@@ -6,18 +6,72 @@
 #define __I915_GEM_WW_H__
 
 #include <linux/stackdepot.h>
+#include <linux/rhashtable-types.h>
 #include <drm/drm_drv.h>
 
 struct i915_gem_ww_ctx {
        struct ww_acquire_ctx ctx;
+       struct rhash_head head;
        struct list_head obj_list;
        struct drm_i915_gem_object *contended;
        depot_stack_handle_t contended_bt;
-       bool intr;
+       u32 call_depth;
+       unsigned short intr;
+       unsigned short loop;
 };
 
 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
 int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
 void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
+
+/* Internal functions used by the inlines! Don't use. */
+void __i915_gem_ww_mark_unused(struct i915_gem_ww_ctx *ww);
+struct i915_gem_ww_ctx *
+__i915_gem_ww_locate_or_use(struct i915_gem_ww_ctx *template);
+
+static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err)
+{
+       ww->loop = 0;
+       if (ww->call_depth) {
+               ww->call_depth--;
+               return err;
+       }
+
+       if (err == -EDEADLK) {
+               err = i915_gem_ww_ctx_backoff(ww);
+               if (!err)
+                       ww->loop = 1;
+       }
+
+       if (!ww->loop) {
+               i915_gem_ww_ctx_fini(ww);
+               __i915_gem_ww_mark_unused(ww);
+       }
+
+       return err;
+}
+
+static inline struct i915_gem_ww_ctx *
+__i915_gem_ww_init(struct i915_gem_ww_ctx *template, bool intr)
+{
+       struct i915_gem_ww_ctx *ww = __i915_gem_ww_locate_or_use(template);
+
+       if (!ww) {
+               ww = template;
+               ww->call_depth = 0;
+               i915_gem_ww_ctx_init(ww, intr);
+       } else {
+               ww->call_depth++;
+       }
+
+       ww->loop = 1;
+
+       return ww;
+}
+
+#define i915_gem_do_ww(_ww, _template, _err, _intr)                    \
+       for ((_ww) = __i915_gem_ww_init(_template, _intr); (_ww)->loop; \
+            _err = __i915_gem_ww_fini(_ww, _err))
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_globals.c 
b/drivers/gpu/drm/i915/i915_globals.c
index 3aa213684293..9087cc8c2ee3 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -94,6 +94,7 @@ static __initconst int (* const initfn[])(void) = {
        i915_global_request_init,
        i915_global_scheduler_init,
        i915_global_vma_init,
+       i915_global_ww_init,
 };
 
 int __init i915_globals_init(void)
diff --git a/drivers/gpu/drm/i915/i915_globals.h 
b/drivers/gpu/drm/i915/i915_globals.h
index b2f5cd9b9b1a..5976b460ee39 100644
--- a/drivers/gpu/drm/i915/i915_globals.h
+++ b/drivers/gpu/drm/i915/i915_globals.h
@@ -34,5 +34,6 @@ int i915_global_objects_init(void);
 int i915_global_request_init(void);
 int i915_global_scheduler_init(void);
 int i915_global_vma_init(void);
+int i915_global_ww_init(void);
 
 #endif /* _I915_GLOBALS_H_ */
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to