[PATCH 08/18] drm/i915/guc: Defer context unpin until scheduling is disabled

2021-07-21 Thread Matthew Brost
With GuC scheduling, it isn't safe to unpin a context while scheduling
is enabled for that context as the GuC may touch some of the pinned
state (e.g. LRC). To ensure scheduling isn't enabled when an unpin is
done, a call back is added to intel_context_unpin when pin count == 1
to disable scheduling for that context. When the response CTB is
received it is safe to do the final unpin.

Future patches may add a heuristic / delay to schedule the disable
call back to avoid thrashing on schedule enable / disable.

v2:
 (John H)
  - s/drm_dbg/drm_err
 (Daneiel)
  - Clean up sched state function

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |   4 +-
 drivers/gpu/drm/i915/gt/intel_context.h   |  27 +++-
 drivers/gpu/drm/i915/gt/intel_context_types.h |   2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   3 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 146 +-
 6 files changed, 180 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index ad7197c5910f..3d5b4116617f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -306,9 +306,9 @@ int __intel_context_do_pin(struct intel_context *ce)
return err;
 }
 
-void intel_context_unpin(struct intel_context *ce)
+void __intel_context_do_unpin(struct intel_context *ce, int sub)
 {
-   if (!atomic_dec_and_test(>pin_count))
+   if (!atomic_sub_and_test(sub, >pin_count))
return;
 
CE_TRACE(ce, "unpin\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index b10cbe8fee99..974ef85320c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -113,7 +113,32 @@ static inline void __intel_context_pin(struct 
intel_context *ce)
atomic_inc(>pin_count);
 }
 
-void intel_context_unpin(struct intel_context *ce);
+void __intel_context_do_unpin(struct intel_context *ce, int sub);
+
+static inline void intel_context_sched_disable_unpin(struct intel_context *ce)
+{
+   __intel_context_do_unpin(ce, 2);
+}
+
+static inline void intel_context_unpin(struct intel_context *ce)
+{
+   if (!ce->ops->sched_disable) {
+   __intel_context_do_unpin(ce, 1);
+   } else {
+   /*
+* Move ownership of this pin to the scheduling disable which is
+* an async operation. When that operation completes the above
+* intel_context_sched_disable_unpin is called potentially
+* unpinning the context.
+*/
+   while (!atomic_add_unless(>pin_count, -1, 1)) {
+   if (atomic_cmpxchg(>pin_count, 1, 2) == 1) {
+   ce->ops->sched_disable(ce);
+   break;
+   }
+   }
+   }
+}
 
 void intel_context_enter_engine(struct intel_context *ce);
 void intel_context_exit_engine(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e0e3a937f709..4a5518d295c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -43,6 +43,8 @@ struct intel_context_ops {
void (*enter)(struct intel_context *ce);
void (*exit)(struct intel_context *ce);
 
+   void (*sched_disable)(struct intel_context *ce);
+
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
 };
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 7fd6c3e343e4..4d470ebeda95 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -248,6 +248,8 @@ int intel_guc_reset_engine(struct intel_guc *guc,
 
 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
  const u32 *msg, u32 len);
+int intel_guc_sched_done_process_msg(struct intel_guc *guc,
+const u32 *msg, u32 len);
 
 void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 28ff82c5be45..019b25ff1888 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -932,6 +932,9 @@ static int ct_process_request(struct intel_guc_ct *ct, 
struct ct_incoming_msg *r
ret = intel_guc_deregister_done_process_msg(guc, payload,
len);
break;
+   case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+   ret = intel_guc_sched_done_process_msg(guc, payload, 

[PATCH 08/18] drm/i915/guc: Defer context unpin until scheduling is disabled

2021-07-20 Thread Matthew Brost
With GuC scheduling, it isn't safe to unpin a context while scheduling
is enabled for that context as the GuC may touch some of the pinned
state (e.g. LRC). To ensure scheduling isn't enabled when an unpin is
done, a call back is added to intel_context_unpin when pin count == 1
to disable scheduling for that context. When the response CTB is
received it is safe to do the final unpin.

Future patches may add a heuristic / delay to schedule the disable
call back to avoid thrashing on schedule enable / disable.

v2:
 (John H)
  - s/drm_dbg/drm_err
 (Daneiel)
  - Clean up sched state function

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |   4 +-
 drivers/gpu/drm/i915/gt/intel_context.h   |  27 +++-
 drivers/gpu/drm/i915/gt/intel_context_types.h |   2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   3 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 146 +-
 6 files changed, 180 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index ad7197c5910f..3d5b4116617f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -306,9 +306,9 @@ int __intel_context_do_pin(struct intel_context *ce)
return err;
 }
 
-void intel_context_unpin(struct intel_context *ce)
+void __intel_context_do_unpin(struct intel_context *ce, int sub)
 {
-   if (!atomic_dec_and_test(>pin_count))
+   if (!atomic_sub_and_test(sub, >pin_count))
return;
 
CE_TRACE(ce, "unpin\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index b10cbe8fee99..974ef85320c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -113,7 +113,32 @@ static inline void __intel_context_pin(struct 
intel_context *ce)
atomic_inc(>pin_count);
 }
 
-void intel_context_unpin(struct intel_context *ce);
+void __intel_context_do_unpin(struct intel_context *ce, int sub);
+
+static inline void intel_context_sched_disable_unpin(struct intel_context *ce)
+{
+   __intel_context_do_unpin(ce, 2);
+}
+
+static inline void intel_context_unpin(struct intel_context *ce)
+{
+   if (!ce->ops->sched_disable) {
+   __intel_context_do_unpin(ce, 1);
+   } else {
+   /*
+* Move ownership of this pin to the scheduling disable which is
+* an async operation. When that operation completes the above
+* intel_context_sched_disable_unpin is called potentially
+* unpinning the context.
+*/
+   while (!atomic_add_unless(>pin_count, -1, 1)) {
+   if (atomic_cmpxchg(>pin_count, 1, 2) == 1) {
+   ce->ops->sched_disable(ce);
+   break;
+   }
+   }
+   }
+}
 
 void intel_context_enter_engine(struct intel_context *ce);
 void intel_context_exit_engine(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e0e3a937f709..4a5518d295c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -43,6 +43,8 @@ struct intel_context_ops {
void (*enter)(struct intel_context *ce);
void (*exit)(struct intel_context *ce);
 
+   void (*sched_disable)(struct intel_context *ce);
+
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
 };
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 30773cd699f5..03b7222b04a2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -241,6 +241,8 @@ int intel_guc_reset_engine(struct intel_guc *guc,
 
 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
  const u32 *msg, u32 len);
+int intel_guc_sched_done_process_msg(struct intel_guc *guc,
+const u32 *msg, u32 len);
 
 void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 28ff82c5be45..019b25ff1888 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -932,6 +932,9 @@ static int ct_process_request(struct intel_guc_ct *ct, 
struct ct_incoming_msg *r
ret = intel_guc_deregister_done_process_msg(guc, payload,
len);
break;
+   case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+   ret = intel_guc_sched_done_process_msg(guc, payload,