From: Tomer Tayar <tta...@habana.ai>

Refactor the CS IOCTL handling by gathering common code into
sub-functions, in order to ease future additions of new CS types.

Signed-off-by: Tomer Tayar <tta...@habana.ai>
Reviewed-by: Oded Gabbay <ogab...@kernel.org>
Signed-off-by: Oded Gabbay <ogab...@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 419 ++++++++++--------
 1 file changed, 224 insertions(+), 195 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index e9529f3efc1b..3e6f4e5ef7ec 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -11,8 +11,6 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
-#define HL_CS_FLAGS_SIG_WAIT   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)
-
 static void job_wq_completion(struct work_struct *work);
 static long _hl_cs_wait_ioctl(struct hl_device *hdev,
                struct hl_ctx *ctx, u64 timeout_us, u64 seq);
@@ -660,44 +658,114 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device 
*hdev,
        return job;
 }
 
-static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
-                               u32 num_chunks, u64 *cs_seq)
+static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
+{
+       if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
+               return CS_TYPE_SIGNAL;
+       else if (cs_type_flags & HL_CS_FLAGS_WAIT)
+               return CS_TYPE_WAIT;
+       else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
+               return CS_TYPE_COLLECTIVE_WAIT;
+       else
+               return CS_TYPE_DEFAULT;
+}
+
+static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 {
        struct hl_device *hdev = hpriv->hdev;
-       struct hl_cs_chunk *cs_chunk_array;
-       struct hl_cs_counters_atomic *cntr;
-       struct hl_cs_job *job;
-       struct hl_cs *cs;
-       struct hl_cb *cb;
-       bool int_queues_only = true;
-       u32 size_to_copy;
-       int rc, i;
+       struct hl_ctx *ctx = hpriv->ctx;
+       u32 cs_type_flags, num_chunks;
+       enum hl_cs_type cs_type;
 
-       cntr = &hdev->aggregated_cs_counters;
-       *cs_seq = ULLONG_MAX;
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               dev_warn_ratelimited(hdev->dev,
+                       "Device is %s. Can't submit new CS\n",
+                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+               return -EBUSY;
+       }
+
+       cs_type_flags = args->in.cs_flags & ~HL_CS_FLAGS_FORCE_RESTORE;
+
+       if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
+               dev_err(hdev->dev,
+                       "CS type flags are mutually exclusive, context %d\n",
+                       ctx->asid);
+               return -EINVAL;
+       }
+
+       cs_type = hl_cs_get_cs_type(cs_type_flags);
+       num_chunks = args->in.num_chunks_execute;
+
+       if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
+                                       !hdev->supports_sync_stream)) {
+               dev_err(hdev->dev, "Sync stream CS is not supported\n");
+               return -EINVAL;
+       }
+
+       if (cs_type == CS_TYPE_DEFAULT) {
+               if (!num_chunks) {
+                       dev_err(hdev->dev,
+                               "Got execute CS with 0 chunks, context %d\n",
+                               ctx->asid);
+                       return -EINVAL;
+               }
+       } else if (num_chunks != 1) {
+               dev_err(hdev->dev,
+                       "Sync stream CS mandates one chunk only, context %d\n",
+                       ctx->asid);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int hl_cs_copy_chunk_array(struct hl_device *hdev,
+                                       struct hl_cs_chunk **cs_chunk_array,
+                                       void __user *chunks, u32 num_chunks)
+{
+       u32 size_to_copy;
 
        if (num_chunks > HL_MAX_JOBS_PER_CS) {
                dev_err(hdev->dev,
                        "Number of chunks can NOT be larger than %d\n",
                        HL_MAX_JOBS_PER_CS);
-               rc = -EINVAL;
-               goto out;
+               return -EINVAL;
        }
 
-       cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
+       *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
                                        GFP_ATOMIC);
-       if (!cs_chunk_array) {
-               rc = -ENOMEM;
-               goto out;
-       }
+       if (!*cs_chunk_array)
+               return -ENOMEM;
 
        size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
-       if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
+       if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
                dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
-               rc = -EFAULT;
-               goto free_cs_chunk_array;
+               kfree(*cs_chunk_array);
+               return -EFAULT;
        }
 
+       return 0;
+}
+
+static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
+                               u32 num_chunks, u64 *cs_seq)
+{
+       bool int_queues_only = true;
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_cs_chunk *cs_chunk_array;
+       struct hl_cs_counters_atomic *cntr;
+       struct hl_cs_job *job;
+       struct hl_cs *cs;
+       struct hl_cb *cb;
+       int rc, i;
+
+       cntr = &hdev->aggregated_cs_counters;
+       *cs_seq = ULLONG_MAX;
+
+       rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+       if (rc)
+               goto out;
+
        /* increment refcnt for context */
        hl_ctx_get(hdev, hpriv->ctx);
 
@@ -828,6 +896,108 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void 
__user *chunks,
        return rc;
 }
 
+static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
+                               u64 *cs_seq)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_ctx *ctx = hpriv->ctx;
+       bool need_soft_reset = false;
+       int rc = 0, do_ctx_switch;
+       void __user *chunks;
+       u32 num_chunks, tmp;
+       long ret;
+
+       do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
+
+       if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
+               mutex_lock(&hpriv->restore_phase_mutex);
+
+               if (do_ctx_switch) {
+                       rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
+                       if (rc) {
+                               dev_err_ratelimited(hdev->dev,
+                                       "Failed to switch to context %d, 
rejecting CS! %d\n",
+                                       ctx->asid, rc);
+                               /*
+                                * If we timedout, or if the device is not IDLE
+                                * while we want to do context-switch (-EBUSY),
+                                * we need to soft-reset because QMAN is
+                                * probably stuck. However, we can't call to
+                                * reset here directly because of deadlock, so
+                                * need to do it at the very end of this
+                                * function
+                                */
+                               if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
+                                       need_soft_reset = true;
+                               mutex_unlock(&hpriv->restore_phase_mutex);
+                               goto out;
+                       }
+               }
+
+               hdev->asic_funcs->restore_phase_topology(hdev);
+
+               chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
+               num_chunks = args->in.num_chunks_restore;
+
+               if (!num_chunks) {
+                       dev_dbg(hdev->dev,
+                               "Need to run restore phase but restore CS is 
empty\n");
+                       rc = 0;
+               } else {
+                       rc = cs_ioctl_default(hpriv, chunks, num_chunks,
+                                               cs_seq);
+               }
+
+               mutex_unlock(&hpriv->restore_phase_mutex);
+
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "Failed to submit restore CS for context %d 
(%d)\n",
+                               ctx->asid, rc);
+                       goto out;
+               }
+
+               /* Need to wait for restore completion before execution phase */
+               if (num_chunks) {
+wait_again:
+                       ret = _hl_cs_wait_ioctl(hdev, ctx,
+                                       jiffies_to_usecs(hdev->timeout_jiffies),
+                                       *cs_seq);
+                       if (ret <= 0) {
+                               if (ret == -ERESTARTSYS) {
+                                       usleep_range(100, 200);
+                                       goto wait_again;
+                               }
+
+                               dev_err(hdev->dev,
+                                       "Restore CS for context %d failed to 
complete %ld\n",
+                                       ctx->asid, ret);
+                               rc = -ENOEXEC;
+                               goto out;
+                       }
+               }
+
+               ctx->thread_ctx_switch_wait_token = 1;
+
+       } else if (!ctx->thread_ctx_switch_wait_token) {
+               rc = hl_poll_timeout_memory(hdev,
+                       &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
+                       100, jiffies_to_usecs(hdev->timeout_jiffies), false);
+
+               if (rc == -ETIMEDOUT) {
+                       dev_err(hdev->dev,
+                               "context switch phase timeout (%d)\n", tmp);
+                       goto out;
+               }
+       }
+
+out:
+       if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
+               hl_device_reset(hdev, false, false);
+
+       return rc;
+}
+
 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
                struct hl_cs_chunk *chunk, u64 *signal_seq)
 {
@@ -935,43 +1105,25 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, 
enum hl_cs_type cs_type,
                                void __user *chunks, u32 num_chunks,
                                u64 *cs_seq)
 {
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_ctx *ctx = hpriv->ctx;
        struct hl_cs_chunk *cs_chunk_array, *chunk;
        struct hw_queue_properties *hw_queue_prop;
-       struct hl_fence *sig_fence = NULL;
-       struct hl_cs_counters_atomic *cntr;
+       struct hl_device *hdev = hpriv->hdev;
        struct hl_cs_compl *sig_waitcs_cmpl;
-       struct hl_cs *cs;
+       u32 q_idx, collective_engine_id = 0;
+       struct hl_cs_counters_atomic *cntr;
+       struct hl_fence *sig_fence = NULL;
+       struct hl_ctx *ctx = hpriv->ctx;
        enum hl_queue_type q_type;
-       u32 size_to_copy, q_idx, collective_engine_id = 0;
+       struct hl_cs *cs;
        u64 signal_seq;
        int rc;
 
        *cs_seq = ULLONG_MAX;
        cntr = &hdev->aggregated_cs_counters;
 
-       if (num_chunks > HL_MAX_JOBS_PER_CS) {
-               dev_err(hdev->dev,
-                       "Number of chunks can NOT be larger than %d\n",
-                       HL_MAX_JOBS_PER_CS);
-               rc = -EINVAL;
-               goto out;
-       }
-
-       cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
-                                       GFP_ATOMIC);
-       if (!cs_chunk_array) {
-               rc = -ENOMEM;
+       rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+       if (rc)
                goto out;
-       }
-
-       size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
-       if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
-               dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
-               rc = -EFAULT;
-               goto free_cs_chunk_array;
-       }
 
        /* currently it is guaranteed to have only one chunk */
        chunk = &cs_chunk_array[0];
@@ -1108,158 +1260,38 @@ static int cs_ioctl_signal_wait(struct hl_fpriv 
*hpriv, enum hl_cs_type cs_type,
 
 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 {
-       struct hl_device *hdev = hpriv->hdev;
        union hl_cs_args *args = data;
-       struct hl_ctx *ctx = hpriv->ctx;
-       void __user *chunks_execute, *chunks_restore;
        enum hl_cs_type cs_type;
-       u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
        u64 cs_seq = ULONG_MAX;
-       int rc, do_ctx_switch;
-       bool need_soft_reset = false;
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               dev_warn_ratelimited(hdev->dev,
-                       "Device is %s. Can't submit new CS\n",
-                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
-               rc = -EBUSY;
-               goto out;
-       }
-
-       sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;
-
-       if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
-               dev_err(hdev->dev,
-                       "Signal and wait CS flags are mutually exclusive, 
context %d\n",
-               ctx->asid);
-               rc = -EINVAL;
-               goto out;
-       }
+       void __user *chunks;
+       u32 num_chunks;
+       int rc;
 
-       if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
-                       (!hdev->supports_sync_stream))) {
-               dev_err(hdev->dev, "Sync stream CS is not supported\n");
-               rc = -EINVAL;
+       rc = hl_cs_sanity_checks(hpriv, args);
+       if (rc)
                goto out;
-       }
-
-       if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
-               cs_type = CS_TYPE_SIGNAL;
-       else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
-               cs_type = CS_TYPE_WAIT;
-       else if (args->in.cs_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
-               cs_type = CS_TYPE_COLLECTIVE_WAIT;
-       else
-               cs_type = CS_TYPE_DEFAULT;
 
-       chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
-       num_chunks_execute = args->in.num_chunks_execute;
-
-       if (cs_type == CS_TYPE_DEFAULT) {
-               if (!num_chunks_execute) {
-                       dev_err(hdev->dev,
-                               "Got execute CS with 0 chunks, context %d\n",
-                               ctx->asid);
-                       rc = -EINVAL;
-                       goto out;
-               }
-       } else if (num_chunks_execute != 1) {
-               dev_err(hdev->dev,
-                       "Sync stream CS mandates one chunk only, context %d\n",
-                       ctx->asid);
-               rc = -EINVAL;
+       rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
+       if (rc)
                goto out;
-       }
-
-       do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
-
-       if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
-               long ret;
-
-               chunks_restore =
-                       (void __user *) (uintptr_t) args->in.chunks_restore;
-               num_chunks_restore = args->in.num_chunks_restore;
-
-               mutex_lock(&hpriv->restore_phase_mutex);
-
-               if (do_ctx_switch) {
-                       rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
-                       if (rc) {
-                               dev_err_ratelimited(hdev->dev,
-                                       "Failed to switch to context %d, 
rejecting CS! %d\n",
-                                       ctx->asid, rc);
-                               /*
-                                * If we timedout, or if the device is not IDLE
-                                * while we want to do context-switch (-EBUSY),
-                                * we need to soft-reset because QMAN is
-                                * probably stuck. However, we can't call to
-                                * reset here directly because of deadlock, so
-                                * need to do it at the very end of this
-                                * function
-                                */
-                               if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
-                                       need_soft_reset = true;
-                               mutex_unlock(&hpriv->restore_phase_mutex);
-                               goto out;
-                       }
-               }
-
-               hdev->asic_funcs->restore_phase_topology(hdev);
-
-               if (!num_chunks_restore) {
-                       dev_dbg(hdev->dev,
-                       "Need to run restore phase but restore CS is empty\n");
-                       rc = 0;
-               } else {
-                       rc = cs_ioctl_default(hpriv, chunks_restore,
-                                               num_chunks_restore, &cs_seq);
-               }
-
-               mutex_unlock(&hpriv->restore_phase_mutex);
-
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "Failed to submit restore CS for context %d 
(%d)\n",
-                               ctx->asid, rc);
-                       goto out;
-               }
-
-               /* Need to wait for restore completion before execution phase */
-               if (num_chunks_restore) {
-                       ret = _hl_cs_wait_ioctl(hdev, ctx,
-                                       jiffies_to_usecs(hdev->timeout_jiffies),
-                                       cs_seq);
-                       if (ret <= 0) {
-                               dev_err(hdev->dev,
-                                       "Restore CS for context %d failed to 
complete %ld\n",
-                                       ctx->asid, ret);
-                               rc = -ENOEXEC;
-                               goto out;
-                       }
-               }
-
-               ctx->thread_ctx_switch_wait_token = 1;
-       } else if (!ctx->thread_ctx_switch_wait_token) {
-               u32 tmp;
-
-               rc = hl_poll_timeout_memory(hdev,
-                       &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
-                       100, jiffies_to_usecs(hdev->timeout_jiffies), false);
 
-               if (rc == -ETIMEDOUT) {
-                       dev_err(hdev->dev,
-                               "context switch phase timeout (%d)\n", tmp);
-                       goto out;
-               }
+       cs_type = hl_cs_get_cs_type(args->in.cs_flags &
+                                       ~HL_CS_FLAGS_FORCE_RESTORE);
+       chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
+       num_chunks = args->in.num_chunks_execute;
+
+       switch (cs_type) {
+       case CS_TYPE_SIGNAL:
+       case CS_TYPE_WAIT:
+       case CS_TYPE_COLLECTIVE_WAIT:
+               rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
+                                               &cs_seq);
+               break;
+       default:
+               rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq);
+               break;
        }
 
-       if (cs_type == CS_TYPE_DEFAULT)
-               rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
-                                       &cs_seq);
-       else
-               rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
-                                               num_chunks_execute, &cs_seq);
-
 out:
        if (rc != -EAGAIN) {
                memset(args, 0, sizeof(*args));
@@ -1267,9 +1299,6 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
                args->out.seq = cs_seq;
        }
 
-       if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
-               hl_device_reset(hdev, false, false);
-
        return rc;
 }
 
-- 
2.17.1

Reply via email to