From: farah kassabri <fkassa...@habana.ai>

When the driver needs to abort waiters for interrupts, for cases
such as critical events that occur and driver need to do hard reset,
in such scenario the driver will complete the fence to wake up the
waiting thread, and will set the fence error indication.
The return value of the completion API will be greater than 0
since it will return the timeout, but as this indicates successful
completion, the driver should mark it as aborted.

Signed-off-by: farah kassabri <fkassa...@habana.ai>
Reviewed-by: Oded Gabbay <ogab...@kernel.org>
Signed-off-by: Oded Gabbay <ogab...@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c 
b/drivers/accel/habanalabs/common/command_submission.c
index 02ac6d754fba..396bbf8652b7 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -3449,7 +3449,15 @@ static int _hl_interrupt_wait_ioctl(struct hl_device 
*hdev, struct hl_ctx *ctx,
        completion_rc = 
wait_for_completion_interruptible_timeout(&pend->fence.completion,
                                                                timeout);
        if (completion_rc > 0) {
-               *status = HL_WAIT_CS_STATUS_COMPLETED;
+               if (pend->fence.error == -EIO) {
+                       dev_err_ratelimited(hdev->dev,
+                                       "interrupt based wait ioctl 
aborted(error:%d) due to a reset cycle initiated\n",
+                                       pend->fence.error);
+                       rc = -EIO;
+                       *status = HL_WAIT_CS_STATUS_ABORTED;
+               } else {
+                       *status = HL_WAIT_CS_STATUS_COMPLETED;
+               }
        } else {
                if (completion_rc == -ERESTARTSYS) {
                        dev_err_ratelimited(hdev->dev,
@@ -3458,21 +3466,13 @@ static int _hl_interrupt_wait_ioctl(struct hl_device 
*hdev, struct hl_ctx *ctx,
                        rc = -EINTR;
                        *status = HL_WAIT_CS_STATUS_ABORTED;
                } else {
-                       if (pend->fence.error == -EIO) {
-                               dev_err_ratelimited(hdev->dev,
-                                               "interrupt based wait ioctl 
aborted(error:%d) due to a reset cycle initiated\n",
-                                               pend->fence.error);
-                               rc = -EIO;
-                               *status = HL_WAIT_CS_STATUS_ABORTED;
-                       } else {
-                               /* The wait has timed-out. We don't know 
anything beyond that
-                                * because the workload wasn't submitted 
through the driver.
-                                * Therefore, from driver's perspective, the 
workload is still
-                                * executing.
-                                */
-                               rc = 0;
-                               *status = HL_WAIT_CS_STATUS_BUSY;
-                       }
+                       /* The wait has timed-out. We don't know anything 
beyond that
+                        * because the workload was not submitted through the 
driver.
+                        * Therefore, from driver's perspective, the workload 
is still
+                        * executing.
+                        */
+                       rc = 0;
+                       *status = HL_WAIT_CS_STATUS_BUSY;
                }
        }
 
-- 
2.40.1

Reply via email to