During GPU reset, IOCTL still  submit command to GPU.
No it won't. We disable the GPU scheduler during GPU reset, so that isn't an issue.

Do you have a concrete problem (e.g. dmesg?) you ran into?

Regards,
Christian.

Am 21.04.2017 um 11:00 schrieb He, Hongbo:
Hi Christian:

During GPU reset, IOCTL still  submit command to GPU.
That will result in lots of strange problems when we test this feature.

I have no better idea to handle this case. At that time, I have tried with 
return error directly rather than waiting GPU reset completion.
But that doesn't work and it lead to other problems.

Thanks
Roger(Hongbo.He)

-----Original Message-----
From: Christian König [mailto:deathsim...@vodafone.de]
Sent: Friday, April 21, 2017 4:27 PM
To: He, Hongbo; amd-gfx@lists.freedesktop.org
Cc: Zhou, David(ChunMing)
Subject: Re: [PATCH 3/3] drm/amdgpu: fix gpu reset issue

NAK, that is exactly what we wanted to avoid.

We used to have an exclusive lock for this and it cause a whole bunch of 
problems.

Please elaborate why that should be necessary.

Regards,
Christian.

Am 21.04.2017 um 09:08 schrieb Roger.He:
Change-Id: Ib77d33a09f348ebf2e3a9d7861411f4b951ebf7c
Signed-off-by: Roger.He <hongbo...@amd.com>
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  6 ++++++
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 +++++++++++++++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 ++++
   drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c  |  7 ++++++-
   4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 71364f5..ab0ffa8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1526,6 +1526,7 @@ struct amdgpu_device {
        atomic64_t                      num_bytes_moved;
        atomic64_t                      num_evictions;
        atomic_t                        gpu_reset_counter;
+       atomic_t                        gpu_reset_state;
/* data for buffer migration throttling */
        struct {
@@ -1851,6 +1852,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
   #define amdgpu_psp_check_fw_loading_status(adev, i)
(adev)->firmware.funcs->check_fw_loading_status((adev), (i))
/* Common functions */
+static inline int amdgpu_device_is_reset(struct amdgpu_device *adev)
+{
+       return atomic_read(&adev->gpu_reset_state);
+}
+
   int amdgpu_gpu_reset(struct amdgpu_device *adev);
   bool amdgpu_need_backup(struct amdgpu_device *adev);
   void amdgpu_pci_config_reset(struct amdgpu_device *adev); diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f882496..0fb4716 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1894,6 +1894,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        mutex_init(&adev->grbm_idx_mutex);
        mutex_init(&adev->mn_lock);
        hash_init(adev->mn_hash);
+       atomic_set(&adev->gpu_reset_state, 0);
amdgpu_check_arguments(adev); @@ -2655,6 +2656,18 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
   }
/**
+ * amdgpu_device_set_reset_state - set gpu reset state
+ *
+ * @adev: amdgpu device pointer
+ * @state: true when start to reset gpu; false: reset done  */ static
+inline void amdgpu_device_set_reset_state(struct amdgpu_device *adev,
+                                                       bool state)
+{
+       atomic_set(&adev->gpu_reset_state, state); }
+
+/**
    * amdgpu_gpu_reset - reset the asic
    *
    * @adev: amdgpu device pointer
@@ -2678,7 +2691,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
        }
atomic_inc(&adev->gpu_reset_counter);
-
+       amdgpu_device_set_reset_state(adev, true);
        /* block TTM */
        resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
        /* store modesetting */
@@ -2811,6 +2824,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
                dev_info(adev->dev, "GPU reset failed\n");
        }
+ amdgpu_device_set_reset_state(adev, false);
        return r;
   }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ead00d7..8cc14af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -679,11 +679,15 @@ long amdgpu_drm_ioctl(struct file *filp,
        struct drm_file *file_priv = filp->private_data;
        struct drm_device *dev;
        long ret;
+
        dev = file_priv->minor->dev;
        ret = pm_runtime_get_sync(dev->dev);
        if (ret < 0)
                return ret;
+ while (amdgpu_device_is_reset(dev->dev_private))
+               msleep(100);
+
        ret = drm_ioctl(filp, cmd, arg);
pm_runtime_mark_last_busy(dev->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
index 2648291..22b8059 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
@@ -36,10 +36,15 @@
   long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned 
long arg)
   {
        unsigned int nr = DRM_IOCTL_NR(cmd);
+       struct drm_file *file_priv = filp->private_data;
+       struct amdgpu_device *adev = file_priv->minor->dev->dev_private;
        int ret;
- if (nr < DRM_COMMAND_BASE)
+       if (nr < DRM_COMMAND_BASE) {
+               while (amdgpu_device_is_reset(adev))
+                       msleep(100);
                return drm_compat_ioctl(filp, cmd, arg);
+       }
ret = amdgpu_drm_ioctl(filp, cmd, arg);


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to