Am 16.05.2017 um 08:44 schrieb Chunming Zhou:
below ioctl will return -ENODEV:
amdgpu_cs_ioctl
amdgpu_cs_wait_ioctl
amdgpu_cs_wait_fences_ioctl
amdgpu_gem_va_ioctl
amdgpu_info_ioctl

Change-Id: I8970cde3301b7cfeb4263cc0f0e54aece215c98e
Signed-off-by: Chunming Zhou <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  4 ++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |  9 +++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 10 ++++++++++
  5 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f9da215..dcd6203 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -855,6 +855,7 @@ struct amdgpu_fpriv {
        struct amdgpu_ctx_mgr   ctx_mgr;
        spinlock_t              sem_handles_lock;
        struct idr              sem_handles;
+       u32                     vram_lost_counter;
  };
/*
@@ -1607,6 +1608,7 @@ struct amdgpu_device {
        atomic64_t                      num_bytes_moved;
        atomic64_t                      num_evictions;
        atomic_t                        gpu_reset_counter;
+       atomic_t                        vram_lost_counter;
/* data for buffer migration throttling */
        struct {
@@ -2005,6 +2007,8 @@ static inline void amdgpu_unregister_atpx_handler(void) {}
  extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
  extern const int amdgpu_max_kms_ioctl;
+bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
+                         struct amdgpu_fpriv *fpriv);
  int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
  int amdgpu_driver_unload_kms(struct drm_device *dev);
  void amdgpu_driver_lastclose_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b803412..911aa02 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1097,6 +1097,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
  int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
  {
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
        union drm_amdgpu_cs *cs = data;
        struct amdgpu_cs_parser parser = {};
        bool reserved_buffers = false;
@@ -1104,6 +1105,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
if (!adev->accel_working)
                return -EBUSY;
+       if (amdgpu_kms_vram_lost(adev, fpriv))
+               return -ENODEV;
parser.adev = adev;
        parser.filp = filp;
@@ -1165,12 +1168,15 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void 
*data,
  {
        union drm_amdgpu_wait_cs *wait = data;
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
        struct amdgpu_ring *ring = NULL;
        struct amdgpu_ctx *ctx;
        struct fence *fence;
        long r;
+ if (amdgpu_kms_vram_lost(adev, fpriv))
+               return -ENODEV;
        r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
                               wait->in.ring, &ring);
        if (r)
@@ -1344,12 +1350,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, 
void *data,
                                struct drm_file *filp)
  {
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
        union drm_amdgpu_wait_fences *wait = data;
        uint32_t fence_count = wait->in.fence_count;
        struct drm_amdgpu_fence *fences_user;
        struct drm_amdgpu_fence *fences;
        int r;
+ if (amdgpu_kms_vram_lost(adev, fpriv))
+               return -ENODEV;
        /* Get the fences from userspace */
        fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
                        GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 58af9ea..417b8f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2919,8 +2919,10 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
                        if (r)
                                goto out;
                        vram_lost = amdgpu_check_vram_lost(adev);
-                       if (vram_lost)
+                       if (vram_lost) {
                                DRM_ERROR("VRAM is lost!\n");
+                               atomic_inc(&adev->vram_lost_counter);
+                       }
                        r = amdgpu_ttm_recover_gart(adev);
                        if (r)
                                goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d8275ef..0f0b736 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -776,6 +776,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
if (!adev->vm_manager.enabled)
                return -ENOTTY;
+       if (amdgpu_kms_vram_lost(adev, fpriv))
+               return -ENODEV;

We should only block AMDGPU_VA_OP_MAP and AMDGPU_VA_OP_REPLACE here and still allow AMDGPU_VA_OP_UNMAP and AMDGPU_VA_OP_CLEAR.

BTW: How should the UMD recover from that situation? Completely close the fd and recreate it?

That might be tricky for processes like X or the Compositor. Should we have an IOCTL to reset the vram_lost counter for an fd?

Christian.

if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
                dev_err(&dev->pdev->dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 368829a..a231aa1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -235,6 +235,7 @@ static int amdgpu_firmware_info(struct 
drm_amdgpu_info_firmware *fw_info,
  static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct 
drm_file *filp)
  {
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
        struct drm_amdgpu_info *info = data;
        struct amdgpu_mode_info *minfo = &adev->mode_info;
        void __user *out = (void __user *)(uintptr_t)info->return_pointer;
@@ -247,6 +248,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
if (!info->return_size || !info->return_pointer)
                return -EINVAL;
+       if (amdgpu_kms_vram_lost(adev, fpriv))
+               return -ENODEV;
switch (info->query) {
        case AMDGPU_INFO_VIRTUAL_RANGE: {
@@ -779,6 +782,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
        vga_switcheroo_process_delayed_switch();
  }
+bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
+                         struct amdgpu_fpriv *fpriv)
+{
+       return fpriv->vram_lost_counter != 
atomic_read(&adev->vram_lost_counter);
+}
+
  /**
   * amdgpu_driver_open_kms - drm callback for open
   *
@@ -833,6 +842,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct 
drm_file *file_priv)
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); + fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
        file_priv->driver_priv = fpriv;
out_suspend:


_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to