Various bug fixes and improvements that accumulated over the last two
years.

Signed-off-by: Felix Kuehling <felix.kuehl...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h         |  16 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  | 130 +++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  | 165 ++++++++++++++++++---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |   7 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c      |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h       |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c   |  23 +--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c    |  16 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |   5 -
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h    |  11 +-
 drivers/gpu/drm/radeon/radeon_kfd.c                |  12 +-
 11 files changed, 322 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index b8802a5..8d689ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -26,6 +26,7 @@
 #define AMDGPU_AMDKFD_H_INCLUDED
 
 #include <linux/types.h>
+#include <linux/mmu_context.h>
 #include <kgd_kfd_interface.h>
 
 struct amdgpu_device;
@@ -60,4 +61,19 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
 
 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
 
+#define read_user_wptr(mmptr, wptr, dst)                               \
+       ({                                                              \
+               bool valid = false;                                     \
+               if ((mmptr) && (wptr)) {                                \
+                       if ((mmptr) == current->mm) {                   \
+                               valid = !get_user((dst), (wptr));       \
+                       } else if (current->mm == NULL) {               \
+                               use_mm(mmptr);                          \
+                               valid = !get_user((dst), (wptr));       \
+                               unuse_mm(mmptr);                        \
+                       }                                               \
+               }                                                       \
+               valid;                                                  \
+       })
+
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index dfb8c74..994d262 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -39,6 +39,12 @@
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "cik_structs.h"
 
+enum hqd_dequeue_request_type {
+       NO_ACTION = 0,
+       DRAIN_PIPE,
+       RESET_WAVES
+};
+
 enum {
        MAX_TRAPID = 8,         /* 3 bits in the bitfield. */
        MAX_WATCH_ADDRESSES = 4
@@ -96,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t 
pipe_id,
                                uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-                       uint32_t queue_id, uint32_t __user *wptr);
+                       uint32_t queue_id, uint32_t __user *wptr,
+                       uint32_t wptr_shift, uint32_t wptr_mask,
+                       struct mm_struct *mm);
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
                                uint32_t pipe_id, uint32_t queue_id);
 
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+                               enum kfd_preempt_type reset_type,
                                unsigned int utimeout, uint32_t pipe_id,
                                uint32_t queue_id);
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -290,20 +299,38 @@ static inline struct cik_sdma_rlc_registers 
*get_sdma_mqd(void *mqd)
 }
 
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-                       uint32_t queue_id, uint32_t __user *wptr)
+                       uint32_t queue_id, uint32_t __user *wptr,
+                       uint32_t wptr_shift, uint32_t wptr_mask,
+                       struct mm_struct *mm)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
-       uint32_t wptr_shadow, is_wptr_shadow_valid;
        struct cik_mqd *m;
+       uint32_t *mqd_hqd;
+       uint32_t reg, wptr_val, data;
 
        m = get_mqd(mqd);
 
-       is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
-       if (is_wptr_shadow_valid)
-               m->cp_hqd_pq_wptr = wptr_shadow;
-
        acquire_queue(kgd, pipe_id, queue_id);
-       gfx_v7_0_mqd_commit(adev, m);
+
+       /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
+       mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+       for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
+               WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+       /* Copy userspace write pointer value to register.
+        * Activate doorbell logic to monitor subsequent changes.
+        */
+       data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+                            CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+       WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+       if (read_user_wptr(mm, wptr, wptr_val))
+               WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+       data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+       WREG32(mmCP_HQD_ACTIVE, data);
+
        release_queue(kgd);
 
        return 0;
@@ -382,30 +409,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, 
void *mqd)
        return false;
 }
 
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+                               enum kfd_preempt_type reset_type,
                                unsigned int utimeout, uint32_t pipe_id,
                                uint32_t queue_id)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
        uint32_t temp;
-       int timeout = utimeout;
+       enum hqd_dequeue_request_type type;
+       unsigned long flags, end_jiffies;
+       int retry;
 
        acquire_queue(kgd, pipe_id, queue_id);
        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
 
-       WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
+       switch (reset_type) {
+       case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+               type = DRAIN_PIPE;
+               break;
+       case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+               type = RESET_WAVES;
+               break;
+       default:
+               type = DRAIN_PIPE;
+               break;
+       }
 
+       /* Workaround: If IQ timer is active and the wait time is close to or
+        * equal to 0, dequeueing is not safe. Wait until either the wait time
+        * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+        * cleared before continuing. Also, ensure wait times are set to at
+        * least 0x3.
+        */
+       local_irq_save(flags);
+       preempt_disable();
+       retry = 5000; /* wait for 500 usecs at maximum */
+       while (true) {
+               temp = RREG32(mmCP_HQD_IQ_TIMER);
+               if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+                       pr_debug("HW is processing IQ\n");
+                       goto loop;
+               }
+               if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+                       if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+                                       == 3) /* SEM-rearm is safe */
+                               break;
+                       /* Wait time 3 is safe for CP, but our MMIO read/write
+                        * time is close to 1 microsecond, so check for 10 to
+                        * leave more buffer room
+                        */
+                       if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+                                       >= 10)
+                               break;
+                       pr_debug("IQ timer is active\n");
+               } else
+                       break;
+loop:
+               if (!retry) {
+                       pr_err("CP HQD IQ timer status time out\n");
+                       break;
+               }
+               ndelay(100);
+               --retry;
+       }
+       retry = 1000;
+       while (true) {
+               temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+               if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+                       break;
+               pr_debug("Dequeue request is pending\n");
+
+               if (!retry) {
+                       pr_err("CP HQD dequeue request time out\n");
+                       break;
+               }
+               ndelay(100);
+               --retry;
+       }
+       local_irq_restore(flags);
+       preempt_enable();
+
+       WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+       end_jiffies = (utimeout * HZ / 1000) + jiffies;
        while (true) {
                temp = RREG32(mmCP_HQD_ACTIVE);
-               if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
+               if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
                        break;
-               if (timeout <= 0) {
-                       pr_err("kfd: cp queue preemption time out.\n");
+               if (time_after(jiffies, end_jiffies)) {
+                       pr_err("cp queue preemption time out\n");
                        release_queue(kgd);
                        return -ETIME;
                }
-               msleep(20);
-               timeout -= 20;
+               usleep_range(500, 1000);
        }
 
        release_queue(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 0fccd30..29a6f5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -39,6 +39,12 @@
 #include "vi_structs.h"
 #include "vid.h"
 
+enum hqd_dequeue_request_type {
+       NO_ACTION = 0,
+       DRAIN_PIPE,
+       RESET_WAVES
+};
+
 struct cik_sdma_rlc_registers;
 
 /*
@@ -55,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t 
pipe_id,
                uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-               uint32_t queue_id, uint32_t __user *wptr);
+                       uint32_t queue_id, uint32_t __user *wptr,
+                       uint32_t wptr_shift, uint32_t wptr_mask,
+                       struct mm_struct *mm);
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
                uint32_t pipe_id, uint32_t queue_id);
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+                               enum kfd_preempt_type reset_type,
                                unsigned int utimeout, uint32_t pipe_id,
                                uint32_t queue_id);
 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
@@ -244,20 +253,67 @@ static inline struct cik_sdma_rlc_registers 
*get_sdma_mqd(void *mqd)
 }
 
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-                       uint32_t queue_id, uint32_t __user *wptr)
+                       uint32_t queue_id, uint32_t __user *wptr,
+                       uint32_t wptr_shift, uint32_t wptr_mask,
+                       struct mm_struct *mm)
 {
-       struct vi_mqd *m;
-       uint32_t shadow_wptr, valid_wptr;
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
+       struct vi_mqd *m;
+       uint32_t *mqd_hqd;
+       uint32_t reg, wptr_val, data;
 
        m = get_mqd(mqd);
 
-       valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
-       if (valid_wptr == 0)
-               m->cp_hqd_pq_wptr = shadow_wptr;
-
        acquire_queue(kgd, pipe_id, queue_id);
-       gfx_v8_0_mqd_commit(adev, mqd);
+
+       /* HIQ is set during driver init period with vmid set to 0*/
+       if (m->cp_hqd_vmid == 0) {
+               uint32_t value, mec, pipe;
+
+               mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+               pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+               pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+                       mec, pipe, queue_id);
+               value = RREG32(mmRLC_CP_SCHEDULERS);
+               value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+                       ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+               WREG32(mmRLC_CP_SCHEDULERS, value);
+       }
+
+       /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+       mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+       for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
+               WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+       /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+        * This is safe since EOP RPTR==WPTR for any inactive HQD
+        * on ASICs that do not support context-save.
+        * EOP writes/reads can start anywhere in the ring.
+        */
+       if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
+               WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
+               WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
+               WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
+       }
+
+       for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
+               WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+       /* Copy userspace write pointer value to register.
+        * Activate doorbell logic to monitor subsequent changes.
+        */
+       data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+                            CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+       WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+       if (read_user_wptr(mm, wptr, wptr_val))
+               WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+       data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+       WREG32(mmCP_HQD_ACTIVE, data);
+
        release_queue(kgd);
 
        return 0;
@@ -308,29 +364,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev 
*kgd, void *mqd)
        return false;
 }
 
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+                               enum kfd_preempt_type reset_type,
                                unsigned int utimeout, uint32_t pipe_id,
                                uint32_t queue_id)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
        uint32_t temp;
-       int timeout = utimeout;
+       enum hqd_dequeue_request_type type;
+       unsigned long flags, end_jiffies;
+       int retry;
+       struct vi_mqd *m = get_mqd(mqd);
 
        acquire_queue(kgd, pipe_id, queue_id);
 
-       WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
+       if (m->cp_hqd_vmid == 0)
+               WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
+
+       switch (reset_type) {
+       case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+               type = DRAIN_PIPE;
+               break;
+       case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+               type = RESET_WAVES;
+               break;
+       default:
+               type = DRAIN_PIPE;
+               break;
+       }
+
+       /* Workaround: If IQ timer is active and the wait time is close to or
+        * equal to 0, dequeueing is not safe. Wait until either the wait time
+        * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+        * cleared before continuing. Also, ensure wait times are set to at
+        * least 0x3.
+        */
+       local_irq_save(flags);
+       preempt_disable();
+       retry = 5000; /* wait for 500 usecs at maximum */
+       while (true) {
+               temp = RREG32(mmCP_HQD_IQ_TIMER);
+               if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+                       pr_debug("HW is processing IQ\n");
+                       goto loop;
+               }
+               if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+                       if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+                                       == 3) /* SEM-rearm is safe */
+                               break;
+                       /* Wait time 3 is safe for CP, but our MMIO read/write
+                        * time is close to 1 microsecond, so check for 10 to
+                        * leave more buffer room
+                        */
+                       if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+                                       >= 10)
+                               break;
+                       pr_debug("IQ timer is active\n");
+               } else
+                       break;
+loop:
+               if (!retry) {
+                       pr_err("CP HQD IQ timer status time out\n");
+                       break;
+               }
+               ndelay(100);
+               --retry;
+       }
+       retry = 1000;
+       while (true) {
+               temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+               if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+                       break;
+               pr_debug("Dequeue request is pending\n");
 
+               if (!retry) {
+                       pr_err("CP HQD dequeue request time out\n");
+                       break;
+               }
+               ndelay(100);
+               --retry;
+       }
+       local_irq_restore(flags);
+       preempt_enable();
+
+       WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+       end_jiffies = (utimeout * HZ / 1000) + jiffies;
        while (true) {
                temp = RREG32(mmCP_HQD_ACTIVE);
-               if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
+               if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
                        break;
-               if (timeout <= 0) {
-                       pr_err("kfd: cp queue preemption time out.\n");
+               if (time_after(jiffies, end_jiffies)) {
+                       pr_err("cp queue preemption time out.\n");
                        release_queue(kgd);
                        return -ETIME;
                }
-               msleep(20);
-               timeout -= 20;
+               usleep_range(500, 1000);
        }
 
        release_queue(kgd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 5dac29d..3891fe5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -268,8 +268,8 @@ static int create_compute_queue_nocpsch(struct 
device_queue_manager *dqm,
        pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
                        q->pipe, q->queue);
 
-       retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
-                       q->queue, (uint32_t __user *) q->properties.write_ptr);
+       retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
+                              q->process->mm);
        if (retval)
                goto out_uninit_mqd;
 
@@ -585,8 +585,7 @@ static int create_sdma_queue_nocpsch(struct 
device_queue_manager *dqm,
        if (retval)
                goto out_deallocate_sdma_queue;
 
-       retval = mqd->load_mqd(mqd, q->mqd, 0,
-                               0, NULL);
+       retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
        if (retval)
                goto out_uninit_mqd;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 0e4d4a9..681b639 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -143,7 +143,8 @@ static bool initialize(struct kernel_queue *kq, struct 
kfd_dev *dev,
                kq->queue->pipe = KFD_CIK_HIQ_PIPE;
                kq->queue->queue = KFD_CIK_HIQ_QUEUE;
                kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe,
-                                       kq->queue->queue, NULL);
+                                 kq->queue->queue, &kq->queue->properties,
+                                 NULL);
        } else {
                /* allocate fence for DIQ */
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 213a71e..1f3a6ba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -67,7 +67,8 @@ struct mqd_manager {
 
        int     (*load_mqd)(struct mqd_manager *mm, void *mqd,
                                uint32_t pipe_id, uint32_t queue_id,
-                               uint32_t __user *wptr);
+                               struct queue_properties *p,
+                               struct mm_struct *mms);
 
        int     (*update_mqd)(struct mqd_manager *mm, void *mqd,
                                struct queue_properties *q);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 7e0ec6b..44ffd23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -144,15 +144,21 @@ static void uninit_mqd_sdma(struct mqd_manager *mm, void 
*mqd,
 }
 
 static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
-                       uint32_t queue_id, uint32_t __user *wptr)
+                   uint32_t queue_id, struct queue_properties *p,
+                   struct mm_struct *mms)
 {
-       return mm->dev->kfd2kgd->hqd_load
-               (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+       /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+       uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+       uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+
+       return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+                                         (uint32_t __user *)p->write_ptr,
+                                         wptr_shift, wptr_mask, mms);
 }
 
 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
-                       uint32_t pipe_id, uint32_t queue_id,
-                       uint32_t __user *wptr)
+                        uint32_t pipe_id, uint32_t queue_id,
+                        struct queue_properties *p, struct mm_struct *mms)
 {
        return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
 }
@@ -176,20 +182,17 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
        m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
        m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
        m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
-       m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
-                                       DOORBELL_OFFSET(q->doorbell_off);
+       m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
 
        m->cp_hqd_vmid = q->vmid;
 
        if (q->format == KFD_QUEUE_FORMAT_AQL)
                m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
 
-       m->cp_hqd_active = 0;
        q->is_active = false;
        if (q->queue_size > 0 &&
                        q->queue_address != 0 &&
                        q->queue_percent > 0) {
-               m->cp_hqd_active = 1;
                q->is_active = true;
        }
 
@@ -239,7 +242,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
                        unsigned int timeout, uint32_t pipe_id,
                        uint32_t queue_id)
 {
-       return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
+       return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout,
                                        pipe_id, queue_id);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 98a930e..73cbfe1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -94,10 +94,15 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
 
 static int load_mqd(struct mqd_manager *mm, void *mqd,
                        uint32_t pipe_id, uint32_t queue_id,
-                       uint32_t __user *wptr)
+                       struct queue_properties *p, struct mm_struct *mms)
 {
-       return mm->dev->kfd2kgd->hqd_load
-               (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+       /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+       uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+       uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+
+       return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+                                         (uint32_t __user *)p->write_ptr,
+                                         wptr_shift, wptr_mask, mms);
 }
 
 static int __update_mqd(struct mqd_manager *mm, void *mqd,
@@ -122,7 +127,6 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
        m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
 
        m->cp_hqd_pq_doorbell_control =
-               1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT |
                q->doorbell_off <<
                        CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
        pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
@@ -159,12 +163,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
                                2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
        }
 
-       m->cp_hqd_active = 0;
        q->is_active = false;
        if (q->queue_size > 0 &&
                        q->queue_address != 0 &&
                        q->queue_percent > 0) {
-               m->cp_hqd_active = 1;
                q->is_active = true;
        }
 
@@ -184,7 +186,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
                        uint32_t queue_id)
 {
        return mm->dev->kfd2kgd->hqd_destroy
-               (mm->dev->kgd, type, timeout,
+               (mm->dev->kgd, mqd, type, timeout,
                pipe_id, queue_id);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f0d55cc0..30ce92c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -239,11 +239,6 @@ enum kfd_preempt_type_filter {
        KFD_PREEMPT_TYPE_FILTER_BY_PASID
 };
 
-enum kfd_preempt_type {
-       KFD_PREEMPT_TYPE_WAVEFRONT,
-       KFD_PREEMPT_TYPE_WAVEFRONT_RESET
-};
-
 /**
  * enum kfd_queue_type
  *
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 36f3766..ffafda0 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -41,6 +41,11 @@ struct kgd_dev;
 
 struct kgd_mem;
 
+enum kfd_preempt_type {
+       KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
+       KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
+};
+
 enum kgd_memory_pool {
        KGD_POOL_SYSTEM_CACHEABLE = 1,
        KGD_POOL_SYSTEM_WRITECOMBINE = 2,
@@ -153,14 +158,16 @@ struct kfd2kgd_calls {
        int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
 
        int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-                       uint32_t queue_id, uint32_t __user *wptr);
+                       uint32_t queue_id, uint32_t __user *wptr,
+                       uint32_t wptr_shift, uint32_t wptr_mask,
+                       struct mm_struct *mm);
 
        int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);
 
        bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
                                uint32_t pipe_id, uint32_t queue_id);
 
-       int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type,
+       int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
                                unsigned int timeout, uint32_t pipe_id,
                                uint32_t queue_id);
 
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c 
b/drivers/gpu/drm/radeon/radeon_kfd.c
index a2ab6dc..695117a 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -75,12 +75,14 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t 
pipe_id,
                                uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-                       uint32_t queue_id, uint32_t __user *wptr);
+                       uint32_t queue_id, uint32_t __user *wptr,
+                       uint32_t wptr_shift, uint32_t wptr_mask,
+                       struct mm_struct *mm);
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
                                uint32_t pipe_id, uint32_t queue_id);
 
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
                                unsigned int timeout, uint32_t pipe_id,
                                uint32_t queue_id);
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -482,7 +484,9 @@ static inline struct cik_sdma_rlc_registers 
*get_sdma_mqd(void *mqd)
 }
 
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-                       uint32_t queue_id, uint32_t __user *wptr)
+                       uint32_t queue_id, uint32_t __user *wptr,
+                       uint32_t wptr_shift, uint32_t wptr_mask,
+                       struct mm_struct *mm)
 {
        uint32_t wptr_shadow, is_wptr_shadow_valid;
        struct cik_mqd *m;
@@ -636,7 +640,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, 
void *mqd)
        return false;
 }
 
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
                                unsigned int timeout, uint32_t pipe_id,
                                uint32_t queue_id)
 {
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to