From: Ben Goz <ben....@amd.com>

The mqd_manager module handles MQD data structures. MQD stands for Memory Queue
Descriptor, which is used by the H/W to keep the HSA queue state in memory.

Signed-off-by: Ben Goz <ben....@amd.com>
Signed-off-by: Oded Gabbay <oded.gab...@amd.com>
---
 drivers/gpu/hsa/radeon/Makefile               |   2 +-
 drivers/gpu/hsa/radeon/cik_mqds.h             | 251 ++++++++++++++
 drivers/gpu/hsa/radeon/cik_regs.h             |   1 +
 drivers/gpu/hsa/radeon/kfd_mqd_manager.c      | 453 ++++++++++++++++++++++++++
 drivers/gpu/hsa/radeon/kfd_mqd_manager.h      |  48 +++
 drivers/gpu/hsa/radeon/kfd_priv.h             |  26 ++
 drivers/gpu/hsa/radeon/kfd_sched_cik_static.c |  10 -
 drivers/gpu/hsa/radeon/kfd_vidmem.c           |  36 ++
 8 files changed, 816 insertions(+), 11 deletions(-)
 create mode 100644 drivers/gpu/hsa/radeon/cik_mqds.h
 create mode 100644 drivers/gpu/hsa/radeon/kfd_mqd_manager.c
 create mode 100644 drivers/gpu/hsa/radeon/kfd_mqd_manager.h

diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 18e1639..c87b518 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -6,6 +6,6 @@ radeon_kfd-y    := kfd_module.o kfd_device.o kfd_chardev.o \
                kfd_pasid.o kfd_topology.o kfd_process.o \
                kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
                kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
-               kfd_queue.o kfd_hw_pointer_store.o
+               kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o
 
 obj-$(CONFIG_HSA_RADEON)       += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/cik_mqds.h 
b/drivers/gpu/hsa/radeon/cik_mqds.h
new file mode 100644
index 0000000..58945c8
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/cik_mqds.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#ifndef CIK_MQDS_H_
+#define CIK_MQDS_H_
+
+#pragma pack(push, 4)
+
+struct cik_hpd_registers {
+       u32 cp_hpd_roq_offsets;
+       u32 cp_hpd_eop_base_addr;
+       u32 cp_hpd_eop_base_addr_hi;
+       u32 cp_hpd_eop_vmid;
+       u32 cp_hpd_eop_control;
+};
+
+struct cik_hqd_registers {
+       u32 cp_mqd_base_addr;
+       u32 cp_mqd_base_addr_hi;
+       u32 cp_hqd_active;
+       u32 cp_hqd_vmid;
+       u32 cp_hqd_persistent_state;
+       u32 cp_hqd_pipe_priority;
+       u32 cp_hqd_queue_priority;
+       u32 cp_hqd_quantum;
+       u32 cp_hqd_pq_base;
+       u32 cp_hqd_pq_base_hi;
+       u32 cp_hqd_pq_rptr;
+       u32 cp_hqd_pq_rptr_report_addr;
+       u32 cp_hqd_pq_rptr_report_addr_hi;
+       u32 cp_hqd_pq_wptr_poll_addr;
+       u32 cp_hqd_pq_wptr_poll_addr_hi;
+       u32 cp_hqd_pq_doorbell_control;
+       u32 cp_hqd_pq_wptr;
+       u32 cp_hqd_pq_control;
+       u32 cp_hqd_ib_base_addr;
+       u32 cp_hqd_ib_base_addr_hi;
+       u32 cp_hqd_ib_rptr;
+       u32 cp_hqd_ib_control;
+       u32 cp_hqd_iq_timer;
+       u32 cp_hqd_iq_rptr;
+       u32 cp_hqd_dequeue_request;
+       u32 cp_hqd_dma_offload;
+       u32 cp_hqd_sema_cmd;
+       u32 cp_hqd_msg_type;
+       u32 cp_hqd_atomic0_preop_lo;
+       u32 cp_hqd_atomic0_preop_hi;
+       u32 cp_hqd_atomic1_preop_lo;
+       u32 cp_hqd_atomic1_preop_hi;
+       u32 cp_hqd_hq_scheduler0;
+       u32 cp_hqd_hq_scheduler1;
+       u32 cp_mqd_control;
+};
+
+struct cik_mqd {
+       u32 header;
+       u32 dispatch_initiator;
+       u32 dimensions[3];
+       u32 start_idx[3];
+       u32 num_threads[3];
+       u32 pipeline_stat_enable;
+       u32 perf_counter_enable;
+       u32 pgm[2];
+       u32 tba[2];
+       u32 tma[2];
+       u32 pgm_rsrc[2];
+       u32 vmid;
+       u32 resource_limits;
+       u32 static_thread_mgmt01[2];
+       u32 tmp_ring_size;
+       u32 static_thread_mgmt23[2];
+       u32 restart[3];
+       u32 thread_trace_enable;
+       u32 reserved1;
+       u32 user_data[16];
+       u32 vgtcs_invoke_count[2];
+       struct cik_hqd_registers queue_state;
+       u32 dequeue_cntr;
+       u32 interrupt_queue[64];
+};
+
+/* This structure represents mqd used for cp scheduling queue
+ * taken from Gfx72_cp_program_spec.pdf
+ */
+struct cik_compute_mqd {
+       u32 header;
+       u32 compute_dispatch_initiator;
+       u32 compute_dim_x;
+       u32 compute_dim_y;
+       u32 compute_dim_z;
+       u32 compute_start_x;
+       u32 compute_start_y;
+       u32 compute_start_z;
+       u32 compute_num_thread_x;
+       u32 compute_num_thread_y;
+       u32 compute_num_thread_z;
+       u32 compute_pipelinestat_enable;
+       u32 compute_perfcount_enable;
+       u32 compute_pgm_lo;
+       u32 compute_pgm_hi;
+       u32 compute_tba_lo;
+       u32 compute_tba_hi;
+       u32 compute_tma_lo;
+       u32 compute_tma_hi;
+       u32 compute_pgm_rsrc1;
+       u32 compute_pgm_rsrc2;
+       u32 compute_vmid;
+       u32 compute_resource_limits;
+       u32 compute_static_thread_mgmt_se0;
+       u32 compute_static_thread_mgmt_se1;
+       u32 compute_tmpring_size;
+       u32 compute_static_thread_mgmt_se2;
+       u32 compute_static_thread_mgmt_se3;
+       u32 compute_restart_x;
+       u32 compute_restart_y;
+       u32 compute_restart_z;
+       u32 compute_thread_trace_enable;
+       u32 compute_misc_reserved;
+       u32 compute_user_data[16];
+       u32 vgt_csinvoc_count_lo;
+       u32 vgt_csinvoc_count_hi;
+       u32 cp_mqd_base_addr51;
+       u32 cp_mqd_base_addr_hi;
+       u32 cp_hqd_active;
+       u32 cp_hqd_vmid;
+       u32 cp_hqd_persistent_state;
+       u32 cp_hqd_pipe_priority;
+       u32 cp_hqd_queue_priority;
+       u32 cp_hqd_quantum;
+       u32 cp_hqd_pq_base;
+       u32 cp_hqd_pq_base_hi;
+       u32 cp_hqd_pq_rptr;
+       u32 cp_hqd_pq_rptr_report_addr;
+       u32 cp_hqd_pq_rptr_report_addr_hi;
+       u32 cp_hqd_pq_wptr_poll_addr;
+       u32 cp_hqd_pq_wptr_poll_addr_hi;
+       u32 cp_hqd_pq_doorbell_control;
+       u32 cp_hqd_pq_wptr;
+       u32 cp_hqd_pq_control;
+       u32 cp_hqd_ib_base_addr;
+       u32 cp_hqd_ib_base_addr_hi;
+       u32 cp_hqd_ib_rptr;
+       u32 cp_hqd_ib_control;
+       u32 cp_hqd_iq_timer;
+       u32 cp_hqd_iq_rptr;
+       u32 cp_hqd_dequeue_request;
+       u32 cp_hqd_dma_offload;
+       u32 cp_hqd_sema_cmd;
+       u32 cp_hqd_msg_type;
+       u32 cp_hqd_atomic0_preop_lo;
+       u32 cp_hqd_atomic0_preop_hi;
+       u32 cp_hqd_atomic1_preop_lo;
+       u32 cp_hqd_atomic1_preop_hi;
+       u32 cp_hqd_hq_scheduler0;
+       u32 cp_hqd_hq_scheduler1;
+       u32 cp_mqd_control;
+       u32 reserved1[10];
+       u32 cp_mqd_query_time_lo;
+       u32 cp_mqd_query_time_hi;
+       u32 reserved2[4];
+       u32 cp_mqd_connect_start_time_lo;
+       u32 cp_mqd_connect_start_time_hi;
+       u32 cp_mqd_connect_end_time_lo;
+       u32 cp_mqd_connect_end_time_hi;
+       u32 cp_mqd_connect_end_wf_count;
+       u32 cp_mqd_connect_end_pq_rptr;
+       u32 cp_mqd_connect_end_pq_wptr;
+       u32 cp_mqd_connect_end_ib_rptr;
+       u32 reserved3[18];
+};
+
+/* This structure represents all *IQs
+ * Taken from Gfx73_CPC_Eng_Init_Prog.pdf
+ */
+struct cik_interface_mqd {
+       u32 reserved1[128];
+       u32 cp_mqd_base_addr;
+       u32 cp_mqd_base_addr_hi;
+       u32 cp_hqd_active;
+       u32 cp_hqd_vmid;
+       u32 cp_hqd_persistent_state;
+       u32 cp_hqd_pipe_priority;
+       u32 cp_hqd_queue_priority;
+       u32 cp_hqd_quantum;
+       u32 cp_hqd_pq_base;
+       u32 cp_hqd_pq_base_hi;
+       u32 cp_hqd_pq_rptr;
+       u32 cp_hqd_pq_rptr_report_addr;
+       u32 cp_hqd_pq_rptr_report_addr_hi;
+       u32 cp_hqd_pq_wptr_poll_addr;
+       u32 cp_hqd_pq_wptr_poll_addr_hi;
+       u32 cp_hqd_pq_doorbell_control;
+       u32 cp_hqd_pq_wptr;
+       u32 cp_hqd_pq_control;
+       u32 cp_hqd_ib_base_addr;
+       u32 cp_hqd_ib_base_addr_hi;
+       u32 cp_hqd_ib_rptr;
+       u32 cp_hqd_ib_control;
+       u32 cp_hqd_iq_timer;
+       u32 cp_hqd_iq_rptr;
+       u32 cp_hqd_dequeue_request;
+       u32 cp_hqd_dma_offload;
+       u32 cp_hqd_sema_cmd;
+       u32 cp_hqd_msg_type;
+       u32 cp_hqd_atomic0_preop_lo;
+       u32 cp_hqd_atomic0_preop_hi;
+       u32 cp_hqd_atomic1_preop_lo;
+       u32 cp_hqd_atomic1_preop_hi;
+       u32 cp_hqd_hq_status0;
+       u32 cp_hqd_hq_control0;
+       u32 cp_mqd_control;
+       u32 reserved2[3];
+       u32 cp_hqd_hq_status1;
+       u32 cp_hqd_hq_control1;
+       u32 reserved3[16];
+       u32 cp_hqd_hq_status2;
+       u32 cp_hqd_hq_control2;
+       u32 cp_hqd_hq_status3;
+       u32 cp_hqd_hq_control3;
+       u32 reserved4[2];
+       u32 cp_mqd_query_time_lo;
+       u32 cp_mqd_query_time_hi;
+       u32 reserved5[48];
+       u32 cp_mqd_skip_process[16];
+};
+
+#pragma pack(pop)
+
+
+#endif /* CIK_MQDS_H_ */
diff --git a/drivers/gpu/hsa/radeon/cik_regs.h 
b/drivers/gpu/hsa/radeon/cik_regs.h
index 93f7b34..fa5ec01 100644
--- a/drivers/gpu/hsa/radeon/cik_regs.h
+++ b/drivers/gpu/hsa/radeon/cik_regs.h
@@ -168,6 +168,7 @@
 
 #define CP_HQD_DEQUEUE_REQUEST                         0xC974
 #define        DEQUEUE_REQUEST_DRAIN                           1
+#define DEQUEUE_REQUEST_RESET                          2
 #define                DEQUEUE_INT                                     (1U << 
8)
 
 #define CP_HQD_SEMA_CMD                                        0xC97Cu
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c 
b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
new file mode 100644
index 0000000..14b248f
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "cik_mqds.h"
+#include "cik_regs.h"
+
+inline uint32_t lower_32(uint64_t x)
+{
+       return (uint32_t)x;
+}
+
+inline uint32_t upper_32(uint64_t x)
+{
+       return (uint32_t)(x >> 32);
+}
+
+inline void busy_wait(unsigned long ms)
+{
+       while (time_before(jiffies, ms))
+               cpu_relax();
+}
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+       return (struct cik_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd, kfd_mem_obj 
*mqd_mem_obj,
+               uint64_t *gart_addr, struct queue_properties *q)
+{
+       uint64_t addr;
+       struct cik_mqd *m;
+       int retval;
+
+       BUG_ON(!mm || !q || !mqd);
+
+       pr_debug("kfd: In func %s\n", __func__);
+
+       retval = radeon_kfd_vidmem_alloc_map(
+                               mm->dev,
+                               mqd_mem_obj,
+                               (void **)&m,
+                               &addr,
+                               ALIGN(sizeof(struct cik_mqd), 256));
+
+       if (retval != 0)
+               return -ENOMEM;
+
+       memset(m, 0, sizeof(struct cik_mqd));
+
+       m->header = 0xC0310800;
+       m->pipeline_stat_enable = 1;
+       m->static_thread_mgmt01[0] = 0xFFFFFFFF;
+       m->static_thread_mgmt01[1] = 0xFFFFFFFF;
+       m->static_thread_mgmt23[0] = 0xFFFFFFFF;
+       m->static_thread_mgmt23[1] = 0xFFFFFFFF;
+
+       m->queue_state.cp_hqd_persistent_state = 
DEFAULT_CP_HQD_PERSISTENT_STATE;
+
+       m->queue_state.cp_mqd_control             = MQD_CONTROL_PRIV_STATE_EN;
+       m->queue_state.cp_mqd_base_addr           = lower_32(addr);
+       m->queue_state.cp_mqd_base_addr_hi        = upper_32(addr);
+
+       m->queue_state.cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | 
IB_ATC_EN;
+       /* Although WinKFD writes this, I suspect it should not be necessary. */
+       m->queue_state.cp_hqd_ib_control = IB_ATC_EN | 
DEFAULT_MIN_IB_AVAIL_SIZE;
+
+       m->queue_state.cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | 
QUANTUM_DURATION(10);
+
+       m->queue_state.cp_hqd_pipe_priority = 1;
+       m->queue_state.cp_hqd_queue_priority = 15;
+
+       *mqd = m;
+       if (gart_addr != NULL)
+               *gart_addr = addr;
+       retval = mm->update_mqd(mm, m, q);
+
+       return retval;
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd, kfd_mem_obj 
mqd_mem_obj)
+{
+       BUG_ON(!mm || !mqd);
+       radeon_kfd_vidmem_free_unmap(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd)
+{
+       struct cik_mqd *m;
+
+       BUG_ON(!mm || !mqd);
+
+       m = get_mqd(mqd);
+
+       WRITE_REG(mm->dev, CP_MQD_BASE_ADDR, m->queue_state.cp_mqd_base_addr);
+       WRITE_REG(mm->dev, CP_MQD_BASE_ADDR_HI, 
m->queue_state.cp_mqd_base_addr_hi);
+       WRITE_REG(mm->dev, CP_MQD_CONTROL, m->queue_state.cp_mqd_control);
+
+       WRITE_REG(mm->dev, CP_HQD_PQ_BASE, m->queue_state.cp_hqd_pq_base);
+       WRITE_REG(mm->dev, CP_HQD_PQ_BASE_HI, m->queue_state.cp_hqd_pq_base_hi);
+       WRITE_REG(mm->dev, CP_HQD_PQ_CONTROL, m->queue_state.cp_hqd_pq_control);
+
+       WRITE_REG(mm->dev, CP_HQD_IB_CONTROL, m->queue_state.cp_hqd_ib_control);
+       WRITE_REG(mm->dev, CP_HQD_IB_BASE_ADDR, 
m->queue_state.cp_hqd_ib_base_addr);
+       WRITE_REG(mm->dev, CP_HQD_IB_BASE_ADDR_HI, 
m->queue_state.cp_hqd_ib_base_addr_hi);
+
+       WRITE_REG(mm->dev, CP_HQD_IB_RPTR, m->queue_state.cp_hqd_ib_rptr);
+
+       WRITE_REG(mm->dev, CP_HQD_PERSISTENT_STATE, 
m->queue_state.cp_hqd_persistent_state);
+       WRITE_REG(mm->dev, CP_HQD_SEMA_CMD, m->queue_state.cp_hqd_sema_cmd);
+       WRITE_REG(mm->dev, CP_HQD_MSG_TYPE, m->queue_state.cp_hqd_msg_type);
+
+       WRITE_REG(mm->dev, CP_HQD_ATOMIC0_PREOP_LO, 
m->queue_state.cp_hqd_atomic0_preop_lo);
+       WRITE_REG(mm->dev, CP_HQD_ATOMIC0_PREOP_HI, 
m->queue_state.cp_hqd_atomic0_preop_hi);
+       WRITE_REG(mm->dev, CP_HQD_ATOMIC1_PREOP_LO, 
m->queue_state.cp_hqd_atomic1_preop_lo);
+       WRITE_REG(mm->dev, CP_HQD_ATOMIC1_PREOP_HI, 
m->queue_state.cp_hqd_atomic1_preop_hi);
+
+       WRITE_REG(mm->dev, CP_HQD_PQ_RPTR_REPORT_ADDR, 
m->queue_state.cp_hqd_pq_rptr_report_addr);
+       WRITE_REG(mm->dev, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 
m->queue_state.cp_hqd_pq_rptr_report_addr_hi);
+       WRITE_REG(mm->dev, CP_HQD_PQ_RPTR, m->queue_state.cp_hqd_pq_rptr);
+
+       WRITE_REG(mm->dev, CP_HQD_PQ_WPTR_POLL_ADDR, 
m->queue_state.cp_hqd_pq_wptr_poll_addr);
+       WRITE_REG(mm->dev, CP_HQD_PQ_WPTR_POLL_ADDR_HI, 
m->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
+
+       WRITE_REG(mm->dev, CP_HQD_PQ_DOORBELL_CONTROL, 
m->queue_state.cp_hqd_pq_doorbell_control);
+
+       WRITE_REG(mm->dev, CP_HQD_VMID, m->queue_state.cp_hqd_vmid);
+
+       WRITE_REG(mm->dev, CP_HQD_QUANTUM, m->queue_state.cp_hqd_quantum);
+
+       WRITE_REG(mm->dev, CP_HQD_PIPE_PRIORITY, 
m->queue_state.cp_hqd_pipe_priority);
+       WRITE_REG(mm->dev, CP_HQD_QUEUE_PRIORITY, 
m->queue_state.cp_hqd_queue_priority);
+
+       WRITE_REG(mm->dev, CP_HQD_HQ_SCHEDULER0, 
m->queue_state.cp_hqd_hq_scheduler0);
+       WRITE_REG(mm->dev, CP_HQD_HQ_SCHEDULER1, 
m->queue_state.cp_hqd_hq_scheduler1);
+
+       WRITE_REG(mm->dev, CP_HQD_ACTIVE, m->queue_state.cp_hqd_active);
+
+       return 0;
+}
+
+static int update_mqd(struct mqd_manager *mm, void *mqd, struct 
queue_properties *q)
+{
+       struct cik_mqd *m;
+
+       BUG_ON(!mm || !q || !mqd);
+
+       pr_debug("kfd: In func %s\n", __func__);
+
+       m = get_mqd(mqd);
+       m->queue_state.cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | 
DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
+       /* calculating queue size which is log base 2 of actual queue size -1 
dwords and another -1 for ffs */
+       m->queue_state.cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned 
int)) - 1 - 1;
+       m->queue_state.cp_hqd_pq_base = lower_32((uint64_t)q->queue_address >> 
8);
+       m->queue_state.cp_hqd_pq_base_hi = upper_32((uint64_t)q->queue_address 
>> 8);
+       m->queue_state.cp_hqd_pq_rptr_report_addr = 
lower_32((uint64_t)q->read_ptr);
+       m->queue_state.cp_hqd_pq_rptr_report_addr_hi = 
upper_32((uint64_t)q->read_ptr);
+       m->queue_state.cp_hqd_pq_doorbell_control = DOORBELL_EN | 
DOORBELL_OFFSET(q->doorbell_off);
+
+       m->queue_state.cp_hqd_vmid = q->vmid;
+
+       m->queue_state.cp_hqd_active = 0;
+       q->is_active = false;
+       if (q->queue_size > 0 &&
+                       q->queue_address != 0 &&
+                       q->queue_percent > 0) {
+               m->queue_state.cp_hqd_active = 1;
+               q->is_active = true;
+       }
+
+       return 0;
+}
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd, enum 
kfd_preempt_type type, unsigned int timeout)
+{
+       int status;
+       uint32_t temp;
+       bool sync;
+
+       status = 0;
+       BUG_ON(!mm || !mqd);
+
+       pr_debug("kfd: In func %s\n", __func__);
+
+       WRITE_REG(mm->dev, CP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+       if (type == KFD_PREEMPT_TYPE_WAVEFRONT_RESET)
+               WRITE_REG(mm->dev, CP_HQD_DEQUEUE_REQUEST, 
DEQUEUE_REQUEST_RESET);
+       else
+               WRITE_REG(mm->dev, CP_HQD_DEQUEUE_REQUEST, 
DEQUEUE_REQUEST_DRAIN);
+
+       sync = (timeout > 0);
+       temp = timeout;
+
+       while (READ_REG(mm->dev, CP_HQD_ACTIVE) != 0) {
+               if (sync && timeout <= 0) {
+                       status = -EBUSY;
+                       pr_err("kfd: cp queue preemption time out (%dms)\n", 
temp);
+                       break;
+               }
+               busy_wait(1000);
+               if (sync)
+                       timeout--;
+       }
+
+       return status;
+}
+
+static inline uint32_t make_srbm_gfx_cntl_mpqv(unsigned int me,
+                                               unsigned int pipe,
+                                               unsigned int queue,
+                                               unsigned int vmid)
+{
+       return QUEUEID(queue) | VMID(vmid) | MEID(me) | PIPEID(pipe);
+}
+
+static inline uint32_t get_first_pipe_offset(struct mqd_manager *mm)
+{
+       BUG_ON(!mm);
+       return mm->dev->shared_resources.first_compute_pipe;
+}
+
+static void acquire_hqd(struct mqd_manager *mm, unsigned int pipe, unsigned 
int queue, unsigned int vmid)
+{
+       unsigned int mec, pipe_in_mec;
+
+       BUG_ON(!mm);
+
+       radeon_kfd_lock_srbm_index(mm->dev);
+
+       pipe_in_mec = (pipe + get_first_pipe_offset(mm)) % 4;
+       mec = (pipe + get_first_pipe_offset(mm)) / 4;
+       mec++;
+
+       pr_debug("kfd: acquire mec: %d pipe: %d queue: %d vmid: %d\n",
+                       mec,
+                       pipe_in_mec,
+                       queue,
+                       vmid);
+
+       WRITE_REG(mm->dev, SRBM_GFX_CNTL, make_srbm_gfx_cntl_mpqv(mec,
+                       pipe_in_mec, queue, vmid));
+}
+
+static void release_hqd(struct mqd_manager *mm)
+{
+       BUG_ON(!mm);
+       /* Be nice to KGD, reset indexed CP registers to the GFX pipe. */
+       WRITE_REG(mm->dev, SRBM_GFX_CNTL, 0);
+       radeon_kfd_unlock_srbm_index(mm->dev);
+}
+
+bool is_occupied(struct mqd_manager *mm, void *mqd, struct queue_properties *q)
+{
+       int act;
+       struct cik_mqd *m;
+       uint32_t low, high;
+
+       BUG_ON(!mm || !mqd || !q);
+
+       m = get_mqd(mqd);
+
+       act = READ_REG(mm->dev, CP_HQD_ACTIVE);
+       if (act) {
+               low = lower_32((uint64_t)q->queue_address >> 8);
+               high = upper_32((uint64_t)q->queue_address >> 8);
+
+               if (low == READ_REG(mm->dev, CP_HQD_PQ_BASE) &&
+                       high == READ_REG(mm->dev, CP_HQD_PQ_BASE_HI))
+                       return true;
+       }
+
+       return false;
+}
+
+static int initialize(struct mqd_manager *mm)
+{
+       BUG_ON(!mm);
+       return 0;
+}
+
+static void uninitialize(struct mqd_manager *mm)
+{
+       BUG_ON(!mm);
+}
+
+/*
+ * HIQ MQD Implementation
+ */
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, kfd_mem_obj 
*mqd_mem_obj,
+               uint64_t *gart_addr, struct queue_properties *q)
+{
+       uint64_t addr;
+       struct cik_mqd *m;
+       int retval;
+
+       BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
+
+       pr_debug("kfd: In func %s\n", __func__);
+
+       retval = radeon_kfd_vidmem_alloc_map(
+                               mm->dev,
+                               mqd_mem_obj,
+                               (void **)&m,
+                               &addr,
+                               ALIGN(sizeof(struct cik_mqd), PAGE_SIZE));
+
+       if (retval != 0)
+               return -ENOMEM;
+
+       memset(m, 0, sizeof(struct cik_mqd));
+
+       m->header = 0xC0310800;
+       m->pipeline_stat_enable = 1;
+       m->static_thread_mgmt01[0] = 0xFFFFFFFF;
+       m->static_thread_mgmt01[1] = 0xFFFFFFFF;
+       m->static_thread_mgmt23[0] = 0xFFFFFFFF;
+       m->static_thread_mgmt23[1] = 0xFFFFFFFF;
+
+       m->queue_state.cp_hqd_persistent_state = 
DEFAULT_CP_HQD_PERSISTENT_STATE;
+
+       m->queue_state.cp_mqd_control             = MQD_CONTROL_PRIV_STATE_EN;
+       m->queue_state.cp_mqd_base_addr           = lower_32(addr);
+       m->queue_state.cp_mqd_base_addr_hi        = upper_32(addr);
+
+       m->queue_state.cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
+
+       m->queue_state.cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | 
QUANTUM_DURATION(10);
+
+       m->queue_state.cp_hqd_pipe_priority = 1;
+       m->queue_state.cp_hqd_queue_priority = 15;
+
+       *mqd = m;
+       if (gart_addr)
+               *gart_addr = addr;
+       retval = mm->update_mqd(mm, m, q);
+
+       return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, struct 
queue_properties *q)
+{
+       struct cik_mqd *m;
+
+       BUG_ON(!mm || !q || !mqd);
+
+       pr_debug("kfd: In func %s\n", __func__);
+
+       m = get_mqd(mqd);
+       m->queue_state.cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | 
DEFAULT_MIN_AVAIL_SIZE | PRIV_STATE | KMD_QUEUE;
+       /* calculating queue size which is log base 2 of actual queue size -1 
dwords */
+       m->queue_state.cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned 
int)) - 1 - 1;
+       m->queue_state.cp_hqd_pq_base = lower_32((uint64_t)q->queue_address >> 
8);
+       m->queue_state.cp_hqd_pq_base_hi = upper_32((uint64_t)q->queue_address 
>> 8);
+       m->queue_state.cp_hqd_pq_rptr_report_addr = 
lower_32((uint64_t)q->read_ptr);
+       m->queue_state.cp_hqd_pq_rptr_report_addr_hi = 
upper_32((uint64_t)q->read_ptr);
+       m->queue_state.cp_hqd_pq_doorbell_control = DOORBELL_EN | 
DOORBELL_OFFSET(q->doorbell_off);
+
+       m->queue_state.cp_hqd_vmid = q->vmid;
+
+       m->queue_state.cp_hqd_active = 0;
+       q->is_active = false;
+       if (q->queue_size > 0 &&
+                       q->queue_address != 0 &&
+                       q->queue_percent > 0) {
+               m->queue_state.cp_hqd_active = 1;
+               q->is_active = true;
+       }
+
+       return 0;
+}
+
+struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev 
*dev)
+{
+       struct mqd_manager *mqd;
+
+       BUG_ON(!dev);
+       BUG_ON(type >= KFD_MQD_TYPE_MAX);
+
+       pr_debug("kfd: In func %s\n", __func__);
+
+       mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+       if (!mqd)
+               return NULL;
+
+       mqd->dev = dev;
+
+       switch (type) {
+       case KFD_MQD_TYPE_CIK_CP:
+       case KFD_MQD_TYPE_CIK_COMPUTE:
+               mqd->init_mqd = init_mqd;
+               mqd->uninit_mqd = uninit_mqd;
+               mqd->load_mqd = load_mqd;
+               mqd->update_mqd = update_mqd;
+               mqd->destroy_mqd = destroy_mqd;
+               mqd->acquire_hqd = acquire_hqd;
+               mqd->release_hqd = release_hqd;
+               mqd->is_occupied = is_occupied;
+               mqd->initialize = initialize;
+               mqd->uninitialize = uninitialize;
+               break;
+       case KFD_MQD_TYPE_CIK_HIQ:
+               mqd->init_mqd = init_mqd_hiq;
+               mqd->uninit_mqd = uninit_mqd;
+               mqd->load_mqd = load_mqd;
+               mqd->update_mqd = update_mqd_hiq;
+               mqd->destroy_mqd = destroy_mqd;
+               mqd->acquire_hqd = acquire_hqd;
+               mqd->release_hqd = release_hqd;
+               mqd->is_occupied = is_occupied;
+               mqd->initialize = initialize;
+               mqd->uninitialize = uninitialize;
+               break;
+       default:
+               return NULL;
+               break;
+       }
+
+       if (mqd->initialize(mqd) != 0) {
+               pr_err("kfd: mqd manager initialization failed\n");
+               kfree(mqd);
+               return NULL;
+       }
+       return mqd;
+}
+
+/* SDMA queues should be implemented here when the cp will supports them */
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h 
b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
new file mode 100644
index 0000000..e7b39ee
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#ifndef MQD_MANAGER_H_
+#define MQD_MANAGER_H_
+
+#include "kfd_priv.h"
+
+struct mqd_manager {
+       int     (*init_mqd)(struct mqd_manager *mm, void **mqd, kfd_mem_obj 
*mqd_mem_obj, uint64_t *gart_addr,
+                           struct queue_properties *q);
+       int     (*load_mqd)(struct mqd_manager *mm, void *mqd);
+       int     (*update_mqd)(struct mqd_manager *mm, void *mqd, struct 
queue_properties *q);
+       int     (*destroy_mqd)(struct mqd_manager *mm, void *mqd, enum 
kfd_preempt_type type, unsigned int timeout);
+       void    (*uninit_mqd)(struct mqd_manager *mm, void *mqd, kfd_mem_obj 
mqd_mem_obj);
+       void    (*acquire_hqd)(struct mqd_manager *mm, unsigned int pipe, 
unsigned int queue, unsigned int vmid);
+       void    (*release_hqd)(struct mqd_manager *mm);
+       bool    (*is_occupied)(struct mqd_manager *mm, void *mqd, struct 
queue_properties *q);
+       int     (*initialize)(struct mqd_manager *mm);
+       void    (*uninitialize)(struct mqd_manager *mm);
+
+       struct mutex            mqd_mutex;
+       struct kfd_dev          *dev;
+};
+
+
+#endif /* MQD_MANAGER_H_ */
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h 
b/drivers/gpu/hsa/radeon/kfd_priv.h
index df17387..cc60b48 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -141,6 +141,9 @@ int radeon_kfd_vidmem_gpumap(struct kfd_dev *kfd, 
kfd_mem_obj mem_obj, uint64_t
 void radeon_kfd_vidmem_ungpumap(struct kfd_dev *kfd, kfd_mem_obj mem_obj);
 int radeon_kfd_vidmem_kmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj, void 
**ptr);
 void radeon_kfd_vidmem_unkmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj);
+int radeon_kfd_vidmem_alloc_map(struct kfd_dev *kfd, kfd_mem_obj *mem_obj, 
void **ptr,
+                               uint64_t *vmid0_address, size_t size);
+void radeon_kfd_vidmem_free_unmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj);
 
 /* Character device interface */
 int radeon_kfd_chardev_init(void);
@@ -161,6 +164,17 @@ struct kfd_queue {
        struct kfd_scheduler_queue scheduler_queue;
 };
 
+enum kfd_preempt_type_filter {
+       KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
+       KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES,
+       KFD_PRERMPT_TYPE_FILTER_BY_PASID
+};
+
+enum kfd_preempt_type {
+       KFD_PREEMPT_TYPE_WAVEFRONT,
+       KFD_PREEMPT_TYPE_WAVEFRONT_RESET
+};
+
 enum kfd_queue_type  {
        KFD_QUEUE_TYPE_COMPUTE,
        KFD_QUEUE_TYPE_SDMA,
@@ -204,6 +218,14 @@ struct queue {
        struct kfd_dev          *device;
 };
 
+enum KFD_MQD_TYPE {
+       KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */
+       KFD_MQD_TYPE_CIK_HIQ, /* for hiq */
+       KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */
+       KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */
+       KFD_MQD_TYPE_MAX
+};
+
 /* Data that is per-process-per device. */
 struct kfd_process_device {
        /* List of all per-device data for a process. Starts from 
kfd_process.per_device_data. */
@@ -325,10 +347,14 @@ int kgd2kfd_resume(struct kfd_dev *dev);
 int kfd_init_apertures(struct kfd_process *process);
 
 /* Queue Context Management */
+inline uint32_t lower_32(uint64_t x);
+inline uint32_t upper_32(uint64_t x);
+inline void busy_wait(unsigned long ms);
 
 int init_queue(struct queue **q, struct queue_properties properties);
 void uninit_queue(struct queue *q);
 void print_queue_properties(struct queue_properties *q);
 void print_queue(struct queue *q);
 
+struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev 
*dev);
 #endif
diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c 
b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
index 30561a6..d576d95 100644
--- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
+++ b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
@@ -182,16 +182,6 @@ struct cik_static_queue {
        uint32_t queue_size_encoded; /* CP_HQD_PQ_CONTROL.QUEUE_SIZE takes the 
queue size as log2(size) - 3. */
 };
 
-static uint32_t lower_32(uint64_t x)
-{
-       return (uint32_t)x;
-}
-
-static uint32_t upper_32(uint64_t x)
-{
-       return (uint32_t)(x >> 32);
-}
-
 /* SRBM_GFX_CNTL provides the MEC/pipe/queue and vmid for many registers that 
are
  * In particular, CP_HQD_* and CP_MQD_* are instanced for each queue. CP_HPD_* 
are instanced for each pipe.
  * SH_MEM_* are instanced per-VMID.
diff --git a/drivers/gpu/hsa/radeon/kfd_vidmem.c 
b/drivers/gpu/hsa/radeon/kfd_vidmem.c
index c8d3770..9713373 100644
--- a/drivers/gpu/hsa/radeon/kfd_vidmem.c
+++ b/drivers/gpu/hsa/radeon/kfd_vidmem.c
@@ -59,3 +59,39 @@ void radeon_kfd_vidmem_unkmap(struct kfd_dev *kfd, 
kfd_mem_obj mem_obj)
 {
        kfd2kgd->unkmap_mem(kfd->kgd, (struct kgd_mem *)mem_obj);
 }
+
+int radeon_kfd_vidmem_alloc_map(struct kfd_dev *kfd, kfd_mem_obj *mem_obj,
+                               void **ptr, uint64_t *vmid0_address,
+                               size_t size)
+{
+       int retval;
+
+       retval = radeon_kfd_vidmem_alloc(kfd, size, PAGE_SIZE, 
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
+                       mem_obj);
+       if (retval != 0)
+               goto fail_vidmem_alloc;
+
+       retval = radeon_kfd_vidmem_kmap(kfd, *mem_obj, ptr);
+       if (retval != 0)
+               goto fail_vidmem_kmap;
+
+       retval = radeon_kfd_vidmem_gpumap(kfd, *mem_obj, vmid0_address);
+       if (retval != 0)
+               goto fail_vidmem_gpumap;
+
+       return 0;
+
+fail_vidmem_gpumap:
+       radeon_kfd_vidmem_unkmap(kfd, *mem_obj);
+fail_vidmem_kmap:
+       radeon_kfd_vidmem_free(kfd, *mem_obj);
+fail_vidmem_alloc:
+       return retval;
+}
+
+void radeon_kfd_vidmem_free_unmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj)
+{
+       radeon_kfd_vidmem_ungpumap(kfd, mem_obj);
+       radeon_kfd_vidmem_unkmap(kfd, mem_obj);
+       radeon_kfd_vidmem_free(kfd, mem_obj);
+}
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to