[PATCH 51/83] hsa/radeon: Add packet manager module

2014-07-10 Thread Oded Gabbay
From: Ben Goz 

The packet manager module builds PM4 packets for the sole use of the CP
scheduler. Those packets are used by the HIQ to submit runlists to the CP.

Signed-off-by: Ben Goz 
Signed-off-by: Oded Gabbay 
---
 drivers/gpu/hsa/radeon/Makefile |   2 +-
 drivers/gpu/hsa/radeon/kfd_packet_manager.c | 473 
 2 files changed, 474 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/hsa/radeon/kfd_packet_manager.c

diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index f06d925..4978915 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -7,6 +7,6 @@ radeon_kfd-y:= kfd_module.o kfd_device.o kfd_chardev.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o \
-   kfd_kernel_queue.o
+   kfd_kernel_queue.o kfd_packet_manager.o
 
 obj-$(CONFIG_HSA_RADEON)   += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c 
b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
new file mode 100644
index 000..4967b7c
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
@@ -0,0 +1,473 @@
+/*
+ * packet_manager.c
+ *
+ *  Created on: Mar 16, 2014
+ *  Author: ben
+ */
+#include 
+#include 
+#include "kfd_device_queue_manager.h"
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_opcodes.h"
+#include "cik_mqds.h"
+
+static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, 
unsigned int buffer_size_bytes)
+{
+   unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
+
+   BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes);
+   *wptr = temp;
+}
+
+static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+   PM4_TYPE_3_HEADER header;
+
+   header.u32all = 0;
+   header.opcode = opcode;
+   header.count = packet_size/sizeof(uint32_t) - 2;
+   header.type = PM4_TYPE_3;
+
+   return header.u32all;
+}
+
+static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int 
*rlib_size, bool *over_subscription)
+{
+   unsigned int process_count, queue_count;
+
+   BUG_ON(!pm || !rlib_size || !over_subscription);
+
+   process_count = pm->dqm->processes_count;
+   queue_count = pm->dqm->queue_count;
+
+   /* check if there is over subscription*/
+   *over_subscription = false;
+   if ((process_count >= VMID_PER_DEVICE) ||
+   queue_count >= PIPE_PER_ME_CP_SCHEDULING * 
QUEUES_PER_PIPE) {
+   *over_subscription = true;
+   pr_debug("kfd: over subscribed runlist\n");
+   }
+
+   /* calculate run list ib allocation size */
+   *rlib_size = process_count * sizeof(struct pm4_map_process) +
+queue_count * sizeof(struct pm4_map_queues);
+
+   /* increase the allocation size in case we need a chained run list when 
over subscription */
+   if (*over_subscription)
+   *rlib_size += sizeof(struct pm4_runlist);
+
+   pr_debug("kfd: runlist ib size %d\n", *rlib_size);
+}
+
+static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int 
**rl_buffer, uint64_t *rl_gpu_buffer,
+   unsigned int *rl_buffer_size, bool *is_over_subscription)
+{
+   int retval;
+
+   BUG_ON(!pm);
+   BUG_ON(pm->allocated == true);
+
+   pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+   if (is_over_subscription &&
+   sched_policy == 
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION)
+   return -EFAULT;
+
+   retval = radeon_kfd_vidmem_alloc_map(pm->dqm->dev, >ib_buffer_obj, 
(void **)rl_buffer,
+rl_gpu_buffer, 
ALIGN(*rl_buffer_size, PAGE_SIZE));
+   if (retval != 0) {
+   pr_err("kfd: failed to allocate runlist IB\n");
+   return retval;
+   }
+
+   memset(*rl_buffer, 0, *rl_buffer_size);
+   pm->allocated = true;
+   return retval;
+}
+
+static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
+   uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+   struct pm4_runlist *packet;
+
+   BUG_ON(!pm || !buffer || !ib);
+
+   packet = (struct pm4_runlist *)buffer;
+
+   memset(buffer, 0, sizeof(struct pm4_runlist));
+   packet->header.u32all = build_pm4_header(IT_RUN_LIST, sizeof(struct 
pm4_runlist));
+
+   packet->bitfields4.ib_size = ib_size_in_dwords;
+   packet->bitfields4.chain = chain ? 1 : 0;
+   packet->bitfields4.offload_polling = 0;
+   packet->bitfields4.valid = 1;
+   packet->bitfields4.vmid = 0;
+   packet->ordinal2 = lower_32(ib);
+   packet->bitfields3.ib_base_hi = upper_32(ib);
+
+   return 0;
+}
+
+static 

[PATCH 51/83] hsa/radeon: Add packet manager module

2014-07-10 Thread Oded Gabbay
From: Ben Goz ben@amd.com

The packet manager module builds PM4 packets for the sole use of the CP
scheduler. Those packets are used by the HIQ to submit runlists to the CP.

Signed-off-by: Ben Goz ben@amd.com
Signed-off-by: Oded Gabbay oded.gab...@amd.com
---
 drivers/gpu/hsa/radeon/Makefile |   2 +-
 drivers/gpu/hsa/radeon/kfd_packet_manager.c | 473 
 2 files changed, 474 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/hsa/radeon/kfd_packet_manager.c

diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index f06d925..4978915 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -7,6 +7,6 @@ radeon_kfd-y:= kfd_module.o kfd_device.o kfd_chardev.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o \
-   kfd_kernel_queue.o
+   kfd_kernel_queue.o kfd_packet_manager.o
 
 obj-$(CONFIG_HSA_RADEON)   += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c 
b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
new file mode 100644
index 000..4967b7c
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
@@ -0,0 +1,473 @@
+/*
+ * packet_manager.c
+ *
+ *  Created on: Mar 16, 2014
+ *  Author: ben
+ */
+#include linux/slab.h
+#include linux/mutex.h
+#include kfd_device_queue_manager.h
+#include kfd_kernel_queue.h
+#include kfd_priv.h
+#include kfd_pm4_headers.h
+#include kfd_pm4_opcodes.h
+#include cik_mqds.h
+
+static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, 
unsigned int buffer_size_bytes)
+{
+   unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
+
+   BUG_ON((temp * sizeof(uint32_t))  buffer_size_bytes);
+   *wptr = temp;
+}
+
+static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+   PM4_TYPE_3_HEADER header;
+
+   header.u32all = 0;
+   header.opcode = opcode;
+   header.count = packet_size/sizeof(uint32_t) - 2;
+   header.type = PM4_TYPE_3;
+
+   return header.u32all;
+}
+
+static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int 
*rlib_size, bool *over_subscription)
+{
+   unsigned int process_count, queue_count;
+
+   BUG_ON(!pm || !rlib_size || !over_subscription);
+
+   process_count = pm-dqm-processes_count;
+   queue_count = pm-dqm-queue_count;
+
+   /* check if there is over subscription*/
+   *over_subscription = false;
+   if ((process_count = VMID_PER_DEVICE) ||
+   queue_count = PIPE_PER_ME_CP_SCHEDULING * 
QUEUES_PER_PIPE) {
+   *over_subscription = true;
+   pr_debug(kfd: over subscribed runlist\n);
+   }
+
+   /* calculate run list ib allocation size */
+   *rlib_size = process_count * sizeof(struct pm4_map_process) +
+queue_count * sizeof(struct pm4_map_queues);
+
+   /* increase the allocation size in case we need a chained run list when 
over subscription */
+   if (*over_subscription)
+   *rlib_size += sizeof(struct pm4_runlist);
+
+   pr_debug(kfd: runlist ib size %d\n, *rlib_size);
+}
+
+static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int 
**rl_buffer, uint64_t *rl_gpu_buffer,
+   unsigned int *rl_buffer_size, bool *is_over_subscription)
+{
+   int retval;
+
+   BUG_ON(!pm);
+   BUG_ON(pm-allocated == true);
+
+   pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+   if (is_over_subscription 
+   sched_policy == 
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION)
+   return -EFAULT;
+
+   retval = radeon_kfd_vidmem_alloc_map(pm-dqm-dev, pm-ib_buffer_obj, 
(void **)rl_buffer,
+rl_gpu_buffer, 
ALIGN(*rl_buffer_size, PAGE_SIZE));
+   if (retval != 0) {
+   pr_err(kfd: failed to allocate runlist IB\n);
+   return retval;
+   }
+
+   memset(*rl_buffer, 0, *rl_buffer_size);
+   pm-allocated = true;
+   return retval;
+}
+
+static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
+   uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+   struct pm4_runlist *packet;
+
+   BUG_ON(!pm || !buffer || !ib);
+
+   packet = (struct pm4_runlist *)buffer;
+
+   memset(buffer, 0, sizeof(struct pm4_runlist));
+   packet-header.u32all = build_pm4_header(IT_RUN_LIST, sizeof(struct 
pm4_runlist));
+
+   packet-bitfields4.ib_size = ib_size_in_dwords;
+   packet-bitfields4.chain = chain ? 1 : 0;
+   packet-bitfields4.offload_polling = 0;
+   packet-bitfields4.valid = 1;
+   packet-bitfields4.vmid = 0;
+   packet-ordinal2 = lower_32(ib);
+   packet-bitfields3.ib_base_hi =