[PATCH 51/83] hsa/radeon: Add packet manager module
From: Ben Goz The packet manager module builds PM4 packets for the sole use of the CP scheduler. Those packets are used by the HIQ to submit runlists to the CP. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay --- drivers/gpu/hsa/radeon/Makefile | 2 +- drivers/gpu/hsa/radeon/kfd_packet_manager.c | 473 2 files changed, 474 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/hsa/radeon/kfd_packet_manager.c diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile index f06d925..4978915 100644 --- a/drivers/gpu/hsa/radeon/Makefile +++ b/drivers/gpu/hsa/radeon/Makefile @@ -7,6 +7,6 @@ radeon_kfd-y:= kfd_module.o kfd_device.o kfd_chardev.o \ kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \ kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \ kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o \ - kfd_kernel_queue.o + kfd_kernel_queue.o kfd_packet_manager.o obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c b/drivers/gpu/hsa/radeon/kfd_packet_manager.c new file mode 100644 index 000..4967b7c --- /dev/null +++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c @@ -0,0 +1,473 @@ +/* + * packet_manager.c + * + * Created on: Mar 16, 2014 + * Author: ben + */ +#include +#include +#include "kfd_device_queue_manager.h" +#include "kfd_kernel_queue.h" +#include "kfd_priv.h" +#include "kfd_pm4_headers.h" +#include "kfd_pm4_opcodes.h" +#include "cik_mqds.h" + +static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) +{ + unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); + + BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes); + *wptr = temp; +} + +static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) +{ + PM4_TYPE_3_HEADER header; + + header.u32all = 0; + header.opcode = opcode; + header.count = packet_size/sizeof(uint32_t) - 2; + header.type = PM4_TYPE_3; + + return header.u32all; +} + +static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) +{ + unsigned int process_count, queue_count; + + BUG_ON(!pm || !rlib_size || !over_subscription); + + process_count = pm->dqm->processes_count; + queue_count = pm->dqm->queue_count; + + /* check if there is over subscription*/ + *over_subscription = false; + if ((process_count >= VMID_PER_DEVICE) || + queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) { + *over_subscription = true; + pr_debug("kfd: over subscribed runlist\n"); + } + + /* calculate run list ib allocation size */ + *rlib_size = process_count * sizeof(struct pm4_map_process) + +queue_count * sizeof(struct pm4_map_queues); + + /* increase the allocation size in case we need a chained run list when over subscription */ + if (*over_subscription) + *rlib_size += sizeof(struct pm4_runlist); + + pr_debug("kfd: runlist ib size %d\n", *rlib_size); +} + +static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_buffer, uint64_t *rl_gpu_buffer, + unsigned int *rl_buffer_size, bool *is_over_subscription) +{ + int retval; + + BUG_ON(!pm); + BUG_ON(pm->allocated == true); + + pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + if (is_over_subscription && + sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) + return -EFAULT; + + retval = radeon_kfd_vidmem_alloc_map(pm->dqm->dev, >ib_buffer_obj, (void **)rl_buffer, +rl_gpu_buffer, ALIGN(*rl_buffer_size, PAGE_SIZE)); + if (retval != 0) { + pr_err("kfd: failed to allocate runlist IB\n"); + return retval; + } + + memset(*rl_buffer, 0, *rl_buffer_size); + pm->allocated = true; + return retval; +} + +static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_runlist *packet; + + BUG_ON(!pm || !buffer || !ib); + + packet = (struct pm4_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_runlist)); + packet->header.u32all = build_pm4_header(IT_RUN_LIST, sizeof(struct pm4_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->bitfields4.vmid = 0; + packet->ordinal2 = lower_32(ib); + packet->bitfields3.ib_base_hi = upper_32(ib); + + return 0; +} + +static
[PATCH 51/83] hsa/radeon: Add packet manager module
From: Ben Goz ben@amd.com The packet manager module builds PM4 packets for the sole use of the CP scheduler. Those packets are used by the HIQ to submit runlists to the CP. Signed-off-by: Ben Goz ben@amd.com Signed-off-by: Oded Gabbay oded.gab...@amd.com --- drivers/gpu/hsa/radeon/Makefile | 2 +- drivers/gpu/hsa/radeon/kfd_packet_manager.c | 473 2 files changed, 474 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/hsa/radeon/kfd_packet_manager.c diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile index f06d925..4978915 100644 --- a/drivers/gpu/hsa/radeon/Makefile +++ b/drivers/gpu/hsa/radeon/Makefile @@ -7,6 +7,6 @@ radeon_kfd-y:= kfd_module.o kfd_device.o kfd_chardev.o \ kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \ kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \ kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o \ - kfd_kernel_queue.o + kfd_kernel_queue.o kfd_packet_manager.o obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c b/drivers/gpu/hsa/radeon/kfd_packet_manager.c new file mode 100644 index 000..4967b7c --- /dev/null +++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c @@ -0,0 +1,473 @@ +/* + * packet_manager.c + * + * Created on: Mar 16, 2014 + * Author: ben + */ +#include linux/slab.h +#include linux/mutex.h +#include kfd_device_queue_manager.h +#include kfd_kernel_queue.h +#include kfd_priv.h +#include kfd_pm4_headers.h +#include kfd_pm4_opcodes.h +#include cik_mqds.h + +static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) +{ + unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); + + BUG_ON((temp * sizeof(uint32_t)) buffer_size_bytes); + *wptr = temp; +} + +static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) +{ + PM4_TYPE_3_HEADER header; + + header.u32all = 0; + header.opcode = opcode; + header.count = packet_size/sizeof(uint32_t) - 2; + header.type = PM4_TYPE_3; + + return header.u32all; +} + +static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) +{ + unsigned int process_count, queue_count; + + BUG_ON(!pm || !rlib_size || !over_subscription); + + process_count = pm-dqm-processes_count; + queue_count = pm-dqm-queue_count; + + /* check if there is over subscription*/ + *over_subscription = false; + if ((process_count = VMID_PER_DEVICE) || + queue_count = PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) { + *over_subscription = true; + pr_debug(kfd: over subscribed runlist\n); + } + + /* calculate run list ib allocation size */ + *rlib_size = process_count * sizeof(struct pm4_map_process) + +queue_count * sizeof(struct pm4_map_queues); + + /* increase the allocation size in case we need a chained run list when over subscription */ + if (*over_subscription) + *rlib_size += sizeof(struct pm4_runlist); + + pr_debug(kfd: runlist ib size %d\n, *rlib_size); +} + +static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_buffer, uint64_t *rl_gpu_buffer, + unsigned int *rl_buffer_size, bool *is_over_subscription) +{ + int retval; + + BUG_ON(!pm); + BUG_ON(pm-allocated == true); + + pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + if (is_over_subscription + sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) + return -EFAULT; + + retval = radeon_kfd_vidmem_alloc_map(pm-dqm-dev, pm-ib_buffer_obj, (void **)rl_buffer, +rl_gpu_buffer, ALIGN(*rl_buffer_size, PAGE_SIZE)); + if (retval != 0) { + pr_err(kfd: failed to allocate runlist IB\n); + return retval; + } + + memset(*rl_buffer, 0, *rl_buffer_size); + pm-allocated = true; + return retval; +} + +static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_runlist *packet; + + BUG_ON(!pm || !buffer || !ib); + + packet = (struct pm4_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_runlist)); + packet-header.u32all = build_pm4_header(IT_RUN_LIST, sizeof(struct pm4_runlist)); + + packet-bitfields4.ib_size = ib_size_in_dwords; + packet-bitfields4.chain = chain ? 1 : 0; + packet-bitfields4.offload_polling = 0; + packet-bitfields4.valid = 1; + packet-bitfields4.vmid = 0; + packet-ordinal2 = lower_32(ib); + packet-bitfields3.ib_base_hi =