date:20170302

[PATCH 20/21] drm/amdgpu: condense mqd programming sequence

2017-03-02 Thread Andres Rodriguez

The MQD structure matches the reg layout. Take advantage of this to
simplify HQD programming.

Note that the ACTIVE field still needs to be programmed last.

Suggested-by: Felix Kuehling 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 44 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 83 +--
 2 files changed, 22 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index b0b0c89..36994bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3108,61 +3108,39 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device 
*adev,
mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
 
/* activate the queue */
mqd->cp_hqd_active = 1;
 }
 
 int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
 {
-   u32 tmp;
+   uint32_t tmp;
+   uint32_t mqd_reg;
+   uint32_t *mqd_data;
+
+   /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
+   mqd_data = >cp_mqd_base_addr_lo;
 
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
-   /* program MQD field to HW */
-   WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
-   WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-   WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
-   WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
-   WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-   WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 
mqd->cp_hqd_pq_rptr_report_addr_lo);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 
mqd->cp_hqd_pq_rptr_report_addr_hi);
-   WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
-   WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-   WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
-
-   WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
-   WREG32(mmCP_HQD_IB_BASE_ADDR, mqd->cp_hqd_ib_base_addr_lo);
-   WREG32(mmCP_HQD_IB_BASE_ADDR_HI, mqd->cp_hqd_ib_base_addr_hi);
-   WREG32(mmCP_HQD_IB_RPTR, mqd->cp_hqd_ib_rptr);
-   WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
-   WREG32(mmCP_HQD_SEMA_CMD, mqd->cp_hqd_sema_cmd);
-   WREG32(mmCP_HQD_MSG_TYPE, mqd->cp_hqd_msg_type);
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, mqd->cp_hqd_atomic0_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, mqd->cp_hqd_atomic0_preop_hi);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, mqd->cp_hqd_atomic1_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, mqd->cp_hqd_atomic1_preop_hi);
-   WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-   WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
-   WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
-   WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
-   WREG32(mmCP_HQD_IQ_RPTR, mqd->cp_hqd_iq_rptr);
+   /* program all HQD registers */
+   for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
+   WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
/* activate the HQD */
-   WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
+   for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; 
mqd_reg++)
+   WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
return 0;
 }
 
 static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
 {
int r;
u64 mqd_gpu_addr;
struct cik_mqd *mqd;
struct amdgpu_ring *ring = >gfx.compute_ring[ring_id];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 38a5099..63bfdf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4919,99 +4919,38 @@ static void gfx_v8_0_enable_doorbell(struct 
amdgpu_device *adev, bool enable)
tmp = RREG32(mmCP_PQ_STATUS);
tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
WREG32(mmCP_PQ_STATUS, tmp);
 
adev->gfx.doorbell_enabled = true;
 }
 
 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
 {
uint32_t tmp;
+   uint32_t mqd_reg;
+   uint32_t *mqd_data;
+
+   /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
+   mqd_data = >cp_mqd_base_addr_lo;
 
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);

[PATCH 06/21] drm/amdgpu: rename rdev to adev

2017-03-02 Thread Andres Rodriguez

Rename straggler instances of r(adeon)dev to a(mdgpu)dev

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 70 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 14 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  |  2 +-
 4 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index dba8a5b..3200ff9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -53,23 +53,23 @@ int amdgpu_amdkfd_init(void)
if (ret)
kgd2kfd = NULL;
 
 #else
ret = -ENOENT;
 #endif
 
return ret;
 }
 
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
 {
-   switch (rdev->asic_type) {
+   switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
break;
 #endif
case CHIP_CARRIZO:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
default:
return false;
@@ -79,119 +79,119 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device 
*rdev)
 }
 
 void amdgpu_amdkfd_fini(void)
 {
if (kgd2kfd) {
kgd2kfd->exit();
symbol_put(kgd2kfd_init);
}
 }
 
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
if (kgd2kfd)
-   rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
-   rdev->pdev, kfd2kgd);
+   adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
+   adev->pdev, kfd2kgd);
 }
 
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-   if (rdev->kfd) {
+   if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
 
.first_compute_pipe = 1,
.compute_pipe_count = 4 - 1,
};
 
-   amdgpu_doorbell_get_kfd_info(rdev,
+   amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
-   kgd2kfd->device_init(rdev->kfd, _resources);
+   kgd2kfd->device_init(adev->kfd, _resources);
}
 }
 
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
-   if (rdev->kfd) {
-   kgd2kfd->device_exit(rdev->kfd);
-   rdev->kfd = NULL;
+   if (adev->kfd) {
+   kgd2kfd->device_exit(adev->kfd);
+   adev->kfd = NULL;
}
 }
 
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry)
 {
-   if (rdev->kfd)
-   kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
+   if (adev->kfd)
+   kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
 }
 
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
 {
-   if (rdev->kfd)
-   kgd2kfd->suspend(rdev->kfd);
+   if (adev->kfd)
+   kgd2kfd->suspend(adev->kfd);
 }
 
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
 {
int r = 0;
 
-   if (rdev->kfd)
-   r = kgd2kfd->resume(rdev->kfd);
+   if (adev->kfd)
+   r = kgd2kfd->resume(adev->kfd);
 
return r;
 }
 
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr)
 {
-   struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+   struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
int r;
 
BUG_ON(kgd == NULL);
BUG_ON(gpu_addr == NULL);
BUG_ON(cpu_ptr == NULL);
 
*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if ((*mem) == NULL)
return -ENOMEM;
 
-   r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
+   r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
 AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 
&(*mem)->bo);
if (r) {
-   dev_err(rdev->dev,
+   dev_err(adev->dev,

[PATCH 11/21] drm/amdkfd: allow split HQD on per-queue granularity v4

2017-03-02 Thread Andres Rodriguez

Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.

This allows for more interesting pipe/queue splits.

v2: fix overflow check for res.queue_mask
v3: fix shift overflow when setting res.queue_mask
v4: fix comment in is_pipeline_enabled()

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  22 -
 drivers/gpu/drm/amd/amdkfd/kfd_device.c|   4 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 100 ++---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  10 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c|   3 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |   2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h|  17 ++--
 drivers/gpu/drm/radeon/radeon_kfd.c|  21 -
 9 files changed, 126 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3200ff9..8fc5aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -88,28 +88,44 @@ void amdgpu_amdkfd_fini(void)
 
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
if (kgd2kfd)
adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
adev->pdev, kfd2kgd);
 }
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
+   int i;
+   int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
-
-   .first_compute_pipe = 1,
-   .compute_pipe_count = 4 - 1,
+   .num_mec = adev->gfx.mec.num_mec,
+   .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+   .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
};
 
+   /* this is going to have a few of the MSBs set that we need to
+* clear */
+   bitmap_complement(gpu_resources.queue_bitmap,
+ adev->gfx.mec.queue_bitmap,
+ KGD_MAX_QUEUES);
+
+   /* According to linux/bitmap.h we shouldn't use bitmap_clear if
+* nbits is not compile time constant */
+   last_valid_bit = adev->gfx.mec.num_mec
+   * adev->gfx.mec.num_pipe_per_mec
+   * adev->gfx.mec.num_queue_per_pipe;
+   for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+   clear_bit(i, gpu_resources.queue_bitmap);
+
amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
kgd2kfd->device_init(adev->kfd, _resources);
}
 }
 
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3f95f7c..88187bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -219,20 +219,24 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int 
pasid,
return AMD_IOMMU_INV_PRI_RSP_INVALID;
 }
 
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 const struct kgd2kfd_shared_resources *gpu_resources)
 {
unsigned int size;
 
kfd->shared_resources = *gpu_resources;
 
+   /* We only use the first MEC */
+   if (kfd->shared_resources.num_mec > 1)
+   kfd->shared_resources.num_mec = 1;
+
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
 
/*
 * calculate max size of runlist packet.
 * There can be only 2 packets at once
 */
size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
max_num_of_queues_per_device *
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c064dea..5f28720 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -56,35 +56,58 @@ static void deallocate_sdma_queue(struct 
device_queue_manager *dqm,
unsigned int sdma_queue_id);
 
 static inline
 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
 {
if (type == KFD_QUEUE_TYPE_SDMA)
return KFD_MQD_TYPE_SDMA;
return KFD_MQD_TYPE_CP;
 }
 
-unsigned int get_first_pipe(struct

[PATCH 12/21] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c

2017-03-02 Thread Andres Rodriguez

This information is already available in adev.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 12 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 12 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 910f9d3..5254562 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -32,22 +32,20 @@
 #include "gfx_v7_0.h"
 #include "gca/gfx_7_2_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 #include "gmc/gmc_7_1_d.h"
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "cik_structs.h"
 
-#define CIK_PIPE_PER_MEC   (4)
-
 enum {
MAX_TRAPID = 8, /* 3 bits in the bitfield. */
MAX_WATCH_ADDRESSES = 4
 };
 
 enum {
ADDRESS_WATCH_REG_ADDR_HI = 0,
ADDRESS_WATCH_REG_ADDR_LO,
ADDRESS_WATCH_REG_CNTL,
ADDRESS_WATCH_REG_MAX
@@ -179,22 +177,24 @@ static void unlock_srbm(struct kgd_dev *kgd)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
WREG32(mmSRBM_GFX_CNTL, 0);
mutex_unlock(>srbm_mutex);
 }
 
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t queue_id)
 {
-   uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-   uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+   struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+   uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+   uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
 static void release_queue(struct kgd_dev *kgd)
 {
unlock_srbm(kgd);
 }
 
 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
@@ -247,22 +247,22 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, 
uint32_t pipe_id,
/* amdgpu owns the per-pipe state */
return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
 
-   mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
-   pipe = (pipe_id % CIK_PIPE_PER_MEC);
+   mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+   pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
lock_srbm(kgd, mec, pipe, 0, 0);
 
WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 
unlock_srbm(kgd);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 5843368..db7410a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -32,22 +32,20 @@
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "oss/oss_3_0_sh_mask.h"
 #include "oss/oss_3_0_d.h"
 #include "gmc/gmc_8_1_sh_mask.h"
 #include "gmc/gmc_8_1_d.h"
 #include "vi_structs.h"
 #include "vid.h"
 
-#define VI_PIPE_PER_MEC(4)
-
 struct cik_sdma_rlc_registers;
 
 /*
  * Register access functions
  */
 
 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases);
@@ -140,22 +138,24 @@ static void unlock_srbm(struct kgd_dev *kgd)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
WREG32(mmSRBM_GFX_CNTL, 0);
mutex_unlock(>srbm_mutex);
 }
 
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t queue_id)
 {
-   uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
-   uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC);
+   struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+   uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+   uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
 static void release_queue(struct kgd_dev *kgd)
 {
unlock_srbm(kgd);
 }
 
 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
@@ -209,22 +209,22 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, 
uint32_t pipe_id,
/* amdgpu owns the per-pipe state */
return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
 
-   mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
-   pipe = (pipe_id % VI_PIPE_PER_MEC);
+   mec = (++pipe_id /

[PATCH 19/21] drm/amdgpu: implement ring set_priority for gfx_v8 compute v3

2017-03-02 Thread Andres Rodriguez

Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.

Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
0x2: CS_H
0x1: CS_M
0x0: CS_L

The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.

In order to prevent getting stuck in loops of CUs bouncing between GFX
and high priority compute and introducing further latency, we reserve
CUs 2+ for high priority compute on-demand.

v2: fix srbm_select to ring->queue and use ring->funcs->type
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 96 +-
 3 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e7e92fd..5c12486 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -904,20 +904,23 @@ struct amdgpu_gfx {
uint32_tme_feature_version;
uint32_tce_feature_version;
uint32_tpfp_feature_version;
uint32_trlc_feature_version;
uint32_tmec_feature_version;
uint32_tmec2_feature_version;
struct amdgpu_ring  gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsignednum_gfx_rings;
struct amdgpu_ring  compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
unsignednum_compute_rings;
+   spinlock_t  cu_reserve_lock;
+   uint32_tcu_reserve_pipe_mask;
+   uint32_t
cu_reserve_queue_mask[AMDGPU_MAX_COMPUTE_RINGS];
struct amdgpu_irq_src   eop_irq;
struct amdgpu_irq_src   priv_reg_irq;
struct amdgpu_irq_src   priv_inst_irq;
/* gfx status */
uint32_tgfx_current_status;
/* ce ram size*/
unsignedce_ram_size;
struct amdgpu_cu_info   cu_info;
const struct amdgpu_gfx_funcs   *funcs;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 954e3b9..3f3dc79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1705,20 +1705,21 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* Registers mapping */
/* TODO: block userspace mapping of io register */
spin_lock_init(>mmio_idx_lock);
spin_lock_init(>smc_idx_lock);
spin_lock_init(>pcie_idx_lock);
spin_lock_init(>uvd_ctx_idx_lock);
spin_lock_init(>didt_idx_lock);
spin_lock_init(>gc_cac_idx_lock);
spin_lock_init(>audio_endpt_idx_lock);
spin_lock_init(>mm_stats.lock);
+   spin_lock_init(>gfx.cu_reserve_lock);
 
INIT_LIST_HEAD(>shadow_list);
mutex_init(>shadow_list_lock);
 
INIT_LIST_HEAD(>gtt_list);
spin_lock_init(>gtt_list_lock);
 
INIT_LIST_HEAD(>ring_lru_list);
mutex_init(>ring_lru_list_lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5db5bac..38a5099 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -46,21 +46,24 @@
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_enum.h"
 
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
 #include "smu/smu_7_1_3_d.h"
 
 #define GFX8_NUM_GFX_RINGS 1
 #define GFX8_MEC_HPD_SIZE 2048
-
+#define GFX8_CU_RESERVE_RESOURCES 0x45888
+#define GFX8_CU_NUM 8
+#define GFX8_UNRESERVED_CU_NUM 2
+#define GFX8_CU_RESERVE_PIPE_SHIFT 7
 
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
 
 #define ARRAY_MODE(x)  ((x) << 
GB_TILE_MODE0__ARRAY_MODE__SHIFT)
 #define PIPE_CONFIG(x) ((x) << 
GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
 #define TILE_SPLIT(x)  ((x) << 
GB_TILE_MODE0__TILE_SPLIT__SHIFT)
 #define MICRO_TILE_MODE_NEW(x) ((x) << 
GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
@@ -6667,20 +6670,110 @@ static u32 gfx_v8_0_ring_get_wptr_compute(struct 
amdgpu_ring *ring)
 
 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 {
struct

[PATCH 17/21] drm/amdgpu: add parameter to allocate high priority contexts v6

2017-03-02 Thread Andres Rodriguez

Add a new context creation parameter to express a global context priority.

Contexts allocated with AMDGPU_CTX_PRIORITY_HIGH will receive higher
priority to schedule their work than AMDGPU_CTX_PRIORITY_NORMAL
(default) contexts.

v2: Instead of using flags, repurpose __pad
v3: Swap enum values of _NORMAL _HIGH for backwards compatibility
v4: Validate usermode priority and store it
v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword
v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 38 +++
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  1 +
 include/uapi/drm/amdgpu_drm.h |  8 +-
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 088aa4a..bf15373 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -664,20 +664,21 @@ struct amdgpu_ctx_ring {
struct amd_sched_entity entity;
 };
 
 struct amdgpu_ctx {
struct kref refcount;
struct amdgpu_device*adev;
unsignedreset_counter;
spinlock_t  ring_lock;
struct dma_fence**fences;
struct amdgpu_ctx_ring  rings[AMDGPU_MAX_RINGS];
+   int priority;
bool preamble_presented;
 };
 
 struct amdgpu_ctx_mgr {
struct amdgpu_device*adev;
struct mutexlock;
/* protected by lock */
struct idr  ctx_handles;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 400c66b..8ef6370 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -18,47 +18,56 @@
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors: monk liu 
  */
 
 #include 
 #include "amdgpu.h"
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+  int priority,
+  struct amdgpu_ctx *ctx)
 {
unsigned i, j;
int r;
 
+   if (priority < 0 || priority >= AMD_SCHED_MAX_PRIORITY)
+   return -EINVAL;
+
+   if (priority == AMD_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_ADMIN))
+   return -EACCES;
+
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
+   ctx->priority = priority;
kref_init(>refcount);
spin_lock_init(>ring_lock);
ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
  sizeof(struct dma_fence*), GFP_KERNEL);
if (!ctx->fences)
return -ENOMEM;
 
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
ctx->rings[i].sequence = 1;
ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i];
}
 
ctx->reset_counter = atomic_read(>gpu_reset_counter);
 
/* create context entity for each ring */
for (i = 0; i < adev->num_rings; i++) {
struct amdgpu_ring *ring = adev->rings[i];
struct amd_sched_rq *rq;
 
-   rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+   rq = >sched.sched_rq[priority];
r = amd_sched_entity_init(>sched, >rings[i].entity,
  rq, amdgpu_sched_jobs);
if (r)
goto failed;
}
 
return 0;
 
 failed:
for (j = 0; j < i; j++)
@@ -83,39 +92,41 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
kfree(ctx->fences);
ctx->fences = NULL;
 
for (i = 0; i < adev->num_rings; i++)
amd_sched_entity_fini(>rings[i]->sched,
  >rings[i].entity);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
+   uint32_t priority,
uint32_t *id)
 {
struct amdgpu_ctx_mgr *mgr = >ctx_mgr;
struct amdgpu_ctx *ctx;
int r;
 
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
 
mutex_lock(>lock);
r = idr_alloc(>ctx_handles, ctx, 1, 0, GFP_KERNEL);
if (r < 0) {
mutex_unlock(>lock);
kfree(ctx);
return r;
}
+
*id = (uint32_t)r;
-   r = amdgpu_ctx_init(adev, ctx);
+   r = amdgpu_ctx_init(adev, priority, ctx);
if (r) {
idr_remove(>ctx_handles, *id);

[PATCH 13/21] drm/amdgpu: allocate queues horizontally across pipes

2017-03-02 Thread Andres Rodriguez

Pipes provide better concurrency than queues, therefore we want to make
sure that apps use queues from different pipes whenever possible.

Optimize for the trivial case where an app will consume rings in order,
therefore we don't want adjacent rings to belong to the same pipe.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 78 +++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 83 +--
 3 files changed, 109 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f9df217..377f58a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1639,20 +1639,33 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
for (i = 0; i < adev->sdma.num_instances; i++)
if (>sdma.instance[i].ring == ring)
break;
 
if (i < AMDGPU_MAX_SDMA_INSTANCES)
return >sdma.instance[i];
else
return NULL;
 }
 
+static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
+   int mec, int pipe, int queue)
+{
+   int bit = 0;
+
+   bit += mec * adev->gfx.mec.num_pipe_per_mec
+   * adev->gfx.mec.num_queue_per_pipe;
+   bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+   bit += queue;
+
+   return test_bit(bit, adev->gfx.mec.queue_bitmap);
+}
+
 /*
  * ASICs macro.
  */
 #define amdgpu_asic_set_vga_state(adev, state) 
(adev)->asic_funcs->set_vga_state((adev), (state))
 #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) 
(adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) 
(adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
 #define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
 #define amdgpu_set_pcie_lanes(adev, l) 
(adev)->asic_funcs->set_pcie_lanes((adev), (l))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 68265b7..3ca5519 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4720,25 +4720,56 @@ static void gfx_v7_0_gpu_early_init(struct 
amdgpu_device *adev)
case 2:
gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
break;
case 4:
gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
break;
}
adev->gfx.config.gb_addr_config = gb_addr_config;
 }
 
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+   int mec, int pipe, int queue)
+{
+   int r;
+   unsigned irq_type;
+   struct amdgpu_ring *ring = >gfx.compute_ring[ring_id];
+
+   /* mec0 is me1 */
+   ring->me = mec + 1;
+   ring->pipe = pipe;
+   ring->queue = queue;
+
+   ring->ring_obj = NULL;
+   ring->use_doorbell = true;
+   ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+   sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+   irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+   + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+   + ring->pipe;
+
+   /* type-2 packets are deprecated on MEC, use type-3 instead */
+   r = amdgpu_ring_init(adev, ring, 1024,
+   >gfx.eop_irq, irq_type);
+   if (r)
+   return r;
+
+
+   return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   int i, r, ring_id;
+   int i, j, k, r, ring_id;
 
/* EOP Event */
r = amdgpu_irq_add_id(adev, 181, >gfx.eop_irq);
if (r)
return r;
 
/* Privileged reg */
r = amdgpu_irq_add_id(adev, 184, >gfx.priv_reg_irq);
if (r)
return r;
@@ -4772,53 +4803,38 @@ static int gfx_v7_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = >gfx.gfx_ring[i];
ring->ring_obj = NULL;
sprintf(ring->name, "gfx");
r = amdgpu_ring_init(adev, ring, 1024,
 >gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
if (r)
return r;
}
 
-   /* set up the compute queues */
-   for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) {
-   unsigned irq_type;
-
-   if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-   continue;
-
-   ring = >gfx.compute_ring[ring_id];
-
-

[PATCH 10/21] drm/amdgpu: teach amdgpu how to enable interrupts for any pipe

2017-03-02 Thread Andres Rodriguez

The current implementation is hardcoded to enable ME1/PIPE0 interrupts
only.

This patch allows amdgpu to enable interrupts for any pipe of ME1.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 48 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 33 
 2 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index fe46765..68265b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5032,56 +5032,42 @@ static void gfx_v7_0_set_gfx_eop_interrupt_state(struct 
amdgpu_device *adev,
break;
default:
break;
}
 }
 
 static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device 
*adev,
 int me, int pipe,
 enum 
amdgpu_interrupt_state state)
 {
-   u32 mec_int_cntl, mec_int_cntl_reg;
-
-   /*
-* amdgpu controls only pipe 0 of MEC1. That's why this function only
-* handles the setting of interrupts for this specific pipe. All other
-* pipes' interrupts are set by amdkfd.
+   /* Me 0 is for graphics and Me 2 is reserved for HW scheduling
+* So we should only really be configuring ME 1 i.e. MEC0
 */
-
-   if (me == 1) {
-   switch (pipe) {
-   case 0:
-   mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
-   break;
-   default:
-   DRM_DEBUG("invalid pipe %d\n", pipe);
-   return;
-   }
-   } else {
-   DRM_DEBUG("invalid me %d\n", me);
+   if (me != 1) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid 
me:%d\n", me);
return;
}
 
-   switch (state) {
-   case AMDGPU_IRQ_STATE_DISABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
-   mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
-   break;
-   case AMDGPU_IRQ_STATE_ENABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
-   mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
-   break;
-   default:
-   break;
+   if (pipe >= adev->gfx.mec.num_pipe_per_mec) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid "
+   "me:%d pipe:%d\n", pipe, me);
+   return;
}
+
+   mutex_lock(>srbm_mutex);
+   cik_srbm_select(adev, me, pipe, 0, 0);
+
+   WREG32_FIELD(CPC_INT_CNTL, TIME_STAMP_INT_ENABLE,
+   state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+
+   cik_srbm_select(adev, 0, 0, 0, 0);
+   mutex_unlock(>srbm_mutex);
 }
 
 static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
 struct amdgpu_irq_src *src,
 unsigned type,
 enum amdgpu_interrupt_state state)
 {
u32 cp_int_cntl;
 
switch (state) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1238b3d..861334b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6779,41 +6779,42 @@ static void gfx_v8_0_set_gfx_eop_interrupt_state(struct 
amdgpu_device *adev,
 enum amdgpu_interrupt_state 
state)
 {
WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 }
 
 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device 
*adev,
 int me, int pipe,
 enum 
amdgpu_interrupt_state state)
 {
-   /*
-* amdgpu controls only pipe 0 of MEC1. That's why this function only
-* handles the setting of interrupts for this specific pipe. All other
-* pipes' interrupts are set by amdkfd.
+   /* Me 0 is for graphics and Me 2 is reserved for HW scheduling
+* So we should only really be configuring ME 1 i.e. MEC0
 */
+   if (me != 1) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid 
me:%d\n", me);
+   return;
+   }
 
-   if (me == 1) {
-   switch (pipe) {
-   case 0:
-   break;
-   default:
-   DRM_DEBUG("invalid pipe %d\n", pipe);
-   return;
-   }
-   } else {
-

[PATCH 21/21] drm/amdgpu: workaround tonga HW bug in HQD programming sequence

2017-03-02 Thread Andres Rodriguez

Tonga based asics may experience hangs when an HQD's EOP parameters
are modified.

Workaround this HW issue by avoiding writes to these registers for
tonga asics.

Based on the following ROCm commit:
2a0fb8 - drm/amdgpu: Synchronize KFD HQD load protocol with CP scheduler

From the ROCm git repository:
https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver.git

CC: Jay Cornwall 
Suggested-by: Felix Kuehling 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 63bfdf6..a995398 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4931,21 +4931,35 @@ int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 
struct vi_mqd *mqd)
 
/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
mqd_data = >cp_mqd_base_addr_lo;
 
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
/* program all HQD registers */
-   for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
+   for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; 
mqd_reg++)
+   WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+   /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+* This is safe since EOP RPTR==WPTR for any inactive HQD
+* on ASICs that do not support context-save.
+* EOP writes/reads can start anywhere in the ring.
+*/
+   if (adev->asic_type != CHIP_TONGA) {
+   WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
+   WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
+   WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
+   }
+
+   for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; 
mqd_reg++)
WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
/* activate the HQD */
for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; 
mqd_reg++)
WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
return 0;
 }
 
 static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring,
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 18/21] drm/amdgpu: add framework for HW specific priority settings v3

2017-03-02 Thread Andres Rodriguez

Add an initial framework for changing the HW priorities of rings. The
framework allows requesting priority changes for the lifetime of an
amdgpu_job. After the job completes the priority will decay to the next
lowest priority for which a request is still valid.

A new ring function set_priority() can now be populated to take care of
the HW specific programming sequence for priority changes.

v2: set priority before emitting IB, and take a ref on amdgpu_job
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*
v4: plug amdgpu_ring_restore_priority_cb into amdgpu_job_free_cb

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 73 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 12 ++
 5 files changed, 100 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bf15373..e7e92fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -636,21 +636,21 @@ struct amdgpu_flip_work {
 struct amdgpu_ib {
struct amdgpu_sa_bo *sa_bo;
uint32_tlength_dw;
uint64_tgpu_addr;
uint32_t*ptr;
uint32_tflags;
 };
 
 extern const struct amd_sched_backend_ops amdgpu_sched_ops;
 
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, int 
priority,
 struct amdgpu_job **job, struct amdgpu_vm *vm);
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 struct amdgpu_job **job);
 
 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
  struct amd_sched_entity *entity, void *owner,
  struct dma_fence **f);
 
@@ -981,20 +981,21 @@ struct amdgpu_cs_parser {
 #define AMDGPU_VM_DOMAIN(1 << 3) /* bit set means in 
virtual memory context */
 
 struct amdgpu_job {
struct amd_sched_jobbase;
struct amdgpu_device*adev;
struct amdgpu_vm*vm;
struct amdgpu_ring  *ring;
struct amdgpu_sync  sync;
struct amdgpu_ib*ibs;
struct dma_fence*fence; /* the hw fence */
+   int priority;
uint32_tpreamble_status;
uint32_tnum_ibs;
void*owner;
uint64_tfence_ctx; /* the fence_context this job uses */
boolvm_needs_flush;
unsignedvm_id;
uint64_tvm_pd_addr;
uint32_tgds_base, gds_size;
uint32_tgws_base, gws_size;
uint32_toa_base, oa_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 605d40e..19ce202 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -179,21 +179,21 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, 
void *data)
 
case AMDGPU_CHUNK_ID_DEPENDENCIES:
break;
 
default:
ret = -EINVAL;
goto free_partial_kdata;
}
}
 
-   ret = amdgpu_job_alloc(p->adev, num_ibs, >job, vm);
+   ret = amdgpu_job_alloc(p->adev, num_ibs, p->ctx->priority, >job, vm);
if (ret)
goto free_all_kdata;
 
if (p->uf_entry.robj)
p->job->uf_addr = uf_offset;
kfree(chunk_array);
return 0;
 
 free_all_kdata:
i = p->nchunks - 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 86a1242..28f8a50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -32,50 +32,51 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
 {
struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted 
seq=%u\n",
  job->base.sched->name,
  atomic_read(>ring->fence_drv.last_seq),
  job->ring->fence_drv.sync_seq);
amdgpu_gpu_reset(job->adev);
 }
 
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, int 
priority,
 struct amdgpu_job **job, struct amdgpu_vm *vm)
 {
size_t

[PATCH 15/21] drm/amdgpu: untie user ring ids from kernel ring ids v2

2017-03-02 Thread Andres Rodriguez

Add amdgpu_queue_mgr, a mechanism that allows disjointing usermode's
ring ids from the kernel's ring ids.

The queue manager maintains a per-file descriptor map of user ring ids
to amdgpu_ring pointers. Once a map is created it is permanent (this is
required to maintain FIFO execution guarantees for a context's ring).

Different queue map policies can be configured for each HW IP.
Currently all HW IPs use the identity mapper, i.e. kernel ring id is
equal to the user ring id.

The purpose of this mechanism is to distribute the load across multiple
queues more effectively for HW IPs that support multiple rings.
Userspace clients are unable to check whether a specific resource is in
use by a different client. Therefore, it is up to the kernel driver to
make the optimal choice.

v2: remove amdgpu_queue_mapper_funcs

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  22 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  70 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 163 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.h |  75 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  |  45 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   2 +
 8 files changed, 330 insertions(+), 52 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2814aad..0081d0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -17,21 +17,21 @@ amdgpu-y := amdgpu_drv.o
 amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \
atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-   amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
+   amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_queue_mgr.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o 
vce_v2_0.o \
amdgpu_amdkfd_gfx_v7.o
 
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o 
dce_v6_0.o si_dpm.o si_smc.o
 
 amdgpu-y += \
vi.o mxgpu_vi.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 377f58a..d3f87f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -689,28 +689,45 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, 
struct amdgpu_ring *ring,
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
   struct amdgpu_ring *ring, uint64_t seq);
 
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 struct drm_file *filp);
 
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
 /*
+ * Queue manager related structures
+ */
+struct amdgpu_queue_mapper;
+
+struct amdgpu_queue_mapper {
+   int hw_ip;
+   struct mutexlock;
+   /* protected by lock */
+   struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
+};
+
+struct amdgpu_queue_mgr {
+   struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
+};
+
+/*
  * file private structure
  */
 
 struct amdgpu_fpriv {
struct amdgpu_vmvm;
struct mutexbo_list_lock;
struct idr  bo_list_handles;
struct amdgpu_ctx_mgr   ctx_mgr;
+   struct amdgpu_queue_mgr queue_mgr;
 };
 
 /*
  * residency list
  */
 
 struct amdgpu_bo_list {
struct mutex lock;
struct amdgpu_bo *gds_obj;
struct amdgpu_bo *gws_obj;
@@ -1723,22 +1740,23 @@ static inline bool amdgpu_is_mec_queue_enabled(struct 
amdgpu_device *adev,
 #define amdgpu_gds_switch(adev, r, v, d, w, a) 
(adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
 
 /* Common functions */
 int amdgpu_gpu_reset(struct amdgpu_device *adev);
 bool amdgpu_need_backup(struct amdgpu_device *adev);
 void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
-int

[PATCH 09/21] drm/amdgpu: allow split of queues with kfd at queue granularity

2017-03-02 Thread Andres Rodriguez

Previously the queue/pipe split with kfd operated with pipe
granularity. This patch allows amdgpu to take ownership of an arbitrary
set of queues.

It also consolidates the last few magic numbers in the compute
initialization process into mec_init.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  7 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 83 ++---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 79 ++-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h |  1 +
 4 files changed, 133 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 15e048c..f9df217 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -39,20 +39,22 @@
 #include 
 #include 
 #include 
 #include 
 #include 
 
 #include 
 #include 
 #include 
 
+#include 
+
 #include "amd_shared.h"
 #include "amdgpu_mode.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_irq.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ttm.h"
 #include "amdgpu_gds.h"
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
 #include "amdgpu_vm.h"
@@ -766,26 +768,31 @@ struct amdgpu_rlc {
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
 
u32 *register_list_format;
u32 *register_restore;
 };
 
+#define AMDGPU_MAX_QUEUES KGD_MAX_QUEUES
+
 struct amdgpu_mec {
struct amdgpu_bo*hpd_eop_obj;
u64 hpd_eop_gpu_addr;
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
+
+   /* These are the resources for which amdgpu takes ownership */
+   DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_QUEUES);
 };
 
 struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo*eop_obj;
struct amdgpu_ring  ring;
struct amdgpu_irq_src   irq;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 2f1faa4..fe46765 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -42,21 +42,20 @@
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 
 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"
 
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
 #define GFX7_NUM_GFX_RINGS 1
-#define GFX7_NUM_COMPUTE_RINGS 8
 #define GFX7_MEC_HPD_SIZE  2048
 
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
 
 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
@@ -2795,47 +2794,79 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", 
r);
amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
amdgpu_bo_unref(>gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
 }
 
+static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+   int i, queue, pipe, mec;
+
+   /* policy for amdgpu compute queue ownership */
+   for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
+   queue = i % adev->gfx.mec.num_queue_per_pipe;
+   pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+   % adev->gfx.mec.num_pipe_per_mec;
+   mec = (i / adev->gfx.mec.num_queue_per_pipe)
+   / adev->gfx.mec.num_pipe_per_mec;
+
+   /* we've run out of HW */
+   if (mec > adev->gfx.mec.num_mec)
+   break;
+
+   /* policy: amdgpu owns all queues in the first pipe */
+   if (mec == 0 && pipe == 0)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   }
+
+   /* update the number of active compute rings */
+   adev->gfx.num_compute_rings =
+   bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
+
+   /* If you hit this case and edited the policy, you probably just
+* need to increase AMDGPU_MAX_COMPUTE_RINGS */
+   WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS);
+   if (adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)
+   adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;
size_t mec_hpd_size;
 
-   /*
-* KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
-* CI/KB: 1 MEC, 4

[PATCH 03/21] drm/amdgpu: detect timeout error when deactivating hqd

2017-03-02 Thread Andres Rodriguez

Handle HQD deactivation timeouts instead of ignoring them.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 09a..af4b505 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4884,20 +4884,21 @@ static int gfx_v8_0_mqd_commit(struct amdgpu_device 
*adev, struct vi_mqd *mqd)
/* activate the queue */
WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
return 0;
 }
 
 static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring,
   struct vi_mqd *mqd,
   u64 mqd_gpu_addr)
 {
+   int r = 0;
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = >gfx.kiq;
uint64_t eop_gpu_addr;
bool is_kiq = false;
 
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
is_kiq = true;
 
if (is_kiq) {
eop_gpu_addr = kiq->eop_gpu_addr;
@@ -4905,34 +4906,45 @@ static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring 
*ring,
} else
eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
ring->queue * GFX8_MEC_HPD_SIZE;
 
mutex_lock(>srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
if (is_kiq) {
-   gfx_v8_0_mqd_deactivate(adev);
+   r = gfx_v8_0_mqd_deactivate(adev);
+   if (r) {
+   dev_err(adev->dev, "failed to deactivate ring %s\n", 
ring->name);
+   goto out_unlock;
+   }
+
gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
gfx_v8_0_mqd_commit(adev, mqd);
}
 
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
 
if (is_kiq)
gfx_v8_0_kiq_enable(ring);
else
gfx_v8_0_map_queue_enable(>ring, ring);
 
return 0;
+
+out_unlock:
+   vi_srbm_select(adev, 0, 0, 0, 0);
+   mutex_unlock(>srbm_mutex);
+
+   return r;
 }
 
 static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
 {
struct amdgpu_ring *ring = NULL;
int i;
 
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = >gfx.compute_ring[i];
amdgpu_bo_free_kernel(>mqd_obj, NULL, NULL);
@@ -5052,24 +5064,30 @@ static int gfx_v8_0_compute_queue_init(struct 
amdgpu_device *adev,
eop_gpu_addr >>= 8;
 
/* init the mqd struct */
memset(mqd, 0, sizeof(struct vi_mqd));
 
mutex_lock(>srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
-   gfx_v8_0_mqd_deactivate(adev);
+   r = gfx_v8_0_mqd_deactivate(adev);
+   if (r) {
+   dev_err(adev->dev, "failed to deactivate ring %s\n", 
ring->name);
+   goto out_unlock;
+   }
+
gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
gfx_v8_0_mqd_commit(adev, mqd);
 
+out_unlock:
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
 
amdgpu_bo_kunmap(ring->mqd_obj);
 out_unreserve:
amdgpu_bo_unreserve(ring->mqd_obj);
 out:
return r;
 }
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 08/21] drm/radeon: take ownership of pipe initialization

2017-03-02 Thread Andres Rodriguez

Take ownership of pipe initialization away from KFD.

Note that hpd_eop_gpu_addr was already large enough to accomodate all
pipes.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/radeon/cik.c| 27 ++-
 drivers/gpu/drm/radeon/radeon_kfd.c | 13 +
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index f6ff41a..82b57ef 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4588,37 +4588,38 @@ static int cik_cp_compute_resume(struct radeon_device 
*rdev)
return r;
 
/* fix up chicken bits */
tmp = RREG32(CP_CPF_DEBUG);
tmp |= (1 << 23);
WREG32(CP_CPF_DEBUG, tmp);
 
/* init the pipes */
mutex_lock(>srbm_mutex);
 
-   eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
+   for (i = 0; i < rdev->mec.num_pipe; ++i) {
+   cik_srbm_select(rdev, 0, i, 0, 0);
 
-   cik_srbm_select(rdev, 0, 0, 0, 0);
-
-   /* write the EOP addr */
-   WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
-   WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+   eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 
2) ;
+   /* write the EOP addr */
+   WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+   WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 
8);
 
-   /* set the VMID assigned */
-   WREG32(CP_HPD_EOP_VMID, 0);
+   /* set the VMID assigned */
+   WREG32(CP_HPD_EOP_VMID, 0);
 
-   /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-   tmp = RREG32(CP_HPD_EOP_CONTROL);
-   tmp &= ~EOP_SIZE_MASK;
-   tmp |= order_base_2(MEC_HPD_SIZE / 8);
-   WREG32(CP_HPD_EOP_CONTROL, tmp);
+   /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+   tmp = RREG32(CP_HPD_EOP_CONTROL);
+   tmp &= ~EOP_SIZE_MASK;
+   tmp |= order_base_2(MEC_HPD_SIZE / 8);
+   WREG32(CP_HPD_EOP_CONTROL, tmp);
 
+   }
mutex_unlock(>srbm_mutex);
 
/* init the queues.  Just two for now. */
for (i = 0; i < 2; i++) {
if (i == 0)
idx = CAYMAN_RING_TYPE_CP1_INDEX;
else
idx = CAYMAN_RING_TYPE_CP2_INDEX;
 
if (rdev->ring[idx].mqd_obj == NULL) {
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c 
b/drivers/gpu/drm/radeon/radeon_kfd.c
index 87a9ebb..a06e3b1 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -416,32 +416,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev 
*kgd, unsigned int pasid,
/* Mapping vmid to pasid also for IH block */
write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
pasid_mapping);
 
return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-   uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
-   uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-   lock_srbm(kgd, mec, pipe, 0, 0);
-   write_register(kgd, CP_HPD_EOP_BASE_ADDR,
-   lower_32_bits(hpd_gpu_addr >> 8));
-   write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
-   upper_32_bits(hpd_gpu_addr >> 8));
-   write_register(kgd, CP_HPD_EOP_VMID, 0);
-   write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
-   unlock_srbm(kgd);
-
+   /* nothing to do here */
return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
uint32_t mec;
uint32_t pipe;
 
mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
pipe = (pipe_id % CIK_PIPE_PER_MEC);
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 07/21] drm/amdgpu: take ownership of per-pipe configuration

2017-03-02 Thread Andres Rodriguez

Make amdgpu the owner of all per-pipe state of the HQDs.

This change will allow us to split the queues between kfd and amdgpu
with a queue granularity instead of pipe granularity.

This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
goes unused.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  | 13 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 28 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 33 +++-
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 45 --
 6 files changed, 49 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b577ec1..15e048c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -769,23 +769,23 @@ struct amdgpu_rlc {
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
 
u32 *register_list_format;
u32 *register_restore;
 };
 
 struct amdgpu_mec {
struct amdgpu_bo*hpd_eop_obj;
u64 hpd_eop_gpu_addr;
-   u32 num_pipe;
u32 num_mec;
-   u32 num_queue;
+   u32 num_pipe_per_mec;
+   u32 num_queue_per_pipe;
 };
 
 struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo*eop_obj;
struct amdgpu_ring  ring;
struct amdgpu_irq_src   irq;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 038b7ea..910f9d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -237,32 +237,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev 
*kgd, unsigned int pasid,
 
/* Mapping vmid to pasid also for IH block */
WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 
return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-   struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-   uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-   uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-   lock_srbm(kgd, mec, pipe, 0, 0);
-   WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
-   WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
-   WREG32(mmCP_HPD_EOP_VMID, 0);
-   WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
-   unlock_srbm(kgd);
-
+   /* amdgpu owns the per-pipe state */
return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
 
mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 2ecef3d..5843368 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -199,20 +199,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev 
*kgd, unsigned int pasid,
 
/* Mapping vmid to pasid also for IH block */
WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 
return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
+   /* amdgpu owns the per-pipe state */
return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
 
mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 03a4cee..2f1faa4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2799,34 +2799,48 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
 
amdgpu_bo_unref(>gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
 }
 
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;
+   size_t mec_hpd_size;
 
/*
 * KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
 * Nonetheless, we assign only 1 pipe because all other pipes will
 * be handled by KFD
 */
-   adev->gfx.mec.num_mec = 1;
-   adev->gfx.mec.num_pipe = 1;
-   adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * 8;
+   switch (adev->asic_type) {
+

[PATCH 01/21] drm/amdgpu: refactor MQD/HQD initialization

2017-03-02 Thread Andres Rodriguez

The MQD programming sequence currently exists in 3 different places.
Refactor it to absorb all the duplicates.

The success path remains mostly identical except for a slightly
different order in the non-kiq case. This shouldn't matter if the HQD
is disabled.

The error handling paths have been updated to deal with the new code
structure.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 417 +++
 2 files changed, 387 insertions(+), 477 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 1f93545..8e1e601 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -42,20 +42,22 @@
 #include "gca/gfx_7_2_sh_mask.h"
 
 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"
 
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
 #define GFX7_NUM_GFX_RINGS 1
 #define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE  2048
+
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
 
 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
@@ -2792,40 +2794,38 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", 
r);
amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
amdgpu_bo_unref(>gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
 }
 
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;
 
/*
 * KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
 * Nonetheless, we assign only 1 pipe because all other pipes will
 * be handled by KFD
 */
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe = 1;
adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * 8;
 
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
-adev->gfx.mec.num_mec 
*adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
 PAGE_SIZE, true,
 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 >gfx.mec.hpd_eop_obj);
if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", 
r);
return r;
}
}
 
r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
@@ -2841,21 +2841,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
return r;
}
r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **));
if (r) {
dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
gfx_v7_0_mec_fini(adev);
return r;
}
 
/* clear memory.  Not sure if this is required or not */
-   memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * 
MEC_HPD_SIZE * 2);
+   memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 
GFX7_MEC_HPD_SIZE * 2);
 
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
return 0;
 }
 
 struct hqd_registers
 {
u32 cp_mqd_base_addr;
@@ -2916,261 +2916,296 @@ struct bonaire_mqd
u32 restart[3];
u32 thread_trace_enable;
u32 reserved1;
u32 user_data[16];
u32 vgtcs_invoke_count[2];
struct hqd_registers queue_state;
u32 dequeue_cntr;
u32 interrupt_queue[64];
 };
 
-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * @adev: amdgpu_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int 
pipe)
 {
-   int r, i, j;
-   u32 tmp;
-   bool use_doorbell = true;
-   u64 hqd_gpu_addr;
-   u64 mqd_gpu_addr;
u64 eop_gpu_addr;
-   u64 wb_gpu_addr;
-   u32 *buf;
-   struct bonaire_mqd *mqd;
-   struct

[PATCH] Add support for high priority scheduling in amdgpu v4

2017-03-02 Thread Andres Rodriguez

This revision implements Christian's latest feedback:

1) Avoid indirection in amdgpu_queue_manager, call the map functions directly
2) Drop refcounting patch for amdgpu_job
3) Restore ring priority from amdgpu_job_free_cb()


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Added more hqd debug messages

2017-03-02 Thread Jay Cornwall

On Wed, Mar 1, 2017, at 16:28, Zeng, Oak wrote:
> COMPUTE_PGM* registers are per pipe per queue - each queue of each pipe
> has a copy of those registers.

COMPUTE_* are ADC registers. These are instantiated once per pipe. The
values they hold corresponds to the most recent values written from the
connected queue (the one selected for execution at a given time) on the
pipe. They're saved to the MQD of the connected queue before a different
queue is selected for execution.

Alex is right. They're not indexed via SRBM_GFX_CNTL.QUEUE.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH xf86-video-amdgpu 6/6] Allow toggling TearFree at runtime via output property

2017-03-02 Thread Alex Deucher

On Thu, Mar 2, 2017 at 4:18 AM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> Option "TearFree" now sets the default value of the output property.
> See the manpage update for details.
>
> TearFree is now enabled by default for outputs using rotation or other
> RandR transforms, and for RandR 1.4 slave outputs.
>
> (Ported from radeon commit 58cd1600057e41aade0106d4acf78e23eac6e44f)
>
> Signed-off-by: Michel Dänzer 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  man/amdgpu.man|  15 +++--
>  src/amdgpu_dri2.c |  34 +--
>  src/amdgpu_drv.h  |   2 +-
>  src/amdgpu_kms.c  |  57 ++
>  src/drmmode_display.c | 162 
> +-
>  src/drmmode_display.h |   2 +
>  6 files changed, 221 insertions(+), 51 deletions(-)
>
> diff --git a/man/amdgpu.man b/man/amdgpu.man
> index 0e5c291d..53bd768a 100644
> --- a/man/amdgpu.man
> +++ b/man/amdgpu.man
> @@ -73,10 +73,17 @@ Enable DRI2 page flipping.  The default is
>  .B on.
>  .TP
>  .BI "Option \*qTearFree\*q \*q" boolean \*q
> -Enable tearing prevention using the hardware page flipping mechanism. 
> Requires allocating two
> -separate scanout buffers for each CRTC. Enabling this option currently 
> disables Option
> -\*qEnablePageFlip\*q. The default is
> -.B off.
> +Set the default value of the per-output 'TearFree' property, which controls
> +tearing prevention using the hardware page flipping mechanism. TearFree is
> +on for any CRTC associated with one or more outputs with TearFree on. Two
> +separate scanout buffers need to be allocated for each CRTC with TearFree
> +on. While TearFree is on for any CRTC, it currently prevents clients from 
> using
> +DRI page flipping. If this option is set, the default value of the property 
> is
> +'on' or 'off' accordingly. If this option isn't set, the default value of the
> +property is
> +.B auto,
> +which means that TearFree is on for outputs with rotation or other RandR
> +transforms, and for RandR 1.4 slave outputs, otherwise off.
>  .TP
>  .BI "Option \*qAccelMethod\*q \*q" string \*q
>  Setting this option to
> diff --git a/src/amdgpu_dri2.c b/src/amdgpu_dri2.c
> index a83d2177..8dde2930 100644
> --- a/src/amdgpu_dri2.c
> +++ b/src/amdgpu_dri2.c
> @@ -51,6 +51,7 @@
>  #include "amdgpu_list.h"
>
>  #include 
> +#include 
>
>  #if DRI2INFOREC_VERSION >= 9
>  #define USE_DRI2_PRIME
> @@ -637,13 +638,34 @@ can_flip(ScrnInfoPtr pScrn, DrawablePtr draw,
>  DRI2BufferPtr front, DRI2BufferPtr back)
>  {
> AMDGPUInfoPtr info = AMDGPUPTR(pScrn);
> +   xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn);
> +   int num_crtcs_on;
> +   int i;
> +
> +   if (draw->type != DRAWABLE_WINDOW ||
> +   !info->allowPageFlip ||
> +   info->hwcursor_disabled ||
> +   info->drmmode.present_flipping ||
> +   !pScrn->vtSema ||
> +   !DRI2CanFlip(draw))
> +   return FALSE;
> +
> +   for (i = 0, num_crtcs_on = 0; i < config->num_crtc; i++) {
> +   xf86CrtcPtr crtc = config->crtc[i];
> +   drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
> +
> +   if (!crtc->enabled)
> +   continue;
> +
> +   if (!drmmode_crtc || drmmode_crtc->rotate.bo ||
> +   drmmode_crtc->scanout[0].bo)
> +   return FALSE;
> +
> +   if (drmmode_crtc->pending_dpms_mode == DPMSModeOn)
> +   num_crtcs_on++;
> +   }
>
> -   return draw->type == DRAWABLE_WINDOW &&
> -   info->allowPageFlip &&
> -   !info->hwcursor_disabled &&
> -   !info->drmmode.present_flipping &&
> -   pScrn->vtSema &&
> -   DRI2CanFlip(draw) && can_exchange(pScrn, draw, front, back);
> +   return num_crtcs_on > 0 && can_exchange(pScrn, draw, front, back);
>  }
>
>  static void
> diff --git a/src/amdgpu_drv.h b/src/amdgpu_drv.h
> index 3a24fa73..2aaafe43 100644
> --- a/src/amdgpu_drv.h
> +++ b/src/amdgpu_drv.h
> @@ -221,7 +221,7 @@ typedef struct {
> Bool use_glamor;
> Bool force_accel;
> Bool shadow_primary;
> -   Bool tear_free;
> +   int tear_free;
>
> /* general */
> OptionInfoPtr Options;
> diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
> index 10a68fbd..bafcb9bb 100644
> --- a/src/amdgpu_kms.c
> +++ b/src/amdgpu_kms.c
> @@ -575,7 +575,6 @@ amdgpu_prime_scanout_do_update(xf86CrtcPtr crtc, unsigned 
> scanout_id)
>  {
> ScrnInfoPtr scrn = crtc->scrn;
> ScreenPtr screen = scrn->pScreen;
> -   AMDGPUInfoPtr info = AMDGPUPTR(scrn);
> drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
> PixmapPtr scanoutpix = crtc->randr_crtc->scanout_pixmap;
> PixmapDirtyUpdatePtr dirty;
> @@ -583,7 +582,7 @@ amdgpu_prime_scanout_do_update(xf86CrtcPtr

Re: [PATCH xf86-video-amdgpu 3/3] Handle rotation in the driver also with Xorg 1.12-1.18

2017-03-02 Thread Alex Deucher

On Thu, Mar 2, 2017 at 2:59 AM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> We cannot use the HW cursor in that case, but in turn we get more
> efficient and less teary updates of rotated outputs.
>
> (Ported from radeon commit f2bc882f1c1082bed9f496cfab6c8f07a76bc122)
>
> Signed-off-by: Michel Dänzer 

Series is:
Reviewed-by: Alex Deucher 


> ---
>  src/drmmode_display.c | 12 +---
>  1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/src/drmmode_display.c b/src/drmmode_display.c
> index b3c850c5..67dd61b6 100644
> --- a/src/drmmode_display.c
> +++ b/src/drmmode_display.c
> @@ -589,22 +589,20 @@ drmmode_can_use_hw_cursor(xf86CrtcPtr crtc)
>
>  #if XF86_CRTC_VERSION >= 4
>
> +#if XF86_CRTC_VERSION < 7
> +#define XF86DriverTransformOutput TRUE
> +#define XF86DriverTransformNone FALSE
> +#endif
> +
>  static Bool
>  drmmode_handle_transform(xf86CrtcPtr crtc)
>  {
> Bool ret;
>
> -#if XF86_CRTC_VERSION >= 7
> if (crtc->transformPresent || crtc->rotation != RR_Rotate_0)
> crtc->driverIsPerformingTransform = XF86DriverTransformOutput;
> else
> crtc->driverIsPerformingTransform = XF86DriverTransformNone;
> -#else
> -   AMDGPUInfoPtr info = AMDGPUPTR(crtc->scrn);
> -
> -   crtc->driverIsPerformingTransform = crtc->transformPresent ||
> -   (info->tear_free && crtc->rotation != RR_Rotate_0);
> -#endif
>
> ret = xf86CrtcRotate(crtc);
>
> --
> 2.11.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH xf86-video-ati] Don't call radeon_cs_flush_indirect & radeon_bo_wait in drmmode_copy_fb

2017-03-02 Thread Alex Deucher

On Thu, Mar 2, 2017 at 2:23 AM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> RADEONWindowExposures_oneshot takes care of it.
>
> Signed-off-by: Michel Dänzer 

Reviewed-by: Alex Deucher 

> ---
>  src/drmmode_display.c | 3 ---
>  1 file changed, 3 deletions(-)
>
> diff --git a/src/drmmode_display.c b/src/drmmode_display.c
> index a7904a396..ab11583a6 100644
> --- a/src/drmmode_display.c
> +++ b/src/drmmode_display.c
> @@ -490,9 +490,6 @@ void drmmode_copy_fb(ScrnInfoPtr pScrn, drmmode_ptr 
> drmmode)
>
> FreeScratchGC(gc);
>
> -   radeon_cs_flush_indirect(pScrn);
> -   radeon_bo_wait(info->front_bo);
> -
> pScreen->canDoBGNoneRoot = TRUE;
> destroy_pixmap_for_fbcon(pScrn);
> return;
> --
> 2.11.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 15/22] drm/amdgpu: add a mechanism to untie user ring ids from kernel ring ids

2017-03-02 Thread Christian König


Am 02.03.2017 um 09:02 schrieb Andres Rodriguez:

Add amdgpu_queue_mgr, a mechanism that allows disjointing usermode's
ring ids from the kernel's ring ids.

The queue manager maintains a per-file descriptor map of user ring ids
to amdgpu_ring pointers. Once a map is created it is permanent (this is
required to maintain FIFO execution guarantees for a context's ring).

Different queue map policies can be configured for each HW IP.
Currently all HW IPs use the identity mapper, i.e. kernel ring id is
equal to the user ring id.

The purpose of this mechanism is to distribute the load across multiple
queues more effectively for HW IPs that support multiple rings.
Userspace clients are unable to check whether a specific resource is in
use by a different client. Therefore, it is up to the kernel driver to
make the optimal choice.

Signed-off-by: Andres Rodriguez 
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  31 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  70 
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   3 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 157 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.h |  75 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  |  45 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   2 +
  8 files changed, 333 insertions(+), 52 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2814aad..0081d0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-   amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
+   amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_queue_mgr.o
  
  # add asic specific block

  amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 377f58a..dc79c0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -696,6 +696,31 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  
  /*

+ * Queue manager related structures
+ */
+struct amdgpu_queue_mapper;
+
+struct amdgpu_queue_mapper_funcs {
+   /* map a userspace ring id to a kernel ring id */
+   int (*map)(struct amdgpu_device *adev,
+  struct amdgpu_queue_mapper *mapper,
+  int ring,
+  struct amdgpu_ring **out_ring);
+};


Don't add indirection if you don't abstract any hardware difference with it.

Just call the appropriate mapping function directly based on the ring type.

Regards,
Christian.


+
+struct amdgpu_queue_mapper {
+   struct amdgpu_queue_mapper_funcs *funcs;
+   int hw_ip;
+   struct mutexlock;
+   /* protected by lock */
+   struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
+};
+
+struct amdgpu_queue_mgr {
+   struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
+};
+
+/*
   * file private structure
   */
  
@@ -704,6 +729,7 @@ struct amdgpu_fpriv {

struct mutexbo_list_lock;
struct idr  bo_list_handles;
struct amdgpu_ctx_mgr   ctx_mgr;
+   struct amdgpu_queue_mgr queue_mgr;
  };
  
  /*

@@ -1730,8 +1756,9 @@ bool amdgpu_need_post(struct amdgpu_device *adev);
  void amdgpu_update_display_priority(struct amdgpu_device *adev);
  
  int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);

-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
-  u32 ip_instance, u32 ring,
+int amdgpu_cs_get_ring(struct amdgpu_device *adev,
+  struct amdgpu_queue_mgr *mgr,
+  u32 ip_type, u32 ip_instance, u32 user_ring,
   struct amdgpu_ring **out_ring);
  void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
  void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 57301f5..605d40e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -29,60 +29,28 @@
  #include 
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
+#include "amdgpu_queue_mgr.h"
  
-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,

-  u32 ip_instance, u32 ring,
+int amdgpu_cs_get_ring(struct amdgpu_device *adev,
+

Re: Add support for high priority scheduling in amdgpu

2017-03-02 Thread Christian König


Am 01.03.2017 um 18:24 schrieb Andres Rodriguez:



On 2017-03-01 12:13 PM, Andres Rodriguez wrote:



On 3/1/2017 6:42 AM, Christian König wrote:
Patches #1-#14 are Acked-by: Christian König 
.


Patch #15:

Not sure if that is a good idea or not, need to take a closer look 
after digging through the rest.


In general the HW IP is just for the IOCTL API and not for internal 
use inside the driver.

I'll drop this patch and use ring->funcs->type instead.


Patch #16:

Really nice :) I don't have time to look into it in detail, but you 
have one misconception I like to point out:

The queue manager maintains a per-file descriptor map of user ring ids
to amdgpu_ring pointers. Once a map is created it is permanent 
(this is

required to maintain FIFO execution guarantees for a ring).
Actually we don't have a FIFO execution guarantee per ring. We only 
have that per context.


Agreed. I'm using pretty imprecise terminology here which can be 
confusing. I wanted to be more precise than "context", because two 
amdgpu_cs_request submissions to the same context but with a 
different ring field can execute out of order.


I think s/ring/context's ring/ should be enough to clarify here if 
you think so as well.


Yeah, just fix the description a bit and we are good to go.





E.g. commands from different context can execute at the same time 
and out of order.


Making this per file is ok for now, but you should keep in mind that 
we might want to change that sooner or later.


Patch #17 & #18 need to take a closer look when I have more time, 
but the comments from others sounded valid to me as well.


Patch #19: Raising and lowering the priority of a ring during 
command submission doesn't sound like a good idea to me.
I'm not really sure what would be a better time than at command 
submission.


If it was just SPI priorities we could have static partitioning of 
rings, some high priority and some regular, etc. But that approach 
reduces the number of rings
Sorry, I finished typing something else and forgot this section was 
incomplete. Full reply:


I'm not really sure what would be a better time than at command 
submission.


If it was just SPI priorities we could have static partitioning of 
rings, some high priority and some regular, etc. But that approach 
reduces the number of rings available. It would also require a 
callback at command submission time for CU reservation.


Ok, as Alex wrote as well I'm not 100% sure if that really works on all 
hardware. But we could give it a try, cause I don't see much of a better 
alternative either.




The way you currently have it implemented would also raise the 
priority of already running jobs on the same ring. Keep in mind that 
everything is pipelined here.
That is actually intentional. If there is work already on the ring 
with lower priority we don't want the high priority work to have to 
wait for it to finish executing at regular priority. Therefore the 
work that has already been commited to the ring inherits the higher 
priority level.


I agree this isn't ideal, which is why the LRU ring mapping policy is 
there to make sure this doesn't happen often.


Additional to that you can't have a fence callback in the job 
structure, cause the job structure is freed by the same fence as 
well. So it can happen that you access freed up memory (but only for 
a very short period of time).
Any strong preference for either 1) refcounting the job structure, or 
2) allocating a new piece of memory to store the callback parameters?


How about option #3, just add that to the job lifetime.

See drivers/gpu/drm/amd/amdgpu/amdgpu_job.c. amdgpu_job_run() is called 
when the job is ready to run.


amdgpu_job_free_cb() is from a work item after a scheduler job has 
finished executing. If that is to late we could also add another 
callback to the scheduler for this.


amdgpu_job_free() is called when we directly submitted the job without 
going through the scheduler. Don't touch it that is just for GPU reset 
handling.


Regards,
Christian.



Patches #20-#22 are Acked-by: Christian König 
.


Regards,
Christian.

Am 28.02.2017 um 23:14 schrieb Andres Rodriguez:
This patch series introduces a mechanism that allows users with 
sufficient
privileges to categorize their work as "high priority". A userspace 
app can
create a high priority amdgpu context, where any work submitted to 
this context

will receive preferential treatment over any other work.

High priority contexts will be scheduled ahead of other contexts by 
the sw gpu

scheduler. This functionality is generic for all HW blocks.

Optionally, a ring can implement a set_priority() function that allows
programming HW specific features to elevate a ring's priority.

This patch series implements set_priority() for gfx8 compute rings. 
It takes
advantage of SPI scheduling and CU reservation to provide improved 
frame

latencies for high priority contexts.

For compute +

[PATCH xf86-video-amdgpu 4/6] Factor out amdgpu_prime_dirty_to_crtc helper

2017-03-02 Thread Michel Dänzer

From: Michel Dänzer 

Cleanup in preparation for the following change, no functional change
intended.

(Ported from radeon commit 649644a88347a6d03de68f8c41db03a82deeb23b)

Signed-off-by: Michel Dänzer 
---
 src/amdgpu_kms.c | 62 ++--
 1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
index b2d098ad..10a68fbd 100644
--- a/src/amdgpu_kms.c
+++ b/src/amdgpu_kms.c
@@ -549,6 +549,27 @@ call_sync_shared_pixmap(PixmapDirtyUpdatePtr dirty)
 #endif /* HAS_SYNC_SHARED_PIXMAPS */
 
 
+static xf86CrtcPtr
+amdgpu_prime_dirty_to_crtc(PixmapDirtyUpdatePtr dirty)
+{
+   ScreenPtr screen = dirty->slave_dst->drawable.pScreen;
+   ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
+   xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn);
+   int c;
+
+   /* Find the CRTC which is scanning out from this slave pixmap */
+   for (c = 0; c < xf86_config->num_crtc; c++) {
+   xf86CrtcPtr xf86_crtc = xf86_config->crtc[c];
+   drmmode_crtc_private_ptr drmmode_crtc = 
xf86_crtc->driver_private;
+
+   if (drmmode_crtc->scanout[0].pixmap == dirty->slave_dst ||
+   drmmode_crtc->scanout[1].pixmap == dirty->slave_dst)
+   return xf86_crtc;
+   }
+
+   return NULL;
+}
+
 static Bool
 amdgpu_prime_scanout_do_update(xf86CrtcPtr crtc, unsigned scanout_id)
 {
@@ -608,24 +629,16 @@ amdgpu_prime_scanout_update(PixmapDirtyUpdatePtr dirty)
ScreenPtr screen = dirty->slave_dst->drawable.pScreen;
ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
AMDGPUEntPtr pAMDGPUEnt = AMDGPUEntPriv(scrn);
-   xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn);
-   xf86CrtcPtr xf86_crtc = NULL;
-   drmmode_crtc_private_ptr drmmode_crtc = NULL;
+   xf86CrtcPtr xf86_crtc = amdgpu_prime_dirty_to_crtc(dirty);
+   drmmode_crtc_private_ptr drmmode_crtc;
uintptr_t drm_queue_seq;
drmVBlank vbl;
-   int c;
 
-   /* Find the CRTC which is scanning out from this slave pixmap */
-   for (c = 0; c < xf86_config->num_crtc; c++) {
-   xf86_crtc = xf86_config->crtc[c];
-   drmmode_crtc = xf86_crtc->driver_private;
-   if (drmmode_crtc->scanout[0].pixmap == dirty->slave_dst)
-   break;
-   }
+   if (!xf86_crtc || !xf86_crtc->enabled)
+   return;
 
-   if (c == xf86_config->num_crtc ||
-   !xf86_crtc->enabled ||
-   drmmode_crtc->scanout_update_pending ||
+   drmmode_crtc = xf86_crtc->driver_private;
+   if (drmmode_crtc->scanout_update_pending ||
!drmmode_crtc->scanout[0].pixmap ||
drmmode_crtc->pending_dpms_mode != DPMSModeOn)
return;
@@ -671,25 +684,16 @@ amdgpu_prime_scanout_flip(PixmapDirtyUpdatePtr ent)
ScreenPtr screen = ent->slave_dst->drawable.pScreen;
ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
AMDGPUEntPtr pAMDGPUEnt = AMDGPUEntPriv(scrn);
-   xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn);
-   xf86CrtcPtr crtc = NULL;
-   drmmode_crtc_private_ptr drmmode_crtc = NULL;
+   xf86CrtcPtr crtc = amdgpu_prime_dirty_to_crtc(ent);
+   drmmode_crtc_private_ptr drmmode_crtc;
uintptr_t drm_queue_seq;
unsigned scanout_id;
-   int c;
 
-   /* Find the CRTC which is scanning out from this slave pixmap */
-   for (c = 0; c < xf86_config->num_crtc; c++) {
-   crtc = xf86_config->crtc[c];
-   drmmode_crtc = crtc->driver_private;
-   scanout_id = drmmode_crtc->scanout_id;
-   if (drmmode_crtc->scanout[scanout_id].pixmap == ent->slave_dst)
-   break;
-   }
+   if (!crtc || !crtc->enabled)
+   return;
 
-   if (c == xf86_config->num_crtc ||
-   !crtc->enabled ||
-   drmmode_crtc->scanout_update_pending ||
+   drmmode_crtc = crtc->driver_private;
+   if (drmmode_crtc->scanout_update_pending ||
!drmmode_crtc->scanout[drmmode_crtc->scanout_id].pixmap ||
drmmode_crtc->pending_dpms_mode != DPMSModeOn)
return;
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH xf86-video-amdgpu 5/6] Factor out drmmode_crtc_scanout_update helper

2017-03-02 Thread Michel Dänzer

From: Michel Dänzer 

Cleanup in preparation for following change, no functional change
intended.

(Ported from radeon commit 305e2cbf335837a2ab6a24e9ff65815afe038296)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 85 +--
 1 file changed, 49 insertions(+), 36 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 463759b9..303848f3 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -621,6 +621,53 @@ drmmode_handle_transform(xf86CrtcPtr crtc)
 
 #endif
 
+static void
+drmmode_crtc_scanout_update(xf86CrtcPtr crtc, DisplayModePtr mode,
+   unsigned scanout_id, int *fb_id, int *x, int *y)
+{
+   ScrnInfoPtr scrn = crtc->scrn;
+   ScreenPtr screen = scrn->pScreen;
+   AMDGPUInfoPtr info = AMDGPUPTR(scrn);
+   drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+
+   drmmode_crtc_scanout_create(crtc, _crtc->scanout[0],
+   mode->HDisplay, mode->VDisplay);
+   if (info->tear_free) {
+   drmmode_crtc_scanout_create(crtc, _crtc->scanout[1],
+   mode->HDisplay, mode->VDisplay);
+   }
+
+   if (drmmode_crtc->scanout[0].pixmap &&
+   (!info->tear_free || drmmode_crtc->scanout[1].pixmap)) {
+   RegionPtr region;
+   BoxPtr box;
+
+   if (!drmmode_crtc->scanout_damage) {
+   drmmode_crtc->scanout_damage =
+   DamageCreate(amdgpu_screen_damage_report,
+NULL, DamageReportRawRegion,
+TRUE, screen, NULL);
+   
DamageRegister(>GetScreenPixmap(screen)->drawable,
+  drmmode_crtc->scanout_damage);
+   }
+
+   region = DamageRegion(drmmode_crtc->scanout_damage);
+   RegionUninit(region);
+   region->data = NULL;
+   box = RegionExtents(region);
+   box->x1 = 0;
+   box->y1 = 0;
+   box->x2 = max(box->x2, scrn->virtualX);
+   box->y2 = max(box->y2, scrn->virtualY);
+
+   *fb_id = drmmode_crtc->scanout[scanout_id].fb_id;
+   *x = *y = 0;
+
+   amdgpu_scanout_do_update(crtc, scanout_id);
+   amdgpu_glamor_finish(scrn);
+   }
+}
+
 static Bool
 drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
   Rotation rotation, int x, int y)
@@ -697,42 +744,8 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr 
mode,
crtc->driverIsPerformingTransform ||
 #endif
info->shadow_primary)) {
-   for (i = 0; i < (info->tear_free ? 2 : 1); i++) {
-   drmmode_crtc_scanout_create(crtc,
-   
_crtc->scanout[i],
-   mode->HDisplay,
-   mode->VDisplay);
-   }
-
-   if (drmmode_crtc->scanout[0].pixmap &&
-   (!info->tear_free || 
drmmode_crtc->scanout[1].pixmap)) {
-   RegionPtr pRegion;
-   BoxPtr pBox;
-
-   if (!drmmode_crtc->scanout_damage) {
-   drmmode_crtc->scanout_damage =
-   
DamageCreate(amdgpu_screen_damage_report,
-NULL, 
DamageReportRawRegion,
-TRUE, pScreen, 
NULL);
-   
DamageRegister(>GetScreenPixmap(pScreen)->drawable,
-  
drmmode_crtc->scanout_damage);
-   }
-
-   pRegion = 
DamageRegion(drmmode_crtc->scanout_damage);
-   RegionUninit(pRegion);
-   pRegion->data = NULL;
-   pBox = RegionExtents(pRegion);
-   pBox->x1 = 0;
-   pBox->y1 = 0;
-   pBox->x2 = max(pBox->x2, pScrn->virtualX);
-   pBox->y2 = max(pBox->y2, pScrn->virtualY);
-
-   fb_id = drmmode_crtc->scanout[scanout_id].fb_id;
-   x = y = 0;
-
-   amdgpu_scanout_do_update(crtc, scanout_id);
-   amdgpu_glamor_finish(pScrn);
-   }
+   drmmode_crtc_scanout_update(crtc, mode,

[PATCH xf86-video-amdgpu 6/6] Allow toggling TearFree at runtime via output property

2017-03-02 Thread Michel Dänzer

From: Michel Dänzer 

Option "TearFree" now sets the default value of the output property.
See the manpage update for details.

TearFree is now enabled by default for outputs using rotation or other
RandR transforms, and for RandR 1.4 slave outputs.

(Ported from radeon commit 58cd1600057e41aade0106d4acf78e23eac6e44f)

Signed-off-by: Michel Dänzer 
---
 man/amdgpu.man|  15 +++--
 src/amdgpu_dri2.c |  34 +--
 src/amdgpu_drv.h  |   2 +-
 src/amdgpu_kms.c  |  57 ++
 src/drmmode_display.c | 162 +-
 src/drmmode_display.h |   2 +
 6 files changed, 221 insertions(+), 51 deletions(-)

diff --git a/man/amdgpu.man b/man/amdgpu.man
index 0e5c291d..53bd768a 100644
--- a/man/amdgpu.man
+++ b/man/amdgpu.man
@@ -73,10 +73,17 @@ Enable DRI2 page flipping.  The default is
 .B on.
 .TP
 .BI "Option \*qTearFree\*q \*q" boolean \*q
-Enable tearing prevention using the hardware page flipping mechanism. Requires 
allocating two
-separate scanout buffers for each CRTC. Enabling this option currently 
disables Option
-\*qEnablePageFlip\*q. The default is
-.B off.
+Set the default value of the per-output 'TearFree' property, which controls
+tearing prevention using the hardware page flipping mechanism. TearFree is
+on for any CRTC associated with one or more outputs with TearFree on. Two
+separate scanout buffers need to be allocated for each CRTC with TearFree
+on. While TearFree is on for any CRTC, it currently prevents clients from using
+DRI page flipping. If this option is set, the default value of the property is
+'on' or 'off' accordingly. If this option isn't set, the default value of the
+property is
+.B auto,
+which means that TearFree is on for outputs with rotation or other RandR
+transforms, and for RandR 1.4 slave outputs, otherwise off.
 .TP
 .BI "Option \*qAccelMethod\*q \*q" string \*q
 Setting this option to
diff --git a/src/amdgpu_dri2.c b/src/amdgpu_dri2.c
index a83d2177..8dde2930 100644
--- a/src/amdgpu_dri2.c
+++ b/src/amdgpu_dri2.c
@@ -51,6 +51,7 @@
 #include "amdgpu_list.h"
 
 #include 
+#include 
 
 #if DRI2INFOREC_VERSION >= 9
 #define USE_DRI2_PRIME
@@ -637,13 +638,34 @@ can_flip(ScrnInfoPtr pScrn, DrawablePtr draw,
 DRI2BufferPtr front, DRI2BufferPtr back)
 {
AMDGPUInfoPtr info = AMDGPUPTR(pScrn);
+   xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn);
+   int num_crtcs_on;
+   int i;
+
+   if (draw->type != DRAWABLE_WINDOW ||
+   !info->allowPageFlip ||
+   info->hwcursor_disabled ||
+   info->drmmode.present_flipping ||
+   !pScrn->vtSema ||
+   !DRI2CanFlip(draw))
+   return FALSE;
+
+   for (i = 0, num_crtcs_on = 0; i < config->num_crtc; i++) {
+   xf86CrtcPtr crtc = config->crtc[i];
+   drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+
+   if (!crtc->enabled)
+   continue;
+
+   if (!drmmode_crtc || drmmode_crtc->rotate.bo ||
+   drmmode_crtc->scanout[0].bo)
+   return FALSE;
+
+   if (drmmode_crtc->pending_dpms_mode == DPMSModeOn)
+   num_crtcs_on++;
+   }
 
-   return draw->type == DRAWABLE_WINDOW &&
-   info->allowPageFlip &&
-   !info->hwcursor_disabled &&
-   !info->drmmode.present_flipping &&
-   pScrn->vtSema &&
-   DRI2CanFlip(draw) && can_exchange(pScrn, draw, front, back);
+   return num_crtcs_on > 0 && can_exchange(pScrn, draw, front, back);
 }
 
 static void
diff --git a/src/amdgpu_drv.h b/src/amdgpu_drv.h
index 3a24fa73..2aaafe43 100644
--- a/src/amdgpu_drv.h
+++ b/src/amdgpu_drv.h
@@ -221,7 +221,7 @@ typedef struct {
Bool use_glamor;
Bool force_accel;
Bool shadow_primary;
-   Bool tear_free;
+   int tear_free;
 
/* general */
OptionInfoPtr Options;
diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
index 10a68fbd..bafcb9bb 100644
--- a/src/amdgpu_kms.c
+++ b/src/amdgpu_kms.c
@@ -575,7 +575,6 @@ amdgpu_prime_scanout_do_update(xf86CrtcPtr crtc, unsigned 
scanout_id)
 {
ScrnInfoPtr scrn = crtc->scrn;
ScreenPtr screen = scrn->pScreen;
-   AMDGPUInfoPtr info = AMDGPUPTR(scrn);
drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
PixmapPtr scanoutpix = crtc->randr_crtc->scanout_pixmap;
PixmapDirtyUpdatePtr dirty;
@@ -583,7 +582,7 @@ amdgpu_prime_scanout_do_update(xf86CrtcPtr crtc, unsigned 
scanout_id)
 
xorg_list_for_each_entry(dirty, >pixmap_dirty_list, ent) {
if (dirty->src == scanoutpix && dirty->slave_dst ==
-   drmmode_crtc->scanout[scanout_id ^ info->tear_free].pixmap) 
{
+   drmmode_crtc->scanout[scanout_id ^ 
drmmode_crtc->tear_free].pixmap) {
RegionPtr region;

[PATCH xf86-video-amdgpu 2/6] Fix flip event data leak if calloc or drmModeAddFB fails

2017-03-02 Thread Michel Dänzer

From: Michel Dänzer 

(Ported from radeon commit 481394e3c9f9f7d88bb66fe9ae8834c87952a8ab)

Signed-off-by: Michel Dänzer 
---
 src/amdgpu_dri2.c | 4 ++--
 src/drmmode_display.c | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/amdgpu_dri2.c b/src/amdgpu_dri2.c
index ede6c0e0..a83d2177 100644
--- a/src/amdgpu_dri2.c
+++ b/src/amdgpu_dri2.c
@@ -465,9 +465,9 @@ xf86CrtcPtr amdgpu_dri2_drawable_crtc(DrawablePtr pDraw, 
Bool consider_disabled)
 static void
 amdgpu_dri2_flip_event_abort(xf86CrtcPtr crtc, void *event_data)
 {
-   AMDGPUInfoPtr info = AMDGPUPTR(crtc->scrn);
+   if (crtc)
+   AMDGPUPTR(crtc->scrn)->drmmode.dri2_flipping = FALSE;
 
-   info->drmmode.dri2_flipping = FALSE;
free(event_data);
 }
 
diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 67dd61b6..bc7b9c33 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -2688,8 +2688,10 @@ error:
amdgpu_drm_abort_entry(drm_queue_seq);
else if (crtc)
drmmode_flip_abort(crtc, flipdata);
-   else if (flipdata && flipdata->flip_count <= 1)
+   else {
+   abort(NULL, data);
free(flipdata);
+   }
 
xf86DrvMsg(scrn->scrnIndex, X_WARNING, "Page flip failed: %s\n",
   strerror(errno));
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH xf86-video-amdgpu 3/6] Don't destroy current FB if drmModeAddFB fails

2017-03-02 Thread Michel Dänzer

From: Michel Dänzer 

It would probably result in a black screen.

(Ported from radeon commit 1351e48efe7a2c28eab447e16f36a00fbd02ae48)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index bc7b9c33..463759b9 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -2679,7 +2679,8 @@ flip_error:
   strerror(errno));
 
 error:
-   if (flipdata && flipdata->flip_count <= 1) {
+   if (flipdata && flipdata->flip_count <= 1 &&
+   drmmode->fb_id != flipdata->old_fb_id) {
drmModeRmFB(pAMDGPUEnt->fd, drmmode->fb_id);
drmmode->fb_id = flipdata->old_fb_id;
}
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH xf86-video-amdgpu 1/6] autogen: add default patch prefix

2017-03-02 Thread Michel Dänzer

From: Mihail Konev 

(Ported from radeon commit 8e6a4e96b7b27559e186f71b5547abb0a80b96dd)

Signed-off-by: Michel Dänzer 
---
 autogen.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/autogen.sh b/autogen.sh
index 0006de8a..c814d5cf 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,6 +9,9 @@ cd "$srcdir"
 autoreconf -v --install || exit 1
 cd "$ORIGDIR" || exit $?
 
+git config --local --get format.subjectPrefix >/dev/null 2>&1 ||
+git config --local format.subjectPrefix "PATCH xf86-video-amdgpu"
+
 if test -z "$NOCONFIGURE"; then
 exec "$srcdir"/configure "$@"
 fi
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 21/22] drm/amdgpu: condense mqd programming sequence

2017-03-02 Thread Andres Rodriguez

The MQD structure matches the reg layout. Take advantage of this to
simplify HQD programming.

Note that the ACTIVE field still needs to be programmed last.

Suggested-by: Felix Kuehling 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 44 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 83 +--
 2 files changed, 22 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index b0b0c89..36994bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3115,47 +3115,25 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device 
*adev,
 
 int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
 {
-   u32 tmp;
+   uint32_t tmp;
+   uint32_t mqd_reg;
+   uint32_t *mqd_data;
+
+   /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
+   mqd_data = >cp_mqd_base_addr_lo;
 
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
-   /* program MQD field to HW */
-   WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
-   WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-   WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
-   WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
-   WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-   WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 
mqd->cp_hqd_pq_rptr_report_addr_lo);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 
mqd->cp_hqd_pq_rptr_report_addr_hi);
-   WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
-   WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-   WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
-
-   WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
-   WREG32(mmCP_HQD_IB_BASE_ADDR, mqd->cp_hqd_ib_base_addr_lo);
-   WREG32(mmCP_HQD_IB_BASE_ADDR_HI, mqd->cp_hqd_ib_base_addr_hi);
-   WREG32(mmCP_HQD_IB_RPTR, mqd->cp_hqd_ib_rptr);
-   WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
-   WREG32(mmCP_HQD_SEMA_CMD, mqd->cp_hqd_sema_cmd);
-   WREG32(mmCP_HQD_MSG_TYPE, mqd->cp_hqd_msg_type);
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, mqd->cp_hqd_atomic0_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, mqd->cp_hqd_atomic0_preop_hi);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, mqd->cp_hqd_atomic1_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, mqd->cp_hqd_atomic1_preop_hi);
-   WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-   WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
-   WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
-   WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
-   WREG32(mmCP_HQD_IQ_RPTR, mqd->cp_hqd_iq_rptr);
+   /* program all HQD registers */
+   for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
+   WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
/* activate the HQD */
-   WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
+   for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; 
mqd_reg++)
+   WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 38a5099..63bfdf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4926,85 +4926,24 @@ static void gfx_v8_0_enable_doorbell(struct 
amdgpu_device *adev, bool enable)
 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
 {
uint32_t tmp;
+   uint32_t mqd_reg;
+   uint32_t *mqd_data;
+
+   /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
+   mqd_data = >cp_mqd_base_addr_lo;
 
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
-   WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
-   WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
-
-   /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-   WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
-
-   /* enable doorbell? */
-   WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
-
-   /* set pq read/write pointers */
-   WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
-   WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-

[PATCH 15/22] drm/amdgpu: add a mechanism to untie user ring ids from kernel ring ids

2017-03-02 Thread Andres Rodriguez

Add amdgpu_queue_mgr, a mechanism that allows disjointing usermode's
ring ids from the kernel's ring ids.

The queue manager maintains a per-file descriptor map of user ring ids
to amdgpu_ring pointers. Once a map is created it is permanent (this is
required to maintain FIFO execution guarantees for a context's ring).

Different queue map policies can be configured for each HW IP.
Currently all HW IPs use the identity mapper, i.e. kernel ring id is
equal to the user ring id.

The purpose of this mechanism is to distribute the load across multiple
queues more effectively for HW IPs that support multiple rings.
Userspace clients are unable to check whether a specific resource is in
use by a different client. Therefore, it is up to the kernel driver to
make the optimal choice.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  31 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  70 
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 157 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.h |  75 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  |  45 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   2 +
 8 files changed, 333 insertions(+), 52 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2814aad..0081d0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-   amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
+   amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_queue_mgr.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 377f58a..dc79c0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -696,6 +696,31 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
 /*
+ * Queue manager related structures
+ */
+struct amdgpu_queue_mapper;
+
+struct amdgpu_queue_mapper_funcs {
+   /* map a userspace ring id to a kernel ring id */
+   int (*map)(struct amdgpu_device *adev,
+  struct amdgpu_queue_mapper *mapper,
+  int ring,
+  struct amdgpu_ring **out_ring);
+};
+
+struct amdgpu_queue_mapper {
+   struct amdgpu_queue_mapper_funcs *funcs;
+   int hw_ip;
+   struct mutexlock;
+   /* protected by lock */
+   struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
+};
+
+struct amdgpu_queue_mgr {
+   struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
+};
+
+/*
  * file private structure
  */
 
@@ -704,6 +729,7 @@ struct amdgpu_fpriv {
struct mutexbo_list_lock;
struct idr  bo_list_handles;
struct amdgpu_ctx_mgr   ctx_mgr;
+   struct amdgpu_queue_mgr queue_mgr;
 };
 
 /*
@@ -1730,8 +1756,9 @@ bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
-  u32 ip_instance, u32 ring,
+int amdgpu_cs_get_ring(struct amdgpu_device *adev,
+  struct amdgpu_queue_mgr *mgr,
+  u32 ip_type, u32 ip_instance, u32 user_ring,
   struct amdgpu_ring **out_ring);
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 57301f5..605d40e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -29,60 +29,28 @@
 #include 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_queue_mgr.h"
 
-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
-  u32 ip_instance, u32 ring,
+int amdgpu_cs_get_ring(struct amdgpu_device *adev,
+  struct amdgpu_queue_mgr *mgr,
+  u32 ip_type, u32 ip_instance, u32 user_ring,
   struct amdgpu_ring **out_ring)
 {
+   int r;
+
/* Right now all IPs have only one instance - multiple rings. */
if (ip_instance !=

[PATCH 16/22] drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v2

2017-03-02 Thread Andres Rodriguez

Use an LRU policy to map usermode rings to HW compute queues.

Most compute clients use one queue, and usually the first queue
available. This results in poor pipe/queue work distribution when
multiple compute apps are running. In most cases pipe 0 queue 0 is
the only queue that gets used.

In order to better distribute work across multiple HW queues, we adopt
a policy to map the usermode ring ids to the LRU HW queue.

This fixes a large majority of multi-app compute workloads sharing the
same HW queue, even though 7 other queues are available.

v2: use ring->funcs->type instead of ring->hw_ip

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 52 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  | 57 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  4 ++
 5 files changed, 117 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc79c0e..a4c16a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1517,6 +1517,9 @@ struct amdgpu_device {
/* link all gtt */
spinlock_t  gtt_list_lock;
struct list_headgtt_list;
+   /* keep an lru list of rings by HW IP */
+   struct list_headring_lru_list;
+   struct mutexring_lru_list_lock;
 
/* record hw reset is performed */
bool has_hw_reset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6abb238..954e3b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1719,6 +1719,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(>gtt_list);
spin_lock_init(>gtt_list_lock);
 
+   INIT_LIST_HEAD(>ring_lru_list);
+   mutex_init(>ring_lru_list_lock);
+
if (adev->asic_type >= CHIP_BONAIRE) {
adev->rmmio_base = pci_resource_start(adev->pdev, 5);
adev->rmmio_size = pci_resource_len(adev->pdev, 5);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 3918bdb..0cf5d24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -97,6 +97,44 @@ static struct amdgpu_queue_mapper_funcs identity_mapper = {
.map = amdgpu_identity_map
 };
 
+static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
+{
+   switch (hw_ip) {
+   case AMDGPU_HW_IP_GFX:
+   return AMDGPU_RING_TYPE_GFX;
+   case AMDGPU_HW_IP_COMPUTE:
+   return AMDGPU_RING_TYPE_COMPUTE;
+   case AMDGPU_HW_IP_DMA:
+   return AMDGPU_RING_TYPE_SDMA;
+   case AMDGPU_HW_IP_UVD:
+   return AMDGPU_RING_TYPE_UVD;
+   case AMDGPU_HW_IP_VCE:
+   return AMDGPU_RING_TYPE_VCE;
+   default:
+   DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
+   return -1;
+   }
+}
+
+static int amdgpu_lru_map(struct amdgpu_device *adev,
+ struct amdgpu_queue_mapper *mapper,
+ int user_ring,
+ struct amdgpu_ring **out_ring)
+{
+   int r;
+   int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
+
+   r = amdgpu_ring_lru_get(adev, ring_type, out_ring);
+   if (r)
+   return r;
+
+   return update_cached_map(mapper, user_ring, *out_ring);
+}
+
+static struct amdgpu_queue_mapper_funcs lru_mapper = {
+   .map = amdgpu_lru_map
+};
+
 int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
  struct amdgpu_queue_mgr *mgr)
 {
@@ -107,8 +145,18 @@ int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
 
memset(mgr, 0, sizeof(*mgr));
 
-   for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i)
-   amdgpu_queue_mapper_init(>mapper[i], i, _mapper);
+   for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
+   switch (i) {
+   case AMDGPU_HW_IP_COMPUTE:
+   amdgpu_queue_mapper_init(>mapper[i], i,
+_mapper);
+   break;
+   default:
+   amdgpu_queue_mapper_init(>mapper[i], i,
+_mapper);
+   break;
+   }
+   }
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 43cd539..31c6274 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -180,6 +180,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 
if

[PATCH 05/22] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu

2017-03-02 Thread Andres Rodriguez

Use the same gfx_*_mqd_commit function for kfd and amdgpu codepaths.

This removes the last duplicates of this programming sequence.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 51 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 49 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 38 -
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h |  5 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 44 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h |  5 +++
 6 files changed, 97 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 1a0a5f7..038b7ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -29,6 +29,7 @@
 #include "cikd.h"
 #include "cik_sdma.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v7_0.h"
 #include "gca/gfx_7_2_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
@@ -309,55 +310,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
m = get_mqd(mqd);
 
is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
-
-   acquire_queue(kgd, pipe_id, queue_id);
-   WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-   WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-   WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-
-   WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-   WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-   WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-
-   WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-   WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
-   WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
-
-   WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
-
-   WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-   WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
-   WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
-
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
-
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-   m->cp_hqd_pq_rptr_report_addr_hi);
-
-   WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
-
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
-
-   WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
-
-   WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-
-   WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-
-   WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-   WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-
-   WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
-
if (is_wptr_shadow_valid)
-   WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
+   m->cp_hqd_pq_wptr = wptr_shadow;
 
-   WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+   acquire_queue(kgd, pipe_id, queue_id);
+   gfx_v7_0_mqd_commit(adev, m);
release_queue(kgd);
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6697612..2ecef3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -28,6 +28,7 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v8_0.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
@@ -251,53 +252,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
m = get_mqd(mqd);
 
valid_wptr = copy_from_user(_wptr, wptr, sizeof(shadow_wptr));
-   acquire_queue(kgd, pipe_id, queue_id);
-
-   WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-   WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-   WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-
-   WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-   WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-   WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-   WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-   WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-   WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-   WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-

[PATCH 11/22] drm/amdkfd: allow split HQD on per-queue granularity v4

2017-03-02 Thread Andres Rodriguez

Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.

This allows for more interesting pipe/queue splits.

v2: fix overflow check for res.queue_mask
v3: fix shift overflow when setting res.queue_mask
v4: fix comment in is_pipeline_enabled()

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  22 -
 drivers/gpu/drm/amd/amdkfd/kfd_device.c|   4 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 100 ++---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  10 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c|   3 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |   2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h|  17 ++--
 drivers/gpu/drm/radeon/radeon_kfd.c|  21 -
 9 files changed, 126 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3200ff9..8fc5aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -95,14 +95,30 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
+   int i;
+   int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
-
-   .first_compute_pipe = 1,
-   .compute_pipe_count = 4 - 1,
+   .num_mec = adev->gfx.mec.num_mec,
+   .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+   .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
};
 
+   /* this is going to have a few of the MSBs set that we need to
+* clear */
+   bitmap_complement(gpu_resources.queue_bitmap,
+ adev->gfx.mec.queue_bitmap,
+ KGD_MAX_QUEUES);
+
+   /* According to linux/bitmap.h we shouldn't use bitmap_clear if
+* nbits is not compile time constant */
+   last_valid_bit = adev->gfx.mec.num_mec
+   * adev->gfx.mec.num_pipe_per_mec
+   * adev->gfx.mec.num_queue_per_pipe;
+   for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+   clear_bit(i, gpu_resources.queue_bitmap);
+
amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
_resources.doorbell_aperture_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3f95f7c..88187bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -226,6 +226,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
kfd->shared_resources = *gpu_resources;
 
+   /* We only use the first MEC */
+   if (kfd->shared_resources.num_mec > 1)
+   kfd->shared_resources.num_mec = 1;
+
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c064dea..5f28720 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -63,21 +63,44 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum 
kfd_queue_type type)
return KFD_MQD_TYPE_CP;
 }
 
-unsigned int get_first_pipe(struct device_queue_manager *dqm)
+static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int 
pipe)
+{
+   int i;
+   int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
+   + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
+
+   /* queue is available for KFD usage if bit is 1 */
+   for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
+   if (test_bit(pipe_offset + i,
+ dqm->dev->shared_resources.queue_bitmap))
+   return true;
+   return false;
+}
+
+unsigned int get_mec_num(struct device_queue_manager *dqm)
+{
+   BUG_ON(!dqm || !dqm->dev);
+
+   return dqm->dev->shared_resources.num_mec;
+}
+
+unsigned int get_queues_num(struct device_queue_manager *dqm)
 {
BUG_ON(!dqm || !dqm->dev);
-   return dqm->dev->shared_resources.first_compute_pipe;
+   return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
+   KGD_MAX_QUEUES);
 }
 
-unsigned int get_pipes_num(struct device_queue_manager

[PATCH 18/22] drm/amdgpu: add framework for HW specific priority settings v3

2017-03-02 Thread Andres Rodriguez

Add an initial framework for changing the HW priorities of rings. The
framework allows requesting priority changes for the lifetime of an
amdgpu_job. After the job completes the priority will decay to the next
lowest priority for which a request is still valid.

A new ring function set_priority() can now be populated to take care of
the HW specific programming sequence for priority changes.

v2: set priority before emitting IB, and take a ref on amdgpu_job
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 10 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 74 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 11 +
 5 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f085698..5fec076 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -643,7 +643,7 @@ struct amdgpu_ib {
 
 extern const struct amd_sched_backend_ops amdgpu_sched_ops;
 
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, int 
priority,
 struct amdgpu_job **job, struct amdgpu_vm *vm);
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 struct amdgpu_job **job);
@@ -997,6 +997,8 @@ struct amdgpu_job {
struct amdgpu_sync  sync;
struct amdgpu_ib*ibs;
struct dma_fence*fence; /* the hw fence */
+   struct dma_fence_cb cb;
+   int priority;
uint32_tpreamble_status;
uint32_tnum_ibs;
void*owner;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 605d40e..19ce202 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -186,7 +186,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void 
*data)
}
}
 
-   ret = amdgpu_job_alloc(p->adev, num_ibs, >job, vm);
+   ret = amdgpu_job_alloc(p->adev, num_ibs, p->ctx->priority, >job, vm);
if (ret)
goto free_all_kdata;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 86a1242..66b2e76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -39,7 +39,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
amdgpu_gpu_reset(job->adev);
 }
 
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, int 
priority,
 struct amdgpu_job **job, struct amdgpu_vm *vm)
 {
size_t size = sizeof(struct amdgpu_job);
@@ -55,6 +55,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned 
num_ibs,
 
(*job)->adev = adev;
(*job)->vm = vm;
+   (*job)->priority = priority;
(*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs;
 
@@ -68,7 +69,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, 
unsigned size,
 {
int r;
 
-   r = amdgpu_job_alloc(adev, 1, job, NULL);
+   r = amdgpu_job_alloc(adev, 1, AMD_SCHED_PRIORITY_NORMAL, job, NULL);
if (r)
return r;
 
@@ -169,6 +170,10 @@ static struct dma_fence *amdgpu_job_run(struct 
amd_sched_job *sched_job)
 
BUG_ON(amdgpu_sync_peek_fence(>sync, NULL));
 
+   r = amdgpu_ring_elevate_priority(job->ring, job->priority, job);
+   if (r)
+   DRM_ERROR("Failed to set job priority (%d)\n", r);
+
trace_amdgpu_sched_run_job(job);
r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, );
if (r)
@@ -177,6 +182,7 @@ static struct dma_fence *amdgpu_job_run(struct 
amd_sched_job *sched_job)
/* if gpu reset, hw fence will be replaced here */
dma_fence_put(job->fence);
job->fence = dma_fence_get(fence);
+
amdgpu_job_free_resources(job);
return fence;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 31c6274..9106ae3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -199,6 +199,78 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->funcs->end_use(ring);
 }
 
+static void amdgpu_ring_restore_priority_cb(struct dma_fence *f,
+   struct dma_fence_cb *cb)
+{
+   int i;
+   struct amdgpu_job *cb_job =
+   container_of(cb, struct amdgpu_job, cb);
+   struct amdgpu_ring *ring =

[PATCH 20/22] drm/amdgpu: implement ring set_priority for gfx_v8 compute v3

2017-03-02 Thread Andres Rodriguez

Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.

Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
0x2: CS_H
0x1: CS_M
0x0: CS_L

The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.

In order to prevent getting stuck in loops of CUs bouncing between GFX
and high priority compute and introducing further latency, we reserve
CUs 2+ for high priority compute on-demand.

v2: fix srbm_select to ring->queue and use ring->funcs->type
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 96 +-
 3 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6fe10cd..a22b859 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -921,6 +921,9 @@ struct amdgpu_gfx {
unsignednum_gfx_rings;
struct amdgpu_ring  compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
unsignednum_compute_rings;
+   spinlock_t  cu_reserve_lock;
+   uint32_tcu_reserve_pipe_mask;
+   uint32_t
cu_reserve_queue_mask[AMDGPU_MAX_COMPUTE_RINGS];
struct amdgpu_irq_src   eop_irq;
struct amdgpu_irq_src   priv_reg_irq;
struct amdgpu_irq_src   priv_inst_irq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 954e3b9..3f3dc79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1712,6 +1712,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(>gc_cac_idx_lock);
spin_lock_init(>audio_endpt_idx_lock);
spin_lock_init(>mm_stats.lock);
+   spin_lock_init(>gfx.cu_reserve_lock);
 
INIT_LIST_HEAD(>shadow_list);
mutex_init(>shadow_list_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5db5bac..38a5099 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -53,7 +53,10 @@
 
 #define GFX8_NUM_GFX_RINGS 1
 #define GFX8_MEC_HPD_SIZE 2048
-
+#define GFX8_CU_RESERVE_RESOURCES 0x45888
+#define GFX8_CU_NUM 8
+#define GFX8_UNRESERVED_CU_NUM 2
+#define GFX8_CU_RESERVE_PIPE_SHIFT 7
 
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -6674,6 +6677,96 @@ static void gfx_v8_0_ring_set_wptr_compute(struct 
amdgpu_ring *ring)
WDOORBELL32(ring->doorbell_index, ring->wptr);
 }
 
+static void gfx_v8_0_cu_reserve(struct amdgpu_device *adev,
+   struct amdgpu_ring *ring, bool acquire)
+{
+   int i, resources;
+   int tmp = 0, queue_mask = 0, type_mask = 0;
+   int reserve_res_reg, reserve_en_reg;
+
+   /* gfx_v8_0_cu_reserve only supports compute path */
+   if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+   return;
+
+   spin_lock(>gfx.cu_reserve_lock);
+   if (acquire) {
+   adev->gfx.cu_reserve_pipe_mask |= (1 << ring->pipe);
+   adev->gfx.cu_reserve_queue_mask[ring->pipe] |= (1 << 
ring->queue);
+   } else {
+   adev->gfx.cu_reserve_pipe_mask &= ~(1 << ring->pipe);
+   adev->gfx.cu_reserve_queue_mask[ring->pipe] &= ~(1 << 
ring->queue);
+   }
+
+   /* compute pipe 0 starts at GFX8_CU_RESERVE_PIPE_SHIFT */
+   type_mask = (adev->gfx.cu_reserve_pipe_mask << 
GFX8_CU_RESERVE_PIPE_SHIFT);
+
+   /* HW only has one register for queue mask, so we collaspse them */
+   for (i = 0; i < AMDGPU_MAX_COMPUTE_RINGS; i++)
+   queue_mask |= adev->gfx.cu_reserve_queue_mask[i];
+
+   /* leave the first CUs for general processing */
+   for (i = GFX8_UNRESERVED_CU_NUM; i < GFX8_CU_NUM; i++) {
+   reserve_res_reg = mmSPI_RESOURCE_RESERVE_CU_0 + i;
+   reserve_en_reg = mmSPI_RESOURCE_RESERVE_EN_CU_0 + i;
+
+   tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+   TYPE_MASK, type_mask);
+   tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+   QUEUE_MASK, queue_mask);
+   if (queue_mask) {
+   resources = GFX8_CU_RESERVE_RESOURCES;
+   tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+

[PATCH 22/22] drm/amdgpu: workaround tonga HW bug in HQD programming sequence

2017-03-02 Thread Andres Rodriguez

Tonga based asics may experience hangs when an HQD's EOP parameters
are modified.

Workaround this HW issue by avoiding writes to these registers for
tonga asics.

Based on the following ROCm commit:
2a0fb8 - drm/amdgpu: Synchronize KFD HQD load protocol with CP scheduler

From the ROCm git repository:
https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver.git

CC: Jay Cornwall 
Suggested-by: Felix Kuehling 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 63bfdf6..a995398 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4938,7 +4938,21 @@ int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 
struct vi_mqd *mqd)
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
/* program all HQD registers */
-   for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
+   for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; 
mqd_reg++)
+   WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+   /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+* This is safe since EOP RPTR==WPTR for any inactive HQD
+* on ASICs that do not support context-save.
+* EOP writes/reads can start anywhere in the ring.
+*/
+   if (adev->asic_type != CHIP_TONGA) {
+   WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
+   WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
+   WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
+   }
+
+   for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; 
mqd_reg++)
WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
/* activate the HQD */
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 03/22] drm/amdgpu: detect timeout error when deactivating hqd

2017-03-02 Thread Andres Rodriguez

Handle HQD deactivation timeouts instead of ignoring them.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 09a..af4b505 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4891,6 +4891,7 @@ static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring 
*ring,
   struct vi_mqd *mqd,
   u64 mqd_gpu_addr)
 {
+   int r = 0;
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = >gfx.kiq;
uint64_t eop_gpu_addr;
@@ -4912,7 +4913,12 @@ static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring 
*ring,
gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
if (is_kiq) {
-   gfx_v8_0_mqd_deactivate(adev);
+   r = gfx_v8_0_mqd_deactivate(adev);
+   if (r) {
+   dev_err(adev->dev, "failed to deactivate ring %s\n", 
ring->name);
+   goto out_unlock;
+   }
+
gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
gfx_v8_0_mqd_commit(adev, mqd);
}
@@ -4926,6 +4932,12 @@ static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring 
*ring,
gfx_v8_0_map_queue_enable(>ring, ring);
 
return 0;
+
+out_unlock:
+   vi_srbm_select(adev, 0, 0, 0, 0);
+   mutex_unlock(>srbm_mutex);
+
+   return r;
 }
 
 static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
@@ -5059,10 +5071,16 @@ static int gfx_v8_0_compute_queue_init(struct 
amdgpu_device *adev,
 
gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
-   gfx_v8_0_mqd_deactivate(adev);
+   r = gfx_v8_0_mqd_deactivate(adev);
+   if (r) {
+   dev_err(adev->dev, "failed to deactivate ring %s\n", 
ring->name);
+   goto out_unlock;
+   }
+
gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
gfx_v8_0_mqd_commit(adev, mqd);
 
+out_unlock:
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 13/22] drm/amdgpu: allocate queues horizontally across pipes

2017-03-02 Thread Andres Rodriguez

Pipes provide better concurrency than queues, therefore we want to make
sure that apps use queues from different pipes whenever possible.

Optimize for the trivial case where an app will consume rings in order,
therefore we don't want adjacent rings to belong to the same pipe.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 78 +++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 83 +--
 3 files changed, 109 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f9df217..377f58a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1646,6 +1646,19 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
return NULL;
 }
 
+static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
+   int mec, int pipe, int queue)
+{
+   int bit = 0;
+
+   bit += mec * adev->gfx.mec.num_pipe_per_mec
+   * adev->gfx.mec.num_queue_per_pipe;
+   bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+   bit += queue;
+
+   return test_bit(bit, adev->gfx.mec.queue_bitmap);
+}
+
 /*
  * ASICs macro.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 68265b7..3ca5519 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4727,11 +4727,42 @@ static void gfx_v7_0_gpu_early_init(struct 
amdgpu_device *adev)
adev->gfx.config.gb_addr_config = gb_addr_config;
 }
 
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+   int mec, int pipe, int queue)
+{
+   int r;
+   unsigned irq_type;
+   struct amdgpu_ring *ring = >gfx.compute_ring[ring_id];
+
+   /* mec0 is me1 */
+   ring->me = mec + 1;
+   ring->pipe = pipe;
+   ring->queue = queue;
+
+   ring->ring_obj = NULL;
+   ring->use_doorbell = true;
+   ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+   sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+   irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+   + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+   + ring->pipe;
+
+   /* type-2 packets are deprecated on MEC, use type-3 instead */
+   r = amdgpu_ring_init(adev, ring, 1024,
+   >gfx.eop_irq, irq_type);
+   if (r)
+   return r;
+
+
+   return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   int i, r, ring_id;
+   int i, j, k, r, ring_id;
 
/* EOP Event */
r = amdgpu_irq_add_id(adev, 181, >gfx.eop_irq);
@@ -4779,39 +4810,24 @@ static int gfx_v7_0_sw_init(void *handle)
return r;
}
 
-   /* set up the compute queues */
-   for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) {
-   unsigned irq_type;
-
-   if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-   continue;
-
-   ring = >gfx.compute_ring[ring_id];
-
-   /* mec0 is me1 */
-   ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-   / adev->gfx.mec.num_pipe_per_mec)
-   + 1;
-   ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-   % adev->gfx.mec.num_pipe_per_mec;
-   ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-   ring->ring_obj = NULL;
-   ring->use_doorbell = true;
-   ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-   sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, 
ring->queue);
+   /* set up the compute queues - allocate horizontally across pipes */
+   ring_id = 0;
+   for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; ++i) {
+   for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+   for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
 
-   irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-   + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-   + ring->pipe;
+   if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+   continue;
 
-   /* type-2 packets are deprecated on MEC, use type-3 instead */
-   r = amdgpu_ring_init(adev, ring, 1024,
->gfx.eop_irq, irq_type);
-   if (r)
-   return r;
+   r =

[PATCH 19/22] drm/amdgpu: make amdgpu_job refcounted

2017-03-02 Thread Andres Rodriguez

The job structure is shared between multiple components, gpu_scheduler
and amdgpu. Make each user hold its own reference to simplify resource
free-ing.

This will also be useful for adding extra callbacks on job completion.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 24 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 29 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 18 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 23 ++-
 7 files changed, 59 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5fec076..6fe10cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -649,7 +649,8 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, 
unsigned size,
 struct amdgpu_job **job);
 
 void amdgpu_job_free_resources(struct amdgpu_job *job);
-void amdgpu_job_free(struct amdgpu_job *job);
+struct amdgpu_job *amdgpu_job_get(struct amdgpu_job *job);
+void amdgpu_job_put(struct amdgpu_job **job);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
  struct amd_sched_entity *entity, void *owner,
  struct dma_fence **f);
@@ -991,6 +992,7 @@ struct amdgpu_cs_parser {
 
 struct amdgpu_job {
struct amd_sched_jobbase;
+   struct kref refcount;
struct amdgpu_device*adev;
struct amdgpu_vm*vm;
struct amdgpu_ring  *ring;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 19ce202..5e8431d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -714,7 +714,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser 
*parser, int error, bo
drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks);
if (parser->job)
-   amdgpu_job_free(parser->job);
+   amdgpu_job_put(>job);
amdgpu_bo_unref(>uf_entry.robj);
 }
 
@@ -988,11 +988,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
int r;
 
job = p->job;
-   p->job = NULL;
 
r = amd_sched_job_init(>base, >sched, entity, p->filp);
if (r) {
-   amdgpu_job_free(job);
+   amdgpu_job_put();
return r;
}
 
@@ -1004,6 +1003,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_job_free_resources(job);
 
trace_amdgpu_cs_ioctl(job);
+   amdgpu_job_get(job);
amd_sched_entity_push_job(>base);
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 66b2e76..9b48e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -53,6 +53,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned 
num_ibs, int priority,
if (!*job)
return -ENOMEM;
 
+   kref_init(&(*job)->refcount);
(*job)->adev = adev;
(*job)->vm = vm;
(*job)->priority = priority;
@@ -96,13 +97,14 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
 {
struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
-   dma_fence_put(job->fence);
-   amdgpu_sync_free(>sync);
-   kfree(job);
+   amdgpu_job_put();
 }
 
-void amdgpu_job_free(struct amdgpu_job *job)
+static void amdgpu_job_free(struct kref *ref)
 {
+   struct amdgpu_job *job;
+   job = container_of(ref, struct amdgpu_job, refcount);
+
amdgpu_job_free_resources(job);
 
dma_fence_put(job->fence);
@@ -110,6 +112,18 @@ void amdgpu_job_free(struct amdgpu_job *job)
kfree(job);
 }
 
+struct amdgpu_job *amdgpu_job_get(struct amdgpu_job *job)
+{
+   kref_get(>refcount);
+   return job;
+}
+
+void amdgpu_job_put(struct amdgpu_job **job)
+{
+   kref_put(&(*job)->refcount, amdgpu_job_free);
+   (*job) = NULL;
+}
+
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
  struct amd_sched_entity *entity, void *owner,
  struct dma_fence **f)
@@ -128,6 +142,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct 
amdgpu_ring *ring,
job->fence_ctx = entity->fence_context;
*f = dma_fence_get(>base.s_fence->finished);
amdgpu_job_free_resources(job);
+
+   amdgpu_job_get(job);
amd_sched_entity_push_job(>base);
 
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4c6094e..1e6e92a 100644
---

[PATCH 08/22] drm/radeon: take ownership of pipe initialization

2017-03-02 Thread Andres Rodriguez

Take ownership of pipe initialization away from KFD.

Note that hpd_eop_gpu_addr was already large enough to accomodate all
pipes.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/radeon/cik.c| 27 ++-
 drivers/gpu/drm/radeon/radeon_kfd.c | 13 +
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index f6ff41a..82b57ef 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4595,23 +4595,24 @@ static int cik_cp_compute_resume(struct radeon_device 
*rdev)
/* init the pipes */
mutex_lock(>srbm_mutex);
 
-   eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
+   for (i = 0; i < rdev->mec.num_pipe; ++i) {
+   cik_srbm_select(rdev, 0, i, 0, 0);
 
-   cik_srbm_select(rdev, 0, 0, 0, 0);
-
-   /* write the EOP addr */
-   WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
-   WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+   eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 
2) ;
+   /* write the EOP addr */
+   WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+   WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 
8);
 
-   /* set the VMID assigned */
-   WREG32(CP_HPD_EOP_VMID, 0);
+   /* set the VMID assigned */
+   WREG32(CP_HPD_EOP_VMID, 0);
 
-   /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-   tmp = RREG32(CP_HPD_EOP_CONTROL);
-   tmp &= ~EOP_SIZE_MASK;
-   tmp |= order_base_2(MEC_HPD_SIZE / 8);
-   WREG32(CP_HPD_EOP_CONTROL, tmp);
+   /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+   tmp = RREG32(CP_HPD_EOP_CONTROL);
+   tmp &= ~EOP_SIZE_MASK;
+   tmp |= order_base_2(MEC_HPD_SIZE / 8);
+   WREG32(CP_HPD_EOP_CONTROL, tmp);
 
+   }
mutex_unlock(>srbm_mutex);
 
/* init the queues.  Just two for now. */
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c 
b/drivers/gpu/drm/radeon/radeon_kfd.c
index 87a9ebb..a06e3b1 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -423,18 +423,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, 
unsigned int pasid,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-   uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
-   uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-   lock_srbm(kgd, mec, pipe, 0, 0);
-   write_register(kgd, CP_HPD_EOP_BASE_ADDR,
-   lower_32_bits(hpd_gpu_addr >> 8));
-   write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
-   upper_32_bits(hpd_gpu_addr >> 8));
-   write_register(kgd, CP_HPD_EOP_VMID, 0);
-   write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
-   unlock_srbm(kgd);
-
+   /* nothing to do here */
return 0;
 }
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 10/22] drm/amdgpu: teach amdgpu how to enable interrupts for any pipe

2017-03-02 Thread Andres Rodriguez

The current implementation is hardcoded to enable ME1/PIPE0 interrupts
only.

This patch allows amdgpu to enable interrupts for any pipe of ME1.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 48 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 33 
 2 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index fe46765..68265b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5039,42 +5039,28 @@ static void 
gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 int me, int pipe,
 enum 
amdgpu_interrupt_state state)
 {
-   u32 mec_int_cntl, mec_int_cntl_reg;
-
-   /*
-* amdgpu controls only pipe 0 of MEC1. That's why this function only
-* handles the setting of interrupts for this specific pipe. All other
-* pipes' interrupts are set by amdkfd.
+   /* Me 0 is for graphics and Me 2 is reserved for HW scheduling
+* So we should only really be configuring ME 1 i.e. MEC0
 */
-
-   if (me == 1) {
-   switch (pipe) {
-   case 0:
-   mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
-   break;
-   default:
-   DRM_DEBUG("invalid pipe %d\n", pipe);
-   return;
-   }
-   } else {
-   DRM_DEBUG("invalid me %d\n", me);
+   if (me != 1) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid 
me:%d\n", me);
return;
}
 
-   switch (state) {
-   case AMDGPU_IRQ_STATE_DISABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
-   mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
-   break;
-   case AMDGPU_IRQ_STATE_ENABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
-   mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
-   break;
-   default:
-   break;
+   if (pipe >= adev->gfx.mec.num_pipe_per_mec) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid "
+   "me:%d pipe:%d\n", pipe, me);
+   return;
}
+
+   mutex_lock(>srbm_mutex);
+   cik_srbm_select(adev, me, pipe, 0, 0);
+
+   WREG32_FIELD(CPC_INT_CNTL, TIME_STAMP_INT_ENABLE,
+   state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+
+   cik_srbm_select(adev, 0, 0, 0, 0);
+   mutex_unlock(>srbm_mutex);
 }
 
 static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1238b3d..861334b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6786,27 +6786,28 @@ static void 
gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 int me, int pipe,
 enum 
amdgpu_interrupt_state state)
 {
-   /*
-* amdgpu controls only pipe 0 of MEC1. That's why this function only
-* handles the setting of interrupts for this specific pipe. All other
-* pipes' interrupts are set by amdkfd.
+   /* Me 0 is for graphics and Me 2 is reserved for HW scheduling
+* So we should only really be configuring ME 1 i.e. MEC0
 */
+   if (me != 1) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid 
me:%d\n", me);
+   return;
+   }
 
-   if (me == 1) {
-   switch (pipe) {
-   case 0:
-   break;
-   default:
-   DRM_DEBUG("invalid pipe %d\n", pipe);
-   return;
-   }
-   } else {
-   DRM_DEBUG("invalid me %d\n", me);
+   if (pipe >= adev->gfx.mec.num_pipe_per_mec) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid "
+   "me:%d pipe:%d\n", pipe, me);
return;
}
 
-   WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
-state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+   mutex_lock(>srbm_mutex);
+   vi_srbm_select(adev, me, pipe, 0, 0);
+
+   WREG32_FIELD(CPC_INT_CNTL, TIME_STAMP_INT_ENABLE,
+   state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+
+   vi_srbm_select(adev, 0, 0, 0, 0);
+   mutex_unlock(>srbm_mutex);
 }
 
 static int

[PATCH 07/22] drm/amdgpu: take ownership of per-pipe configuration

2017-03-02 Thread Andres Rodriguez

Make amdgpu the owner of all per-pipe state of the HQDs.

This change will allow us to split the queues between kfd and amdgpu
with a queue granularity instead of pipe granularity.

This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
goes unused.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  | 13 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 28 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 33 +++-
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 45 --
 6 files changed, 49 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b577ec1..15e048c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -776,9 +776,9 @@ struct amdgpu_rlc {
 struct amdgpu_mec {
struct amdgpu_bo*hpd_eop_obj;
u64 hpd_eop_gpu_addr;
-   u32 num_pipe;
u32 num_mec;
-   u32 num_queue;
+   u32 num_pipe_per_mec;
+   u32 num_queue_per_pipe;
 };
 
 struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 038b7ea..910f9d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -244,18 +244,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, 
unsigned int pasid,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-   struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-   uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-   uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-   lock_srbm(kgd, mec, pipe, 0, 0);
-   WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
-   WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
-   WREG32(mmCP_HPD_EOP_VMID, 0);
-   WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
-   unlock_srbm(kgd);
-
+   /* amdgpu owns the per-pipe state */
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 2ecef3d..5843368 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -206,6 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, 
unsigned int pasid,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
+   /* amdgpu owns the per-pipe state */
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 03a4cee..2f1faa4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2806,6 +2806,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;
+   size_t mec_hpd_size;
 
/*
 * KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
@@ -2813,13 +2814,26 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 * Nonetheless, we assign only 1 pipe because all other pipes will
 * be handled by KFD
 */
-   adev->gfx.mec.num_mec = 1;
-   adev->gfx.mec.num_pipe = 1;
-   adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * 8;
+   switch (adev->asic_type) {
+   case CHIP_KAVERI:
+   adev->gfx.mec.num_mec = 2;
+   break;
+   case CHIP_BONAIRE:
+   case CHIP_HAWAII:
+   case CHIP_KABINI:
+   case CHIP_MULLINS:
+   default:
+   adev->gfx.mec.num_mec = 1;
+   break;
+   }
+   adev->gfx.mec.num_pipe_per_mec = 4;
+   adev->gfx.mec.num_queue_per_pipe = 8;
 
+   mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
+   * GFX7_MEC_HPD_SIZE * 2;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
-adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
+mec_hpd_size,
 PAGE_SIZE, true,
 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 >gfx.mec.hpd_eop_obj);
@@ -2849,7 +2863,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
}
 
/* clear memory.  Not sure if this is required or not */
-   memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 
GFX7_MEC_HPD_SIZE * 2);
+   memset(hpd, 0, mec_hpd_size);

[PATCH 04/22] drm/amdgpu: remove duplicate definition of cik_mqd

2017-03-02 Thread Andres Rodriguez

The gfxv7 contains a slightly different version of cik_mqd called
bonaire_mqd. This can introduce subtle bugs if fixes are not applied in
both places.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 135 ++
 1 file changed, 54 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 8e1e601..c606e0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -27,6 +27,7 @@
 #include "amdgpu_gfx.h"
 #include "cikd.h"
 #include "cik.h"
+#include "cik_structs.h"
 #include "atom.h"
 #include "amdgpu_ucode.h"
 #include "clearstate_ci.h"
@@ -2895,34 +2896,6 @@ struct hqd_registers
u32 cp_mqd_control;
 };
 
-struct bonaire_mqd
-{
-   u32 header;
-   u32 dispatch_initiator;
-   u32 dimensions[3];
-   u32 start_idx[3];
-   u32 num_threads[3];
-   u32 pipeline_stat_enable;
-   u32 perf_counter_enable;
-   u32 pgm[2];
-   u32 tba[2];
-   u32 tma[2];
-   u32 pgm_rsrc[2];
-   u32 vmid;
-   u32 resource_limits;
-   u32 static_thread_mgmt01[2];
-   u32 tmp_ring_size;
-   u32 static_thread_mgmt23[2];
-   u32 restart[3];
-   u32 thread_trace_enable;
-   u32 reserved1;
-   u32 user_data[16];
-   u32 vgtcs_invoke_count[2];
-   struct hqd_registers queue_state;
-   u32 dequeue_cntr;
-   u32 interrupt_queue[64];
-};
-
 static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int 
pipe)
 {
u64 eop_gpu_addr;
@@ -2976,7 +2949,7 @@ static int gfx_v7_0_mqd_deactivate(struct amdgpu_device 
*adev)
 }
 
 static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
-struct bonaire_mqd *mqd,
+struct cik_mqd *mqd,
 uint64_t mqd_gpu_addr,
 struct amdgpu_ring *ring)
 {
@@ -2984,101 +2957,101 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device 
*adev,
u64 wb_gpu_addr;
 
/* init the mqd struct */
-   memset(mqd, 0, sizeof(struct bonaire_mqd));
+   memset(mqd, 0, sizeof(struct cik_mqd));
 
mqd->header = 0xC0310800;
-   mqd->static_thread_mgmt01[0] = 0x;
-   mqd->static_thread_mgmt01[1] = 0x;
-   mqd->static_thread_mgmt23[0] = 0x;
-   mqd->static_thread_mgmt23[1] = 0x;
+   mqd->compute_static_thread_mgmt_se0 = 0x;
+   mqd->compute_static_thread_mgmt_se1 = 0x;
+   mqd->compute_static_thread_mgmt_se2 = 0x;
+   mqd->compute_static_thread_mgmt_se3 = 0x;
 
/* enable doorbell? */
-   mqd->queue_state.cp_hqd_pq_doorbell_control =
+   mqd->cp_hqd_pq_doorbell_control =
RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
if (ring->use_doorbell)
-   mqd->queue_state.cp_hqd_pq_doorbell_control |= 
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+   mqd->cp_hqd_pq_doorbell_control |= 
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
else
-   mqd->queue_state.cp_hqd_pq_doorbell_control &= 
~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+   mqd->cp_hqd_pq_doorbell_control &= 
~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
 
/* set the pointer to the MQD */
-   mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffc;
-   mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+   mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffc;
+   mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
 
/* set MQD vmid to 0 */
-   mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
-   mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
+   mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
+   mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
 
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
hqd_gpu_addr = ring->gpu_addr >> 8;
-   mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
-   mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+   mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+   mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
 
/* set up the HQD, this is similar to CP_RB0_CNTL */
-   mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
-   mqd->queue_state.cp_hqd_pq_control &=
+   mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
+   mqd->cp_hqd_pq_control &=
~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
 
-   mqd->queue_state.cp_hqd_pq_control |=
+   mqd->cp_hqd_pq_control |=
order_base_2(ring->ring_size / 8);
-   mqd->queue_state.cp_hqd_pq_control |=
+   mqd->cp_hqd_pq_control |=
(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) <<

[PATCH 17/22] drm/amdgpu: add parameter to allocate high priority contexts v6

2017-03-02 Thread Andres Rodriguez

Add a new context creation parameter to express a global context priority.

Contexts allocated with AMDGPU_CTX_PRIORITY_HIGH will receive higher
priority to schedule their work than AMDGPU_CTX_PRIORITY_NORMAL
(default) contexts.

v2: Instead of using flags, repurpose __pad
v3: Swap enum values of _NORMAL _HIGH for backwards compatibility
v4: Validate usermode priority and store it
v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword
v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 38 +++
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  1 +
 include/uapi/drm/amdgpu_drm.h |  8 +-
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a4c16a8..f085698 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -671,6 +671,7 @@ struct amdgpu_ctx {
spinlock_t  ring_lock;
struct dma_fence**fences;
struct amdgpu_ctx_ring  rings[AMDGPU_MAX_RINGS];
+   int priority;
bool preamble_presented;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 400c66b..8ef6370 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -25,13 +25,22 @@
 #include 
 #include "amdgpu.h"
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+  int priority,
+  struct amdgpu_ctx *ctx)
 {
unsigned i, j;
int r;
 
+   if (priority < 0 || priority >= AMD_SCHED_MAX_PRIORITY)
+   return -EINVAL;
+
+   if (priority == AMD_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_ADMIN))
+   return -EACCES;
+
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
+   ctx->priority = priority;
kref_init(>refcount);
spin_lock_init(>ring_lock);
ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
@@ -51,7 +60,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct 
amdgpu_ctx *ctx)
struct amdgpu_ring *ring = adev->rings[i];
struct amd_sched_rq *rq;
 
-   rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+   rq = >sched.sched_rq[priority];
r = amd_sched_entity_init(>sched, >rings[i].entity,
  rq, amdgpu_sched_jobs);
if (r)
@@ -90,6 +99,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
+   uint32_t priority,
uint32_t *id)
 {
struct amdgpu_ctx_mgr *mgr = >ctx_mgr;
@@ -107,8 +117,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
kfree(ctx);
return r;
}
+
*id = (uint32_t)r;
-   r = amdgpu_ctx_init(adev, ctx);
+   r = amdgpu_ctx_init(adev, priority, ctx);
if (r) {
idr_remove(>ctx_handles, *id);
*id = 0;
@@ -182,11 +193,24 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
return 0;
 }
 
+static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
+{
+   switch (amdgpu_priority) {
+   case AMDGPU_CTX_PRIORITY_HIGH:
+   return AMD_SCHED_PRIORITY_HIGH;
+   case AMDGPU_CTX_PRIORITY_NORMAL:
+   return AMD_SCHED_PRIORITY_NORMAL;
+   default:
+   WARN(1, "Invalid context priority %d\n", amdgpu_priority);
+   return AMD_SCHED_PRIORITY_NORMAL;
+   }
+}
+
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 struct drm_file *filp)
 {
int r;
-   uint32_t id;
+   uint32_t id, priority;
 
union drm_amdgpu_ctx *args = data;
struct amdgpu_device *adev = dev->dev_private;
@@ -194,10 +218,14 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 
r = 0;
id = args->in.ctx_id;
+   priority = amdgpu_to_sched_priority(args->in.priority);
+
+   if (priority >= AMD_SCHED_MAX_PRIORITY)
+   return -EINVAL;
 
switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX:
-   r = amdgpu_ctx_alloc(adev, fpriv, );
+   r = amdgpu_ctx_alloc(adev, fpriv, priority, );
args->out.alloc.ctx_id = id;
break;
case AMDGPU_CTX_OP_FREE_CTX:
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index d8dc681..2e458de 100644
---

[PATCH 14/22] drm/amdgpu: new queue policy, take first 2 queues of each pipe

2017-03-02 Thread Andres Rodriguez

Instead of taking the first pipe and givint the rest to kfd, take the
first 2 queues of each pipe.

Effectively, amdgpu and amdkfd own the same number of queues. But
because the queues are spread over multiple pipes the hardware will be
able to better handle concurrent compute workloads.

amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4
amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 3ca5519..b0b0c89 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2818,7 +2818,7 @@ static void gfx_v7_0_compute_queue_acquire(struct 
amdgpu_device *adev)
break;
 
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
+   if (mec == 0 && queue < 2)
set_bit(i, adev->gfx.mec.queue_bitmap);
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e86..5db5bac 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1436,7 +1436,7 @@ static void gfx_v8_0_compute_queue_acquire(struct 
amdgpu_device *adev)
break;
 
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
+   if (mec == 0 && queue < 2)
set_bit(i, adev->gfx.mec.queue_bitmap);
}
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 06/22] drm/amdgpu: rename rdev to adev

2017-03-02 Thread Andres Rodriguez

Rename straggler instances of r(adeon)dev to a(mdgpu)dev

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 70 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 14 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  |  2 +-
 4 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index dba8a5b..3200ff9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -60,9 +60,9 @@ int amdgpu_amdkfd_init(void)
return ret;
 }
 
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
 {
-   switch (rdev->asic_type) {
+   switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
@@ -86,16 +86,16 @@ void amdgpu_amdkfd_fini(void)
}
 }
 
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
if (kgd2kfd)
-   rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
-   rdev->pdev, kfd2kgd);
+   adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
+   adev->pdev, kfd2kgd);
 }
 
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-   if (rdev->kfd) {
+   if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
 
@@ -103,42 +103,42 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
.compute_pipe_count = 4 - 1,
};
 
-   amdgpu_doorbell_get_kfd_info(rdev,
+   amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
-   kgd2kfd->device_init(rdev->kfd, _resources);
+   kgd2kfd->device_init(adev->kfd, _resources);
}
 }
 
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
-   if (rdev->kfd) {
-   kgd2kfd->device_exit(rdev->kfd);
-   rdev->kfd = NULL;
+   if (adev->kfd) {
+   kgd2kfd->device_exit(adev->kfd);
+   adev->kfd = NULL;
}
 }
 
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry)
 {
-   if (rdev->kfd)
-   kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
+   if (adev->kfd)
+   kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
 }
 
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
 {
-   if (rdev->kfd)
-   kgd2kfd->suspend(rdev->kfd);
+   if (adev->kfd)
+   kgd2kfd->suspend(adev->kfd);
 }
 
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
 {
int r = 0;
 
-   if (rdev->kfd)
-   r = kgd2kfd->resume(rdev->kfd);
+   if (adev->kfd)
+   r = kgd2kfd->resume(adev->kfd);
 
return r;
 }
@@ -147,7 +147,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr)
 {
-   struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+   struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
int r;
 
@@ -159,10 +159,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
if ((*mem) == NULL)
return -ENOMEM;
 
-   r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
+   r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
 AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 
&(*mem)->bo);
if (r) {
-   dev_err(rdev->dev,
+   dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
return r;
}
@@ -170,21 +170,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
/* map the buffer */
r = amdgpu_bo_reserve((*mem)->bo, true);
if (r) {
-   dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+   dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
goto allocate_mem_reserve_bo_failed;

[PATCH] Add support for high priority scheduling in amdgpu v2

2017-03-02 Thread Andres Rodriguez

Fixed incorrectly using AMDGPU_CTX_PRIORITY instead of AMD_SCHED_PRIORITY.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 09/22] drm/amdgpu: allow split of queues with kfd at queue granularity

2017-03-02 Thread Andres Rodriguez

Previously the queue/pipe split with kfd operated with pipe
granularity. This patch allows amdgpu to take ownership of an arbitrary
set of queues.

It also consolidates the last few magic numbers in the compute
initialization process into mec_init.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  7 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 83 ++---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 79 ++-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h |  1 +
 4 files changed, 133 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 15e048c..f9df217 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -46,6 +46,8 @@
 #include 
 #include 
 
+#include 
+
 #include "amd_shared.h"
 #include "amdgpu_mode.h"
 #include "amdgpu_ih.h"
@@ -773,12 +775,17 @@ struct amdgpu_rlc {
u32 *register_restore;
 };
 
+#define AMDGPU_MAX_QUEUES KGD_MAX_QUEUES
+
 struct amdgpu_mec {
struct amdgpu_bo*hpd_eop_obj;
u64 hpd_eop_gpu_addr;
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
+
+   /* These are the resources for which amdgpu takes ownership */
+   DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_QUEUES);
 };
 
 struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 2f1faa4..fe46765 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -49,7 +49,6 @@
 #include "oss/oss_2_0_sh_mask.h"
 
 #define GFX7_NUM_GFX_RINGS 1
-#define GFX7_NUM_COMPUTE_RINGS 8
 #define GFX7_MEC_HPD_SIZE  2048
 
 
@@ -2802,18 +2801,46 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
}
 }
 
+static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+   int i, queue, pipe, mec;
+
+   /* policy for amdgpu compute queue ownership */
+   for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
+   queue = i % adev->gfx.mec.num_queue_per_pipe;
+   pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+   % adev->gfx.mec.num_pipe_per_mec;
+   mec = (i / adev->gfx.mec.num_queue_per_pipe)
+   / adev->gfx.mec.num_pipe_per_mec;
+
+   /* we've run out of HW */
+   if (mec > adev->gfx.mec.num_mec)
+   break;
+
+   /* policy: amdgpu owns all queues in the first pipe */
+   if (mec == 0 && pipe == 0)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   }
+
+   /* update the number of active compute rings */
+   adev->gfx.num_compute_rings =
+   bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
+
+   /* If you hit this case and edited the policy, you probably just
+* need to increase AMDGPU_MAX_COMPUTE_RINGS */
+   WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS);
+   if (adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)
+   adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;
size_t mec_hpd_size;
 
-   /*
-* KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
-* CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
-* Nonetheless, we assign only 1 pipe because all other pipes will
-* be handled by KFD
-*/
+   bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
+
switch (adev->asic_type) {
case CHIP_KAVERI:
adev->gfx.mec.num_mec = 2;
@@ -2829,6 +2856,10 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
 
+   /* take ownership of the relevant compute queues */
+   gfx_v7_0_compute_queue_acquire(adev);
+
+   /* allocate space for ALL pipes (even the ones we don't own) */
mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
* GFX7_MEC_HPD_SIZE * 2;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
@@ -4504,7 +4535,7 @@ static int gfx_v7_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
-   adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+   adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
adev->gfx.funcs = _v7_0_gfx_funcs;
adev->gfx.rlc.funcs = _v7_0_rlc_funcs;
gfx_v7_0_set_ring_funcs(adev);
@@ -4700,7 +4731,7 @@ static int gfx_v7_0_sw_init(void *handle)
 {
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   int i, r;
+

[PATCH 01/22] drm/amdgpu: refactor MQD/HQD initialization

2017-03-02 Thread Andres Rodriguez

The MQD programming sequence currently exists in 3 different places.
Refactor it to absorb all the duplicates.

The success path remains mostly identical except for a slightly
different order in the non-kiq case. This shouldn't matter if the HQD
is disabled.

The error handling paths have been updated to deal with the new code
structure.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 417 +++
 2 files changed, 387 insertions(+), 477 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 1f93545..8e1e601 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -49,6 +49,8 @@
 
 #define GFX7_NUM_GFX_RINGS 1
 #define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE  2048
+
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -2799,8 +2801,6 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
}
 }
 
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
@@ -2818,7 +2818,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
-adev->gfx.mec.num_mec 
*adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
 PAGE_SIZE, true,
 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 >gfx.mec.hpd_eop_obj);
@@ -2848,7 +2848,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
}
 
/* clear memory.  Not sure if this is required or not */
-   memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * 
MEC_HPD_SIZE * 2);
+   memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 
GFX7_MEC_HPD_SIZE * 2);
 
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -2923,247 +2923,282 @@ struct bonaire_mqd
u32 interrupt_queue[64];
 };
 
-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * @adev: amdgpu_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int 
pipe)
 {
-   int r, i, j;
-   u32 tmp;
-   bool use_doorbell = true;
-   u64 hqd_gpu_addr;
-   u64 mqd_gpu_addr;
u64 eop_gpu_addr;
-   u64 wb_gpu_addr;
-   u32 *buf;
-   struct bonaire_mqd *mqd;
-   struct amdgpu_ring *ring;
-
-   /* fix up chicken bits */
-   tmp = RREG32(mmCP_CPF_DEBUG);
-   tmp |= (1 << 23);
-   WREG32(mmCP_CPF_DEBUG, tmp);
+   u32 tmp;
+   size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2;
 
-   /* init the pipes */
mutex_lock(>srbm_mutex);
-   for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
-   int me = (i < 4) ? 1 : 2;
-   int pipe = (i < 4) ? i : (i - 4);
+   eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
 
-   eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * 
MEC_HPD_SIZE * 2);
+   cik_srbm_select(adev, me, pipe, 0, 0);
 
-   cik_srbm_select(adev, me, pipe, 0, 0);
+   /* write the EOP addr */
+   WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+   WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
 
-   /* write the EOP addr */
-   WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
-   WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) 
>> 8);
+   /* set the VMID assigned */
+   WREG32(mmCP_HPD_EOP_VMID, 0);
 
-   /* set the VMID assigned */
-   WREG32(mmCP_HPD_EOP_VMID, 0);
+   /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+   tmp = RREG32(mmCP_HPD_EOP_CONTROL);
+   tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
+   tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
+   WREG32(mmCP_HPD_EOP_CONTROL, tmp);
 
-   /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-   tmp = RREG32(mmCP_HPD_EOP_CONTROL);
-   tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
-   tmp |= order_base_2(MEC_HPD_SIZE / 8);
-   WREG32(mmCP_HPD_EOP_CONTROL, tmp);
-   }
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
+}
 
-   /* init the queues.  Just two

[PATCH xf86-video-amdgpu 1/3] Call drmmode_crtc_scanout_create in drmmode_crtc_shadow_allocate as well

2017-03-02 Thread Michel Dänzer

From: Michel Dänzer 

Calling drmmode_crtc_scanout_allocate in drmmode_crtc_shadow_allocate
resulted in drmmode_crtc_scanout_create called from
drmmode_crtc_shadow_create passing an uninitialized pitch value to
drmmode_create_bo_pixmap.

Fixes issues such as failure to allocate the scanout pixmap or visual
corruption and GPUVM faults when attempting to use rotation with Xorg
<1.19.

Bugzilla: https://bugs.freedesktop.org/99916
Fixes: 5f7123808833 ("Pass pitch from drmmode_crtc_scanout_allocate to
  drmmode_create_bo_pixmap")
(Ported from radeon commit 987a34adb319923ad36e2b47a26837248f187c3e)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 27 +--
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 4c35657b..c7c9f179 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -1047,14 +1047,20 @@ static void drmmode_show_cursor(xf86CrtcPtr crtc)
 info->cursor_w, info->cursor_h);
 }
 
-static void *drmmode_crtc_shadow_allocate(xf86CrtcPtr crtc, int width,
- int height)
+/* Xorg expects a non-NULL return value from drmmode_crtc_shadow_allocate, and
+ * passes that back to drmmode_crtc_scanout_create; it doesn't use it for
+ * anything else.
+ */
+static void *
+drmmode_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height)
 {
drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
-   int pitch;
 
-   return drmmode_crtc_scanout_allocate(crtc, _crtc->rotate,
-width, height, );
+   if (!drmmode_crtc_scanout_create(crtc, _crtc->rotate, width,
+height))
+   return NULL;
+
+   return (void*)~0UL;
 }
 
 static PixmapPtr
@@ -1062,11 +1068,12 @@ drmmode_crtc_shadow_create(xf86CrtcPtr crtc, void 
*data, int width, int height)
 {
drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
 
-   /* Xorg passes in the return value of drmmode_crtc_shadow_allocate
-* for data, but that's redundant for drmmode_crtc_scanout_create.
-*/
-   return drmmode_crtc_scanout_create(crtc, _crtc->rotate, width,
-  height);
+   if (!data) {
+   drmmode_crtc_scanout_create(crtc, _crtc->rotate, width,
+   height);
+   }
+
+   return drmmode_crtc->rotate.pixmap;
 }
 
 static void
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH xf86-video-amdgpu 2/3] Fold drmmode_crtc_scanout_allocate into drmmode_crtc_scanout_create

2017-03-02 Thread Michel Dänzer

From: Michel Dänzer 

Not used anywhere else anymore.

(Ported from radeon commit ae921a3150f69c38b5b3c88a9e37d54fdf0d5093)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 71 ---
 1 file changed, 22 insertions(+), 49 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index c7c9f179..b3c850c5 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -494,71 +494,39 @@ drmmode_scanout_free(ScrnInfoPtr scrn)
drmmode_crtc_scanout_free(xf86_config->crtc[c]->driver_private);
 }
 
-static void *
-drmmode_crtc_scanout_allocate(xf86CrtcPtr crtc,
- struct drmmode_scanout *scanout,
- int width, int height, int *pitch)
+static PixmapPtr
+drmmode_crtc_scanout_create(xf86CrtcPtr crtc, struct drmmode_scanout *scanout,
+   int width, int height)
 {
ScrnInfoPtr pScrn = crtc->scrn;
AMDGPUEntPtr pAMDGPUEnt = AMDGPUEntPriv(pScrn);
drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
drmmode_ptr drmmode = drmmode_crtc->drmmode;
-   int ret;
union gbm_bo_handle bo_handle;
+   int pitch;
 
-   if (scanout->bo) {
+   if (scanout->pixmap) {
if (scanout->width == width && scanout->height == height)
-   return scanout->bo;
+   return scanout->pixmap;
 
drmmode_crtc_scanout_destroy(drmmode, scanout);
}
 
scanout->bo = amdgpu_alloc_pixmap_bo(pScrn, width, height,
 pScrn->depth, 0,
-pScrn->bitsPerPixel, pitch);
+pScrn->bitsPerPixel, );
if (!scanout->bo) {
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-  "Failed to allocate rotation buffer memory\n");
-   return NULL;
+  "Failed to allocate scanout buffer memory\n");
+   goto error;
}
 
bo_handle = gbm_bo_get_handle(scanout->bo->bo.gbm);
-   ret = drmModeAddFB(pAMDGPUEnt->fd, width, height, pScrn->depth,
-  pScrn->bitsPerPixel, *pitch,
-  bo_handle.u32, >fb_id);
-   if (ret) {
-   ErrorF("failed to add rotate fb\n");
-   amdgpu_bo_unref(>bo);
-   scanout->bo = NULL;
-   return NULL;
-   }
-
-   scanout->width = width;
-   scanout->height = height;
-   return scanout->bo;
-}
-
-static PixmapPtr
-drmmode_crtc_scanout_create(xf86CrtcPtr crtc,
-   struct drmmode_scanout *scanout,
-   int width, int height)
-{
-   ScrnInfoPtr pScrn = crtc->scrn;
-   drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
-   drmmode_ptr drmmode = drmmode_crtc->drmmode;
-   int pitch;
-
-   if (scanout->pixmap) {
-   if (scanout->width == width && scanout->height == height)
-   return scanout->pixmap;
-
-   drmmode_crtc_scanout_destroy(drmmode, scanout);
-   }
-
-   if (!scanout->bo) {
-   if (!drmmode_crtc_scanout_allocate(crtc, scanout, width, height,
-  ))
-   return NULL;
+   if (drmModeAddFB(pAMDGPUEnt->fd, width, height, pScrn->depth,
+pScrn->bitsPerPixel, pitch,
+bo_handle.u32, >fb_id) != 0) {
+   ErrorF("failed to add scanout fb\n");
+   goto error;
}
 
scanout->pixmap = drmmode_create_bo_pixmap(pScrn,
@@ -566,12 +534,17 @@ drmmode_crtc_scanout_create(xf86CrtcPtr crtc,
 pScrn->depth,
 pScrn->bitsPerPixel,
 pitch, scanout->bo);
-   if (scanout->pixmap == NULL) {
+   if (scanout->pixmap) {
+   scanout->width = width;
+   scanout->height = height;
+   } else {
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-  "Couldn't allocate shadow pixmap for rotated 
CRTC\n");
+  "Couldn't allocate scanout pixmap for CRTC\n");
+error:
+   drmmode_crtc_scanout_destroy(drmmode, scanout);
}
-   return scanout->pixmap;
 
+   return scanout->pixmap;
 }
 
 static void
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH xf86-video-amdgpu 3/3] Handle rotation in the driver also with Xorg 1.12-1.18

2017-03-02 Thread Michel Dänzer

From: Michel Dänzer 

We cannot use the HW cursor in that case, but in turn we get more
efficient and less teary updates of rotated outputs.

(Ported from radeon commit f2bc882f1c1082bed9f496cfab6c8f07a76bc122)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index b3c850c5..67dd61b6 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -589,22 +589,20 @@ drmmode_can_use_hw_cursor(xf86CrtcPtr crtc)
 
 #if XF86_CRTC_VERSION >= 4
 
+#if XF86_CRTC_VERSION < 7
+#define XF86DriverTransformOutput TRUE
+#define XF86DriverTransformNone FALSE
+#endif
+
 static Bool
 drmmode_handle_transform(xf86CrtcPtr crtc)
 {
Bool ret;
 
-#if XF86_CRTC_VERSION >= 7
if (crtc->transformPresent || crtc->rotation != RR_Rotate_0)
crtc->driverIsPerformingTransform = XF86DriverTransformOutput;
else
crtc->driverIsPerformingTransform = XF86DriverTransformNone;
-#else
-   AMDGPUInfoPtr info = AMDGPUPTR(crtc->scrn);
-
-   crtc->driverIsPerformingTransform = crtc->transformPresent ||
-   (info->tear_free && crtc->rotation != RR_Rotate_0);
-#endif
 
ret = xf86CrtcRotate(crtc);
 
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

53 matches

Mail list logo