[PATCH 14/21] drm/amdgpu: new queue policy, take first 2 queues of each pipe

2017-03-06 Thread Andres Rodriguez
Instead of taking the first pipe and giving the rest to kfd, take the
first 2 queues of each pipe.

Effectively, amdgpu and amdkfd own the same number of queues. But
because the queues are spread over multiple pipes the hardware will be
able to better handle concurrent compute workloads.

amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4
amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 3ca5519..b0b0c89 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2811,21 +2811,21 @@ static void gfx_v7_0_compute_queue_acquire(struct 
amdgpu_device *adev)
pipe = (i / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
mec = (i / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;
 
/* we've run out of HW */
if (mec > adev->gfx.mec.num_mec)
break;
 
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
+   if (mec == 0 && queue < 2)
set_bit(i, adev->gfx.mec.queue_bitmap);
}
 
/* update the number of active compute rings */
adev->gfx.num_compute_rings =
bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
 
/* If you hit this case and edited the policy, you probably just
 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e86..5db5bac 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1429,21 +1429,21 @@ static void gfx_v8_0_compute_queue_acquire(struct 
amdgpu_device *adev)
pipe = (i / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
mec = (i / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;
 
/* we've run out of HW */
if (mec > adev->gfx.mec.num_mec)
break;
 
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
+   if (mec == 0 && queue < 2)
set_bit(i, adev->gfx.mec.queue_bitmap);
}
 
/* update the number of active compute rings */
adev->gfx.num_compute_rings =
bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
 
/* If you hit this case and edited the policy, you probably just
 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 09/21] drm/amdgpu: allow split of queues with kfd at queue granularity

2017-03-06 Thread Andres Rodriguez
Previously the queue/pipe split with kfd operated with pipe
granularity. This patch allows amdgpu to take ownership of an arbitrary
set of queues.

It also consolidates the last few magic numbers in the compute
initialization process into mec_init.

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  7 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 83 ++---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 79 ++-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h |  1 +
 4 files changed, 133 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 15e048c..f9df217 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -39,20 +39,22 @@
 #include 
 #include 
 #include 
 #include 
 #include 
 
 #include 
 #include 
 #include 
 
+#include 
+
 #include "amd_shared.h"
 #include "amdgpu_mode.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_irq.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ttm.h"
 #include "amdgpu_gds.h"
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
 #include "amdgpu_vm.h"
@@ -766,26 +768,31 @@ struct amdgpu_rlc {
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
 
u32 *register_list_format;
u32 *register_restore;
 };
 
+#define AMDGPU_MAX_QUEUES KGD_MAX_QUEUES
+
 struct amdgpu_mec {
struct amdgpu_bo*hpd_eop_obj;
u64 hpd_eop_gpu_addr;
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
+
+   /* These are the resources for which amdgpu takes ownership */
+   DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_QUEUES);
 };
 
 struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo*eop_obj;
struct amdgpu_ring  ring;
struct amdgpu_irq_src   irq;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 2f1faa4..fe46765 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -42,21 +42,20 @@
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 
 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"
 
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
 #define GFX7_NUM_GFX_RINGS 1
-#define GFX7_NUM_COMPUTE_RINGS 8
 #define GFX7_MEC_HPD_SIZE  2048
 
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
 
 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
@@ -2795,47 +2794,79 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", 
r);
amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
amdgpu_bo_unref(>gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
 }
 
+static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+   int i, queue, pipe, mec;
+
+   /* policy for amdgpu compute queue ownership */
+   for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
+   queue = i % adev->gfx.mec.num_queue_per_pipe;
+   pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+   % adev->gfx.mec.num_pipe_per_mec;
+   mec = (i / adev->gfx.mec.num_queue_per_pipe)
+   / adev->gfx.mec.num_pipe_per_mec;
+
+   /* we've run out of HW */
+   if (mec > adev->gfx.mec.num_mec)
+   break;
+
+   /* policy: amdgpu owns all queues in the first pipe */
+   if (mec == 0 && pipe == 0)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   }
+
+   /* update the number of active compute rings */
+   adev->gfx.num_compute_rings =
+   bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
+
+   /* If you hit this case and edited the policy, you probably just
+* need to increase AMDGPU_MAX_COMPUTE_RINGS */
+   WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS);
+   if (adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)
+   adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;
size_t mec_hpd_size;
 

[PATCH 04/21] drm/amdgpu: remove duplicate definition of cik_mqd

2017-03-06 Thread Andres Rodriguez
The gfxv7 contains a slightly different version of cik_mqd called
bonaire_mqd. This can introduce subtle bugs if fixes are not applied in
both places.

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 135 ++
 1 file changed, 54 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 8e1e601..c606e0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -20,20 +20,21 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
 #include 
 #include "drmP.h"
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_gfx.h"
 #include "cikd.h"
 #include "cik.h"
+#include "cik_structs.h"
 #include "atom.h"
 #include "amdgpu_ucode.h"
 #include "clearstate_ci.h"
 
 #include "dce/dce_8_0_d.h"
 #include "dce/dce_8_0_sh_mask.h"
 
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_sh_mask.h"
 
@@ -2888,48 +2889,20 @@ struct hqd_registers
u32 cp_hqd_msg_type;
u32 cp_hqd_atomic0_preop_lo;
u32 cp_hqd_atomic0_preop_hi;
u32 cp_hqd_atomic1_preop_lo;
u32 cp_hqd_atomic1_preop_hi;
u32 cp_hqd_hq_scheduler0;
u32 cp_hqd_hq_scheduler1;
u32 cp_mqd_control;
 };
 
-struct bonaire_mqd
-{
-   u32 header;
-   u32 dispatch_initiator;
-   u32 dimensions[3];
-   u32 start_idx[3];
-   u32 num_threads[3];
-   u32 pipeline_stat_enable;
-   u32 perf_counter_enable;
-   u32 pgm[2];
-   u32 tba[2];
-   u32 tma[2];
-   u32 pgm_rsrc[2];
-   u32 vmid;
-   u32 resource_limits;
-   u32 static_thread_mgmt01[2];
-   u32 tmp_ring_size;
-   u32 static_thread_mgmt23[2];
-   u32 restart[3];
-   u32 thread_trace_enable;
-   u32 reserved1;
-   u32 user_data[16];
-   u32 vgtcs_invoke_count[2];
-   struct hqd_registers queue_state;
-   u32 dequeue_cntr;
-   u32 interrupt_queue[64];
-};
-
 static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int 
pipe)
 {
u64 eop_gpu_addr;
u32 tmp;
size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2;
 
mutex_lock(>srbm_mutex);
eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
 
cik_srbm_select(adev, me, pipe, 0, 0);
@@ -2969,162 +2942,162 @@ static int gfx_v7_0_mqd_deactivate(struct 
amdgpu_device *adev)
 
WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
WREG32(mmCP_HQD_PQ_RPTR, 0);
WREG32(mmCP_HQD_PQ_WPTR, 0);
}
 
return 0;
 }
 
 static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
-struct bonaire_mqd *mqd,
+struct cik_mqd *mqd,
 uint64_t mqd_gpu_addr,
 struct amdgpu_ring *ring)
 {
u64 hqd_gpu_addr;
u64 wb_gpu_addr;
 
/* init the mqd struct */
-   memset(mqd, 0, sizeof(struct bonaire_mqd));
+   memset(mqd, 0, sizeof(struct cik_mqd));
 
mqd->header = 0xC0310800;
-   mqd->static_thread_mgmt01[0] = 0x;
-   mqd->static_thread_mgmt01[1] = 0x;
-   mqd->static_thread_mgmt23[0] = 0x;
-   mqd->static_thread_mgmt23[1] = 0x;
+   mqd->compute_static_thread_mgmt_se0 = 0x;
+   mqd->compute_static_thread_mgmt_se1 = 0x;
+   mqd->compute_static_thread_mgmt_se2 = 0x;
+   mqd->compute_static_thread_mgmt_se3 = 0x;
 
/* enable doorbell? */
-   mqd->queue_state.cp_hqd_pq_doorbell_control =
+   mqd->cp_hqd_pq_doorbell_control =
RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
if (ring->use_doorbell)
-   mqd->queue_state.cp_hqd_pq_doorbell_control |= 
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+   mqd->cp_hqd_pq_doorbell_control |= 
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
else
-   mqd->queue_state.cp_hqd_pq_doorbell_control &= 
~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+   mqd->cp_hqd_pq_doorbell_control &= 
~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
 
/* set the pointer to the MQD */
-   mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffc;
-   mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+   mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffc;
+   mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
 
/* set MQD vmid to 0 */
-   mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
-   mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
+   mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
+   mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
 
/* set the pointer to the HQD, this is similar 

[PATCH 03/21] drm/amdgpu: detect timeout error when deactivating hqd

2017-03-06 Thread Andres Rodriguez
Handle HQD deactivation timeouts instead of ignoring them.

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 09a..af4b505 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4884,20 +4884,21 @@ static int gfx_v8_0_mqd_commit(struct amdgpu_device 
*adev, struct vi_mqd *mqd)
/* activate the queue */
WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
return 0;
 }
 
 static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring,
   struct vi_mqd *mqd,
   u64 mqd_gpu_addr)
 {
+   int r = 0;
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = >gfx.kiq;
uint64_t eop_gpu_addr;
bool is_kiq = false;
 
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
is_kiq = true;
 
if (is_kiq) {
eop_gpu_addr = kiq->eop_gpu_addr;
@@ -4905,34 +4906,45 @@ static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring 
*ring,
} else
eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
ring->queue * GFX8_MEC_HPD_SIZE;
 
mutex_lock(>srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
if (is_kiq) {
-   gfx_v8_0_mqd_deactivate(adev);
+   r = gfx_v8_0_mqd_deactivate(adev);
+   if (r) {
+   dev_err(adev->dev, "failed to deactivate ring %s\n", 
ring->name);
+   goto out_unlock;
+   }
+
gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
gfx_v8_0_mqd_commit(adev, mqd);
}
 
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
 
if (is_kiq)
gfx_v8_0_kiq_enable(ring);
else
gfx_v8_0_map_queue_enable(>ring, ring);
 
return 0;
+
+out_unlock:
+   vi_srbm_select(adev, 0, 0, 0, 0);
+   mutex_unlock(>srbm_mutex);
+
+   return r;
 }
 
 static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
 {
struct amdgpu_ring *ring = NULL;
int i;
 
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = >gfx.compute_ring[i];
amdgpu_bo_free_kernel(>mqd_obj, NULL, NULL);
@@ -5052,24 +5064,30 @@ static int gfx_v8_0_compute_queue_init(struct 
amdgpu_device *adev,
eop_gpu_addr >>= 8;
 
/* init the mqd struct */
memset(mqd, 0, sizeof(struct vi_mqd));
 
mutex_lock(>srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
-   gfx_v8_0_mqd_deactivate(adev);
+   r = gfx_v8_0_mqd_deactivate(adev);
+   if (r) {
+   dev_err(adev->dev, "failed to deactivate ring %s\n", 
ring->name);
+   goto out_unlock;
+   }
+
gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
gfx_v8_0_mqd_commit(adev, mqd);
 
+out_unlock:
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
 
amdgpu_bo_kunmap(ring->mqd_obj);
 out_unreserve:
amdgpu_bo_unreserve(ring->mqd_obj);
 out:
return r;
 }
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 06/21] drm/amdgpu: rename rdev to adev

2017-03-06 Thread Andres Rodriguez
Rename straggler instances of r(adeon)dev to a(mdgpu)dev

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 70 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 14 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  |  2 +-
 4 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index dba8a5b..3200ff9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -53,23 +53,23 @@ int amdgpu_amdkfd_init(void)
if (ret)
kgd2kfd = NULL;
 
 #else
ret = -ENOENT;
 #endif
 
return ret;
 }
 
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
 {
-   switch (rdev->asic_type) {
+   switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
break;
 #endif
case CHIP_CARRIZO:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
default:
return false;
@@ -79,119 +79,119 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device 
*rdev)
 }
 
 void amdgpu_amdkfd_fini(void)
 {
if (kgd2kfd) {
kgd2kfd->exit();
symbol_put(kgd2kfd_init);
}
 }
 
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
if (kgd2kfd)
-   rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
-   rdev->pdev, kfd2kgd);
+   adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
+   adev->pdev, kfd2kgd);
 }
 
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-   if (rdev->kfd) {
+   if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
 
.first_compute_pipe = 1,
.compute_pipe_count = 4 - 1,
};
 
-   amdgpu_doorbell_get_kfd_info(rdev,
+   amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
-   kgd2kfd->device_init(rdev->kfd, _resources);
+   kgd2kfd->device_init(adev->kfd, _resources);
}
 }
 
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
-   if (rdev->kfd) {
-   kgd2kfd->device_exit(rdev->kfd);
-   rdev->kfd = NULL;
+   if (adev->kfd) {
+   kgd2kfd->device_exit(adev->kfd);
+   adev->kfd = NULL;
}
 }
 
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry)
 {
-   if (rdev->kfd)
-   kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
+   if (adev->kfd)
+   kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
 }
 
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
 {
-   if (rdev->kfd)
-   kgd2kfd->suspend(rdev->kfd);
+   if (adev->kfd)
+   kgd2kfd->suspend(adev->kfd);
 }
 
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
 {
int r = 0;
 
-   if (rdev->kfd)
-   r = kgd2kfd->resume(rdev->kfd);
+   if (adev->kfd)
+   r = kgd2kfd->resume(adev->kfd);
 
return r;
 }
 
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr)
 {
-   struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+   struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
int r;
 
BUG_ON(kgd == NULL);
BUG_ON(gpu_addr == NULL);
BUG_ON(cpu_ptr == NULL);
 
*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if ((*mem) == NULL)
return -ENOMEM;
 
-   r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
+   r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
 AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, 

[PATCH 02/21] drm/amdgpu: doorbell registers need only be set once v2

2017-03-06 Thread Andres Rodriguez
The CP_MEC_DOORBELL_RANGE_* and CP_PQ_STATUS.DOORBELL_ENABLE registers
are not HQD specific.

They only need to be set once if at least 1 pipe requested doorbell
support.

v2: move doorbell_enable to amdgpu_gfx instead of amdgpu_device

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 3 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c1b9135..b577ec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -892,20 +892,23 @@ struct amdgpu_gfx {
/* gfx status */
uint32_tgfx_current_status;
/* ce ram size*/
unsignedce_ram_size;
struct amdgpu_cu_info   cu_info;
const struct amdgpu_gfx_funcs   *funcs;
 
/* reset mask */
uint32_tgrbm_soft_reset;
uint32_tsrbm_soft_reset;
+
+   /* doorbell */
+   booldoorbell_enabled;
 };
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  unsigned size, struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
struct dma_fence *f);
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
   struct amdgpu_ib *ibs, struct amdgpu_job *job,
   struct dma_fence **f);
 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1c8589a..09a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4797,35 +4797,37 @@ static int gfx_v8_0_mqd_deactivate(struct amdgpu_device 
*adev)
WREG32(mmCP_HQD_PQ_WPTR, 0);
}
 
return 0;
 }
 
 static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
 {
uint32_t tmp;
 
-   if (!enable)
+   if (!enable || adev->gfx.doorbell_enabled)
return;
 
if ((adev->asic_type == CHIP_CARRIZO) ||
(adev->asic_type == CHIP_FIJI) ||
(adev->asic_type == CHIP_STONEY) ||
(adev->asic_type == CHIP_POLARIS11) ||
(adev->asic_type == CHIP_POLARIS10)) {
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 
<< 2);
}
 
tmp = RREG32(mmCP_PQ_STATUS);
tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
WREG32(mmCP_PQ_STATUS, tmp);
+
+   adev->gfx.doorbell_enabled = true;
 }
 
 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
 {
uint32_t tmp;
 
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
@@ -5109,20 +5111,22 @@ static int gfx_v8_0_cp_compute_resume(struct 
amdgpu_device *adev)
ring->ready = false;
}
 
return 0;
 }
 
 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
 {
int r;
 
+   adev->gfx.doorbell_enabled = false;
+
if (!(adev->flags & AMD_IS_APU))
gfx_v8_0_enable_gui_idle_interrupt(adev, false);
 
if (!adev->pp_enabled) {
if (!adev->firmware.smu_load) {
/* legacy firmware loading */
r = gfx_v8_0_cp_gfx_load_microcode(adev);
if (r)
return r;
 
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 08/21] drm/radeon: take ownership of pipe initialization

2017-03-06 Thread Andres Rodriguez
Take ownership of pipe initialization away from KFD.

Note that hpd_eop_gpu_addr was already large enough to accomodate all
pipes.

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/radeon/cik.c| 27 ++-
 drivers/gpu/drm/radeon/radeon_kfd.c | 13 +
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index f6ff41a..82b57ef 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4588,37 +4588,38 @@ static int cik_cp_compute_resume(struct radeon_device 
*rdev)
return r;
 
/* fix up chicken bits */
tmp = RREG32(CP_CPF_DEBUG);
tmp |= (1 << 23);
WREG32(CP_CPF_DEBUG, tmp);
 
/* init the pipes */
mutex_lock(>srbm_mutex);
 
-   eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
+   for (i = 0; i < rdev->mec.num_pipe; ++i) {
+   cik_srbm_select(rdev, 0, i, 0, 0);
 
-   cik_srbm_select(rdev, 0, 0, 0, 0);
-
-   /* write the EOP addr */
-   WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
-   WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+   eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 
2) ;
+   /* write the EOP addr */
+   WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+   WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 
8);
 
-   /* set the VMID assigned */
-   WREG32(CP_HPD_EOP_VMID, 0);
+   /* set the VMID assigned */
+   WREG32(CP_HPD_EOP_VMID, 0);
 
-   /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-   tmp = RREG32(CP_HPD_EOP_CONTROL);
-   tmp &= ~EOP_SIZE_MASK;
-   tmp |= order_base_2(MEC_HPD_SIZE / 8);
-   WREG32(CP_HPD_EOP_CONTROL, tmp);
+   /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+   tmp = RREG32(CP_HPD_EOP_CONTROL);
+   tmp &= ~EOP_SIZE_MASK;
+   tmp |= order_base_2(MEC_HPD_SIZE / 8);
+   WREG32(CP_HPD_EOP_CONTROL, tmp);
 
+   }
mutex_unlock(>srbm_mutex);
 
/* init the queues.  Just two for now. */
for (i = 0; i < 2; i++) {
if (i == 0)
idx = CAYMAN_RING_TYPE_CP1_INDEX;
else
idx = CAYMAN_RING_TYPE_CP2_INDEX;
 
if (rdev->ring[idx].mqd_obj == NULL) {
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c 
b/drivers/gpu/drm/radeon/radeon_kfd.c
index 87a9ebb..a06e3b1 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -416,32 +416,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev 
*kgd, unsigned int pasid,
/* Mapping vmid to pasid also for IH block */
write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
pasid_mapping);
 
return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-   uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
-   uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-   lock_srbm(kgd, mec, pipe, 0, 0);
-   write_register(kgd, CP_HPD_EOP_BASE_ADDR,
-   lower_32_bits(hpd_gpu_addr >> 8));
-   write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
-   upper_32_bits(hpd_gpu_addr >> 8));
-   write_register(kgd, CP_HPD_EOP_VMID, 0);
-   write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
-   unlock_srbm(kgd);
-
+   /* nothing to do here */
return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
uint32_t mec;
uint32_t pipe;
 
mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
pipe = (pipe_id % CIK_PIPE_PER_MEC);
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 21/21] drm/amdgpu: workaround tonga HW bug in HQD programming sequence

2017-03-06 Thread Andres Rodriguez
Tonga based asics may experience hangs when an HQD's EOP parameters
are modified.

Workaround this HW issue by avoiding writes to these registers for
tonga asics.

Based on the following ROCm commit:
2a0fb8 - drm/amdgpu: Synchronize KFD HQD load protocol with CP scheduler

From the ROCm git repository:
https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver.git

CC: Jay Cornwall 
Suggested-by: Felix Kuehling 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ab19de3..4e271c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4931,21 +4931,35 @@ int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 
struct vi_mqd *mqd)
 
/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
mqd_data = >cp_mqd_base_addr_lo;
 
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
/* program all HQD registers */
-   for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
+   for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; 
mqd_reg++)
+   WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+   /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+* This is safe since EOP RPTR==WPTR for any inactive HQD
+* on ASICs that do not support context-save.
+* EOP writes/reads can start anywhere in the ring.
+*/
+   if (adev->asic_type != CHIP_TONGA) {
+   WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
+   WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
+   WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
+   }
+
+   for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; 
mqd_reg++)
WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
/* activate the HQD */
for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; 
mqd_reg++)
WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
return 0;
 }
 
 static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring,
-- 
2.9.3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 11/21] drm/amdkfd: allow split HQD on per-queue granularity v4

2017-03-06 Thread Andres Rodriguez
Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.

This allows for more interesting pipe/queue splits.

v2: fix overflow check for res.queue_mask
v3: fix shift overflow when setting res.queue_mask
v4: fix comment in is_pipeline_enabled()

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  22 -
 drivers/gpu/drm/amd/amdkfd/kfd_device.c|   4 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 100 ++---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  10 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c|   3 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |   2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h|  17 ++--
 drivers/gpu/drm/radeon/radeon_kfd.c|  21 -
 9 files changed, 126 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3200ff9..8fc5aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -88,28 +88,44 @@ void amdgpu_amdkfd_fini(void)
 
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
if (kgd2kfd)
adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
adev->pdev, kfd2kgd);
 }
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
+   int i;
+   int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
-
-   .first_compute_pipe = 1,
-   .compute_pipe_count = 4 - 1,
+   .num_mec = adev->gfx.mec.num_mec,
+   .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+   .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
};
 
+   /* this is going to have a few of the MSBs set that we need to
+* clear */
+   bitmap_complement(gpu_resources.queue_bitmap,
+ adev->gfx.mec.queue_bitmap,
+ KGD_MAX_QUEUES);
+
+   /* According to linux/bitmap.h we shouldn't use bitmap_clear if
+* nbits is not compile time constant */
+   last_valid_bit = adev->gfx.mec.num_mec
+   * adev->gfx.mec.num_pipe_per_mec
+   * adev->gfx.mec.num_queue_per_pipe;
+   for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+   clear_bit(i, gpu_resources.queue_bitmap);
+
amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
kgd2kfd->device_init(adev->kfd, _resources);
}
 }
 
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3f95f7c..88187bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -219,20 +219,24 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int 
pasid,
return AMD_IOMMU_INV_PRI_RSP_INVALID;
 }
 
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 const struct kgd2kfd_shared_resources *gpu_resources)
 {
unsigned int size;
 
kfd->shared_resources = *gpu_resources;
 
+   /* We only use the first MEC */
+   if (kfd->shared_resources.num_mec > 1)
+   kfd->shared_resources.num_mec = 1;
+
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
 
/*
 * calculate max size of runlist packet.
 * There can be only 2 packets at once
 */
size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
max_num_of_queues_per_device *
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c064dea..5f28720 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -56,35 +56,58 @@ static void deallocate_sdma_queue(struct 
device_queue_manager *dqm,
unsigned int sdma_queue_id);
 
 static inline
 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
 {
if (type == KFD_QUEUE_TYPE_SDMA)

[PATCH 16/21] drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4

2017-03-06 Thread Andres Rodriguez
Use an LRU policy to map usermode rings to HW compute queues.

Most compute clients use one queue, and usually the first queue
available. This results in poor pipe/queue work distribution when
multiple compute apps are running. In most cases pipe 0 queue 0 is
the only queue that gets used.

In order to better distribute work across multiple HW queues, we adopt
a policy to map the usermode ring ids to the LRU HW queue.

This fixes a large majority of multi-app compute workloads sharing the
same HW queue, even though 7 other queues are available.

v2: use ring->funcs->type instead of ring->hw_ip
v3: remove amdgpu_queue_mapper_funcs
v4: change ring_lru_list_lock to spinlock, grab only once in lru_get()

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 38 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  | 63 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  4 ++
 5 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 734d941..88c3176 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1508,20 +1508,23 @@ struct amdgpu_device {
struct kfd_dev  *kfd;
 
struct amdgpu_virt  virt;
 
/* link all shadow bo */
struct list_headshadow_list;
struct mutexshadow_list_lock;
/* link all gtt */
spinlock_t  gtt_list_lock;
struct list_headgtt_list;
+   /* keep an lru list of rings by HW IP */
+   struct list_headring_lru_list;
+   spinlock_t  ring_lru_list_lock;
 
/* record hw reset is performed */
bool has_hw_reset;
 
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
 {
return container_of(bdev, struct amdgpu_device, mman.bdev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6abb238..c706805 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1712,20 +1712,23 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(>gc_cac_idx_lock);
spin_lock_init(>audio_endpt_idx_lock);
spin_lock_init(>mm_stats.lock);
 
INIT_LIST_HEAD(>shadow_list);
mutex_init(>shadow_list_lock);
 
INIT_LIST_HEAD(>gtt_list);
spin_lock_init(>gtt_list_lock);
 
+   INIT_LIST_HEAD(>ring_lru_list);
+   spin_lock_init(>ring_lru_list_lock);
+
if (adev->asic_type >= CHIP_BONAIRE) {
adev->rmmio_base = pci_resource_start(adev->pdev, 5);
adev->rmmio_size = pci_resource_len(adev->pdev, 5);
} else {
adev->rmmio_base = pci_resource_start(adev->pdev, 2);
adev->rmmio_size = pci_resource_len(adev->pdev, 2);
}
 
adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
if (adev->rmmio == NULL) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 3e9ac80..054d750 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -84,20 +84,54 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
break;
default:
*out_ring = NULL;
DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
return -EINVAL;
}
 
return amdgpu_update_cached_map(mapper, ring, *out_ring);
 }
 
+static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
+{
+   switch (hw_ip) {
+   case AMDGPU_HW_IP_GFX:
+   return AMDGPU_RING_TYPE_GFX;
+   case AMDGPU_HW_IP_COMPUTE:
+   return AMDGPU_RING_TYPE_COMPUTE;
+   case AMDGPU_HW_IP_DMA:
+   return AMDGPU_RING_TYPE_SDMA;
+   case AMDGPU_HW_IP_UVD:
+   return AMDGPU_RING_TYPE_UVD;
+   case AMDGPU_HW_IP_VCE:
+   return AMDGPU_RING_TYPE_VCE;
+   default:
+   DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
+   return -1;
+   }
+}
+
+static int amdgpu_lru_map(struct amdgpu_device *adev,
+ struct amdgpu_queue_mapper *mapper,
+ int user_ring,
+ struct amdgpu_ring **out_ring)
+{
+   int r;
+   int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
+
+   r = amdgpu_ring_lru_get(adev, ring_type, out_ring);
+   if (r)
+   return r;
+
+   return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
+}
+
 /**
  * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr 

[PATCH 10/21] drm/amdgpu: teach amdgpu how to enable interrupts for any pipe

2017-03-06 Thread Andres Rodriguez
The current implementation is hardcoded to enable ME1/PIPE0 interrupts
only.

This patch allows amdgpu to enable interrupts for any pipe of ME1.

Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 48 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 33 
 2 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index fe46765..68265b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5032,56 +5032,42 @@ static void gfx_v7_0_set_gfx_eop_interrupt_state(struct 
amdgpu_device *adev,
break;
default:
break;
}
 }
 
 static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device 
*adev,
 int me, int pipe,
 enum 
amdgpu_interrupt_state state)
 {
-   u32 mec_int_cntl, mec_int_cntl_reg;
-
-   /*
-* amdgpu controls only pipe 0 of MEC1. That's why this function only
-* handles the setting of interrupts for this specific pipe. All other
-* pipes' interrupts are set by amdkfd.
+   /* Me 0 is for graphics and Me 2 is reserved for HW scheduling
+* So we should only really be configuring ME 1 i.e. MEC0
 */
-
-   if (me == 1) {
-   switch (pipe) {
-   case 0:
-   mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
-   break;
-   default:
-   DRM_DEBUG("invalid pipe %d\n", pipe);
-   return;
-   }
-   } else {
-   DRM_DEBUG("invalid me %d\n", me);
+   if (me != 1) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid 
me:%d\n", me);
return;
}
 
-   switch (state) {
-   case AMDGPU_IRQ_STATE_DISABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
-   mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
-   break;
-   case AMDGPU_IRQ_STATE_ENABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
-   mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
-   break;
-   default:
-   break;
+   if (pipe >= adev->gfx.mec.num_pipe_per_mec) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid "
+   "me:%d pipe:%d\n", pipe, me);
+   return;
}
+
+   mutex_lock(>srbm_mutex);
+   cik_srbm_select(adev, me, pipe, 0, 0);
+
+   WREG32_FIELD(CPC_INT_CNTL, TIME_STAMP_INT_ENABLE,
+   state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+
+   cik_srbm_select(adev, 0, 0, 0, 0);
+   mutex_unlock(>srbm_mutex);
 }
 
 static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
 struct amdgpu_irq_src *src,
 unsigned type,
 enum amdgpu_interrupt_state state)
 {
u32 cp_int_cntl;
 
switch (state) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1238b3d..861334b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6779,41 +6779,42 @@ static void gfx_v8_0_set_gfx_eop_interrupt_state(struct 
amdgpu_device *adev,
 enum amdgpu_interrupt_state 
state)
 {
WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 }
 
 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device 
*adev,
 int me, int pipe,
 enum 
amdgpu_interrupt_state state)
 {
-   /*
-* amdgpu controls only pipe 0 of MEC1. That's why this function only
-* handles the setting of interrupts for this specific pipe. All other
-* pipes' interrupts are set by amdkfd.
+   /* Me 0 is for graphics and Me 2 is reserved for HW scheduling
+* So we should only really be configuring ME 1 i.e. MEC0
 */
+   if (me != 1) {
+   DRM_ERROR("Ignoring request to enable interrupts for invalid 
me:%d\n", me);
+   return;
+   }
 
-   if (me == 1) {
-   switch (pipe) {
-   case 0:
-   break;
-   default:
-   DRM_DEBUG("invalid pipe %d\n", pipe);
-   return;
-   }
-   } else {
-   

[PATCH 20/21] drm/amdgpu: condense mqd programming sequence

2017-03-06 Thread Andres Rodriguez
The MQD structure matches the reg layout. Take advantage of this to
simplify HQD programming.

Note that the ACTIVE field still needs to be programmed last.

Suggested-by: Felix Kuehling 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 44 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 83 +--
 2 files changed, 22 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index b0b0c89..36994bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3108,61 +3108,39 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device 
*adev,
mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
 
/* activate the queue */
mqd->cp_hqd_active = 1;
 }
 
 int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
 {
-   u32 tmp;
+   uint32_t tmp;
+   uint32_t mqd_reg;
+   uint32_t *mqd_data;
+
+   /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
+   mqd_data = >cp_mqd_base_addr_lo;
 
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
-   /* program MQD field to HW */
-   WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
-   WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-   WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
-   WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
-   WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-   WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 
mqd->cp_hqd_pq_rptr_report_addr_lo);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 
mqd->cp_hqd_pq_rptr_report_addr_hi);
-   WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
-   WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-   WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
-
-   WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
-   WREG32(mmCP_HQD_IB_BASE_ADDR, mqd->cp_hqd_ib_base_addr_lo);
-   WREG32(mmCP_HQD_IB_BASE_ADDR_HI, mqd->cp_hqd_ib_base_addr_hi);
-   WREG32(mmCP_HQD_IB_RPTR, mqd->cp_hqd_ib_rptr);
-   WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
-   WREG32(mmCP_HQD_SEMA_CMD, mqd->cp_hqd_sema_cmd);
-   WREG32(mmCP_HQD_MSG_TYPE, mqd->cp_hqd_msg_type);
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, mqd->cp_hqd_atomic0_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, mqd->cp_hqd_atomic0_preop_hi);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, mqd->cp_hqd_atomic1_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, mqd->cp_hqd_atomic1_preop_hi);
-   WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-   WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
-   WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
-   WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
-   WREG32(mmCP_HQD_IQ_RPTR, mqd->cp_hqd_iq_rptr);
+   /* program all HQD registers */
+   for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
+   WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
/* activate the HQD */
-   WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
+   for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; 
mqd_reg++)
+   WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
return 0;
 }
 
 static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
 {
int r;
u64 mqd_gpu_addr;
struct cik_mqd *mqd;
struct amdgpu_ring *ring = >gfx.compute_ring[ring_id];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 88ac682..ab19de3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4919,99 +4919,38 @@ static void gfx_v8_0_enable_doorbell(struct 
amdgpu_device *adev, bool enable)
tmp = RREG32(mmCP_PQ_STATUS);
tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
WREG32(mmCP_PQ_STATUS, tmp);
 
adev->gfx.doorbell_enabled = true;
 }
 
 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
 {
uint32_t tmp;
+   uint32_t mqd_reg;
+   uint32_t *mqd_data;
+
+   /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
+   mqd_data = >cp_mqd_base_addr_lo;
 
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);

[PATCH 13/21] drm/amdgpu: allocate queues horizontally across pipes

2017-03-06 Thread Andres Rodriguez
Pipes provide better concurrency than queues, therefore we want to make
sure that apps use queues from different pipes whenever possible.

Optimize for the trivial case where an app will consume rings in order,
therefore we don't want adjacent rings to belong to the same pipe.

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 78 +++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 83 +--
 3 files changed, 109 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f9df217..377f58a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1639,20 +1639,33 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
for (i = 0; i < adev->sdma.num_instances; i++)
if (>sdma.instance[i].ring == ring)
break;
 
if (i < AMDGPU_MAX_SDMA_INSTANCES)
return >sdma.instance[i];
else
return NULL;
 }
 
+static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
+   int mec, int pipe, int queue)
+{
+   int bit = 0;
+
+   bit += mec * adev->gfx.mec.num_pipe_per_mec
+   * adev->gfx.mec.num_queue_per_pipe;
+   bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+   bit += queue;
+
+   return test_bit(bit, adev->gfx.mec.queue_bitmap);
+}
+
 /*
  * ASICs macro.
  */
 #define amdgpu_asic_set_vga_state(adev, state) 
(adev)->asic_funcs->set_vga_state((adev), (state))
 #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) 
(adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) 
(adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
 #define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
 #define amdgpu_set_pcie_lanes(adev, l) 
(adev)->asic_funcs->set_pcie_lanes((adev), (l))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 68265b7..3ca5519 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4720,25 +4720,56 @@ static void gfx_v7_0_gpu_early_init(struct 
amdgpu_device *adev)
case 2:
gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
break;
case 4:
gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
break;
}
adev->gfx.config.gb_addr_config = gb_addr_config;
 }
 
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+   int mec, int pipe, int queue)
+{
+   int r;
+   unsigned irq_type;
+   struct amdgpu_ring *ring = >gfx.compute_ring[ring_id];
+
+   /* mec0 is me1 */
+   ring->me = mec + 1;
+   ring->pipe = pipe;
+   ring->queue = queue;
+
+   ring->ring_obj = NULL;
+   ring->use_doorbell = true;
+   ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+   sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+   irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+   + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+   + ring->pipe;
+
+   /* type-2 packets are deprecated on MEC, use type-3 instead */
+   r = amdgpu_ring_init(adev, ring, 1024,
+   >gfx.eop_irq, irq_type);
+   if (r)
+   return r;
+
+
+   return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   int i, r, ring_id;
+   int i, j, k, r, ring_id;
 
/* EOP Event */
r = amdgpu_irq_add_id(adev, 181, >gfx.eop_irq);
if (r)
return r;
 
/* Privileged reg */
r = amdgpu_irq_add_id(adev, 184, >gfx.priv_reg_irq);
if (r)
return r;
@@ -4772,53 +4803,38 @@ static int gfx_v7_0_sw_init(void *handle)
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = >gfx.gfx_ring[i];
ring->ring_obj = NULL;
sprintf(ring->name, "gfx");
r = amdgpu_ring_init(adev, ring, 1024,
 >gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
if (r)
return r;
}
 
-   /* set up the compute queues */
-   for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) {
-   unsigned irq_type;
-
-   if (!test_bit(i, 

[PATCH 17/21] drm/amdgpu: add parameter to allocate high priority contexts v7

2017-03-06 Thread Andres Rodriguez
Add a new context creation parameter to express a global context priority.

Contexts allocated with AMDGPU_CTX_PRIORITY_HIGH will receive higher
priority to schedule their work than AMDGPU_CTX_PRIORITY_NORMAL
(default) contexts.

v2: Instead of using flags, repurpose __pad
v3: Swap enum values of _NORMAL _HIGH for backwards compatibility
v4: Validate usermode priority and store it
v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword
v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN
v7: remove ctx->priority

Reviewed-by: Emil Velikov 
Reviewed-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 36 ---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  1 +
 include/uapi/drm/amdgpu_drm.h |  8 +-
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 3ddc8db..1b841ad3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -18,25 +18,33 @@
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors: monk liu 
  */
 
 #include 
 #include "amdgpu.h"
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+  enum amd_sched_priority priority,
+  struct amdgpu_ctx *ctx)
 {
unsigned i, j;
int r;
 
+   if (priority < 0 || priority >= AMD_SCHED_MAX_PRIORITY)
+   return -EINVAL;
+
+   if (priority == AMD_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_ADMIN))
+   return -EACCES;
+
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
kref_init(>refcount);
spin_lock_init(>ring_lock);
ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
  sizeof(struct dma_fence*), GFP_KERNEL);
if (!ctx->fences)
return -ENOMEM;
 
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -44,21 +52,21 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, 
struct amdgpu_ctx *ctx)
ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i];
}
 
ctx->reset_counter = atomic_read(>gpu_reset_counter);
 
/* create context entity for each ring */
for (i = 0; i < adev->num_rings; i++) {
struct amdgpu_ring *ring = adev->rings[i];
struct amd_sched_rq *rq;
 
-   rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+   rq = >sched.sched_rq[priority];
r = amd_sched_entity_init(>sched, >rings[i].entity,
  rq, amdgpu_sched_jobs);
if (r)
goto failed;
}
 
r = amdgpu_queue_mgr_init(adev, >queue_mgr);
if (r)
goto failed;
 
@@ -89,39 +97,41 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 
for (i = 0; i < adev->num_rings; i++)
amd_sched_entity_fini(>rings[i]->sched,
  >rings[i].entity);
 
amdgpu_queue_mgr_fini(adev, >queue_mgr);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
+   enum amd_sched_priority priority,
uint32_t *id)
 {
struct amdgpu_ctx_mgr *mgr = >ctx_mgr;
struct amdgpu_ctx *ctx;
int r;
 
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
 
mutex_lock(>lock);
r = idr_alloc(>ctx_handles, ctx, 1, 0, GFP_KERNEL);
if (r < 0) {
mutex_unlock(>lock);
kfree(ctx);
return r;
}
+
*id = (uint32_t)r;
-   r = amdgpu_ctx_init(adev, ctx);
+   r = amdgpu_ctx_init(adev, priority, ctx);
if (r) {
idr_remove(>ctx_handles, *id);
*id = 0;
kfree(ctx);
}
mutex_unlock(>lock);
return r;
 }
 
 static void amdgpu_ctx_do_release(struct kref *ref)
@@ -181,36 +191,54 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
if (ctx->reset_counter == reset_counter)
out->state.reset_status = AMDGPU_CTX_NO_RESET;
else
out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
ctx->reset_counter = reset_counter;
 
mutex_unlock(>lock);
return 0;
 }
 
+static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
+{
+   switch (amdgpu_priority) {
+   case AMDGPU_CTX_PRIORITY_HIGH:
+

[PATCH 19/21] drm/amdgpu: implement ring set_priority for gfx_v8 compute v4

2017-03-06 Thread Andres Rodriguez
Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.

Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
0x2: CS_H
0x1: CS_M
0x0: CS_L

The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.

In order to prevent getting stuck in loops of CUs bouncing between GFX
and high priority compute and introducing further latency, we reserve
CUs 2+ for high priority compute on-demand.

v2: fix srbm_select to ring->queue and use ring->funcs->type
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*
v4: switch int to enum amd_sched_priority

Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 

fix
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 96 +-
 3 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 88c3176..8a3c07c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -910,20 +910,23 @@ struct amdgpu_gfx {
uint32_tme_feature_version;
uint32_tce_feature_version;
uint32_tpfp_feature_version;
uint32_trlc_feature_version;
uint32_tmec_feature_version;
uint32_tmec2_feature_version;
struct amdgpu_ring  gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsignednum_gfx_rings;
struct amdgpu_ring  compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
unsignednum_compute_rings;
+   spinlock_t  cu_reserve_lock;
+   uint32_tcu_reserve_pipe_mask;
+   uint32_t
cu_reserve_queue_mask[AMDGPU_MAX_COMPUTE_RINGS];
struct amdgpu_irq_src   eop_irq;
struct amdgpu_irq_src   priv_reg_irq;
struct amdgpu_irq_src   priv_inst_irq;
/* gfx status */
uint32_tgfx_current_status;
/* ce ram size*/
unsignedce_ram_size;
struct amdgpu_cu_info   cu_info;
const struct amdgpu_gfx_funcs   *funcs;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c706805..2e6f293 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1705,20 +1705,21 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* Registers mapping */
/* TODO: block userspace mapping of io register */
spin_lock_init(>mmio_idx_lock);
spin_lock_init(>smc_idx_lock);
spin_lock_init(>pcie_idx_lock);
spin_lock_init(>uvd_ctx_idx_lock);
spin_lock_init(>didt_idx_lock);
spin_lock_init(>gc_cac_idx_lock);
spin_lock_init(>audio_endpt_idx_lock);
spin_lock_init(>mm_stats.lock);
+   spin_lock_init(>gfx.cu_reserve_lock);
 
INIT_LIST_HEAD(>shadow_list);
mutex_init(>shadow_list_lock);
 
INIT_LIST_HEAD(>gtt_list);
spin_lock_init(>gtt_list_lock);
 
INIT_LIST_HEAD(>ring_lru_list);
spin_lock_init(>ring_lru_list_lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5db5bac..88ac682 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -46,21 +46,24 @@
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_enum.h"
 
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
 #include "smu/smu_7_1_3_d.h"
 
 #define GFX8_NUM_GFX_RINGS 1
 #define GFX8_MEC_HPD_SIZE 2048
-
+#define GFX8_CU_RESERVE_RESOURCES 0x45888
+#define GFX8_CU_NUM 8
+#define GFX8_UNRESERVED_CU_NUM 2
+#define GFX8_CU_RESERVE_PIPE_SHIFT 7
 
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
 
 #define ARRAY_MODE(x)  ((x) << 
GB_TILE_MODE0__ARRAY_MODE__SHIFT)
 #define PIPE_CONFIG(x) ((x) << 
GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
 #define TILE_SPLIT(x)  ((x) << 
GB_TILE_MODE0__TILE_SPLIT__SHIFT)
 #define MICRO_TILE_MODE_NEW(x) ((x) << 
GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
@@ -6667,20 +6670,110 @@ static u32 gfx_v8_0_ring_get_wptr_compute(struct 

[PATCH 01/21] drm/amdgpu: refactor MQD/HQD initialization

2017-03-06 Thread Andres Rodriguez
The MQD programming sequence currently exists in 3 different places.
Refactor it to absorb all the duplicates.

The success path remains mostly identical except for a slightly
different order in the non-kiq case. This shouldn't matter if the HQD
is disabled.

The error handling paths have been updated to deal with the new code
structure.

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 417 +++
 2 files changed, 387 insertions(+), 477 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 1f93545..8e1e601 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -42,20 +42,22 @@
 #include "gca/gfx_7_2_sh_mask.h"
 
 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"
 
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
 #define GFX7_NUM_GFX_RINGS 1
 #define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE  2048
+
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
 
 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
@@ -2792,40 +2794,38 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", 
r);
amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
amdgpu_bo_unref(>gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
 }
 
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;
 
/*
 * KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
 * Nonetheless, we assign only 1 pipe because all other pipes will
 * be handled by KFD
 */
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe = 1;
adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * 8;
 
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
-adev->gfx.mec.num_mec 
*adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+adev->gfx.mec.num_mec * 
adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
 PAGE_SIZE, true,
 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 >gfx.mec.hpd_eop_obj);
if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", 
r);
return r;
}
}
 
r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
@@ -2841,21 +2841,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
return r;
}
r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **));
if (r) {
dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
gfx_v7_0_mec_fini(adev);
return r;
}
 
/* clear memory.  Not sure if this is required or not */
-   memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * 
MEC_HPD_SIZE * 2);
+   memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 
GFX7_MEC_HPD_SIZE * 2);
 
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
return 0;
 }
 
 struct hqd_registers
 {
u32 cp_mqd_base_addr;
@@ -2916,261 +2916,296 @@ struct bonaire_mqd
u32 restart[3];
u32 thread_trace_enable;
u32 reserved1;
u32 user_data[16];
u32 vgtcs_invoke_count[2];
struct hqd_registers queue_state;
u32 dequeue_cntr;
u32 interrupt_queue[64];
 };
 
-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * @adev: amdgpu_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int 
pipe)
 {
-   int r, i, j;
-   u32 tmp;
-   bool use_doorbell = true;
-   u64 hqd_gpu_addr;
-   u64 mqd_gpu_addr;
 

[PATCH 07/21] drm/amdgpu: take ownership of per-pipe configuration

2017-03-06 Thread Andres Rodriguez
Make amdgpu the owner of all per-pipe state of the HQDs.

This change will allow us to split the queues between kfd and amdgpu
with a queue granularity instead of pipe granularity.

This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
goes unused.

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  | 13 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 28 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 33 +++-
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 45 --
 6 files changed, 49 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b577ec1..15e048c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -769,23 +769,23 @@ struct amdgpu_rlc {
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
 
u32 *register_list_format;
u32 *register_restore;
 };
 
 struct amdgpu_mec {
struct amdgpu_bo*hpd_eop_obj;
u64 hpd_eop_gpu_addr;
-   u32 num_pipe;
u32 num_mec;
-   u32 num_queue;
+   u32 num_pipe_per_mec;
+   u32 num_queue_per_pipe;
 };
 
 struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo*eop_obj;
struct amdgpu_ring  ring;
struct amdgpu_irq_src   irq;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 038b7ea..910f9d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -237,32 +237,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev 
*kgd, unsigned int pasid,
 
/* Mapping vmid to pasid also for IH block */
WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 
return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-   struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-   uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-   uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-   lock_srbm(kgd, mec, pipe, 0, 0);
-   WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
-   WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
-   WREG32(mmCP_HPD_EOP_VMID, 0);
-   WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
-   unlock_srbm(kgd);
-
+   /* amdgpu owns the per-pipe state */
return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
 
mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 2ecef3d..5843368 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -199,20 +199,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev 
*kgd, unsigned int pasid,
 
/* Mapping vmid to pasid also for IH block */
WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 
return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
+   /* amdgpu owns the per-pipe state */
return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
 
mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 03a4cee..2f1faa4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2799,34 +2799,48 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device 
*adev)
 
amdgpu_bo_unref(>gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
 }
 
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
int r;
u32 *hpd;
+   size_t mec_hpd_size;
 
/*
 * KV:2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
 * Nonetheless, we assign only 1 pipe because all other pipes will
 * be handled by KFD
 */
-   adev->gfx.mec.num_mec = 1;
-   adev->gfx.mec.num_pipe = 1;
-   

[PATCH 05/21] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu

2017-03-06 Thread Andres Rodriguez
Use the same gfx_*_mqd_commit function for kfd and amdgpu codepaths.

This removes the last duplicates of this programming sequence.

Reviewed-by: Edward O'Callaghan 
Acked-by: Christian König 
Signed-off-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 51 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 49 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 38 -
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h |  5 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 44 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h |  5 +++
 6 files changed, 97 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 1a0a5f7..038b7ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -22,20 +22,21 @@
 
 #include 
 #include 
 #include 
 #include 
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "cikd.h"
 #include "cik_sdma.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v7_0.h"
 #include "gca/gfx_7_2_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 #include "gmc/gmc_7_1_d.h"
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "cik_structs.h"
 
 #define CIK_PIPE_PER_MEC   (4)
@@ -302,69 +303,25 @@ static inline struct cik_sdma_rlc_registers 
*get_sdma_mqd(void *mqd)
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t wptr_shadow, is_wptr_shadow_valid;
struct cik_mqd *m;
 
m = get_mqd(mqd);
 
is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
-
-   acquire_queue(kgd, pipe_id, queue_id);
-   WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-   WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-   WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-
-   WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-   WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-   WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-
-   WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-   WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
-   WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
-
-   WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
-
-   WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-   WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
-   WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
-
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
-   WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
-
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-   WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-   m->cp_hqd_pq_rptr_report_addr_hi);
-
-   WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
-
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
-   WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
-
-   WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
-
-   WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-
-   WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-
-   WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-   WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-
-   WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
-
if (is_wptr_shadow_valid)
-   WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
+   m->cp_hqd_pq_wptr = wptr_shadow;
 
-   WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+   acquire_queue(kgd, pipe_id, queue_id);
+   gfx_v7_0_mqd_commit(adev, m);
release_queue(kgd);
 
return 0;
 }
 
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
uint32_t sdma_base_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6697612..2ecef3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -21,20 +21,21 @@
  */
 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v8_0.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "oss/oss_3_0_sh_mask.h"
 

Re: [PATCH libdrm 3/3] amdgpu: rework and remove amdgpu_get_auth()

2017-03-06 Thread Emil Velikov
On 7 March 2017 at 00:45, Emil Velikov  wrote:

> I have another ~20 patch series that builds on top ;-)
>
Correction - those are xf86-video-amdgpu ones independent of this series.

Pardon for the noise.
Emil
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm 3/3] amdgpu: rework and remove amdgpu_get_auth()

2017-03-06 Thread Emil Velikov
On 22 January 2017 at 18:48, Emil Velikov  wrote:
> All one needs is to establish if dev->fd is the flink (primary/card)
> node, rather than use DRM_IOCTL_GET_CLIENT to query the auth status.
>
> The latter is [somewhat] deprecated and incorrect. We need to know [and
> store] the primary node FD, since we're going to use it [at a later
> stage] for buffer import/export sharing.
>
> Cc: amd-gfx@lists.freedesktop.org
> Signed-off-by: Emil Velikov 
> ---
> Again not 100% sure but things look quite fishy as-is... The
> conditionals might be off.
>
> Note: original code [and this one] do not consider if flink_fd is
> already set, thus as we dup we'll leak it.
> ---
>  amdgpu/amdgpu_device.c | 43 ++-
>  1 file changed, 2 insertions(+), 41 deletions(-)
>
> diff --git a/amdgpu/amdgpu_device.c b/amdgpu/amdgpu_device.c
> index f4ede031..6f04d936 100644
> --- a/amdgpu/amdgpu_device.c
> +++ b/amdgpu/amdgpu_device.c
> @@ -101,34 +101,6 @@ static int fd_compare(void *key1, void *key2)
> return result;
>  }
>
> -/**
> -* Get the authenticated form fd,
> -*
> -* \param   fd   - \c [in]  File descriptor for AMD GPU device
> -* \param   auth - \c [out] Pointer to output the fd is authenticated or not
> -*  A render node fd, output auth = 0
> -*  A legacy fd, get the authenticated for 
> compatibility root
> -*
> -* \return   0 on success\n
> -*  >0 - AMD specific error code\n
> -*  <0 - Negative POSIX Error code
> -*/
> -static int amdgpu_get_auth(int fd, int *auth)
> -{
> -   int r = 0;
> -   drm_client_t client = {};
> -
> -   if (drmGetNodeTypeFromFd(fd) == DRM_NODE_RENDER)
> -   *auth = 0;
> -   else {
> -   client.idx = 0;
> -   r = drmIoctl(fd, DRM_IOCTL_GET_CLIENT, );
> -   if (!r)
> -   *auth = client.auth;
> -   }
> -   return r;
> -}
> -
>  static void amdgpu_device_free_internal(amdgpu_device_handle dev)
>  {
> amdgpu_vamgr_deinit(dev->vamgr);
> @@ -175,8 +147,6 @@ int amdgpu_device_initialize(int fd,
> struct amdgpu_device *dev;
> drmVersionPtr version;
> int r;
> -   int flag_auth = 0;
> -   int flag_authexist=0;
> uint32_t accel_working = 0;
> uint64_t start, max;
>
> @@ -185,19 +155,10 @@ int amdgpu_device_initialize(int fd,
> pthread_mutex_lock(_mutex);
> if (!fd_tab)
> fd_tab = util_hash_table_create(fd_hash, fd_compare);
> -   r = amdgpu_get_auth(fd, _auth);
> -   if (r) {
> -   pthread_mutex_unlock(_mutex);
> -   return r;
> -   }
> dev = util_hash_table_get(fd_tab, UINT_TO_PTR(fd));
> if (dev) {
> -   r = amdgpu_get_auth(dev->fd, _authexist);
> -   if (r) {
> -   pthread_mutex_unlock(_mutex);
> -   return r;
> -   }
> -   if ((flag_auth) && (!flag_authexist)) {
> +   if (drmGetNodeTypeFromFd(fd) == DRM_NODE_RENDER &&
> +   drmGetNodeTypeFromFd(dev->fd) == DRM_NODE_PRIMARY) {
> dev->flink_fd = dup(fd);
> }
> *major_version = dev->major_version;
> --

Seems like this and 1/3 has fallen through the cracks. Note that 2/3
is wrong, as pointed by Nicolai.
Can we get these moving in some shape or form - I have another ~20
patch series that builds on top ;-)

Thanks
Emil
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: refine vce2.0 dpm sequence

2017-03-06 Thread Alex Deucher
On Sun, Mar 5, 2017 at 10:37 PM, Rex Zhu  wrote:
> Change-Id: I48a305f144f032b1b8d1ceda1653f004a56c9e77


Missing your signed-off-by.  With that fixed:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index adaa9ca..716c22d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -1315,11 +1315,11 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device 
> *adev, bool enable)
> /* XXX select vce level based on ring/task */
> adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
> mutex_unlock(>pm.mutex);
> -   amdgpu_pm_compute_clocks(adev);
> -   amdgpu_set_powergating_state(adev, 
> AMD_IP_BLOCK_TYPE_VCE,
> -   AMD_PG_STATE_UNGATE);
> amdgpu_set_clockgating_state(adev, 
> AMD_IP_BLOCK_TYPE_VCE,
> AMD_CG_STATE_UNGATE);
> +   amdgpu_set_powergating_state(adev, 
> AMD_IP_BLOCK_TYPE_VCE,
> +   AMD_PG_STATE_UNGATE);
> +   amdgpu_pm_compute_clocks(adev);
> } else {
> amdgpu_set_powergating_state(adev, 
> AMD_IP_BLOCK_TYPE_VCE,
> AMD_PG_STATE_GATE);
> --
> 1.9.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH xf86-video-amdgpu v2 7/9] Call drmmode_set_desired_modes from a WindowExposures hook

2017-03-06 Thread Alex Deucher
On Sun, Mar 5, 2017 at 8:47 PM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> This is the earliest opportunity where the root window contents are
> guaranteed to be initialized, and prevents drmmode_set_mode_major from
> getting called before drmmode_set_desired_modes via AMDGPUUnblank ->
> drmmode_crtc_dpms. Also, in contrast to the BlockHandler hook, this is
> called when running Xorg with -pogo.
>
> Fixes intermittently showing garbage on server startup or after server
> reset.
>
> As a bonus, this avoids trouble due to higher layers (e.g. the tigervnc
> Xorg module) calling AMDGPUBlockHandler_oneshot repeatedly even after
> we set pScreen->BlockHandler = AMDGPUBlockHandler_KMS.
>
> Bugzilla: https://bugs.freedesktop.org/99457
> (Ported from radeon commits 0a12bf1085505017068dfdfd31d23133e51b45b9 and
> f0e7948e1c0e984fc27f235f365639e9cf628291)
>
> Signed-off-by: Michel Dänzer 
> ---
>
> v2: Squash radeon commit f0e7948e1c0e984fc27f235f365639e9cf628291 to fix
> compile error against xserver < 1.16.99.901
>
> Any volunteers for reviewing this series?

Series is:
Reviewed-by: Alex Deucher 


>
>  src/amdgpu_drv.h |  1 +
>  src/amdgpu_kms.c | 40 
>  2 files changed, 29 insertions(+), 12 deletions(-)
>
> diff --git a/src/amdgpu_drv.h b/src/amdgpu_drv.h
> index 2aaafe438..ae5b6f94c 100644
> --- a/src/amdgpu_drv.h
> +++ b/src/amdgpu_drv.h
> @@ -230,6 +230,7 @@ typedef struct {
>
> CreateScreenResourcesProcPtr CreateScreenResources;
> CreateWindowProcPtr CreateWindow;
> +   WindowExposuresProcPtr WindowExposures;
>
> Bool IsSecondary;
>
> diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
> index bafcb9bbb..ce1ae43a4 100644
> --- a/src/amdgpu_kms.c
> +++ b/src/amdgpu_kms.c
> @@ -1039,17 +1039,6 @@ static void 
> AMDGPUBlockHandler_KMS(BLOCKHANDLER_ARGS_DECL)
>  #endif
>  }
>
> -static void AMDGPUBlockHandler_oneshot(BLOCKHANDLER_ARGS_DECL)
> -{
> -   SCREEN_PTR(arg);
> -   ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
> -   AMDGPUInfoPtr info = AMDGPUPTR(pScrn);
> -
> -   AMDGPUBlockHandler_KMS(BLOCKHANDLER_ARGS);
> -
> -   drmmode_set_desired_modes(pScrn, >drmmode, TRUE);
> -}
> -
>  /* This is called by AMDGPUPreInit to set up the default visual */
>  static Bool AMDGPUPreInitVisual(ScrnInfoPtr pScrn)
>  {
> @@ -1266,6 +1255,31 @@ static Bool AMDGPUCreateWindow_oneshot(WindowPtr pWin)
> return ret;
>  }
>
> +/* When the root window is mapped, set the initial modes */
> +static void AMDGPUWindowExposures_oneshot(WindowPtr pWin, RegionPtr pRegion
> +#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,16,99,901,0)
> + , RegionPtr pBSRegion
> +#endif
> +   )
> +{
> +   ScreenPtr pScreen = pWin->drawable.pScreen;
> +   ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
> +   AMDGPUInfoPtr info = AMDGPUPTR(pScrn);
> +
> +   if (pWin != pScreen->root)
> +   ErrorF("%s called for non-root window %p\n", __func__, pWin);
> +
> +   pScreen->WindowExposures = info->WindowExposures;
> +#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,16,99,901,0)
> +   pScreen->WindowExposures(pWin, pRegion, pBSRegion);
> +#else
> +   pScreen->WindowExposures(pWin, pRegion);
> +#endif
> +
> +   amdgpu_glamor_finish(pScrn);
> +   drmmode_set_desired_modes(pScrn, >drmmode, TRUE);
> +}
> +
>  Bool AMDGPUPreInit_KMS(ScrnInfoPtr pScrn, int flags)
>  {
> AMDGPUInfoPtr info;
> @@ -1824,6 +1838,8 @@ Bool AMDGPUScreenInit_KMS(SCREEN_INIT_ARGS_DECL)
> info->CreateWindow = pScreen->CreateWindow;
> pScreen->CreateWindow = AMDGPUCreateWindow_oneshot;
> }
> +   info->WindowExposures = pScreen->WindowExposures;
> +   pScreen->WindowExposures = AMDGPUWindowExposures_oneshot;
>
> /* Provide SaveScreen & wrap BlockHandler and CloseScreen */
> /* Wrap CloseScreen */
> @@ -1831,7 +1847,7 @@ Bool AMDGPUScreenInit_KMS(SCREEN_INIT_ARGS_DECL)
> pScreen->CloseScreen = AMDGPUCloseScreen_KMS;
> pScreen->SaveScreen = AMDGPUSaveScreen_KMS;
> info->BlockHandler = pScreen->BlockHandler;
> -   pScreen->BlockHandler = AMDGPUBlockHandler_oneshot;
> +   pScreen->BlockHandler = AMDGPUBlockHandler_KMS;
>
> info->CreateScreenResources = pScreen->CreateScreenResources;
> pScreen->CreateScreenResources = AMDGPUCreateScreenResources_KMS;
> --
> 2.11.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH xf86-video-amdgpu 2/2] amdgpu_probe: Do not close server managed drm fds

2017-03-06 Thread Alex Deucher
On Mon, Mar 6, 2017 at 5:02 AM, Michel Dänzer  wrote:
> From: Hans De Goede 
>
> This fixes the xserver only seeing AMD/ATI devices supported by the amdgpu
> driver, as by the time xf86-video-ati gets a chance to probe them, the
> fd has been closed.
>
> This fixes e.g. Xorg not seeing the dGPU on a Lenovo Thinkpad E465 laptop
> with a CARRIZO iGPU and a HAINAN dGPU.
>
> Signed-off-by: Hans de Goede 
>
> v2: Rebased on top of new patch 1.
> Signed-off-by: Michel Dänzer 

Series is:
Reviewed-by: Alex deuc...@amd.com>

> ---
>  src/amdgpu_probe.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/src/amdgpu_probe.c b/src/amdgpu_probe.c
> index fc93ac6a2..fb62cb811 100644
> --- a/src/amdgpu_probe.c
> +++ b/src/amdgpu_probe.c
> @@ -166,7 +166,7 @@ static Bool amdgpu_open_drm_master(ScrnInfoPtr pScrn, 
> AMDGPUEntPtr pAMDGPUEnt,
> if (err != 0) {
> xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
>"[drm] failed to set drm interface version.\n");
> -   drmClose(pAMDGPUEnt->fd);
> +   amdgpu_kernel_close_fd(pAMDGPUEnt);
> return FALSE;
> }
>
> @@ -254,7 +254,7 @@ static Bool amdgpu_get_scrninfo(int entity_num, struct 
> pci_device *pci_dev)
> return TRUE;
>
>  error_amdgpu:
> -   drmClose(pAMDGPUEnt->fd);
> +   amdgpu_kernel_close_fd(pAMDGPUEnt);
>  error_fd:
> free(pPriv->ptr);
>  error:
> @@ -349,6 +349,7 @@ amdgpu_platform_probe(DriverPtr pDriver,
>
> pPriv->ptr = xnfcalloc(sizeof(AMDGPUEntRec), 1);
> pAMDGPUEnt = pPriv->ptr;
> +   pAMDGPUEnt->platform_dev = dev;
> pAMDGPUEnt->fd = amdgpu_kernel_open_fd(pScrn, busid, dev);
> if (pAMDGPUEnt->fd < 0)
> goto error_fd;
> @@ -367,7 +368,6 @@ amdgpu_platform_probe(DriverPtr pDriver,
> pAMDGPUEnt = pPriv->ptr;
> pAMDGPUEnt->fd_ref++;
> }
> -   pAMDGPUEnt->platform_dev = dev;
>
> xf86SetEntityInstanceForScreen(pScrn, pEnt->index,
>xf86GetNumEntityInstances(pEnt->
> @@ -379,7 +379,7 @@ amdgpu_platform_probe(DriverPtr pDriver,
> return TRUE;
>
>  error_amdgpu:
> -   drmClose(pAMDGPUEnt->fd);
> +   amdgpu_kernel_close_fd(pAMDGPUEnt);
>  error_fd:
> free(pPriv->ptr);
>  error:
> --
> 2.11.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH xf86-video-amdgpu] glamor: Don't flush in BlockHandler with Xorg >= 1.19

2017-03-06 Thread Alex Deucher
On Mon, Mar 6, 2017 at 4:49 AM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> This was only necessary with older versions for driving the FBO cache
> expiry mechanism.
>
> Signed-off-by: Michel Dänzer 

Reviewed-by: Alex Deucher 

> ---
>  src/amdgpu_kms.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
> index d48ad480a..31c2e86f2 100644
> --- a/src/amdgpu_kms.c
> +++ b/src/amdgpu_kms.c
> @@ -1035,8 +1035,10 @@ static void 
> AMDGPUBlockHandler_KMS(BLOCKHANDLER_ARGS_DECL)
> }
> }
>
> +#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,19,0,0,0)
> if (info->use_glamor)
> amdgpu_glamor_flush(pScrn);
> +#endif
>
>  #ifdef AMDGPU_PIXMAP_SHARING
> amdgpu_dirty_update(pScrn);
> --
> 2.11.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH xf86-video-amdgpu 3/3] Only define transform_region for XF86_CRTC_VERSION >= 4

2017-03-06 Thread Alex Deucher
On Mon, Mar 6, 2017 at 4:33 AM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> Not used with older versions of Xorg. Fixes warning in that case:
>
> ../../src/amdgpu_kms.c:328:1: warning: ‘transform_region’ defined but not 
> used [-Wunused-function]
>  transform_region(RegionPtr region, struct pict_f_transform *transform,
>  ^~~~
>
> Signed-off-by: Michel Dänzer 

For the series:
Reviewed-by: Alex Deucher 

> ---
>  src/amdgpu_kms.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
> index bfc353096..d48ad480a 100644
> --- a/src/amdgpu_kms.c
> +++ b/src/amdgpu_kms.c
> @@ -324,6 +324,8 @@ amdgpu_scanout_extents_intersect(xf86CrtcPtr xf86_crtc, 
> BoxPtr extents)
> return (extents->x1 < extents->x2 && extents->y1 < extents->y2);
>  }
>
> +#if XF86_CRTC_VERSION >= 4
> +
>  static RegionPtr
>  transform_region(RegionPtr region, struct pict_f_transform *transform,
>  int w, int h)
> @@ -362,6 +364,8 @@ transform_region(RegionPtr region, struct 
> pict_f_transform *transform,
> return transformed;
>  }
>
> +#endif
> +
>  static void
>  amdgpu_sync_scanout_pixmaps(xf86CrtcPtr xf86_crtc, RegionPtr new_region,
> int scanout_id)
> --
> 2.11.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix coding style and printing in amdgpu_doorbell_init

2017-03-06 Thread Edward O'Callaghan
Reviewed-by: Edward O'Callaghan 

On 03/07/2017 12:54 AM, Christian König wrote:
> From: Christian König 
> 
> Based on commit "drm/radeon: remove useless and potentially wrong message".
> 
> The size of the info printing is incorrect and the PCI subsystems prints
> the same info on boot anyway.
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bf31aaf..fd03072 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -381,12 +381,11 @@ static int amdgpu_doorbell_init(struct amdgpu_device 
> *adev)
>   if (adev->doorbell.num_doorbells == 0)
>   return -EINVAL;
>  
> - adev->doorbell.ptr = ioremap(adev->doorbell.base, 
> adev->doorbell.num_doorbells * sizeof(u32));
> - if (adev->doorbell.ptr == NULL) {
> + adev->doorbell.ptr = ioremap(adev->doorbell.base,
> +  adev->doorbell.num_doorbells *
> +  sizeof(u32));
> + if (adev->doorbell.ptr == NULL)
>   return -ENOMEM;
> - }
> - DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)adev->doorbell.base);
> - DRM_INFO("doorbell mmio size: %u\n", (unsigned)adev->doorbell.size);
>  
>   return 0;
>  }
> 



signature.asc
Description: OpenPGP digital signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH xf86-video-amdgpu 1/2] Refactor amdgpu_kernel_close_fd helper

2017-03-06 Thread Hans de Goede

Hi,

On 06-03-17 11:02, Michel Dänzer wrote:

From: Michel Dänzer 

Preparation for the following change.

Assign pAMDGPUEnt->fd = -1 instead of 0 when we're not using the file
descriptor anymore.

Signed-off-by: Michel Dänzer 


Thank you for picking my original patch for this up, I still had
it on my to-do list but it got buried under other to-do list
items.

Both patches look good to me:

Reviewed-by: Hans de Goede 

Regards,

Hans




---
 src/amdgpu_kms.c   |  7 +--
 src/amdgpu_probe.c | 10 ++
 src/amdgpu_probe.h |  2 ++
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
index 31c2e86f2..92bf5fadf 100644
--- a/src/amdgpu_kms.c
+++ b/src/amdgpu_kms.c
@@ -132,12 +132,7 @@ static void AMDGPUFreeRec(ScrnInfoPtr pScrn)
pAMDGPUEnt->fd_ref--;
if (!pAMDGPUEnt->fd_ref) {
amdgpu_device_deinitialize(pAMDGPUEnt->pDev);
-#ifdef XF86_PDEV_SERVER_FD
-   if (!(pAMDGPUEnt->platform_dev &&
- pAMDGPUEnt->platform_dev->flags & 
XF86_PDEV_SERVER_FD))
-#endif
-   drmClose(pAMDGPUEnt->fd);
-   pAMDGPUEnt->fd = 0;
+   amdgpu_kernel_close_fd(pAMDGPUEnt);
}
}

diff --git a/src/amdgpu_probe.c b/src/amdgpu_probe.c
index 94da7f623..fc93ac6a2 100644
--- a/src/amdgpu_probe.c
+++ b/src/amdgpu_probe.c
@@ -134,6 +134,16 @@ static int amdgpu_kernel_open_fd(ScrnInfoPtr pScrn, char 
*busid,
return fd;
 }

+void amdgpu_kernel_close_fd(AMDGPUEntPtr pAMDGPUEnt)
+{
+#ifdef XF86_PDEV_SERVER_FD
+   if (!(pAMDGPUEnt->platform_dev &&
+ pAMDGPUEnt->platform_dev->flags & XF86_PDEV_SERVER_FD))
+#endif
+   drmClose(pAMDGPUEnt->fd);
+   pAMDGPUEnt->fd = -1;
+}
+
 static Bool amdgpu_open_drm_master(ScrnInfoPtr pScrn, AMDGPUEntPtr pAMDGPUEnt,
   char *busid)
 {
diff --git a/src/amdgpu_probe.h b/src/amdgpu_probe.h
index 0f43233c3..9e4a9a451 100644
--- a/src/amdgpu_probe.h
+++ b/src/amdgpu_probe.h
@@ -71,6 +71,8 @@ typedef struct {
char *render_node;
 } AMDGPUEntRec, *AMDGPUEntPtr;

+extern void amdgpu_kernel_close_fd(AMDGPUEntPtr pAMDGPUEnt);
+
 extern const OptionInfoRec *AMDGPUOptionsWeak(void);

 extern Bool AMDGPUPreInit_KMS(ScrnInfoPtr, int);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/5] drm/amdgpu: fix printing the doorbell BAR info

2017-03-06 Thread Andy Shevchenko
On Mon, Mar 6, 2017 at 1:40 PM, Christian König  wrote:
> From: Christian König 
>
> The address is 64bit, not 32bit.
>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bf31aaf..a470869 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -385,7 +385,7 @@ static int amdgpu_doorbell_init(struct amdgpu_device 
> *adev)
> if (adev->doorbell.ptr == NULL) {
> return -ENOMEM;
> }


> -   DRM_INFO("doorbell mmio base: 0x%08X\n", 
> (uint32_t)adev->doorbell.base);
> +   DRM_INFO("doorbell mmio base: 0x%llX\n", 
> (uint64_t)adev->doorbell.base);
> DRM_INFO("doorbell mmio size: %u\n", (unsigned)adev->doorbell.size);

It seems I sent patch to remove those at all, but if you wish to leave
them, please convert to %pap and remove explicit casting.

-- 
With Best Regards,
Andy Shevchenko
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


linux-4.11-rc1/drivers/gpu/drm/amd/amdgpu/vi.c: 3 bugs

2017-03-06 Thread David Binderman

Hello there,
1

[linux-4.11-rc1/drivers/gpu/drm/amd/amdgpu/vi.c:1041] -> 
[linux-4.11-rc1/drivers/gpu/drm/amd/amdgpu/vi.c:1037]: (style) Same expression 
on both sides of '|'.

Maybe the macro AMD_CG_SUPPORT_GFX_MGLS is used twice ?

2.

[linux-4.11-rc1/drivers/gpu/drm/amd/amdgpu/vi.c:1070] -> 
[linux-4.11-rc1/drivers/gpu/drm/amd/amdgpu/vi.c:1066]: (style) Same expression 
on both sides of '|'.

Duplicate.

In the same file:

linux-4.11-rc1/drivers/gpu/drm/amd/amdgpu/vi.c:792]: (style) Variable 'r' is 
assigned a value that is never used.

Source code is

r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);

return 0;

Regards

David Binderman
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/5] PCI: add resizeable BAR infrastructure v2

2017-03-06 Thread Andy Shevchenko
On Mon, Mar 6, 2017 at 1:40 PM, Christian König  wrote:
> From: Christian König 
>
> Just the defines and helper functions to read the possible sizes of a BAR and
> update it's size.
>
> See 
> https://pcisig.com/sites/default/files/specification_documents/ECN_Resizable-BAR_24Apr2008.pdf.
>
> v2: provide read helper as well

Commit message left away the explanation at which point this API might
be useful and how it fits in managed resources model?

>  /**
> + * pci_rbar_get_sizes - get possible sizes for BAR

Why not simple pci_rbar_get_possible_sizes() ?

> +u32 pci_rbar_get_sizes(struct pci_dev *pdev, int bar)
> +{
> +   int pos, nbars;
> +   u32 ctrl, cap;
> +   int i;
> +
> +   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
> +   if (!pos)

> +   return 0x0;

return 0;

> +
> +   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
> +   nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> 
> PCI_REBAR_CTRL_NBAR_SHIFT;
> +

> +   for (i = 0; i < nbars; ++i, pos += 8) {

8 is defined somewhere in the spec? (Yes, I understand that is just 64
bits shift)

> +   int bar_idx;
> +
> +   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
> +   bar_idx = (ctrl & PCI_REBAR_CTRL_BAR_IDX_MASK) >>
> +   PCI_REBAR_CTRL_BAR_IDX_SHIFT;
> +   if (bar_idx != bar)
> +   continue;
> +
> +   pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, );
> +   return (cap & PCI_REBAR_CTRL_SIZES_MASK) >>
> +   PCI_REBAR_CTRL_SIZES_SHIFT;
> +   }
> +

> +   return 0x0;

return 0;

> +/**
> + * pci_rbar_get_size - get the current size of a BAR

pci_rbar_get_current_size() ?

> +/**
> + * pci_rbar_set_size - set a new size for a BAR
> + * @dev: PCI device
> + * @bar: BAR to set size to

> + * @size: new size as defined in the spec.

 * @size: bitmasked value of new size (bit 0=1MB, ..., bit 19=512G)

?

It will briefly get a clue without reading either spec or long description.

> + *
> + * Set the new size of a BAR as defined in the spec (0=1MB, 19=512GB).
> + * Returns true if resizing was successful, false otherwise.
> + */

> +bool pci_rbar_set_size(struct pci_dev *pdev, int bar, int size)

I would return int and error code. It would be better in the future
and seems in alignment with above.

> +{
> +   int pos, nbars;
> +   u32 ctrl;
> +   int i;

All ints are unsigned?

-- 
With Best Regards,
Andy Shevchenko
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/5] drm/amdgpu: resize VRAM BAR for CPU access

2017-03-06 Thread Andy Shevchenko
On Mon, Mar 6, 2017 at 1:40 PM, Christian König  wrote:
> From: Christian König 
>
> Try to resize BAR0 to let CPU access all of VRAM.

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -616,6 +616,35 @@ void amdgpu_gtt_location(struct amdgpu_device *adev, 
> struct amdgpu_mc *mc)

> +void amdgpu_resize_bar0(struct amdgpu_device *adev)
> +{
> +   u32 size = max(ilog2(adev->mc.real_vram_size - 1) + 1, 20) - 20;

Too complicated.

unsigned long = fls_long(real_vram_size | BIT(20));

And the result is not a size, right? It's a logarithm from size.

> +   int r;
> +
> +   r = pci_resize_resource(adev->pdev, 0, size);

> +

Redundant line.

> +   if (r == -ENOTSUPP) {
> +   /* The hardware don't support the extension. */
> +   return;
> +
> +   } else if (r == -ENOSPC) {
> +   DRM_INFO("Not enoigh PCI address space for a large BAR.");
> +   } else if (r) {
> +   DRM_ERROR("Problem resizing BAR0 (%d).", r);
> +   }
> +
> +   /* Reinit the doorbell mapping, it is most likely moved as well */
> +   amdgpu_doorbell_fini(adev);

> +   BUG_ON(amdgpu_doorbell_init(adev));

No way to recover?!

> +}
> +

-- 
With Best Regards,
Andy Shevchenko
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: fix coding style and printing in amdgpu_doorbell_init

2017-03-06 Thread Christian König
From: Christian König 

Based on commit "drm/radeon: remove useless and potentially wrong message".

The size of the info printing is incorrect and the PCI subsystems prints
the same info on boot anyway.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bf31aaf..fd03072 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -381,12 +381,11 @@ static int amdgpu_doorbell_init(struct amdgpu_device 
*adev)
if (adev->doorbell.num_doorbells == 0)
return -EINVAL;
 
-   adev->doorbell.ptr = ioremap(adev->doorbell.base, 
adev->doorbell.num_doorbells * sizeof(u32));
-   if (adev->doorbell.ptr == NULL) {
+   adev->doorbell.ptr = ioremap(adev->doorbell.base,
+adev->doorbell.num_doorbells *
+sizeof(u32));
+   if (adev->doorbell.ptr == NULL)
return -ENOMEM;
-   }
-   DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)adev->doorbell.base);
-   DRM_INFO("doorbell mmio size: %u\n", (unsigned)adev->doorbell.size);
 
return 0;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/5] PCI: add resizeable BAR infrastructure v2

2017-03-06 Thread Christian König

Sorry I've hit enter to soon.

This set of patches tries to implement support for resizeable BARs 
including an example of how the AMD GFX device driver can make use of it 
to gain full CPU access to the VRAM on the hardware.


Patch #1 is just the second version of the basic RBAR support I've send 
out more than a year ago.


Patch #2 adds functionality to resize a single resource of a device by 
only touching parts of the PCIe tree which we can be sure are save to 
modify.


Patch #3 adds a quirk for AMD Kaveri/Kabini APUs which adds another 64GB 
BAR on bootup to make sure we have enough address space assigned to the 
root hub.


Patch #4 & #5 then uses the new functionality to resize the BAR of 
recent AMD GPUs to allow the CPU full access to the memory behind it.


Please comment and review.

Thanks,
Christian.

Am 06.03.2017 um 12:40 schrieb Christian König:

From: Christian König 

Just the defines and helper functions to read the possible sizes of a BAR and
update it's size.

See 
https://pcisig.com/sites/default/files/specification_documents/ECN_Resizable-BAR_24Apr2008.pdf.

v2: provide read helper as well

Signed-off-by: Christian König 
---
  drivers/pci/pci.c | 115 ++
  include/linux/pci.h   |   3 ++
  include/uapi/linux/pci_regs.h |   7 +++
  3 files changed, 125 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ba34907..9658aa7 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2944,6 +2944,121 @@ bool pci_acs_path_enabled(struct pci_dev *start,
  }
  
  /**

+ * pci_rbar_get_sizes - get possible sizes for BAR
+ * @dev: PCI device
+ * @bar: BAR to query
+ *
+ * Get the possible sizes of a resizeable BAR as bitmask defined in the spec
+ * (bit 0=1MB, bit 19=512GB). Returns 0 if BAR isn't resizeable.
+ */
+u32 pci_rbar_get_sizes(struct pci_dev *pdev, int bar)
+{
+   int pos, nbars;
+   u32 ctrl, cap;
+   int i;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+   if (!pos)
+   return 0x0;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
+
+   for (i = 0; i < nbars; ++i, pos += 8) {
+   int bar_idx;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   bar_idx = (ctrl & PCI_REBAR_CTRL_BAR_IDX_MASK) >>
+   PCI_REBAR_CTRL_BAR_IDX_SHIFT;
+   if (bar_idx != bar)
+   continue;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, );
+   return (cap & PCI_REBAR_CTRL_SIZES_MASK) >>
+   PCI_REBAR_CTRL_SIZES_SHIFT;
+   }
+
+   return 0x0;
+}
+
+/**
+ * pci_rbar_get_size - get the current size of a BAR
+ * @dev: PCI device
+ * @bar: BAR to set size to
+ *
+ * Read the size of a BAR from the resizeable BAR config.
+ * Returns size if found or negativ error code.
+ */
+int pci_rbar_get_size(struct pci_dev *pdev, int bar)
+{
+   int pos, nbars;
+   u32 ctrl;
+   int i;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+   if (!pos)
+   return -ENOTSUPP;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
+
+   for (i = 0; i < nbars; ++i, pos += 8) {
+   int bar_idx;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   bar_idx = (ctrl & PCI_REBAR_CTRL_BAR_IDX_MASK) >>
+   PCI_REBAR_CTRL_BAR_IDX_SHIFT;
+   if (bar_idx != bar)
+   continue;
+
+   return (ctrl & PCI_REBAR_CTRL_BAR_SIZE_MASK) >>
+   PCI_REBAR_CTRL_BAR_SIZE_SHIFT;
+   }
+
+   return -ENOENT;
+}
+
+/**
+ * pci_rbar_set_size - set a new size for a BAR
+ * @dev: PCI device
+ * @bar: BAR to set size to
+ * @size: new size as defined in the spec.
+ *
+ * Set the new size of a BAR as defined in the spec (0=1MB, 19=512GB).
+ * Returns true if resizing was successful, false otherwise.
+ */
+bool pci_rbar_set_size(struct pci_dev *pdev, int bar, int size)
+{
+   int pos, nbars;
+   u32 ctrl;
+   int i;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+   if (!pos)
+   return false;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
+
+   for (i = 0; i < nbars; ++i, pos += 8) {
+   int bar_idx;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   bar_idx = (ctrl & PCI_REBAR_CTRL_BAR_IDX_MASK) >>
+   PCI_REBAR_CTRL_BAR_IDX_SHIFT;
+   if (bar_idx != bar)
+   continue;
+
+   ctrl &= 

[PATCH 4/5] drm/amdgpu: fix printing the doorbell BAR info

2017-03-06 Thread Christian König
From: Christian König 

The address is 64bit, not 32bit.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bf31aaf..a470869 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -385,7 +385,7 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev)
if (adev->doorbell.ptr == NULL) {
return -ENOMEM;
}
-   DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)adev->doorbell.base);
+   DRM_INFO("doorbell mmio base: 0x%llX\n", (uint64_t)adev->doorbell.base);
DRM_INFO("doorbell mmio size: %u\n", (unsigned)adev->doorbell.size);
 
return 0;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/5] PCI: add resizeable BAR infrastructure v2

2017-03-06 Thread Christian König
From: Christian König 

Just the defines and helper functions to read the possible sizes of a BAR and
update it's size.

See 
https://pcisig.com/sites/default/files/specification_documents/ECN_Resizable-BAR_24Apr2008.pdf.

v2: provide read helper as well

Signed-off-by: Christian König 
---
 drivers/pci/pci.c | 115 ++
 include/linux/pci.h   |   3 ++
 include/uapi/linux/pci_regs.h |   7 +++
 3 files changed, 125 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ba34907..9658aa7 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2944,6 +2944,121 @@ bool pci_acs_path_enabled(struct pci_dev *start,
 }
 
 /**
+ * pci_rbar_get_sizes - get possible sizes for BAR
+ * @dev: PCI device
+ * @bar: BAR to query
+ *
+ * Get the possible sizes of a resizeable BAR as bitmask defined in the spec
+ * (bit 0=1MB, bit 19=512GB). Returns 0 if BAR isn't resizeable.
+ */
+u32 pci_rbar_get_sizes(struct pci_dev *pdev, int bar)
+{
+   int pos, nbars;
+   u32 ctrl, cap;
+   int i;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+   if (!pos)
+   return 0x0;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
+
+   for (i = 0; i < nbars; ++i, pos += 8) {
+   int bar_idx;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   bar_idx = (ctrl & PCI_REBAR_CTRL_BAR_IDX_MASK) >>
+   PCI_REBAR_CTRL_BAR_IDX_SHIFT;
+   if (bar_idx != bar)
+   continue;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, );
+   return (cap & PCI_REBAR_CTRL_SIZES_MASK) >>
+   PCI_REBAR_CTRL_SIZES_SHIFT;
+   }
+
+   return 0x0;
+}
+
+/**
+ * pci_rbar_get_size - get the current size of a BAR
+ * @dev: PCI device
+ * @bar: BAR to set size to
+ *
+ * Read the size of a BAR from the resizeable BAR config.
+ * Returns size if found or negativ error code.
+ */
+int pci_rbar_get_size(struct pci_dev *pdev, int bar)
+{
+   int pos, nbars;
+   u32 ctrl;
+   int i;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+   if (!pos)
+   return -ENOTSUPP;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
+
+   for (i = 0; i < nbars; ++i, pos += 8) {
+   int bar_idx;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   bar_idx = (ctrl & PCI_REBAR_CTRL_BAR_IDX_MASK) >>
+   PCI_REBAR_CTRL_BAR_IDX_SHIFT;
+   if (bar_idx != bar)
+   continue;
+
+   return (ctrl & PCI_REBAR_CTRL_BAR_SIZE_MASK) >>
+   PCI_REBAR_CTRL_BAR_SIZE_SHIFT;
+   }
+
+   return -ENOENT;
+}
+
+/**
+ * pci_rbar_set_size - set a new size for a BAR
+ * @dev: PCI device
+ * @bar: BAR to set size to
+ * @size: new size as defined in the spec.
+ *
+ * Set the new size of a BAR as defined in the spec (0=1MB, 19=512GB).
+ * Returns true if resizing was successful, false otherwise.
+ */
+bool pci_rbar_set_size(struct pci_dev *pdev, int bar, int size)
+{
+   int pos, nbars;
+   u32 ctrl;
+   int i;
+
+   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
+   if (!pos)
+   return false;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
+
+   for (i = 0; i < nbars; ++i, pos += 8) {
+   int bar_idx;
+
+   pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, );
+   bar_idx = (ctrl & PCI_REBAR_CTRL_BAR_IDX_MASK) >>
+   PCI_REBAR_CTRL_BAR_IDX_SHIFT;
+   if (bar_idx != bar)
+   continue;
+
+   ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE_MASK;
+   ctrl |= size << PCI_REBAR_CTRL_BAR_SIZE_SHIFT;
+   pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
+   return true;
+   }
+
+   return false;
+}
+
+/**
  * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge
  * @dev: the PCI device
  * @pin: the INTx pin (1=INTA, 2=INTB, 3=INTC, 4=INTD)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a38772a..9f26ca4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1946,6 +1946,9 @@ void pci_request_acs(void);
 bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags);
 bool pci_acs_path_enabled(struct pci_dev *start,
  struct pci_dev *end, u16 acs_flags);
+u32 pci_rbar_get_sizes(struct pci_dev *pdev, int bar);
+int pci_rbar_get_size(struct pci_dev *pdev, int bar);
+bool 

[PATCH 2/5] PCI: add functionality for resizing resources

2017-03-06 Thread Christian König
From: Christian König 

This allows device drivers to request resizing their BARs.

The function only tries to reprogram the windows of the bridge directly above
the requesting device and only the BAR of the same type (usually mem, 64bit,
prefetchable). This is done to make sure not to disturb other drivers by
changing the BARs of their devices.

If reprogramming the bridge BAR fails the old status is restored and -ENOSPC
returned to the calling device driver.

Signed-off-by: Christian König 
---
 drivers/pci/setup-bus.c | 61 +
 drivers/pci/setup-res.c | 45 
 include/linux/pci.h |  2 ++
 3 files changed, 108 insertions(+)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index f30ca75..cfab2c7 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1923,6 +1923,67 @@ void pci_assign_unassigned_bridge_resources(struct 
pci_dev *bridge)
 }
 EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);
 
+int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type)
+{
+   const unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+   IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
+
+   struct resource saved;
+   LIST_HEAD(add_list);
+   LIST_HEAD(fail_head);
+   struct pci_dev_resource *fail_res;
+   unsigned i;
+   int ret = 0;
+
+   /* Release all children from the matching bridge resource */
+   for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCE_END; ++i) {
+   struct resource *res = >resource[i];
+
+   if ((res->flags & type_mask) != (type & type_mask))
+   continue;
+
+   saved = *res;
+   if (res->parent) {
+   release_child_resources(res);
+   release_resource(res);
+   }
+   res->start = 0;
+   res->end = 0;
+   break;
+   }
+
+   if (i == PCI_BRIDGE_RESOURCE_END)
+   return -ENOENT;
+
+   __pci_bus_size_bridges(bridge->subordinate, _list);
+   __pci_bridge_assign_resources(bridge, _list, _head);
+   BUG_ON(!list_empty(_list));
+
+   /* restore size and flags */
+   list_for_each_entry(fail_res, _head, list) {
+   struct resource *res = fail_res->res;
+
+   res->start = fail_res->start;
+   res->end = fail_res->end;
+   res->flags = fail_res->flags;
+   }
+
+   /* Revert to the old configuration */
+   if (!list_empty(_head)) {
+   struct resource *res = >resource[i];
+
+   res->start = saved.start;
+   res->end = saved.end;
+   res->flags = saved.flags;
+
+   pci_claim_resource(bridge, i);
+   ret = -ENOSPC;
+   }
+
+   free_list(_head);
+   return ret;
+}
+
 void pci_assign_unassigned_bus_resources(struct pci_bus *bus)
 {
struct pci_dev *dev;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 9526e34..d03e6f1 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -363,6 +363,51 @@ int pci_reassign_resource(struct pci_dev *dev, int resno, 
resource_size_t addsiz
return 0;
 }
 
+int pci_resize_resource(struct pci_dev *dev, int resno, int size)
+{
+   struct resource *res = dev->resource + resno;
+   u32 sizes = pci_rbar_get_sizes(dev, resno);
+   int old = pci_rbar_get_size(dev, resno);
+   u64 bytes = 1ULL << (size + 20);
+   int ret = 0;
+
+   if (!sizes)
+   return -ENOTSUPP;
+
+   if (!(sizes & (1 << size)))
+   return -EINVAL;
+
+   if (old < 0)
+   return old;
+
+   /* Make sure the resource isn't assigned before making it larger. */
+   if (resource_size(res) < bytes && res->parent) {
+   release_resource(res);
+   res->end = resource_size(res) - 1;
+   res->start = 0;
+   if (resno < PCI_BRIDGE_RESOURCES)
+   pci_update_resource(dev, resno);
+   }
+
+   if (pci_rbar_set_size(dev, resno, size))
+   res->end = res->start + bytes - 1;
+   else
+   return -EIO;
+
+   ret = pci_reassign_bridge_resources(dev->bus->self, res->flags);
+   if (ret) {
+   pci_rbar_set_size(dev, resno, old);
+   res->end = res->start + (1ULL << (old + 20)) - 1;
+
+   pci_assign_unassigned_bus_resources(dev->bus);
+   pci_setup_bridge(dev->bus);
+   }
+
+   pci_reenable_device(dev->bus->self);
+   return ret;
+}
+EXPORT_SYMBOL(pci_resize_resource);
+
 int pci_enable_resources(struct pci_dev *dev, int mask)
 {
u16 cmd, old_cmd;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9f26ca4..c85d8d7 100644
--- a/include/linux/pci.h
+++ 

[PATCH 5/5] drm/amdgpu: resize VRAM BAR for CPU access

2017-03-06 Thread Christian König
From: Christian König 

Try to resize BAR0 to let CPU access all of VRAM.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 29 +
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  8 +---
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  8 +---
 4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8a5f8cb..1e888d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1754,6 +1754,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device 
*adev, struct ttm_tt *ttm,
 struct ttm_mem_reg *mem);
 void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, 
u64 base);
 void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
+void amdgpu_resize_bar0(struct amdgpu_device *adev);
 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a470869..f038195 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -616,6 +616,35 @@ void amdgpu_gtt_location(struct amdgpu_device *adev, 
struct amdgpu_mc *mc)
mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
 }
 
+/**
+ * amdgpu_resize_bar0 - try to resize BAR0
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Try to resize BAR0 to make all VRAM CPU accessible.
+ */
+void amdgpu_resize_bar0(struct amdgpu_device *adev)
+{
+   u32 size = max(ilog2(adev->mc.real_vram_size - 1) + 1, 20) - 20;
+   int r;
+
+   r = pci_resize_resource(adev->pdev, 0, size);
+
+   if (r == -ENOTSUPP) {
+   /* The hardware don't support the extension. */
+   return;
+
+   } else if (r == -ENOSPC) {
+   DRM_INFO("Not enoigh PCI address space for a large BAR.");
+   } else if (r) {
+   DRM_ERROR("Problem resizing BAR0 (%d).", r);
+   }
+
+   /* Reinit the doorbell mapping, it is most likely moved as well */
+   amdgpu_doorbell_fini(adev);
+   BUG_ON(amdgpu_doorbell_init(adev));
+}
+
 /*
  * GPU helpers function.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 552bf6b..cd5828c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -367,13 +367,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
break;
}
adev->mc.vram_width = numchan * chansize;
-   /* Could aper size report 0 ? */
-   adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
-   adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
/* size in MB on si */
adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
 
+   if (!(adev->flags & AMD_IS_APU))
+   amdgpu_resize_bar0(adev);
+   adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
+   adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
+
 #ifdef CONFIG_X86_64
if (adev->flags & AMD_IS_APU) {
adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index f2bd016..e277130 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -459,13 +459,15 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
break;
}
adev->mc.vram_width = numchan * chansize;
-   /* Could aper size report 0 ? */
-   adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
-   adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
/* size in MB on si */
adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
 
+   if (!(adev->flags & AMD_IS_APU))
+   amdgpu_resize_bar0(adev);
+   adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
+   adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
+
 #ifdef CONFIG_X86_64
if (adev->flags & AMD_IS_APU) {
adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/5] x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 30h-3fh) Processors

2017-03-06 Thread Christian König
From: Christian König 

Most BIOS don't enable this because of compatibility reasons.

Manually enable a 64bit BAR of 64GB size so that we have
enough room for PCI devices.

Signed-off-by: Christian König 
---
 arch/x86/pci/fixup.c | 53 
 1 file changed, 53 insertions(+)

diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 6d52b94..bff5242 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -571,3 +571,56 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, 
pci_invalid_bar);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar);
+
+static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
+{
+   const uint64_t size = 64ULL * 1024 * 1024 * 1024;
+   uint32_t base, limit, high;
+   struct resource *res;
+   unsigned i;
+   int r;
+
+   for (i = 0; i < 8; ++i) {
+
+   pci_read_config_dword(dev, 0x80 + i * 0x8, );
+   pci_read_config_dword(dev, 0x180 + i * 0x4, );
+
+   /* Is this slot free? */
+   if ((base & 0x3) == 0x0)
+   break;
+
+   base >>= 8;
+   base |= high << 24;
+
+   /* Abort if a slot already configures a 64bit BAR. */
+   if (base > 0x1)
+   return;
+
+   }
+
+   if (i == 8)
+   return;
+
+   res = kzalloc(sizeof(*res), GFP_KERNEL);
+   res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_MEM_64 |
+   IORESOURCE_WINDOW;
+   res->name = dev->bus->name;
+   r = allocate_resource(_resource, res, size, 0x1,
+ 0xfd, size, NULL, NULL);
+   if (r) {
+   kfree(res);
+   return;
+   }
+
+   base = ((res->start >> 8) & 0xff00) | 0x3;
+   limit = ((res->end + 1) >> 8) & 0xff00;
+   high = ((res->start >> 40) & 0xff) |
+   res->end + 1) >> 40) & 0xff) << 16);
+
+   pci_write_config_dword(dev, 0x180 + i * 0x4, high);
+   pci_write_config_dword(dev, 0x84 + i * 0x8, limit);
+   pci_write_config_dword(dev, 0x80 + i * 0x8, base);
+
+   pci_bus_add_resource(dev->bus, res, 0);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu 2/2] amdgpu_probe: Do not close server managed drm fds

2017-03-06 Thread Michel Dänzer
From: Hans De Goede 

This fixes the xserver only seeing AMD/ATI devices supported by the amdgpu
driver, as by the time xf86-video-ati gets a chance to probe them, the
fd has been closed.

This fixes e.g. Xorg not seeing the dGPU on a Lenovo Thinkpad E465 laptop
with a CARRIZO iGPU and a HAINAN dGPU.

Signed-off-by: Hans de Goede 

v2: Rebased on top of new patch 1.
Signed-off-by: Michel Dänzer 
---
 src/amdgpu_probe.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/amdgpu_probe.c b/src/amdgpu_probe.c
index fc93ac6a2..fb62cb811 100644
--- a/src/amdgpu_probe.c
+++ b/src/amdgpu_probe.c
@@ -166,7 +166,7 @@ static Bool amdgpu_open_drm_master(ScrnInfoPtr pScrn, 
AMDGPUEntPtr pAMDGPUEnt,
if (err != 0) {
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
   "[drm] failed to set drm interface version.\n");
-   drmClose(pAMDGPUEnt->fd);
+   amdgpu_kernel_close_fd(pAMDGPUEnt);
return FALSE;
}
 
@@ -254,7 +254,7 @@ static Bool amdgpu_get_scrninfo(int entity_num, struct 
pci_device *pci_dev)
return TRUE;
 
 error_amdgpu:
-   drmClose(pAMDGPUEnt->fd);
+   amdgpu_kernel_close_fd(pAMDGPUEnt);
 error_fd:
free(pPriv->ptr);
 error:
@@ -349,6 +349,7 @@ amdgpu_platform_probe(DriverPtr pDriver,
 
pPriv->ptr = xnfcalloc(sizeof(AMDGPUEntRec), 1);
pAMDGPUEnt = pPriv->ptr;
+   pAMDGPUEnt->platform_dev = dev;
pAMDGPUEnt->fd = amdgpu_kernel_open_fd(pScrn, busid, dev);
if (pAMDGPUEnt->fd < 0)
goto error_fd;
@@ -367,7 +368,6 @@ amdgpu_platform_probe(DriverPtr pDriver,
pAMDGPUEnt = pPriv->ptr;
pAMDGPUEnt->fd_ref++;
}
-   pAMDGPUEnt->platform_dev = dev;
 
xf86SetEntityInstanceForScreen(pScrn, pEnt->index,
   xf86GetNumEntityInstances(pEnt->
@@ -379,7 +379,7 @@ amdgpu_platform_probe(DriverPtr pDriver,
return TRUE;
 
 error_amdgpu:
-   drmClose(pAMDGPUEnt->fd);
+   amdgpu_kernel_close_fd(pAMDGPUEnt);
 error_fd:
free(pPriv->ptr);
 error:
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu 1/2] Refactor amdgpu_kernel_close_fd helper

2017-03-06 Thread Michel Dänzer
From: Michel Dänzer 

Preparation for the following change.

Assign pAMDGPUEnt->fd = -1 instead of 0 when we're not using the file
descriptor anymore.

Signed-off-by: Michel Dänzer 
---
 src/amdgpu_kms.c   |  7 +--
 src/amdgpu_probe.c | 10 ++
 src/amdgpu_probe.h |  2 ++
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
index 31c2e86f2..92bf5fadf 100644
--- a/src/amdgpu_kms.c
+++ b/src/amdgpu_kms.c
@@ -132,12 +132,7 @@ static void AMDGPUFreeRec(ScrnInfoPtr pScrn)
pAMDGPUEnt->fd_ref--;
if (!pAMDGPUEnt->fd_ref) {
amdgpu_device_deinitialize(pAMDGPUEnt->pDev);
-#ifdef XF86_PDEV_SERVER_FD
-   if (!(pAMDGPUEnt->platform_dev &&
- pAMDGPUEnt->platform_dev->flags & 
XF86_PDEV_SERVER_FD))
-#endif
-   drmClose(pAMDGPUEnt->fd);
-   pAMDGPUEnt->fd = 0;
+   amdgpu_kernel_close_fd(pAMDGPUEnt);
}
}
 
diff --git a/src/amdgpu_probe.c b/src/amdgpu_probe.c
index 94da7f623..fc93ac6a2 100644
--- a/src/amdgpu_probe.c
+++ b/src/amdgpu_probe.c
@@ -134,6 +134,16 @@ static int amdgpu_kernel_open_fd(ScrnInfoPtr pScrn, char 
*busid,
return fd;
 }
 
+void amdgpu_kernel_close_fd(AMDGPUEntPtr pAMDGPUEnt)
+{
+#ifdef XF86_PDEV_SERVER_FD
+   if (!(pAMDGPUEnt->platform_dev &&
+ pAMDGPUEnt->platform_dev->flags & XF86_PDEV_SERVER_FD))
+#endif
+   drmClose(pAMDGPUEnt->fd);
+   pAMDGPUEnt->fd = -1;
+}
+
 static Bool amdgpu_open_drm_master(ScrnInfoPtr pScrn, AMDGPUEntPtr pAMDGPUEnt,
   char *busid)
 {
diff --git a/src/amdgpu_probe.h b/src/amdgpu_probe.h
index 0f43233c3..9e4a9a451 100644
--- a/src/amdgpu_probe.h
+++ b/src/amdgpu_probe.h
@@ -71,6 +71,8 @@ typedef struct {
char *render_node;
 } AMDGPUEntRec, *AMDGPUEntPtr;
 
+extern void amdgpu_kernel_close_fd(AMDGPUEntPtr pAMDGPUEnt);
+
 extern const OptionInfoRec *AMDGPUOptionsWeak(void);
 
 extern Bool AMDGPUPreInit_KMS(ScrnInfoPtr, int);
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu] glamor: Don't flush in BlockHandler with Xorg >= 1.19

2017-03-06 Thread Michel Dänzer
From: Michel Dänzer 

This was only necessary with older versions for driving the FBO cache
expiry mechanism.

Signed-off-by: Michel Dänzer 
---
 src/amdgpu_kms.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
index d48ad480a..31c2e86f2 100644
--- a/src/amdgpu_kms.c
+++ b/src/amdgpu_kms.c
@@ -1035,8 +1035,10 @@ static void 
AMDGPUBlockHandler_KMS(BLOCKHANDLER_ARGS_DECL)
}
}
 
+#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,19,0,0,0)
if (info->use_glamor)
amdgpu_glamor_flush(pScrn);
+#endif
 
 #ifdef AMDGPU_PIXMAP_SHARING
amdgpu_dirty_update(pScrn);
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu 2/3] Use local implementation of RegionDuplicate for older xserver

2017-03-06 Thread Michel Dänzer
From: Michel Dänzer 

It was only added in xserver 1.15. Fixes build against older xserver.

Reported-by: Pali Rohár 
(Ported from radeon commit 80cc892ee1ce54fad3cb7dd11bd9df18c359136f)

Signed-off-by: Michel Dänzer 
---
 configure.ac |  6 ++
 src/amdgpu_drv.h | 19 +++
 2 files changed, 25 insertions(+)

diff --git a/configure.ac b/configure.ac
index 11dc28544..00249a599 100644
--- a/configure.ac
+++ b/configure.ac
@@ -152,6 +152,12 @@ else
 fi
 AM_CONDITIONAL(GLAMOR, test x$GLAMOR != xno)
 
+AC_CHECK_DECL(RegionDuplicate,
+ [AC_DEFINE(HAVE_REGIONDUPLICATE, 1,
+ [Have RegionDuplicate API])], [],
+ [#include 
+  #include ])
+
 AC_CHECK_DECL(fbGlyphs,
  [AC_DEFINE(HAVE_FBGLYPHS, 1, [Have fbGlyphs API])], [],
  [#include 
diff --git a/src/amdgpu_drv.h b/src/amdgpu_drv.h
index ae5b6f94c..0700daecb 100644
--- a/src/amdgpu_drv.h
+++ b/src/amdgpu_drv.h
@@ -89,6 +89,25 @@
 
 struct _SyncFence;
 
+#ifndef HAVE_REGIONDUPLICATE
+
+static inline RegionPtr
+RegionDuplicate(RegionPtr pOld)
+{
+   RegionPtr pNew;
+
+   pNew = RegionCreate(>extents, 0);
+   if (!pNew)
+   return NULL;
+   if (!RegionCopy(pNew, pOld)) {
+   RegionDestroy(pNew);
+   return NULL;
+   }
+   return pNew;
+}
+
+#endif
+
 #ifndef MAX
 #define MAX(a,b) ((a)>(b)?(a):(b))
 #endif
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu 0/3] Fixes for building against older Xorg

2017-03-06 Thread Michel Dänzer
From: Michel Dänzer 

This fixes all errors down to Xorg 1.10.

One warning remains, see below. This could be silenced by casting the
return value of amdgpu_get_marketing_name to (char*), but that's ugly
and doesn't seem worth it.

../../src/amdgpu_kms.c: In function ‘AMDGPUPreInitChipType_KMS’:
../../src/amdgpu_kms.c:1174:17: warning: assignment discards ‘const’ qualifier 
from pointer target type [-Wdiscarded-qualifiers]
  pScrn->chipset = amdgpu_get_marketing_name(pAMDGPUEnt->pDev);
 ^

Michel Dänzer (3):
  Don't use pScrn->is_gpu in AMDGPUCreateScreenResources_KMS
  Use local implementation of RegionDuplicate for older xserver
  Only define transform_region for XF86_CRTC_VERSION >= 4

 configure.ac |  6 ++
 src/amdgpu_drv.h | 19 +++
 src/amdgpu_kms.c |  6 +-
 3 files changed, 30 insertions(+), 1 deletion(-)

-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu 1/3] Don't use pScrn->is_gpu in AMDGPUCreateScreenResources_KMS

2017-03-06 Thread Michel Dänzer
From: Michel Dänzer 

Looks like this snuck in accidentally.

Brings us back in line with the radeon driver, and fixes the build
against older versions of xserver which didn't have the is_gpu field
yet.

Fixes: 6bab8fabb37e ("Remove info->dri2.drm_fd and info->drmmode->fd")
Signed-off-by: Michel Dänzer 
---
 src/amdgpu_kms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
index c5205e25a..bfc353096 100644
--- a/src/amdgpu_kms.c
+++ b/src/amdgpu_kms.c
@@ -248,7 +248,7 @@ static Bool AMDGPUCreateScreenResources_KMS(ScreenPtr 
pScreen)
}
}
 
-   if (!drmmode_set_desired_modes(pScrn, >drmmode, pScrn->is_gpu))
+   if (!drmmode_set_desired_modes(pScrn, >drmmode, FALSE))
return FALSE;
 
drmmode_uevent_init(pScrn, >drmmode);
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu 3/3] Only define transform_region for XF86_CRTC_VERSION >= 4

2017-03-06 Thread Michel Dänzer
From: Michel Dänzer 

Not used with older versions of Xorg. Fixes warning in that case:

../../src/amdgpu_kms.c:328:1: warning: ‘transform_region’ defined but not used 
[-Wunused-function]
 transform_region(RegionPtr region, struct pict_f_transform *transform,
 ^~~~

Signed-off-by: Michel Dänzer 
---
 src/amdgpu_kms.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/amdgpu_kms.c b/src/amdgpu_kms.c
index bfc353096..d48ad480a 100644
--- a/src/amdgpu_kms.c
+++ b/src/amdgpu_kms.c
@@ -324,6 +324,8 @@ amdgpu_scanout_extents_intersect(xf86CrtcPtr xf86_crtc, 
BoxPtr extents)
return (extents->x1 < extents->x2 && extents->y1 < extents->y2);
 }
 
+#if XF86_CRTC_VERSION >= 4
+
 static RegionPtr
 transform_region(RegionPtr region, struct pict_f_transform *transform,
 int w, int h)
@@ -362,6 +364,8 @@ transform_region(RegionPtr region, struct pict_f_transform 
*transform,
return transformed;
 }
 
+#endif
+
 static void
 amdgpu_sync_scanout_pixmaps(xf86CrtcPtr xf86_crtc, RegionPtr new_region,
int scanout_id)
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 1/2] drm/amdgpu: print full bios version in dmesg.

2017-03-06 Thread Rex Zhu
v2: fix merge error.

Change-Id: I68be9cf0afce98a1fdb11e32eb883bddda8e040c
Signed-off-by: Rex Zhu 
Reviewed-by: Xiaojie Yuan 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/atom.c | 22 --
 drivers/gpu/drm/amd/amdgpu/atom.h |  1 +
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c 
b/drivers/gpu/drm/amd/amdgpu/atom.c
index d1444aa..81c60a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1300,8 +1300,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info 
*card, void *bios)
struct atom_context *ctx =
kzalloc(sizeof(struct atom_context), GFP_KERNEL);
char *str;
-   char name[512];
-   int i;
+   u16 idx;
 
if (!ctx)
return NULL;
@@ -1339,18 +1338,13 @@ struct atom_context *amdgpu_atom_parse(struct card_info 
*card, void *bios)
return NULL;
}
 
-   str = CSTR(CU16(base + ATOM_ROM_MSG_PTR));
-   while (*str && ((*str == '\n') || (*str == '\r')))
-   str++;
-   /* name string isn't always 0 terminated */
-   for (i = 0; i < 511; i++) {
-   name[i] = str[i];
-   if (name[i] < '.' || name[i] > 'z') {
-   name[i] = 0;
-   break;
-   }
-   }
-   pr_info("ATOM BIOS: %s\n", name);
+   idx = CU16(ATOM_ROM_PART_NUMBER_PTR);
+   if (idx == 0)
+   idx = 0x80;
+
+   str = CSTR(idx);
+   if (*str != '\0')
+   pr_info("ATOM BIOS: %s\n", str);
 
return ctx;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h 
b/drivers/gpu/drm/amd/amdgpu/atom.h
index 49daf6d..baa2438 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -32,6 +32,7 @@
 #define ATOM_ATI_MAGIC_PTR 0x30
 #define ATOM_ATI_MAGIC " 761295520"
 #define ATOM_ROM_TABLE_PTR 0x48
+#define ATOM_ROM_PART_NUMBER_PTR   0x6E
 
 #define ATOM_ROM_MAGIC "ATOM"
 #define ATOM_ROM_MAGIC_PTR 4
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu/gfx8: move CE meta data structure to vi_structs.h

2017-03-06 Thread Christian König

Am 06.03.2017 um 08:32 schrieb Xiangliang Yu:

Because different HWs have different definition for CE & DE meta
data, follow mqd design to move the structures to vi_structs.h.

And change the prefix from amdgpu to vi as the structures is only
for VI family.

Signed-off-by: Xiangliang Yu 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c|  16 ++---
  drivers/gpu/drm/amd/amdgpu/vi.h  | 112 ---
  drivers/gpu/drm/amd/include/vi_structs.h | 106 +
  3 files changed, 114 insertions(+), 120 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index d706f44..8123036 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -7284,15 +7284,15 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct 
amdgpu_ring *ring, uint64_t c
uint64_t ce_payload_addr;
int cnt_ce;
static union {
-   struct amdgpu_ce_ib_state regular;
-   struct amdgpu_ce_ib_state_chained_ib chained;
+   struct vi_ce_ib_state regular;
+   struct vi_ce_ib_state_chained_ib chained;
} ce_payload = {};
  
  	if (ring->adev->virt.chained_ib_support) {

-   ce_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data_chained_ib, ce_payload);
+   ce_payload_addr = csa_addr + offsetof(struct 
vi_gfx_meta_data_chained_ib, ce_payload);
cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
} else {
-   ce_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data, ce_payload);
+   ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, 
ce_payload);
cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
}
  
@@ -7311,20 +7311,20 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c

uint64_t de_payload_addr, gds_addr;
int cnt_de;
static union {
-   struct amdgpu_de_ib_state regular;
-   struct amdgpu_de_ib_state_chained_ib chained;
+   struct vi_de_ib_state regular;
+   struct vi_de_ib_state_chained_ib chained;
} de_payload = {};
  
  	gds_addr = csa_addr + 4096;

if (ring->adev->virt.chained_ib_support) {
de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
-   de_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data_chained_ib, de_payload);
+   de_payload_addr = csa_addr + offsetof(struct 
vi_gfx_meta_data_chained_ib, de_payload);
cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
} else {
de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
-   de_payload_addr = csa_addr + offsetof(struct 
amdgpu_gfx_meta_data, de_payload);
+   de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, 
de_payload);
cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
}
  
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h

index 719587b..575d7ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/vi.h
@@ -28,116 +28,4 @@ void vi_srbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
  int vi_set_ip_blocks(struct amdgpu_device *adev);
  
-struct amdgpu_ce_ib_state

-{
-   uint32_tce_ib_completion_status;
-   uint32_tce_constegnine_count;
-   uint32_tce_ibOffset_ib1;
-   uint32_tce_ibOffset_ib2;
-}; /* Total of 4 DWORD */
-
-struct amdgpu_de_ib_state
-{
-   uint32_tib_completion_status;
-   uint32_tde_constEngine_count;
-   uint32_tib_offset_ib1;
-   uint32_tib_offset_ib2;
-   uint32_tpreamble_begin_ib1;
-   uint32_tpreamble_begin_ib2;
-   uint32_tpreamble_end_ib1;
-   uint32_tpreamble_end_ib2;
-   uint32_tdraw_indirect_baseLo;
-   uint32_tdraw_indirect_baseHi;
-   uint32_tdisp_indirect_baseLo;
-   uint32_tdisp_indirect_baseHi;
-   uint32_tgds_backup_addrlo;
-   uint32_tgds_backup_addrhi;
-   uint32_tindex_base_addrlo;
-   uint32_tindex_base_addrhi;
-   uint32_tsample_cntl;
-}; /* Total of 17 DWORD */
-
-struct amdgpu_ce_ib_state_chained_ib
-{
-   /* section of non chained ib part */
-   uint32_tce_ib_completion_status;
-   uint32_tce_constegnine_count;
-   uint32_tce_ibOffset_ib1;
-   uint32_tce_ibOffset_ib2;
-
-   /* section of chained ib */
-   uint32_tce_chainib_addrlo_ib1;
-   uint32_t