[PATCH v2 1/2] drm/amdgpu: Remove the sriov checking and add firmware checking

2018-08-16 Thread Emily Deng
Refine the patch 1, and the lock about invalidate_lock.

Unify bare metal and sriov, and add firmware checking for
reg write and reg wait unify command.

Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 59 -
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53e9e2a..f172e92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -274,6 +274,8 @@ struct amdgpu_gfx {
uint32_trlc_srls_feature_version;
uint32_tmec_feature_version;
uint32_tmec2_feature_version;
+   boolmec_fw_write_wait;
+   boolme_fw_write_wait;
struct amdgpu_ring  gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsignednum_gfx_rings;
struct amdgpu_ring  compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 4e1e1a0..0cba430 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -482,6 +482,59 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct 
amdgpu_device *adev)

le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
 }
 
+static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+   adev->gfx.me_fw_write_wait = false;
+   adev->gfx.mec_fw_write_wait = false;
+
+   switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   if ((adev->gfx.me_fw_version >= 0x009c) &&
+   (adev->gfx.me_feature_version >= 42) &&
+   (adev->gfx.pfp_fw_version >=  0x00b1) &&
+   (adev->gfx.pfp_feature_version >= 42))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0193) &&
+   (adev->gfx.mec_feature_version >= 42))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   case CHIP_VEGA12:
+   if ((adev->gfx.me_fw_version >= 0x009c) &&
+   (adev->gfx.me_feature_version >= 44) &&
+   (adev->gfx.pfp_fw_version >=  0x00b2) &&
+   (adev->gfx.pfp_feature_version >= 44))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0196) &&
+   (adev->gfx.mec_feature_version >= 44))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   case CHIP_VEGA20:
+   if ((adev->gfx.me_fw_version >= 0x009c) &&
+   (adev->gfx.me_feature_version >= 44) &&
+   (adev->gfx.pfp_fw_version >=  0x00b2) &&
+   (adev->gfx.pfp_feature_version >= 44))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0197) &&
+   (adev->gfx.mec_feature_version >= 44))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   case CHIP_RAVEN:
+   if ((adev->gfx.me_fw_version >= 0x009c) &&
+   (adev->gfx.me_feature_version >= 42) &&
+   (adev->gfx.pfp_fw_version >=  0x00b1) &&
+   (adev->gfx.pfp_feature_version >= 42))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0192) &&
+   (adev->gfx.mec_feature_version >= 42))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   }
+}
+
 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 {
const char *chip_name;
@@ -716,6 +769,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device 
*adev)
}
 
 out:
+   gfx_v9_0_check_fw_write_wait(adev);
if (err) {
dev_err(adev->dev,
"gfx9: Failed to load firmware \"%s\"\n",
@@ -4353,8 +4407,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct 
amdgpu_ring *ring,
  uint32_t ref, uint32_t mask)
 {
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+   struct amdgpu_device *adev = ring->adev;
+   bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
+   adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
 
-   if (amdgpu_sriov_vf(ring->adev))
+   if (fw_version_ok)
gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
  ref, mask, 0x20);
else
-- 
2.7.4

___
amd-gfx 

[PATCH v2 2/2] drm/amdgpu: use kiq to do invalidate tlb

2018-08-16 Thread Emily Deng
To avoid the tlb flush not interrupted by world switch, use kiq and one
command to do tlb invalidate.

v2:
Refine the invalidate lock position.

Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 74 +---
 3 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6265b88..19ef7711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -212,6 +212,10 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_LAST
 };
 
+#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
 int amdgpu_device_ip_set_clockgating_state(void *dev,
   enum amd_ip_block_type block_type,
   enum amd_clockgating_state state);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 21adb1b6..3885636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,9 +22,6 @@
  */
 
 #include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
-#define MAX_KIQ_REG_TRY 20
 
 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ed467de..0bf8439 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -311,6 +311,58 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int 
vmid)
return req;
 }
 
+signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+   signed long r, cnt = 0;
+   unsigned long flags;
+   uint32_t seq;
+   struct amdgpu_kiq *kiq = >gfx.kiq;
+   struct amdgpu_ring *ring = >ring;
+
+   if (!ring->ready)
+   return -EINVAL;
+
+   spin_lock_irqsave(>ring_lock, flags);
+
+   amdgpu_ring_alloc(ring, 32);
+   amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+   ref, mask);
+   amdgpu_fence_emit_polling(ring, );
+   amdgpu_ring_commit(ring);
+   spin_unlock_irqrestore(>ring_lock, flags);
+
+   r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+   /* don't wait anymore for gpu reset case because this way may
+* block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+* never return if we keep waiting in virt_kiq_rreg, which cause
+* gpu_recover() hang there.
+*
+* also don't wait anymore for IRQ context
+* */
+   if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+   goto failed_kiq;
+
+   might_sleep();
+
+   while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+   msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+   r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+   }
+
+   if (cnt > MAX_KIQ_REG_TRY)
+   goto failed_kiq;
+
+   return 0;
+
+failed_kiq:
+   pr_err("failed to invalidate tlb with kiq\n");
+   return r;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -332,13 +384,19 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev,
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned i, j;
-
-   spin_lock(>gmc.invalidate_lock);
+   int r;
 
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vmhub *hub = >vmhub[i];
u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
 
+   r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + 
eng,
+   hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
+   if (!r)
+   continue;
+
+   spin_lock(>gmc.invalidate_lock);
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
/* Busy wait for ACK.*/
@@ -349,8 +407,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev,
break;
cpu_relax();
}
-   if (j < 100)
+   if (j < 100) {
+   spin_unlock(>gmc.invalidate_lock);
continue;
+   }
 
/* Wait for ACK with a delay.*/
for (j = 0; j < adev->usec_timeout; j++) {
@@ -360,13 +420,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct 

[PATCH] drm/amdgpu: Remove duplicate code in gfx_v8_0.c

2018-08-16 Thread Rex Zhu
There are no any logical changes here.

1. if kcq can be enabled via kiq, we don't need to
   do kiq ring test.
2. amdgpu_ring_test_ring function can be used to
   sync the ring complete, remove the duplicate code.

Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 80 ++-
 1 file changed, 13 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 282dba6..680f9a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4604,7 +4604,6 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
 {
struct amdgpu_ring *kiq_ring = >gfx.kiq.ring;
-   uint32_t scratch, tmp = 0;
uint64_t queue_mask = 0;
int r, i;
 
@@ -4623,17 +4622,10 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device 
*adev)
queue_mask |= (1ull << i);
}
 
-   r = amdgpu_gfx_scratch_get(adev, );
-   if (r) {
-   DRM_ERROR("Failed to get scratch reg (%d).\n", r);
-   return r;
-   }
-   WREG32(scratch, 0xCAFEDEAD);
-
-   r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
+   kiq_ring->ready = true;
+   r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-   amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
/* set resources */
@@ -4665,25 +4657,12 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device 
*adev)
amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
}
-   /* write to scratch for completion */
-   amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
-   amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
-   amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
-   amdgpu_ring_commit(kiq_ring);
 
-   for (i = 0; i < adev->usec_timeout; i++) {
-   tmp = RREG32(scratch);
-   if (tmp == 0xDEADBEEF)
-   break;
-   DRM_UDELAY(1);
-   }
-   if (i >= adev->usec_timeout) {
-   DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
-   r = -EINVAL;
+   r = amdgpu_ring_test_ring(kiq_ring);
+   if (r) {
+   DRM_ERROR("KCQ enable failed\n");
+   kiq_ring->ready = false;
}
-   amdgpu_gfx_scratch_free(adev, scratch);
-
return r;
 }
 
@@ -5014,15 +4993,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device 
*adev)
if (r)
goto done;
 
-   /* Test KIQ */
-   ring = >gfx.kiq.ring;
-   ring->ready = true;
-   r = amdgpu_ring_test_ring(ring);
-   if (r) {
-   ring->ready = false;
-   goto done;
-   }
-
/* Test KCQs */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = >gfx.compute_ring[i];
@@ -5092,23 +5062,11 @@ static int gfx_v8_0_hw_init(void *handle)
 
 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct 
amdgpu_ring *ring)
 {
-   struct amdgpu_device *adev = kiq_ring->adev;
-   uint32_t scratch, tmp = 0;
-   int r, i;
-
-   r = amdgpu_gfx_scratch_get(adev, );
-   if (r) {
-   DRM_ERROR("Failed to get scratch reg (%d).\n", r);
-   return r;
-   }
-   WREG32(scratch, 0xCAFEDEAD);
+   int r;
 
-   r = amdgpu_ring_alloc(kiq_ring, 10);
-   if (r) {
+   r = amdgpu_ring_alloc(kiq_ring, 7);
+   if (r)
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-   amdgpu_gfx_scratch_free(adev, scratch);
-   return r;
-   }
 
/* unmap queues */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
@@ -5121,23 +5079,11 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_ring 
*kiq_ring,struct amdgpu_ring
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
-   /* write to scratch for completion */
-   amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
-   amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
-   amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
-   amdgpu_ring_commit(kiq_ring);
 
-   for (i = 0; i < adev->usec_timeout; i++) {
-   tmp = RREG32(scratch);
-   if (tmp == 0xDEADBEEF)
-   break;
-   DRM_UDELAY(1);
-   }
-   if (i >= adev->usec_timeout) {
-   DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", 
scratch, tmp);
-   r = -EINVAL;
-   }
-   

Re: [PATCH] drm/amd/powerplay: set correct base for THM/NBIF/MP1 IP

2018-08-16 Thread Huang Rui
On Fri, Aug 17, 2018 at 10:36:32AM +0800, Evan Quan wrote:
> Set correct address base for vega20.
> 
> Change-Id: I7435980e2ca156ee2b443a97899d40aaba4876cb
> Signed-off-by: Evan Quan 

The patch subject prefix should be "drm/amdgpu:"
With that fixed, patch is
Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 
> b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
> index 52778de93ab0..2d4473557b0d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
> @@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
>   adev->reg_offset[ATHUB_HWIP][i] = (uint32_t 
> *)(&(ATHUB_BASE.instance[i]));
>   adev->reg_offset[NBIO_HWIP][i] = (uint32_t 
> *)(&(NBIO_BASE.instance[i]));
>   adev->reg_offset[MP0_HWIP][i] = (uint32_t 
> *)(&(MP0_BASE.instance[i]));
> + adev->reg_offset[MP1_HWIP][i] = (uint32_t 
> *)(&(MP1_BASE.instance[i]));
>   adev->reg_offset[UVD_HWIP][i] = (uint32_t 
> *)(&(UVD_BASE.instance[i]));
>   adev->reg_offset[VCE_HWIP][i] = (uint32_t 
> *)(&(VCE_BASE.instance[i]));
>   adev->reg_offset[DF_HWIP][i] = (uint32_t 
> *)(&(DF_BASE.instance[i]));
> @@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
>   adev->reg_offset[SDMA0_HWIP][i] = (uint32_t 
> *)(&(SDMA0_BASE.instance[i]));
>   adev->reg_offset[SDMA1_HWIP][i] = (uint32_t 
> *)(&(SDMA1_BASE.instance[i]));
>   adev->reg_offset[SMUIO_HWIP][i] = (uint32_t 
> *)(&(SMUIO_BASE.instance[i]));
> + adev->reg_offset[NBIF_HWIP][i] = (uint32_t 
> *)(&(NBIO_BASE.instance[i]));
> + adev->reg_offset[THM_HWIP][i] = (uint32_t 
> *)(&(THM_BASE.instance[i]));
>   }
>   return 0;
>  }
> -- 
> 2.18.0
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 libdrm] amdgpu: Eliminate void* arithmetic in amdgpu_find_bo_by_cpu_mapping

2018-08-16 Thread Zhang, Jerry
 Reviewed-by: Junwei Zhang 

Regards,
Jerry(Junwei Zhang)


From: amd-gfx  on behalf of Michel 
Dänzer 
Sent: Thursday, August 16, 2018 9:54:29 PM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH v2 libdrm] amdgpu: Eliminate void* arithmetic in 
amdgpu_find_bo_by_cpu_mapping

From: Michel Dänzer 

Arithmetic using void* pointers isn't defined by the C standard, only as
a GCC extension. Avoids compiler warnings:

../../amdgpu/amdgpu_bo.c: In function ‘amdgpu_find_bo_by_cpu_mapping’:
../../amdgpu/amdgpu_bo.c:554:48: warning: pointer of type ‘void *’ used in 
arithmetic [-Wpointer-arith]
   if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size))
^
../../amdgpu/amdgpu_bo.c:561:23: warning: pointer of type ‘void *’ used in 
subtraction [-Wpointer-arith]
   *offset_in_bo = cpu - bo->cpu_ptr;
   ^

v2: Use uintptr_t instead of char*, don't change function signature
(Junwei Zhang)

Fixes: 4d454424e1f2 ("amdgpu: add a function to find bo by cpu mapping
 (v2)")
Signed-off-by: Michel Dänzer 
---
 amdgpu/amdgpu_bo.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index 86d1c143..8efd014e 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -551,14 +551,15 @@ int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle 
dev,
bo = handle_table_lookup(>bo_handles, i);
if (!bo || !bo->cpu_ptr || size > bo->alloc_size)
continue;
-   if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size))
+   if (cpu >= bo->cpu_ptr &&
+   cpu < (void*)((uintptr_t)bo->cpu_ptr + bo->alloc_size))
break;
}

if (i < dev->bo_handles.max_key) {
atomic_inc(>refcount);
*buf_handle = bo;
-   *offset_in_bo = cpu - bo->cpu_ptr;
+   *offset_in_bo = (uintptr_t)cpu - (uintptr_t)bo->cpu_ptr;
} else {
*buf_handle = NULL;
*offset_in_bo = 0;
--
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/powerplay: set correct base for THM/NBIF/MP1 IP

2018-08-16 Thread Deucher, Alexander
Reviewed-by: Alex Deucher 


From: Evan Quan 
Sent: Thursday, August 16, 2018 10:36:32 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander; Xu, Feifei; Quan, Evan
Subject: [PATCH] drm/amd/powerplay: set correct base for THM/NBIF/MP1 IP

Set correct address base for vega20.

Change-Id: I7435980e2ca156ee2b443a97899d40aaba4876cb
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 52778de93ab0..2d4473557b0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
 adev->reg_offset[ATHUB_HWIP][i] = (uint32_t 
*)(&(ATHUB_BASE.instance[i]));
 adev->reg_offset[NBIO_HWIP][i] = (uint32_t 
*)(&(NBIO_BASE.instance[i]));
 adev->reg_offset[MP0_HWIP][i] = (uint32_t 
*)(&(MP0_BASE.instance[i]));
+   adev->reg_offset[MP1_HWIP][i] = (uint32_t 
*)(&(MP1_BASE.instance[i]));
 adev->reg_offset[UVD_HWIP][i] = (uint32_t 
*)(&(UVD_BASE.instance[i]));
 adev->reg_offset[VCE_HWIP][i] = (uint32_t 
*)(&(VCE_BASE.instance[i]));
 adev->reg_offset[DF_HWIP][i] = (uint32_t 
*)(&(DF_BASE.instance[i]));
@@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
 adev->reg_offset[SDMA0_HWIP][i] = (uint32_t 
*)(&(SDMA0_BASE.instance[i]));
 adev->reg_offset[SDMA1_HWIP][i] = (uint32_t 
*)(&(SDMA1_BASE.instance[i]));
 adev->reg_offset[SMUIO_HWIP][i] = (uint32_t 
*)(&(SMUIO_BASE.instance[i]));
+   adev->reg_offset[NBIF_HWIP][i] = (uint32_t 
*)(&(NBIO_BASE.instance[i]));
+   adev->reg_offset[THM_HWIP][i] = (uint32_t 
*)(&(THM_BASE.instance[i]));
 }
 return 0;
 }
--
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb

2018-08-16 Thread Deng, Emily
>-Original Message-
>From: amd-gfx  On Behalf Of Deng,
>Emily
>Sent: Friday, August 17, 2018 10:19 AM
>To: Koenig, Christian ; amd-
>g...@lists.freedesktop.org
>Subject: RE: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb
>
>>-Original Message-
>>From: Christian König 
>>Sent: Thursday, August 16, 2018 9:24 PM
>>To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>>Subject: Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb
>>
>>Am 16.08.2018 um 15:05 schrieb Emily Deng:
>>> To avoid the tlb flush not interrupted by world switch, use kiq and
>>> one command to do tlb invalidate.
>>>
>>> Signed-off-by: Emily Deng 
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 +++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 60
>>
>>>   3 files changed, 64 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 6265b88..19ef7711 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq {
>>> AMDGPU_CP_KIQ_IRQ_LAST
>>>   };
>>>
>>> +#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
>>> +#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
>>> +#define MAX_KIQ_REG_TRY 20
>>> +
>>>   int amdgpu_device_ip_set_clockgating_state(void *dev,
>>>enum amd_ip_block_type block_type,
>>>enum amd_clockgating_state state);
>>diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> index 21adb1b6..3885636 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> @@ -22,9 +22,6 @@
>>>*/
>>>
>>>   #include "amdgpu.h"
>>> -#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
>>> -#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
>>> -#define MAX_KIQ_REG_TRY 20
>>>
>>>   uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
>>>   {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index ed467de..6c164bd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -311,6 +311,58 @@ static uint32_t
>>gmc_v9_0_get_invalidate_req(unsigned int vmid)
>>> return req;
>>>   }
>>>
>>> +signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
>>> + uint32_t reg0, uint32_t reg1,
>>> + uint32_t ref, uint32_t mask) {
>>> +   signed long r, cnt = 0;
>>> +   unsigned long flags;
>>> +   uint32_t seq;
>>> +   struct amdgpu_kiq *kiq = >gfx.kiq;
>>> +   struct amdgpu_ring *ring = >ring;
>>> +
>>> +   if (!ring->ready)
>>> +   return -EINVAL;
>>> +
>>> +   spin_lock_irqsave(>ring_lock, flags);
>>> +
>>> +   amdgpu_ring_alloc(ring, 32);
>>> +   amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
>>> +   ref, mask);
>>> +   amdgpu_fence_emit_polling(ring, );
>>> +   amdgpu_ring_commit(ring);
>>> +   spin_unlock_irqrestore(>ring_lock, flags);
>>> +
>>> +   r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>>> +
>>> +   /* don't wait anymore for gpu reset case because this way may
>>> +* block gpu_recover() routine forever, e.g. this virt_kiq_rreg
>>> +* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
>>> +* never return if we keep waiting in virt_kiq_rreg, which cause
>>> +* gpu_recover() hang there.
>>> +*
>>> +* also don't wait anymore for IRQ context
>>> +* */
>>> +   if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>>> +   goto failed_kiq;
>>> +
>>> +   might_sleep();
>>> +
>>> +   while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
>>> +   msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
>>> +   r = amdgpu_fence_wait_polling(ring, seq,
>>MAX_KIQ_REG_WAIT);
>>> +   }
>>> +
>>> +   if (cnt > MAX_KIQ_REG_TRY)
>>> +   goto failed_kiq;
>>> +
>>> +   return 0;
>>> +
>>> +failed_kiq:
>>> +   pr_err("failed to invalidate tlb with kiq\n");
>>> +   return r;
>>> +}
>>> +
>>>   /*
>>>* GART
>>>* VMID 0 is the physical GPU addresses as used by the kernel.
>>> @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct
>>amdgpu_device *adev,
>>> /* Use register 17 for GART */
>>> const unsigned eng = 17;
>>> unsigned i, j;
>>> +   int r;
>>>
>>> spin_lock(>gmc.invalidate_lock);
>>>
>>> @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct
>>amdgpu_device *adev,
>>> struct amdgpu_vmhub *hub = >vmhub[i];
>>> u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>>>
>>> +   spin_unlock(>gmc.invalidate_lock);
>>
>>Well just remove the lock altogether. Taking it and dropping it again

[PATCH] drm/amd/powerplay: set correct base for THM/NBIF/MP1 IP

2018-08-16 Thread Evan Quan
Set correct address base for vega20.

Change-Id: I7435980e2ca156ee2b443a97899d40aaba4876cb
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 52778de93ab0..2d4473557b0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[ATHUB_HWIP][i] = (uint32_t 
*)(&(ATHUB_BASE.instance[i]));
adev->reg_offset[NBIO_HWIP][i] = (uint32_t 
*)(&(NBIO_BASE.instance[i]));
adev->reg_offset[MP0_HWIP][i] = (uint32_t 
*)(&(MP0_BASE.instance[i]));
+   adev->reg_offset[MP1_HWIP][i] = (uint32_t 
*)(&(MP1_BASE.instance[i]));
adev->reg_offset[UVD_HWIP][i] = (uint32_t 
*)(&(UVD_BASE.instance[i]));
adev->reg_offset[VCE_HWIP][i] = (uint32_t 
*)(&(VCE_BASE.instance[i]));
adev->reg_offset[DF_HWIP][i] = (uint32_t 
*)(&(DF_BASE.instance[i]));
@@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[SDMA0_HWIP][i] = (uint32_t 
*)(&(SDMA0_BASE.instance[i]));
adev->reg_offset[SDMA1_HWIP][i] = (uint32_t 
*)(&(SDMA1_BASE.instance[i]));
adev->reg_offset[SMUIO_HWIP][i] = (uint32_t 
*)(&(SMUIO_BASE.instance[i]));
+   adev->reg_offset[NBIF_HWIP][i] = (uint32_t 
*)(&(NBIO_BASE.instance[i]));
+   adev->reg_offset[THM_HWIP][i] = (uint32_t 
*)(&(THM_BASE.instance[i]));
}
return 0;
 }
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb

2018-08-16 Thread Deng, Emily
>-Original Message-
>From: Christian König 
>Sent: Thursday, August 16, 2018 9:24 PM
>To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb
>
>Am 16.08.2018 um 15:05 schrieb Emily Deng:
>> To avoid the tlb flush not interrupted by world switch, use kiq and
>> one command to do tlb invalidate.
>>
>> Signed-off-by: Emily Deng 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 60
>
>>   3 files changed, 64 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 6265b88..19ef7711 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq {
>>  AMDGPU_CP_KIQ_IRQ_LAST
>>   };
>>
>> +#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
>> +#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
>> +#define MAX_KIQ_REG_TRY 20
>> +
>>   int amdgpu_device_ip_set_clockgating_state(void *dev,
>> enum amd_ip_block_type block_type,
>> enum amd_clockgating_state state);
>diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> index 21adb1b6..3885636 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> @@ -22,9 +22,6 @@
>>*/
>>
>>   #include "amdgpu.h"
>> -#define MAX_KIQ_REG_WAIT5000 /* in usecs, 5ms */
>> -#define MAX_KIQ_REG_BAILOUT_INTERVAL5 /* in msecs, 5ms */
>> -#define MAX_KIQ_REG_TRY 20
>>
>>   uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index ed467de..6c164bd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -311,6 +311,58 @@ static uint32_t
>gmc_v9_0_get_invalidate_req(unsigned int vmid)
>>  return req;
>>   }
>>
>> +signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
>> +  uint32_t reg0, uint32_t reg1,
>> +  uint32_t ref, uint32_t mask)
>> +{
>> +signed long r, cnt = 0;
>> +unsigned long flags;
>> +uint32_t seq;
>> +struct amdgpu_kiq *kiq = >gfx.kiq;
>> +struct amdgpu_ring *ring = >ring;
>> +
>> +if (!ring->ready)
>> +return -EINVAL;
>> +
>> +spin_lock_irqsave(>ring_lock, flags);
>> +
>> +amdgpu_ring_alloc(ring, 32);
>> +amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
>> +ref, mask);
>> +amdgpu_fence_emit_polling(ring, );
>> +amdgpu_ring_commit(ring);
>> +spin_unlock_irqrestore(>ring_lock, flags);
>> +
>> +r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>> +
>> +/* don't wait anymore for gpu reset case because this way may
>> + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
>> + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
>> + * never return if we keep waiting in virt_kiq_rreg, which cause
>> + * gpu_recover() hang there.
>> + *
>> + * also don't wait anymore for IRQ context
>> + * */
>> +if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>> +goto failed_kiq;
>> +
>> +might_sleep();
>> +
>> +while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
>> +msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
>> +r = amdgpu_fence_wait_polling(ring, seq,
>MAX_KIQ_REG_WAIT);
>> +}
>> +
>> +if (cnt > MAX_KIQ_REG_TRY)
>> +goto failed_kiq;
>> +
>> +return 0;
>> +
>> +failed_kiq:
>> +pr_err("failed to invalidate tlb with kiq\n");
>> +return r;
>> +}
>> +
>>   /*
>>* GART
>>* VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct
>amdgpu_device *adev,
>>  /* Use register 17 for GART */
>>  const unsigned eng = 17;
>>  unsigned i, j;
>> +int r;
>>
>>  spin_lock(>gmc.invalidate_lock);
>>
>> @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct
>amdgpu_device *adev,
>>  struct amdgpu_vmhub *hub = >vmhub[i];
>>  u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>>
>> +spin_unlock(>gmc.invalidate_lock);
>
>Well just remove the lock altogether. Taking it and dropping it again
>immediately doesn't do anything useful.
>
>> +r = amdgpu_kiq_reg_write_reg_wait(adev, hub-
>>vm_inv_eng0_req + eng,
>> +hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
>> +spin_lock(>gmc.invalidate_lock);
>> +if (!r)
>> +continue;
>> +
>>  

RE: [PATCH 1/2] drm/amdgpu: Remove the sriov checking and add firmware checking

2018-08-16 Thread Deng, Emily
>-Original Message-
>From: Christian König 
>Sent: Thursday, August 16, 2018 9:17 PM
>To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH 1/2] drm/amdgpu: Remove the sriov checking and add
>firmware checking
>
>Am 16.08.2018 um 15:05 schrieb Emily Deng:
>> Unify bare metal and sriov, and add firmware checking for reg write
>> and reg wait unify command.
>>
>> Signed-off-by: Emily Deng 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 47
>-
>>   2 files changed, 48 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 53e9e2a..f172e92 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -274,6 +274,8 @@ struct amdgpu_gfx {
>>  uint32_trlc_srls_feature_version;
>>  uint32_tmec_feature_version;
>>  uint32_tmec2_feature_version;
>> +boolmec_fw_write_wait;
>> +boolme_fw_write_wait;
>>  struct amdgpu_ring  gfx_ring[AMDGPU_MAX_GFX_RINGS];
>>  unsignednum_gfx_rings;
>>  struct amdgpu_ring
>   compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 76d979e..76e8973 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -482,6 +482,47 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct
>amdgpu_device *adev)
>>  le32_to_cpu(rlc_hdr-
>>reg_list_format_direct_reg_list_length);
>>   }
>>
>> +static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
>> +{
>> +adev->gfx.me_fw_write_wait = false;
>> +adev->gfx.mec_fw_write_wait = false;
>> +
>> +switch (adev->asic_type) {
>> +case CHIP_VEGA10:
>> +if ((adev->gfx.me_fw_version >= 0x009c) && (adev-
>>gfx.me_feature_version >= 42) &&
>> +(adev->gfx.pfp_fw_version >=  0x00b1) && (adev-
>>gfx.pfp_feature_version >= 42))
>> +adev->gfx.me_fw_write_wait = true;
>
>Well that looks like the correct solution to me. Only a nit pick left to fix, 
>the
>coding style looks a bit off.
>
>For example the "if" should be indented so that "(adev.." in both lines is on 
>the
>same column:
>
>if ((adev ) &&
>     (adev
>
>And the "adev->gfx.me_fw_write_wait = true;" is to far to the right.
>
Thanks, will modify as your suggestion.
>With that fixed the patch is Acked-by: Christian König
>.
>
>Regards,
>Christian.
>
>> +
>> +if ((adev->gfx.mec_fw_version >=  0x0193) && (adev-
>>gfx.mec_feature_version >= 42))
>> +adev->gfx.mec_fw_write_wait = true;
>> +break;
>> +case CHIP_VEGA12:
>> +if ((adev->gfx.me_fw_version >= 0x009c) && (adev-
>>gfx.me_feature_version >= 44) &&
>> +(adev->gfx.pfp_fw_version >=  0x00b2) && (adev-
>>gfx.pfp_feature_version >= 44))
>> +adev->gfx.me_fw_write_wait = true;
>> +
>> +if ((adev->gfx.mec_fw_version >=  0x0196) && (adev-
>>gfx.mec_feature_version >= 44))
>> +adev->gfx.mec_fw_write_wait = true;
>> +break;
>> +case CHIP_VEGA20:
>> +if ((adev->gfx.me_fw_version >= 0x009c) && (adev-
>>gfx.me_feature_version >= 44) &&
>> +(adev->gfx.pfp_fw_version >=  0x00b2) && (adev-
>>gfx.pfp_feature_version >= 44))
>> +adev->gfx.me_fw_write_wait = true;
>> +
>> +if ((adev->gfx.mec_fw_version >=  0x0197) && (adev-
>>gfx.mec_feature_version >= 44))
>> +adev->gfx.mec_fw_write_wait = true;
>> +break;
>> +case CHIP_RAVEN:
>> +if ((adev->gfx.me_fw_version >= 0x009c) && (adev-
>>gfx.me_feature_version >= 42) &&
>> +(adev->gfx.pfp_fw_version >=  0x00b1) && (adev-
>>gfx.pfp_feature_version >= 42))
>> +adev->gfx.me_fw_write_wait = true;
>> +
>> +if ((adev->gfx.mec_fw_version >=  0x0192) && (adev-
>>gfx.mec_feature_version >= 42))
>> +adev->gfx.mec_fw_write_wait = true;
>> +break;
>> +}
>> +}
>> +
>>   static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
>>   {
>>  const char *chip_name;
>> @@ -716,6 +757,7 @@ static int gfx_v9_0_init_microcode(struct
>amdgpu_device *adev)
>>  }
>>
>>   out:
>> +gfx_v9_0_check_fw_write_wait(adev);
>>  if (err) {
>>  dev_err(adev->dev,
>>  "gfx9: Failed to load firmware \"%s\"\n", @@ -4348,8
>+4390,11 @@
>> static void 

[PATCH] drm/amdgpu/vega20: add missing IP reg offsets

2018-08-16 Thread Alex Deucher
Add additional IP block offsets.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 52778de93ab0..c231ae6f79e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[ATHUB_HWIP][i] = (uint32_t 
*)(&(ATHUB_BASE.instance[i]));
adev->reg_offset[NBIO_HWIP][i] = (uint32_t 
*)(&(NBIO_BASE.instance[i]));
adev->reg_offset[MP0_HWIP][i] = (uint32_t 
*)(&(MP0_BASE.instance[i]));
+   adev->reg_offset[MP1_HWIP][i] = (uint32_t 
*)(&(MP1_BASE.instance[i]));
adev->reg_offset[UVD_HWIP][i] = (uint32_t 
*)(&(UVD_BASE.instance[i]));
adev->reg_offset[VCE_HWIP][i] = (uint32_t 
*)(&(VCE_BASE.instance[i]));
adev->reg_offset[DF_HWIP][i] = (uint32_t 
*)(&(DF_BASE.instance[i]));
@@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[SDMA0_HWIP][i] = (uint32_t 
*)(&(SDMA0_BASE.instance[i]));
adev->reg_offset[SDMA1_HWIP][i] = (uint32_t 
*)(&(SDMA1_BASE.instance[i]));
adev->reg_offset[SMUIO_HWIP][i] = (uint32_t 
*)(&(SMUIO_BASE.instance[i]));
+   adev->reg_offset[THM_HWIP][i] = (uint32_t 
*)(&(THM_BASE.instance[i]));
+   adev->reg_offset[CLK_HWIP][i] = (uint32_t 
*)(&(CLK_BASE.instance[i]));
}
return 0;
 }
-- 
2.13.6

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] Revert "drm/amdgpu/display: Replace CONFIG_DRM_AMD_DC_DCN1_0 with CONFIG_X86"

2018-08-16 Thread Alex Deucher
On Thu, Aug 16, 2018 at 3:45 PM  wrote:
>
> From: "Leo (Sunpeng) Li" 
>
> This reverts commit 8624c3c4dbfe24fc6740687236a2e196f5f4bfb0.
>
> We need CONFIG_DRM_AMD_DC_DCN1_0 to guard code that is using fp math.

MIssing your SOB.  WIth that fixed, series is:
Acked-by: Alex Deucher 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] [v3] drm: amd: dc: don't use FP math when Kcov is enabled

2018-08-16 Thread Arnd Bergmann
On Thu, Aug 16, 2018 at 9:56 PM Leo Li  wrote:
> On 2018-08-11 11:54 AM, Arnd Bergmann wrote:
> >
> > I tried implementing the two functions in KCOV: __sanitizer_cov_trace_cmpd
> > and __sanitizer_cov_trace_cmpf, but that fails to build on architectures
> > that do not support any floating-point functions, or would require making
> > that code x86 specific as well.  I also looked at what it would take to
>
> Hi Arnd,
>
> Is there a reason why we can't make __sanitizer_cov_trace_cmpd and
> __sanitizer_cov_trace_cmpf X86 dependent?
>
> I sent out two patches to disable DCN1, but would prefer implementing
> these two functions as opposed to disabling a component.

I think it should be possible to implement them, perhaps not even hard
to do it in an architecture independent way. I tried this at some point
and couldn't figure it out, but I suppose it would fix the problem nicely.

This would assume that the two functions can only ever be called
from a context that already has access to the fpu, which I think is
the case here.

 Arnd
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amd/display: Don't build DCN1 when kcov is enabled

2018-08-16 Thread Arnd Bergmann
On Thu, Aug 16, 2018 at 9:45 PM  wrote:
>
> From: "Leo (Sunpeng) Li" 
>
> DCN1 contains code that utilizes fp math. When
> CONFIG_KCOV_INSTRUMENT_ALL and CONFIG_KCOV_ENABLE_COMPARISONS are
> enabled, build errors are found. See this earlier patch for details:
>
> https://lists.freedesktop.org/archives/dri-devel/2018-August/186131.html
>
> As a short term solution, disable CONFIG_DRM_AMD_DC_DCN1_0 when
> KCOV_INSTRUMENT_ALL and KCOV_ENABLE_COMPARISONS are enabled. In
> addition, make it a fully derived config, taking into account
> CONFIG_X86.
>
> Signed-off-by: Leo (Sunpeng) Li 

Looks good to me,

Acked-by: Arnd Bergmann 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH xf86-video-ati 5/5] Remove drmmode_crtc_private_rec::present_vblank_* related code

2018-08-16 Thread Alex Deucher
On Thu, Aug 16, 2018 at 12:20 PM Michel Dänzer  wrote:
>
> From: Michel Dänzer 
>
> Not needed anymore with the more robust mechanisms for preventing nested
> drmHandleEvent calls introduced in the previous changes.
>
> (Ported from amdgpu commit 85cd8eef0cbed7b409b07f58d76dacd34aa3ddea)
>
> Signed-off-by: Michel Dänzer 

Series is:
Acked-by: Alex Deucher 

> ---
>  src/drmmode_display.h |  8 
>  src/radeon_kms.c  |  9 -
>  src/radeon_present.c  | 34 +++---
>  3 files changed, 3 insertions(+), 48 deletions(-)
>
> diff --git a/src/drmmode_display.h b/src/drmmode_display.h
> index a039bf8fb..bc66eda65 100644
> --- a/src/drmmode_display.h
> +++ b/src/drmmode_display.h
> @@ -111,14 +111,6 @@ typedef struct {
>  struct drmmode_fb *flip_pending;
>  /* The FB currently being scanned out by this CRTC, if any */
>  struct drmmode_fb *fb;
> -
> -#ifdef HAVE_PRESENT_H
> -/* Deferred processing of Present vblank event */
> -uint64_t present_vblank_event_id;
> -uint64_t present_vblank_usec;
> -unsigned present_vblank_msc;
> -Bool present_flip_expected;
> -#endif
>  } drmmode_crtc_private_rec, *drmmode_crtc_private_ptr;
>
>  typedef struct {
> diff --git a/src/radeon_kms.c b/src/radeon_kms.c
> index 809d24469..a24776811 100644
> --- a/src/radeon_kms.c
> +++ b/src/radeon_kms.c
> @@ -541,15 +541,6 @@ radeon_scanout_flip_handler(xf86CrtcPtr crtc, uint32_t 
> msc, uint64_t usec,
>  drmmode_fb_reference(pRADEONEnt->fd, _crtc->fb,
>  drmmode_crtc->flip_pending);
>  radeon_scanout_flip_abort(crtc, event_data);
> -
> -#ifdef HAVE_PRESENT_H
> -if (drmmode_crtc->present_vblank_event_id) {
> -   present_event_notify(drmmode_crtc->present_vblank_event_id,
> -drmmode_crtc->present_vblank_usec,
> -drmmode_crtc->present_vblank_msc);
> -   drmmode_crtc->present_vblank_event_id = 0;
> -}
> -#endif
>  }
>
>
> diff --git a/src/radeon_present.c b/src/radeon_present.c
> index ffc14a0e8..d0a0c68ca 100644
> --- a/src/radeon_present.c
> +++ b/src/radeon_present.c
> @@ -52,7 +52,6 @@ static present_screen_info_rec radeon_present_screen_info;
>
>  struct radeon_present_vblank_event {
>  uint64_t event_id;
> -Bool vblank_for_flip;
>  Bool unflip;
>  };
>
> @@ -120,26 +119,9 @@ static void
>  radeon_present_vblank_handler(xf86CrtcPtr crtc, unsigned int msc,
>   uint64_t usec, void *data)
>  {
> -drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
>  struct radeon_present_vblank_event *event = data;
>
> -if (event->vblank_for_flip &&
> -   drmmode_crtc->tear_free &&
> -   drmmode_crtc->scanout_update_pending) {
> -   if (drmmode_crtc->present_vblank_event_id != 0) {
> -   xf86DrvMsg(crtc->scrn->scrnIndex, X_WARNING,
> -  "Need to handle previously deferred vblank event\n");
> -   present_event_notify(drmmode_crtc->present_vblank_event_id,
> -drmmode_crtc->present_vblank_usec,
> -drmmode_crtc->present_vblank_msc);
> -   }
> -
> -   drmmode_crtc->present_vblank_event_id = event->event_id;
> -   drmmode_crtc->present_vblank_msc = msc;
> -   drmmode_crtc->present_vblank_usec = usec;
> -} else
> -   present_event_notify(event->event_id, usec, msc);
> -
> +present_event_notify(event->event_id, usec, msc);
>  free(event);
>  }
>
> @@ -162,7 +144,6 @@ static int
>  radeon_present_queue_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc)
>  {
>  xf86CrtcPtr xf86_crtc = crtc->devPrivate;
> -drmmode_crtc_private_ptr drmmode_crtc = xf86_crtc->driver_private;
>  ScreenPtr screen = crtc->pScreen;
>  struct radeon_present_vblank_event *event;
>  uintptr_t drm_queue_seq;
> @@ -171,8 +152,6 @@ radeon_present_queue_vblank(RRCrtcPtr crtc, uint64_t 
> event_id, uint64_t msc)
>  if (!event)
> return BadAlloc;
>  event->event_id = event_id;
> -event->vblank_for_flip = drmmode_crtc->present_flip_expected;
> -drmmode_crtc->present_flip_expected = FALSE;
>
>  drm_queue_seq = radeon_drm_queue_alloc(xf86_crtc,
>RADEON_DRM_QUEUE_CLIENT_DEFAULT,
> @@ -272,7 +251,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr 
> window, PixmapPtr pixmap,
>   Bool sync_flip)
>  {
>  xf86CrtcPtr xf86_crtc = crtc->devPrivate;
> -drmmode_crtc_private_ptr drmmode_crtc = xf86_crtc->driver_private;
>  ScreenPtr screen = window->drawable.pScreen;
>  ScrnInfoPtr scrn = xf86_crtc->scrn;
>  xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn);
> @@ -281,8 +259,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr 
> window, PixmapPtr pixmap,
>  int num_crtcs_on;
>  int i;
>
> -drmmode_crtc->present_flip_expected = FALSE;
> -
>  if (!scrn->vtSema)
> return 

[PATCH] drm/amdgpu/display: disable eDP fast boot optimization on DCE8

2018-08-16 Thread Alex Deucher
Seems to cause blank screens.

Bug: https://bugs.freedesktop.org/show_bug.cgi?id=106940
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 350ee3e3e34d..2e3f85ceeaa9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1562,7 +1562,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, 
struct dc_state *context)
bool can_eDP_fast_boot_optimize = false;
 
if (edp_link) {
-   can_eDP_fast_boot_optimize =
+   /* this seems to cause blank screens on DCE8 */
+   if ((dc->ctx->dce_version == DCE_VERSION_8_0) ||
+   (dc->ctx->dce_version == DCE_VERSION_8_1) ||
+   (dc->ctx->dce_version == DCE_VERSION_8_3))
+   can_eDP_fast_boot_optimize = false;
+   else
+   can_eDP_fast_boot_optimize =

edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc);
}
 
-- 
2.13.6

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amd/display: Don't build DCN1 when kcov is enabled

2018-08-16 Thread sunpeng.li
From: "Leo (Sunpeng) Li" 

DCN1 contains code that utilizes fp math. When
CONFIG_KCOV_INSTRUMENT_ALL and CONFIG_KCOV_ENABLE_COMPARISONS are
enabled, build errors are found. See this earlier patch for details:

https://lists.freedesktop.org/archives/dri-devel/2018-August/186131.html

As a short term solution, disable CONFIG_DRM_AMD_DC_DCN1_0 when
KCOV_INSTRUMENT_ALL and KCOV_ENABLE_COMPARISONS are enabled. In
addition, make it a fully derived config, taking into account
CONFIG_X86.

Signed-off-by: Leo (Sunpeng) Li 
---
 drivers/gpu/drm/amd/display/Kconfig | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/Kconfig 
b/drivers/gpu/drm/amd/display/Kconfig
index 4c35625..ed654a7 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -4,18 +4,16 @@ menu "Display Engine Configuration"
 config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
+   select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && 
KCOV_ENABLE_COMPARISONS)
help
  Choose this option if you want to use the new display engine
  support for AMDGPU. This adds required support for Vega and
  Raven ASICs.
 
 config DRM_AMD_DC_DCN1_0
-   bool "DCN 1.0 Raven family"
-   depends on DRM_AMD_DC && X86
-   default y
+   def_bool n
help
- Choose this option if you want to have
- RV family for display engine
+ RV family support for display engine
 
 config DEBUG_KERNEL_DC
bool "Enable kgdb break in DC"
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence

2018-08-16 Thread Christian König

Am 16.08.2018 um 20:43 schrieb Felix Kuehling:


On 2018-08-16 02:26 PM, Christian König wrote:

Am 16.08.2018 um 20:23 schrieb Felix Kuehling:

On 2018-08-16 02:18 PM, Christian König wrote:

Am 16.08.2018 um 18:50 schrieb Felix Kuehling:

On 2018-08-16 02:43 AM, Christian König wrote:
[SNIP]

I mean it could be that in the worst case we race and stop a KFD
process for no good reason.

Right. For a more practical example, a KFD BO can get evicted just
before the application decides to unmap it. The preemption happens
asynchronously, handled by an SDMA job in the GPU scheduler. That job
will have an amdgpu_sync object with the eviction fence in it.

While that SDMA job is pending or in progress, the application decides
to unmap the BO. That removes the eviction fence from that BO's
reservation. But it can't remove the fence from all the sync objects
that were previously created and are still in flight. So the
preemption
will be triggered, and the fence will eventually signal when the KFD
preemption is complete.

I don't think that's something we can prevent. The worst case is
that a
preemption happens unnecessarily if an eviction gets triggered just
before removing the fence. But removing the fence will prevent future
evictions of the BO from triggering a KFD process preemption.
That's the
best we can do.

It's true that you can't drop the SDMA job which wants to evict the
BO, but at this time the fence signaling is already underway and not
stoppable anymore.

Replacing the fence with a new one would just be much more cleaner and
fix quite a bunch of corner cases where the KFD process would be
preempted without good reason.

Replacing the fence cleanly probably also involves a preemption, so you
don't gain anything.

Mhm, why that?

My idea would be to create a new fence, replace the old one with the
new one and then manually signal the old one.

So why should there be a preemption triggered here?

Right maybe you can do this without triggering a preemption, but it
would require a bunch of new code. Currently the only thing that
replaces an old eviction fence with a new one is a preemption+restore.

You'll need to replace the fence in all BOs belonging to the process.
Since removing a fence is something you don't want to do, that really
means adding a new fence, and leaving the old fence in place until it
signals. In the mean time you have two eviction fences active for this
process, and if either one gets triggered, you'll get a preemption. Only
after all BOs have the new eviction fence, you can disarm the old
eviction fence and signal it (without triggering a preemption).

The way I see it, this adds a bunch of CPU overhead (depending on the
number of BOs in the process), and increases the likelihood of
unnecessary preemptions, because it takes that much longer for the
operation to complete.


Yeah, the CPU overhead is indeed a really good point.


Talking about overhead, imagine a process cleaning up at process
termination, which unmaps and frees all BOs. Each BO that gets freed
replaces the eviction fence on all remaining BOs. If you have N BOs,
you'll end up creating N new eviction fences in the process. The
overhead scales with O(N²) of the number of BOs in the process.


Well as I said that is the only case where I think replacing fences is 
unproblematic.


E.g. when a BO is freed we already have a mechanism to copy all fences 
to a local reservation object (to prevent that any new fences can be 
added to the BO if it shares it's reservation object).


At this time it should be really easy to filter out BOs you don't want 
to wait for when the BO is destructed.


But anyway, when you can live with the possibility that a stale fence 
triggers a process preemption than there isn't much point in cleaning 
this up.


Regards,
Christian.



Regards,
   Felix


Christian.


Regards,
    Felix


It's probably quite a bit of more CPU overhead of doing so, but I
think that this would still be the more fail prove option.

Regards,
Christian.



Regards,
     Felix


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence

2018-08-16 Thread Felix Kuehling


On 2018-08-16 02:26 PM, Christian König wrote:
> Am 16.08.2018 um 20:23 schrieb Felix Kuehling:
>> On 2018-08-16 02:18 PM, Christian König wrote:
>>> Am 16.08.2018 um 18:50 schrieb Felix Kuehling:
 On 2018-08-16 02:43 AM, Christian König wrote:
 [SNIP]
> I mean it could be that in the worst case we race and stop a KFD
> process for no good reason.
 Right. For a more practical example, a KFD BO can get evicted just
 before the application decides to unmap it. The preemption happens
 asynchronously, handled by an SDMA job in the GPU scheduler. That job
 will have an amdgpu_sync object with the eviction fence in it.

 While that SDMA job is pending or in progress, the application decides
 to unmap the BO. That removes the eviction fence from that BO's
 reservation. But it can't remove the fence from all the sync objects
 that were previously created and are still in flight. So the
 preemption
 will be triggered, and the fence will eventually signal when the KFD
 preemption is complete.

 I don't think that's something we can prevent. The worst case is
 that a
 preemption happens unnecessarily if an eviction gets triggered just
 before removing the fence. But removing the fence will prevent future
 evictions of the BO from triggering a KFD process preemption.
 That's the
 best we can do.
>>> It's true that you can't drop the SDMA job which wants to evict the
>>> BO, but at this time the fence signaling is already underway and not
>>> stoppable anymore.
>>>
>>> Replacing the fence with a new one would just be much more cleaner and
>>> fix quite a bunch of corner cases where the KFD process would be
>>> preempted without good reason.
>> Replacing the fence cleanly probably also involves a preemption, so you
>> don't gain anything.
>
> Mhm, why that?
>
> My idea would be to create a new fence, replace the old one with the
> new one and then manually signal the old one.
>
> So why should there be a preemption triggered here?

Right maybe you can do this without triggering a preemption, but it
would require a bunch of new code. Currently the only thing that
replaces an old eviction fence with a new one is a preemption+restore.

You'll need to replace the fence in all BOs belonging to the process.
Since removing a fence is something you don't want to do, that really
means adding a new fence, and leaving the old fence in place until it
signals. In the mean time you have two eviction fences active for this
process, and if either one gets triggered, you'll get a preemption. Only
after all BOs have the new eviction fence, you can disarm the old
eviction fence and signal it (without triggering a preemption).

The way I see it, this adds a bunch of CPU overhead (depending on the
number of BOs in the process), and increases the likelihood of
unnecessary preemptions, because it takes that much longer for the
operation to complete.

Talking about overhead, imagine a process cleaning up at process
termination, which unmaps and frees all BOs. Each BO that gets freed
replaces the eviction fence on all remaining BOs. If you have N BOs,
you'll end up creating N new eviction fences in the process. The
overhead scales with O(N²) of the number of BOs in the process.

Regards,
  Felix

>
> Christian.
>
>>
>> Regards,
>>    Felix
>>
>>> It's probably quite a bit of more CPU overhead of doing so, but I
>>> think that this would still be the more fail prove option.
>>>
>>> Regards,
>>> Christian.
>>>
>>>
 Regards,
     Felix

>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence

2018-08-16 Thread Christian König

Am 16.08.2018 um 20:23 schrieb Felix Kuehling:

On 2018-08-16 02:18 PM, Christian König wrote:

Am 16.08.2018 um 18:50 schrieb Felix Kuehling:

On 2018-08-16 02:43 AM, Christian König wrote:
[SNIP]

I mean it could be that in the worst case we race and stop a KFD
process for no good reason.

Right. For a more practical example, a KFD BO can get evicted just
before the application decides to unmap it. The preemption happens
asynchronously, handled by an SDMA job in the GPU scheduler. That job
will have an amdgpu_sync object with the eviction fence in it.

While that SDMA job is pending or in progress, the application decides
to unmap the BO. That removes the eviction fence from that BO's
reservation. But it can't remove the fence from all the sync objects
that were previously created and are still in flight. So the preemption
will be triggered, and the fence will eventually signal when the KFD
preemption is complete.

I don't think that's something we can prevent. The worst case is that a
preemption happens unnecessarily if an eviction gets triggered just
before removing the fence. But removing the fence will prevent future
evictions of the BO from triggering a KFD process preemption. That's the
best we can do.

It's true that you can't drop the SDMA job which wants to evict the
BO, but at this time the fence signaling is already underway and not
stoppable anymore.

Replacing the fence with a new one would just be much more cleaner and
fix quite a bunch of corner cases where the KFD process would be
preempted without good reason.

Replacing the fence cleanly probably also involves a preemption, so you
don't gain anything.


Mhm, why that?

My idea would be to create a new fence, replace the old one with the new 
one and then manually signal the old one.


So why should there be a preemption triggered here?

Christian.



Regards,
   Felix


It's probably quite a bit of more CPU overhead of doing so, but I
think that this would still be the more fail prove option.

Regards,
Christian.



Regards,
    Felix



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence

2018-08-16 Thread Felix Kuehling
On 2018-08-16 02:18 PM, Christian König wrote:
> Am 16.08.2018 um 18:50 schrieb Felix Kuehling:
>> On 2018-08-16 02:43 AM, Christian König wrote:
>> [SNIP]
>>> I mean it could be that in the worst case we race and stop a KFD
>>> process for no good reason.
>> Right. For a more practical example, a KFD BO can get evicted just
>> before the application decides to unmap it. The preemption happens
>> asynchronously, handled by an SDMA job in the GPU scheduler. That job
>> will have an amdgpu_sync object with the eviction fence in it.
>>
>> While that SDMA job is pending or in progress, the application decides
>> to unmap the BO. That removes the eviction fence from that BO's
>> reservation. But it can't remove the fence from all the sync objects
>> that were previously created and are still in flight. So the preemption
>> will be triggered, and the fence will eventually signal when the KFD
>> preemption is complete.
>>
>> I don't think that's something we can prevent. The worst case is that a
>> preemption happens unnecessarily if an eviction gets triggered just
>> before removing the fence. But removing the fence will prevent future
>> evictions of the BO from triggering a KFD process preemption. That's the
>> best we can do.
>
> It's true that you can't drop the SDMA job which wants to evict the
> BO, but at this time the fence signaling is already underway and not
> stoppable anymore.
>
> Replacing the fence with a new one would just be much more cleaner and
> fix quite a bunch of corner cases where the KFD process would be
> preempted without good reason.

Replacing the fence cleanly probably also involves a preemption, so you
don't gain anything.

Regards,
  Felix

>
> It's probably quite a bit of more CPU overhead of doing so, but I
> think that this would still be the more fail prove option.
>
> Regards,
> Christian.
>
>
>>
>> Regards,
>>    Felix
>>
>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 libdrm] amdgpu: Eliminate void* arithmetic in amdgpu_find_bo_by_cpu_mapping

2018-08-16 Thread Christian König

Am 16.08.2018 um 15:54 schrieb Michel Dänzer:

From: Michel Dänzer 

Arithmetic using void* pointers isn't defined by the C standard, only as
a GCC extension. Avoids compiler warnings:

../../amdgpu/amdgpu_bo.c: In function ‘amdgpu_find_bo_by_cpu_mapping’:
../../amdgpu/amdgpu_bo.c:554:48: warning: pointer of type ‘void *’ used in 
arithmetic [-Wpointer-arith]
if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size))
 ^
../../amdgpu/amdgpu_bo.c:561:23: warning: pointer of type ‘void *’ used in 
subtraction [-Wpointer-arith]
*offset_in_bo = cpu - bo->cpu_ptr;
^

v2: Use uintptr_t instead of char*, don't change function signature
 (Junwei Zhang)

Fixes: 4d454424e1f2 ("amdgpu: add a function to find bo by cpu mapping
  (v2)")
Signed-off-by: Michel Dänzer 


Reviewed-by: Christian König 


---
  amdgpu/amdgpu_bo.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index 86d1c143..8efd014e 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -551,14 +551,15 @@ int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle 
dev,
bo = handle_table_lookup(>bo_handles, i);
if (!bo || !bo->cpu_ptr || size > bo->alloc_size)
continue;
-   if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size))
+   if (cpu >= bo->cpu_ptr &&
+   cpu < (void*)((uintptr_t)bo->cpu_ptr + bo->alloc_size))
break;
}
  
  	if (i < dev->bo_handles.max_key) {

atomic_inc(>refcount);
*buf_handle = bo;
-   *offset_in_bo = cpu - bo->cpu_ptr;
+   *offset_in_bo = (uintptr_t)cpu - (uintptr_t)bo->cpu_ptr;
} else {
*buf_handle = NULL;
*offset_in_bo = 0;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence

2018-08-16 Thread Christian König

Am 16.08.2018 um 18:50 schrieb Felix Kuehling:

On 2018-08-16 02:43 AM, Christian König wrote:
[SNIP]

I mean it could be that in the worst case we race and stop a KFD
process for no good reason.

Right. For a more practical example, a KFD BO can get evicted just
before the application decides to unmap it. The preemption happens
asynchronously, handled by an SDMA job in the GPU scheduler. That job
will have an amdgpu_sync object with the eviction fence in it.

While that SDMA job is pending or in progress, the application decides
to unmap the BO. That removes the eviction fence from that BO's
reservation. But it can't remove the fence from all the sync objects
that were previously created and are still in flight. So the preemption
will be triggered, and the fence will eventually signal when the KFD
preemption is complete.

I don't think that's something we can prevent. The worst case is that a
preemption happens unnecessarily if an eviction gets triggered just
before removing the fence. But removing the fence will prevent future
evictions of the BO from triggering a KFD process preemption. That's the
best we can do.


It's true that you can't drop the SDMA job which wants to evict the BO, 
but at this time the fence signaling is already underway and not 
stoppable anymore.


Replacing the fence with a new one would just be much more cleaner and 
fix quite a bunch of corner cases where the KFD process would be 
preempted without good reason.


It's probably quite a bit of more CPU overhead of doing so, but I think 
that this would still be the more fail prove option.


Regards,
Christian.




Regards,
   Felix



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[pull] amdgpu drm-next-4.19

2018-08-16 Thread Alex Deucher
Hi Dave,

Fixes for 4.19:
- Add VCN PSP FW loading for RV (this is required on upcoming parts)
- Fix scheduler setup ordering for VCE and UVD
- Few misc display fixes

The following changes since commit 557ce95051c8eff67af48612ab350d8408aa0541:

  Merge branch 'drm-next-4.19' of git://people.freedesktop.org/~agd5f/linux 
into drm-next (2018-08-10 11:43:02 +1000)

are available in the git repository at:

  git://people.freedesktop.org/~agd5f/linux drm-next-4.19

for you to fetch changes up to c9533d1bca3393fbdfe5b24ccb4cfe6a1a02d794:

  drm/amdgpu: Use kvmalloc for allocating UVD/VCE/VCN BO backup memory 
(2018-08-16 12:59:11 -0500)


Charlene Liu (1):
  drm/amd/display: fix single link DVI has no display

Emily Deng (2):
  drm/amdgpu/uvd: UVD entity initialization relys on ring initialization
  drm/amdgpu/vce: VCE entity initialization relies on ring initializtion

James Zhu (2):
  drm/amdgpu:add tmr mc address into amdgpu_firmware_info
  drm/amdgpu: update tmr mc address

Jerry (Fangzhi) Zuo (1):
  drm/amd/display: Fix warning observed in mode change on Vega

Likun Gao (3):
  drm/amdgpu:add new firmware id for VCN
  drm/amdgpu:add VCN support in PSP driver
  drm/amdgpu:add VCN booting with firmware loaded by PSP

Michel Dänzer (1):
  drm/amdgpu: Use kvmalloc for allocating UVD/VCE/VCN BO backup memory

Mikita Lipski (3):
  drm/amd/display: Allow clock sharing b/w HDMI and DVI
  drm/amd/display: Check if clock source in use before disabling
  drm/amd/display: Pass connector id when executing VBIOS CT

Nicholas Kazlauskas (1):
  drm/amd/display: Guard against null crtc in CRC IRQ

 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c|  5 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h  |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c| 38 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c| 33 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c| 23 +-
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c |  3 ++
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c  |  4 ++
 drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c  |  4 ++
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c  |  2 +
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  |  5 +++
 drivers/gpu/drm/amd/amdgpu/vce_v2_0.c  |  2 +
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c  |  2 +
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  | 10 -
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c  | 40 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c  | 10 -
 drivers/gpu/drm/amd/display/dc/core/dc_link.c  |  2 +
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c  | 50 +++---
 .../gpu/drm/amd/display/dc/dce/dce_link_encoder.c  |  4 +-
 .../amd/display/dc/dce110/dce110_hw_sequencer.c|  4 +-
 .../display/dc/dce120/dce120_timing_generator.c|  2 +-
 drivers/gpu/drm/amd/display/dc/inc/resource.h  |  5 +++
 23 files changed, 189 insertions(+), 65 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence

2018-08-16 Thread Felix Kuehling
On 2018-08-16 02:43 AM, Christian König wrote:
> Am 15.08.2018 um 20:49 schrieb Felix Kuehling:
>> On 2018-08-15 02:27 PM, Christian König wrote:
>>> Am 15.08.2018 um 20:17 schrieb Felix Kuehling:
 On 2018-08-15 03:02 AM, Christian König wrote:
> Hi Felix,
>
> yeah, you pretty much nailed it.
>
> The problem is that the array itself is RCU protected. This means
> that
> you can only copy the whole structure when you want to update it.
>
> The exception is reservation_object_add_shared() which only works
> because we replace an either signaled fence or a fence within the
> same
> context but a later sequence number.
>
> This also explains why this is only a band aid and the whole approach
> of removing fences doesn't work in general. At any time somebody
> could
> have taken an RCU reference to the old array, created a copy of it
> and
> is now still using this one.
>
> The only 100% correct solution would be to mark the existing fence as
> signaled and replace it everywhere else.
 Depends what you're trying to achieve. I think the problem you see is,
 that some reader may still be using the old reservation_object_list
 copy
 with the fence still in it. But, the remaining lifetime of the
 reservation_object_list copy is limited. If we wanted to be sure no
 more
 copies with the old fence exist, all we'd need to do is call
 synchronize_rcu. Maybe we need to do that before releasing the fence
 references, or release the fence reference in an RCU callback to be
 safe.
>>> The assumption that the fence would die with the array is what is
>>> incorrect here.
>> I'm making no such assumption. The point is not to destroy the fence.
>> Only to remove the fence reference from the reservation object. That is,
>> we want any BO with this reservation object to stop checking or waiting
>> for our eviction fence.
>
> Maybe I'm overthinking this, but let me explain the point once more.
>
> See reservation_object_wait_timeout_rcu() for an example of the
> problem I see here:
>>     seq = read_seqcount_begin(>seq);
>>     rcu_read_lock();
> 
>>     if (!dma_fence_get_rcu(lfence))
>>     goto unlock_retry;
> ...
>>     rcu_read_unlock();
> ...
>>     if (read_seqcount_retry(>seq, seq)) {
>>     dma_fence_put(fence);
>>     goto retry;
>>     }
> ...
>>     ret = dma_fence_wait_timeout(fence, intr, ret);
>
> In other words the RCU mechanism guarantees that we also wait for
> newly added fences, but it does not guarantee that we don't wait for a
> fence which is already removed.

I understand that.

>
>>> The lifetime of RCUed array object is limit, but there is absolutely
>>> no guarantee that somebody didn't made a copy of the fences.
>>>
>>> E.g. somebody could have called reservation_object_get_fences_rcu(),
>>> reservation_object_copy_fences() or a concurrent
>>> reservation_object_wait_timeout_rcu() is underway.
>> That's fine. Then there will be additional fence references. When we
>> remove a fence from a reservation object, we don't know and don't care
>> who else is holding a reference to the fence anyway. This is no
>> different.
>
> So the KFD implementation doesn't care that the fence is removed from
> a BO but somebody could still start to wait on it because of the BO?
>
> I mean it could be that in the worst case we race and stop a KFD
> process for no good reason.

Right. For a more practical example, a KFD BO can get evicted just
before the application decides to unmap it. The preemption happens
asynchronously, handled by an SDMA job in the GPU scheduler. That job
will have an amdgpu_sync object with the eviction fence in it.

While that SDMA job is pending or in progress, the application decides
to unmap the BO. That removes the eviction fence from that BO's
reservation. But it can't remove the fence from all the sync objects
that were previously created and are still in flight. So the preemption
will be triggered, and the fence will eventually signal when the KFD
preemption is complete.

I don't think that's something we can prevent. The worst case is that a
preemption happens unnecessarily if an eviction gets triggered just
before removing the fence. But removing the fence will prevent future
evictions of the BO from triggering a KFD process preemption. That's the
best we can do.

Regards,
  Felix

>
> Regards,
> Christian.
>
>>
>> Regards,
>>    Felix
>>
>>> That's also the reason why fences live for much longer than their
>>> signaling, e.g. somebody can have a reference to the fence object even
>>> hours after it is signaled.
>>>
>>> Regards,
>>> Christian.
>>>
 Regards,
     Felix

> Going to fix the copy error I made with the sequence number and
> send it out again.
>
> Regards,
> Christian.
>
> 

Re: [PATCH xf86-video-ati] Use correct FB handle in radeon_do_pageflip

2018-08-16 Thread Alex Deucher
On Thu, Aug 16, 2018 at 12:25 PM Michel Dänzer  wrote:
>
> From: Michel Dänzer 
>
> We were always using the handle of the client provided FB, which
> prevented RandR transforms from working, and could result in a black
> screen.
>
> Fixes: 740f0850f1e4 "Store FB for each CRTC in drmmode_flipdata_rec"
> (Ported from amdgpu commit f6cd72e64e85896b6d155bee0930e59771dcb701)
>
> Signed-off-by: Michel Dänzer 

Reviewed-by: Alex Deucher 

> ---
>  src/drmmode_display.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/drmmode_display.c b/src/drmmode_display.c
> index 0b92b70c6..68d6254d7 100644
> --- a/src/drmmode_display.c
> +++ b/src/drmmode_display.c
> @@ -3386,7 +3386,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
> client,
> if (crtc == ref_crtc) {
> if (drmmode_page_flip_target_absolute(pRADEONEnt,
>   drmmode_crtc,
> - fb->handle,
> + 
> flipdata->fb[i]->handle,
>   flip_flags,
>   drm_queue_seq,
>   target_msc) != 
> 0)
> @@ -3394,7 +3394,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
> client,
> } else {
> if (drmmode_page_flip_target_relative(pRADEONEnt,
>   drmmode_crtc,
> - fb->handle,
> + 
> flipdata->fb[i]->handle,
>   flip_flags,
>   drm_queue_seq, 
> 0) != 0)
> goto flip_error;
> --
> 2.18.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-ati] Use correct FB handle in radeon_do_pageflip

2018-08-16 Thread Michel Dänzer
From: Michel Dänzer 

We were always using the handle of the client provided FB, which
prevented RandR transforms from working, and could result in a black
screen.

Fixes: 740f0850f1e4 "Store FB for each CRTC in drmmode_flipdata_rec"
(Ported from amdgpu commit f6cd72e64e85896b6d155bee0930e59771dcb701)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 0b92b70c6..68d6254d7 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -3386,7 +3386,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
client,
if (crtc == ref_crtc) {
if (drmmode_page_flip_target_absolute(pRADEONEnt,
  drmmode_crtc,
- fb->handle,
+ 
flipdata->fb[i]->handle,
  flip_flags,
  drm_queue_seq,
  target_msc) != 0)
@@ -3394,7 +3394,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
client,
} else {
if (drmmode_page_flip_target_relative(pRADEONEnt,
  drmmode_crtc,
- fb->handle,
+ 
flipdata->fb[i]->handle,
  flip_flags,
  drm_queue_seq, 0) 
!= 0)
goto flip_error;
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-ati 5/5] Remove drmmode_crtc_private_rec::present_vblank_* related code

2018-08-16 Thread Michel Dänzer
From: Michel Dänzer 

Not needed anymore with the more robust mechanisms for preventing nested
drmHandleEvent calls introduced in the previous changes.

(Ported from amdgpu commit 85cd8eef0cbed7b409b07f58d76dacd34aa3ddea)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.h |  8 
 src/radeon_kms.c  |  9 -
 src/radeon_present.c  | 34 +++---
 3 files changed, 3 insertions(+), 48 deletions(-)

diff --git a/src/drmmode_display.h b/src/drmmode_display.h
index a039bf8fb..bc66eda65 100644
--- a/src/drmmode_display.h
+++ b/src/drmmode_display.h
@@ -111,14 +111,6 @@ typedef struct {
 struct drmmode_fb *flip_pending;
 /* The FB currently being scanned out by this CRTC, if any */
 struct drmmode_fb *fb;
-
-#ifdef HAVE_PRESENT_H
-/* Deferred processing of Present vblank event */
-uint64_t present_vblank_event_id;
-uint64_t present_vblank_usec;
-unsigned present_vblank_msc;
-Bool present_flip_expected;
-#endif
 } drmmode_crtc_private_rec, *drmmode_crtc_private_ptr;
 
 typedef struct {
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index 809d24469..a24776811 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -541,15 +541,6 @@ radeon_scanout_flip_handler(xf86CrtcPtr crtc, uint32_t 
msc, uint64_t usec,
 drmmode_fb_reference(pRADEONEnt->fd, _crtc->fb,
 drmmode_crtc->flip_pending);
 radeon_scanout_flip_abort(crtc, event_data);
-
-#ifdef HAVE_PRESENT_H
-if (drmmode_crtc->present_vblank_event_id) {
-   present_event_notify(drmmode_crtc->present_vblank_event_id,
-drmmode_crtc->present_vblank_usec,
-drmmode_crtc->present_vblank_msc);
-   drmmode_crtc->present_vblank_event_id = 0;
-}
-#endif
 }
 
 
diff --git a/src/radeon_present.c b/src/radeon_present.c
index ffc14a0e8..d0a0c68ca 100644
--- a/src/radeon_present.c
+++ b/src/radeon_present.c
@@ -52,7 +52,6 @@ static present_screen_info_rec radeon_present_screen_info;
 
 struct radeon_present_vblank_event {
 uint64_t event_id;
-Bool vblank_for_flip;
 Bool unflip;
 };
 
@@ -120,26 +119,9 @@ static void
 radeon_present_vblank_handler(xf86CrtcPtr crtc, unsigned int msc,
  uint64_t usec, void *data)
 {
-drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
 struct radeon_present_vblank_event *event = data;
 
-if (event->vblank_for_flip &&
-   drmmode_crtc->tear_free &&
-   drmmode_crtc->scanout_update_pending) {
-   if (drmmode_crtc->present_vblank_event_id != 0) {
-   xf86DrvMsg(crtc->scrn->scrnIndex, X_WARNING,
-  "Need to handle previously deferred vblank event\n");
-   present_event_notify(drmmode_crtc->present_vblank_event_id,
-drmmode_crtc->present_vblank_usec,
-drmmode_crtc->present_vblank_msc);
-   }
-
-   drmmode_crtc->present_vblank_event_id = event->event_id;
-   drmmode_crtc->present_vblank_msc = msc;
-   drmmode_crtc->present_vblank_usec = usec;
-} else
-   present_event_notify(event->event_id, usec, msc);
-
+present_event_notify(event->event_id, usec, msc);
 free(event);
 }
 
@@ -162,7 +144,6 @@ static int
 radeon_present_queue_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc)
 {
 xf86CrtcPtr xf86_crtc = crtc->devPrivate;
-drmmode_crtc_private_ptr drmmode_crtc = xf86_crtc->driver_private;
 ScreenPtr screen = crtc->pScreen;
 struct radeon_present_vblank_event *event;
 uintptr_t drm_queue_seq;
@@ -171,8 +152,6 @@ radeon_present_queue_vblank(RRCrtcPtr crtc, uint64_t 
event_id, uint64_t msc)
 if (!event)
return BadAlloc;
 event->event_id = event_id;
-event->vblank_for_flip = drmmode_crtc->present_flip_expected;
-drmmode_crtc->present_flip_expected = FALSE;
 
 drm_queue_seq = radeon_drm_queue_alloc(xf86_crtc,
   RADEON_DRM_QUEUE_CLIENT_DEFAULT,
@@ -272,7 +251,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr window, 
PixmapPtr pixmap,
  Bool sync_flip)
 {
 xf86CrtcPtr xf86_crtc = crtc->devPrivate;
-drmmode_crtc_private_ptr drmmode_crtc = xf86_crtc->driver_private;
 ScreenPtr screen = window->drawable.pScreen;
 ScrnInfoPtr scrn = xf86_crtc->scrn;
 xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn);
@@ -281,8 +259,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr window, 
PixmapPtr pixmap,
 int num_crtcs_on;
 int i;
 
-drmmode_crtc->present_flip_expected = FALSE;
-
 if (!scrn->vtSema)
return FALSE;
 
@@ -313,7 +289,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr window, 
PixmapPtr pixmap,
 if (num_crtcs_on == 0)
return FALSE;
 
-drmmode_crtc->present_flip_expected = TRUE;
 return TRUE;
 }
 
@@ -354,7 +329,6 @@ radeon_present_flip(RRCrtcPtr crtc, uint64_t event_id, 
uint64_t target_msc,
   

[PATCH xf86-video-ati 1/5] Move DRM event queue related initialization to radeon_drm_queue_init

2018-08-16 Thread Michel Dänzer
From: Michel Dänzer 

And make radeon_drm_queue_handler not directly accessible outside of
radeon_drm_queue.c.

(Ported from amdgpu commit 0148283984c77f7a6e97026edc3093497547e0a4)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c  |  4 
 src/radeon_dri2.c  | 14 --
 src/radeon_drm_queue.c | 11 +--
 src/radeon_drm_queue.h |  5 +
 src/radeon_kms.c   |  2 +-
 5 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 0b92b70c6..2e8f91d6e 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -2742,10 +2742,6 @@ Bool drmmode_pre_init(ScrnInfoPtr pScrn, drmmode_ptr 
drmmode, int cpp)
 
xf86InitialConfiguration(pScrn, TRUE);
 
-   drmmode->event_context.version = 2;
-   drmmode->event_context.vblank_handler = radeon_drm_queue_handler;
-   drmmode->event_context.page_flip_handler = radeon_drm_queue_handler;
-
pRADEONEnt->has_page_flip_target = 
drmmode_probe_page_flip_target(pRADEONEnt);
 
drmModeFreeResources(mode_res);
diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c
index c36e06f22..4d12fc09a 100644
--- a/src/radeon_dri2.c
+++ b/src/radeon_dri2.c
@@ -968,13 +968,15 @@ CARD32 radeon_dri2_deferred_event(OsTimerPtr timer, 
CARD32 now, pointer data)
 
 scrn = crtc->scrn;
 pRADEONEnt = RADEONEntPriv(scrn);
+drmmode_crtc = event_info->crtc->driver_private;
 ret = drmmode_get_current_ust(pRADEONEnt->fd, _now);
 if (ret) {
xf86DrvMsg(scrn->scrnIndex, X_ERROR,
   "%s cannot get current time\n", __func__);
if (event_info->drm_queue_seq)
-   radeon_drm_queue_handler(pRADEONEnt->fd, 0, 0, 0,
-(void*)event_info->drm_queue_seq);
+   drmmode_crtc->drmmode->event_context.
+   vblank_handler(pRADEONEnt->fd, 0, 0, 0,
+  (void*)event_info->drm_queue_seq);
else
radeon_dri2_frame_event_handler(crtc, 0, 0, data);
return 0;
@@ -983,15 +985,15 @@ CARD32 radeon_dri2_deferred_event(OsTimerPtr timer, 
CARD32 now, pointer data)
  * calculate the frame number from current time
  * that would come from CRTC if it were running
  */
-drmmode_crtc = event_info->crtc->driver_private;
 delta_t = drm_now - (CARD64)drmmode_crtc->dpms_last_ust;
 delta_seq = delta_t * drmmode_crtc->dpms_last_fps;
 delta_seq /= 100;
 frame = (CARD64)drmmode_crtc->dpms_last_seq + delta_seq;
 if (event_info->drm_queue_seq)
-   radeon_drm_queue_handler(pRADEONEnt->fd, frame, drm_now / 100,
-drm_now % 100,
-(void*)event_info->drm_queue_seq);
+   drmmode_crtc->drmmode->event_context.
+   vblank_handler(pRADEONEnt->fd, frame, drm_now / 100,
+  drm_now % 100,
+  (void*)event_info->drm_queue_seq);
 else
radeon_dri2_frame_event_handler(crtc, frame, drm_now, data);
 return 0;
diff --git a/src/radeon_drm_queue.c b/src/radeon_drm_queue.c
index ac775f86a..bff010fa3 100644
--- a/src/radeon_drm_queue.c
+++ b/src/radeon_drm_queue.c
@@ -57,7 +57,7 @@ static uintptr_t radeon_drm_queue_seq;
 /*
  * Handle a DRM event
  */
-void
+static void
 radeon_drm_queue_handler(int fd, unsigned int frame, unsigned int sec,
 unsigned int usec, void *user_ptr)
 {
@@ -181,8 +181,15 @@ radeon_drm_abort_id(uint64_t id)
  * Initialize the DRM event queue
  */
 void
-radeon_drm_queue_init()
+radeon_drm_queue_init(ScrnInfoPtr scrn)
 {
+RADEONInfoPtr info = RADEONPTR(scrn);
+drmmode_ptr drmmode = >drmmode;
+
+drmmode->event_context.version = 2;
+drmmode->event_context.vblank_handler = radeon_drm_queue_handler;
+drmmode->event_context.page_flip_handler = radeon_drm_queue_handler;
+
 if (radeon_drm_queue_refcnt++)
return;
 
diff --git a/src/radeon_drm_queue.h b/src/radeon_drm_queue.h
index c3e2076db..b6aab37c5 100644
--- a/src/radeon_drm_queue.h
+++ b/src/radeon_drm_queue.h
@@ -40,9 +40,6 @@ typedef void (*radeon_drm_handler_proc)(xf86CrtcPtr crtc, 
uint32_t seq,
uint64_t usec, void *data);
 typedef void (*radeon_drm_abort_proc)(xf86CrtcPtr crtc, void *data);
 
-void radeon_drm_queue_handler(int fd, unsigned int frame,
- unsigned int tv_sec, unsigned int tv_usec,
- void *user_ptr);
 uintptr_t radeon_drm_queue_alloc(xf86CrtcPtr crtc, ClientPtr client,
 uint64_t id, void *data,
 radeon_drm_handler_proc handler,
@@ -50,7 +47,7 @@ uintptr_t radeon_drm_queue_alloc(xf86CrtcPtr crtc, ClientPtr 
client,
 void radeon_drm_abort_client(ClientPtr client);
 void radeon_drm_abort_entry(uintptr_t seq);
 void radeon_drm_abort_id(uint64_t id);
-void radeon_drm_queue_init();
+void 

[PATCH xf86-video-ati 4/5] Add radeon_drm_wait_pending_flip function

2018-08-16 Thread Michel Dänzer
From: Michel Dänzer 

Replacing the drmmode_crtc_wait_pending_event macro.

(Ported from amdgpu commit 6029794e8a35417faf825491a89b85f713c77fc1)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c  | 18 --
 src/drmmode_display.h  |  4 
 src/radeon_drm_queue.c | 41 +++--
 src/radeon_drm_queue.h |  1 +
 4 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index c022b3e4f..a55cd08a0 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -324,6 +324,9 @@ drmmode_do_crtc_dpms(xf86CrtcPtr crtc, int mode)
nominal_frame_rate /= pix_in_frame;
drmmode_crtc->dpms_last_fps = nominal_frame_rate;
}
+
+   drmmode_crtc->dpms_mode = mode;
+   radeon_drm_queue_handle_deferred(crtc);
} else if (drmmode_crtc->dpms_mode != DPMSModeOn && mode == DPMSModeOn) 
{
/*
 * Off->On transition: calculate and accumulate the
@@ -341,8 +344,9 @@ drmmode_do_crtc_dpms(xf86CrtcPtr crtc, int mode)
drmmode_crtc->interpolated_vblanks += delta_seq;
 
}
+
+   drmmode_crtc->dpms_mode = DPMSModeOn;
}
-   drmmode_crtc->dpms_mode = mode;
 }
 
 static void
@@ -972,6 +976,7 @@ done:
}
}
 
+   radeon_drm_queue_handle_deferred(crtc);
return ret;
 }
 
@@ -1763,11 +1768,6 @@ drmmode_output_set_tear_free(RADEONEntPtr pRADEONEnt,
drmmode_output->tear_free = tear_free;
 
if (crtc) {
-   /* Wait for pending flips before drmmode_set_mode_major calls
-* drmmode_crtc_update_tear_free, to prevent a nested
-* drmHandleEvent call, which would hang
-*/
-   radeon_drm_wait_pending_flip(crtc);
drmmode_set_mode_major(crtc, >mode, crtc->rotation,
   crtc->x, crtc->y);
}
@@ -3278,6 +3278,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
client,
drmmode_crtc_private_ptr drmmode_crtc = config->crtc[0]->driver_private;
uint32_t flip_flags = flip_sync == FLIP_ASYNC ? 
DRM_MODE_PAGE_FLIP_ASYNC : 0;
drmmode_flipdata_ptr flipdata;
+   Bool handle_deferred = FALSE;
uintptr_t drm_queue_seq = 0;
struct drmmode_fb *fb;
int i = 0;
@@ -3360,6 +3361,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
client,
 
if (drmmode_crtc->scanout_update_pending) {
radeon_drm_wait_pending_flip(crtc);
+   handle_deferred = TRUE;

radeon_drm_abort_entry(drmmode_crtc->scanout_update_pending);
drmmode_crtc->scanout_update_pending = 0;
}
@@ -3395,6 +3397,8 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
client,
drm_queue_seq = 0;
}
 
+   if (handle_deferred)
+   radeon_drm_queue_handle_deferred(ref_crtc);
if (flipdata->flip_count > 0)
return TRUE;
 
@@ -3414,5 +3418,7 @@ error:
 
xf86DrvMsg(scrn->scrnIndex, X_WARNING, "Page flip failed: %s\n",
   strerror(errno));
+   if (handle_deferred)
+   radeon_drm_queue_handle_deferred(ref_crtc);
return FALSE;
 }
diff --git a/src/drmmode_display.h b/src/drmmode_display.h
index 46449c8e3..a039bf8fb 100644
--- a/src/drmmode_display.h
+++ b/src/drmmode_display.h
@@ -103,6 +103,10 @@ typedef struct {
  * modeset)
  */
 Bool need_modeset;
+/* For keeping track of nested calls to drm_wait_pending_flip /
+ * drm_queue_handle_deferred
+ */
+int wait_flip_nesting_level;
 /* A flip to this FB is pending for this CRTC */
 struct drmmode_fb *flip_pending;
 /* The FB currently being scanned out by this CRTC, if any */
diff --git a/src/radeon_drm_queue.c b/src/radeon_drm_queue.c
index 3d2f4d157..857278fdd 100644
--- a/src/radeon_drm_queue.c
+++ b/src/radeon_drm_queue.c
@@ -117,6 +117,30 @@ radeon_drm_vblank_handler(int fd, unsigned int frame, 
unsigned int sec,
 user_ptr);
 }
 
+/*
+ * Handle deferred DRM vblank events
+ *
+ * This function must be called after radeon_drm_wait_pending_flip, once
+ * it's safe to attempt queueing a flip again
+ */
+void
+radeon_drm_queue_handle_deferred(xf86CrtcPtr crtc)
+{
+drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+struct radeon_drm_queue_entry *e, *tmp;
+
+if (drmmode_crtc->wait_flip_nesting_level == 0 ||
+   --drmmode_crtc->wait_flip_nesting_level > 0)
+   return;
+
+xorg_list_for_each_entry_safe(e, tmp, _drm_vblank_signalled, list) {
+   drmmode_crtc_private_ptr drmmode_crtc = e->crtc->driver_private;
+
+   if (drmmode_crtc->wait_flip_nesting_level == 0)
+   

[PATCH xf86-video-ati 3/5] Add radeon_drm_handle_event wrapper for drmHandleEvent

2018-08-16 Thread Michel Dänzer
From: Michel Dänzer 

Instead of processing DRM events directly from drmHandleEvent's
callbacks, there are three phases:

1. drmHandleEvent is called, and signalled events are re-queued to
   _signalled lists from its callbacks.
2. Signalled page flip completion events are processed.
3. Signalled vblank events are processed.

This should make sure that we never call drmHandleEvent from one of its
callbacks, which would usually result in blocking forever.

(Ported from amdgpu commit 739181c8d3334ff14b5a607895dfdeb29b0d9020)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c  |  13 ++---
 src/radeon_drm_queue.c | 110 -
 src/radeon_drm_queue.h |   1 +
 src/radeon_present.c   |   2 +-
 4 files changed, 96 insertions(+), 30 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 59fe9b7fc..c022b3e4f 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -2587,9 +2587,8 @@ static void
 drm_wakeup_handler(pointer data, int err, pointer p)
 #endif
 {
-   ScrnInfoPtr scrn = data;
-   RADEONEntPtr pRADEONEnt = RADEONEntPriv(scrn);
-   RADEONInfoPtr info = RADEONPTR(scrn);
+   drmmode_ptr drmmode = data;
+   RADEONEntPtr pRADEONEnt = RADEONEntPriv(drmmode->scrn);

 #if !HAVE_NOTIFY_FD
fd_set *read_mask = p;
@@ -2597,7 +2596,7 @@ drm_wakeup_handler(pointer data, int err, pointer p)
if (err >= 0 && FD_ISSET(pRADEONEnt->fd, read_mask))
 #endif
{
-   drmHandleEvent(pRADEONEnt->fd, >drmmode.event_context);
+   radeon_drm_handle_event(pRADEONEnt->fd, 
>event_context);
}
 }
 
@@ -2747,11 +2746,13 @@ void drmmode_init(ScrnInfoPtr pScrn, drmmode_ptr 
drmmode)
info->drmmode_inited = TRUE;
if (pRADEONEnt->fd_wakeup_registered != serverGeneration) {
 #if HAVE_NOTIFY_FD
-   SetNotifyFd(pRADEONEnt->fd, drm_notify_fd, X_NOTIFY_READ, 
pScrn);
+   SetNotifyFd(pRADEONEnt->fd, drm_notify_fd, X_NOTIFY_READ,
+   >drmmode);
 #else
AddGeneralSocket(pRADEONEnt->fd);
RegisterBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA,
-   drm_wakeup_handler, pScrn);
+  drm_wakeup_handler,
+  >drmmode);
 #endif
pRADEONEnt->fd_wakeup_registered = serverGeneration;
pRADEONEnt->fd_wakeup_ref = 1;
diff --git a/src/radeon_drm_queue.c b/src/radeon_drm_queue.c
index 69474be24..3d2f4d157 100644
--- a/src/radeon_drm_queue.c
+++ b/src/radeon_drm_queue.c
@@ -40,6 +40,7 @@
 
 struct radeon_drm_queue_entry {
 struct xorg_list list;
+uint64_t usec;
 uint64_t id;
 uintptr_t seq;
 void *data;
@@ -47,36 +48,73 @@ struct radeon_drm_queue_entry {
 xf86CrtcPtr crtc;
 radeon_drm_handler_proc handler;
 radeon_drm_abort_proc abort;
+unsigned int frame;
 };
 
 static int radeon_drm_queue_refcnt;
 static struct xorg_list radeon_drm_queue;
+static struct xorg_list radeon_drm_flip_signalled;
+static struct xorg_list radeon_drm_vblank_signalled;
 static uintptr_t radeon_drm_queue_seq;
 
 
 /*
- * Handle a DRM event
+ * Process a DRM event
  */
 static void
-radeon_drm_queue_handler(int fd, unsigned int frame, unsigned int sec,
-unsigned int usec, void *user_ptr)
+radeon_drm_queue_handle_one(struct radeon_drm_queue_entry *e)
 {
-   uintptr_t seq = (uintptr_t)user_ptr;
-   struct radeon_drm_queue_entry *e, *tmp;
-
-   xorg_list_for_each_entry_safe(e, tmp, _drm_queue, list) {
-   if (e->seq == seq) {
-   xorg_list_del(>list);
-   if (e->handler)
-   e->handler(e->crtc, frame,
-  (uint64_t)sec * 100 + usec,
-  e->data);
-   else
-   e->abort(e->crtc, e->data);
-   free(e);
-   break;
-   }
+xorg_list_del(>list);
+if (e->handler) {
+   e->handler(e->crtc, e->frame, e->usec, e->data);
+} else
+   e->abort(e->crtc, e->data);
+free(e);
+}
+
+static void
+radeon_drm_queue_handler(struct xorg_list *signalled, unsigned int frame,
+unsigned int sec, unsigned int usec, void *user_ptr)
+{
+uintptr_t seq = (uintptr_t)user_ptr;
+struct radeon_drm_queue_entry *e, *tmp;
+
+xorg_list_for_each_entry_safe(e, tmp, _drm_queue, list) {
+   if (e->seq == seq) {
+   if (!e->handler) {
+   e->abort(e->crtc, e->data);
+   break;
+   }
+
+   xorg_list_del(>list);
+   e->usec = (uint64_t)sec * 100 + usec;
+   e->frame = frame;
+   xorg_list_append(>list, signalled);
+   break;
}
+}
+}
+
+/*
+ * Signal a DRM page flip 

[PATCH xf86-video-ati 2/5] Add radeon_drm_wait_pending_flip function

2018-08-16 Thread Michel Dänzer
From: Michel Dänzer 

Replacing the drmmode_crtc_wait_pending_event macro.

(Ported from amdgpu commit 6029794e8a35417faf825491a89b85f713c77fc1)

Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c  | 21 -
 src/radeon_drm_queue.c | 13 +
 src/radeon_drm_queue.h |  1 +
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 2e8f91d6e..59fe9b7fc 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -100,13 +100,6 @@ RADEONZaphodStringMatches(ScrnInfoPtr pScrn, const char 
*s, char *output_name)
 }
 
 
-/* Wait for the boolean condition to be FALSE */
-#define drmmode_crtc_wait_pending_event(drmmode_crtc, fd, condition) \
-   do {} while ((condition) && \
-drmHandleEvent(fd, _crtc->drmmode->event_context) \
-> 0);
-
-
 static PixmapPtr drmmode_create_bo_pixmap(ScrnInfoPtr pScrn,
  int width, int height,
  int depth, int bpp,
@@ -306,8 +299,7 @@ drmmode_do_crtc_dpms(xf86CrtcPtr crtc, int mode)
if (drmmode_crtc->dpms_mode == DPMSModeOn && mode != DPMSModeOn) {
uint32_t seq;
 
-   drmmode_crtc_wait_pending_event(drmmode_crtc, pRADEONEnt->fd,
-   drmmode_crtc->flip_pending);
+   radeon_drm_wait_pending_flip(crtc);
 
/*
 * On->Off transition: record the last vblank time,
@@ -918,8 +910,7 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr 
mode,
goto done;
}
 
-   drmmode_crtc_wait_pending_event(drmmode_crtc, pRADEONEnt->fd,
-   drmmode_crtc->flip_pending);
+   radeon_drm_wait_pending_flip(crtc);
 
if (!drmmode_set_mode(crtc, fb, mode, x, y))
goto done;
@@ -1772,14 +1763,11 @@ drmmode_output_set_tear_free(RADEONEntPtr pRADEONEnt,
drmmode_output->tear_free = tear_free;
 
if (crtc) {
-   drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
-
/* Wait for pending flips before drmmode_set_mode_major calls
 * drmmode_crtc_update_tear_free, to prevent a nested
 * drmHandleEvent call, which would hang
 */
-   drmmode_crtc_wait_pending_event(drmmode_crtc, pRADEONEnt->fd,
-   drmmode_crtc->flip_pending);
+   radeon_drm_wait_pending_flip(crtc);
drmmode_set_mode_major(crtc, >mode, crtc->rotation,
   crtc->x, crtc->y);
}
@@ -3370,8 +3358,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
client,
radeon_cs_flush_indirect(crtc->scrn);
 
if (drmmode_crtc->scanout_update_pending) {
-   drmmode_crtc_wait_pending_event(drmmode_crtc, 
pRADEONEnt->fd,
-   
drmmode_crtc->flip_pending);
+   radeon_drm_wait_pending_flip(crtc);

radeon_drm_abort_entry(drmmode_crtc->scanout_update_pending);
drmmode_crtc->scanout_update_pending = 0;
}
diff --git a/src/radeon_drm_queue.c b/src/radeon_drm_queue.c
index bff010fa3..69474be24 100644
--- a/src/radeon_drm_queue.c
+++ b/src/radeon_drm_queue.c
@@ -177,6 +177,19 @@ radeon_drm_abort_id(uint64_t id)
 }
 }
 
+/*
+ * Wait for pending page flip on given CRTC to complete
+ */
+void radeon_drm_wait_pending_flip(xf86CrtcPtr crtc)
+{
+drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+RADEONEntPtr pRADEONEnt = RADEONEntPriv(crtc->scrn);
+drmmode_ptr drmmode = drmmode_crtc->drmmode;
+
+while (drmmode_crtc->flip_pending &&
+  drmHandleEvent(pRADEONEnt->fd, >event_context) > 0);
+}
+
 /*
  * Initialize the DRM event queue
  */
diff --git a/src/radeon_drm_queue.h b/src/radeon_drm_queue.h
index b6aab37c5..96f88bd35 100644
--- a/src/radeon_drm_queue.h
+++ b/src/radeon_drm_queue.h
@@ -47,6 +47,7 @@ uintptr_t radeon_drm_queue_alloc(xf86CrtcPtr crtc, ClientPtr 
client,
 void radeon_drm_abort_client(ClientPtr client);
 void radeon_drm_abort_entry(uintptr_t seq);
 void radeon_drm_abort_id(uint64_t id);
+void radeon_drm_wait_pending_flip(xf86CrtcPtr crtc);
 void radeon_drm_queue_init(ScrnInfoPtr scrn);
 void radeon_drm_queue_close(ScrnInfoPtr scrn);
 
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/5] drm/nouveau+amdgpu: add DP CEC-Tunneling-over-AUX

2018-08-16 Thread Alex Deucher
On Thu, Aug 16, 2018 at 6:56 AM Hans Verkuil  wrote:
>
> From: Hans Verkuil 
>
> Now that the DisplayPort CEC-Tunneling-over-AUX drm+i915 support
> has been merged in the mainline kernel it is time to roll this
> out to nouveau and amdgpu as well.
>
> I combined both in the same patch series since both depend on the
> same first patch, the comments in this cover letter apply to both
> and the implementation is also very similar (and simple).
>
> As mentioned, the first patch is required for this: it adds checks that
> the drm_dp_cec functions are called with a working aux implementation.
> These checks weren't necessary for the i915, but nouveau and amdgpu
> require them.
>
> The next two patches update a comment in drm_dp_cec.c and fix a bug
> in sideband AUX handling that I found while researching CEC Tunneling
> over an MST hub. It's there to prevent others from stumbling over the
> same bug in the future.
>
> The fourth patch adds support for CEC to the nouveau driver.
>
> The last patch adds support for CEC to the amdgpu driver. However, there
> appear to be two classes of amdgpu hardware: as a discrete GPU or
> integrated. I only have a discrete GPU, so I can't test the integrated
> GPU support and I only implemented this for the discrete GPU case.
>
> If someone has the integrated GPU and wants to get this working and is
> willing to do some testing, then please contact me. It shouldn't be
> difficult. You will likely have to buy a working DP-to-HDMI adapter.
> See https://hverkuil.home.xs4all.nl/cec-status.txt for a (sadly very
> short) list of working adapters.

Actually you added support for APUs as well.  In amdgpu, there are two
sets of modesetting code, an older less featured version
(amd/amdgpu/dce*.c) and the newer more featured code (amd/display/*).
Newer asics (vega and raven) are only supported by DC.  Older asics
are supported by both.  Eventually we'd like to remove the older
modesetting code.  I'm not really a CEC expert, but the patches look
pretty straight forward to me.  Series is:
Acked-by: Alex Deucher 

>
> Note that I may be completely off-base regarding what atombios_dp.c
> does, it's the first time I ever looked at amdgpu code.
>
> Two notes on CEC-Tunneling-over-AUX:
>
> 1) You need to be very careful about which USB-C/DP/mini-DP to HDMI
>adapters you buy. Only a few support this feature correctly today.
>Known chipsets that support this are Parade PS175 & PS176 and
>MegaChips 2900. Unfortunately almost all Parade-based adapters
>do not hook up the HDMI CEC pin to the chip, making them useless
>for this. The Parade reference design was only recently changed
>to hook up this pin, so perhaps this situation will change for
>new Parade-based adapters.
>
>Adapters based on the new MegaChips 2900 fare much better: it
>appears that their reference design *does* hook up the CEC pin.
>Club3D has adapters using this device for USB-C, DP and mini-DP
>to HDMI, and they all work fine.
>
>If anyone finds other adapters that work besides those I list
>in https://hverkuil.home.xs4all.nl/cec-status.txt, please let
>me know and I'll add them to the list.
>
>Linux is the first OS that supports this feature, so I am
>not surprised that HW support for this has been poor. Hopefully
>this will change going forward. BTW, note the irony that CEC is
>now supported for DP-to-HDMI adapters, but not for the native
>HDMI ports on NVIDIA/AMD/Intel GPUs.
>
> 2) CEC-Tunneling does not work (yet?) if there is an MST hub in
>between. I'm still researching this but this might be a limitation
>of MST.
>
> Regards,
>
> Hans
>
> Hans Verkuil (5):
>   drm_dp_cec: check that aux has a transfer function
>   drm_dp_cec: add note about good MegaChips 2900 CEC support
>   drm_dp_mst_topology: fix broken
> drm_dp_sideband_parse_remote_dpcd_read()
>   drm/nouveau: add DisplayPort CEC-Tunneling-over-AUX support
>   drm/amdgpu: add DisplayPort CEC-Tunneling-over-AUX support
>
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 13 +++--
>  .../display/amdgpu_dm/amdgpu_dm_mst_types.c|  2 ++
>  drivers/gpu/drm/drm_dp_cec.c   | 18 +-
>  drivers/gpu/drm/drm_dp_mst_topology.c  |  1 +
>  drivers/gpu/drm/nouveau/nouveau_connector.c| 17 +++--
>  5 files changed, 46 insertions(+), 5 deletions(-)
>
> --
> 2.18.0
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH xf86-video-amdgpu] Use correct FB handle in amdgpu_do_pageflip

2018-08-16 Thread Alex Deucher
On Thu, Aug 16, 2018 at 10:36 AM Michel Dänzer  wrote:
>
> From: Michel Dänzer 
>
> We were always using the handle of the client provided FB, which
> prevented RandR transforms from working, and could result in a black
> screen.
>
> Fixes: 9b6782c821e0 "Store FB for each CRTC in drmmode_flipdata_rec"
> Signed-off-by: Michel Dänzer 

Good catch!
Reviewed-by: Alex Deucher 

> ---
>  src/drmmode_display.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/drmmode_display.c b/src/drmmode_display.c
> index e58e15d7b..be0e6b875 100644
> --- a/src/drmmode_display.c
> +++ b/src/drmmode_display.c
> @@ -3974,7 +3974,7 @@ Bool amdgpu_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
> client,
> if (crtc == ref_crtc) {
> if (drmmode_page_flip_target_absolute(pAMDGPUEnt,
>   drmmode_crtc,
> - fb->handle,
> + 
> flipdata->fb[i]->handle,
>   flip_flags,
>   drm_queue_seq,
>   target_msc) != 
> 0)
> @@ -3982,7 +3982,7 @@ Bool amdgpu_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
> client,
> } else {
> if (drmmode_page_flip_target_relative(pAMDGPUEnt,
>   drmmode_crtc,
> - fb->handle,
> + 
> flipdata->fb[i]->handle,
>   flip_flags,
>   drm_queue_seq, 
> 0) != 0)
> goto flip_error;
> --
> 2.18.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu] Store FB for each CRTC in drmmode_flipdata_rec

2018-08-16 Thread Michel Dänzer
On 2018-08-10 09:06 AM, Johannes Hirte wrote:
> On 2018 Jul 27, Michel Dänzer wrote:
>> From: Michel Dänzer 
>>
>> We were only storing the FB provided by the client, but on CRTCs with
>> TearFree enabled, we use a separate FB. This could cause
>> drmmode_flip_handler to fail to clear drmmode_crtc->flip_pending, which
>> could result in a hang when waiting for the pending flip to complete. We
>> were trying to avoid that by always clearing drmmode_crtc->flip_pending
>> when TearFree is enabled, but that wasn't reliable, because
>> drmmode_crtc->tear_free can already be FALSE at this point when
>> disabling TearFree.
>>
>> Now that we're keeping track of each CRTC's flip FB separately,
>> drmmode_flip_handler can reliably clear flip_pending, and we no longer
>> need the TearFree hack.
>>
>> Signed-off-by: Michel Dänzer 
> 
> Since this change I get a black screen when login into KDE Plasma. I
> have to switch to linux console and back for getting the X11 screen.
> Additional the Xorg.log is spammed with:
> 
> [   189.744] (WW) AMDGPU(0): get vblank counter failed: Invalid argument
> [   189.828] (WW) AMDGPU(0): flip queue failed in amdgpu_scanout_flip: Device 
> or resource busy, TearFree inactive until next modeset
> [   189.828] (WW) AMDGPU(0): drmmode_wait_vblank failed for scanout update: 
> Invalid argument
> [   189.828] (WW) AMDGPU(0): drmmode_wait_vblank failed for scanout update: 
> Invalid argument
> 
> The "flip queue failed" message appears only once, the other two are
> much more often.
> 
> System is a Carrizo A10-8700B, kernel 4.17.13 + this patch:
> https://bugzilla.kernel.org/attachment.cgi?id=276173

Does https://patchwork.freedesktop.org/patch/244860/ fix it?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-amdgpu] Use correct FB handle in amdgpu_do_pageflip

2018-08-16 Thread Michel Dänzer
From: Michel Dänzer 

We were always using the handle of the client provided FB, which
prevented RandR transforms from working, and could result in a black
screen.

Fixes: 9b6782c821e0 "Store FB for each CRTC in drmmode_flipdata_rec"
Signed-off-by: Michel Dänzer 
---
 src/drmmode_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index e58e15d7b..be0e6b875 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -3974,7 +3974,7 @@ Bool amdgpu_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
client,
if (crtc == ref_crtc) {
if (drmmode_page_flip_target_absolute(pAMDGPUEnt,
  drmmode_crtc,
- fb->handle,
+ 
flipdata->fb[i]->handle,
  flip_flags,
  drm_queue_seq,
  target_msc) != 0)
@@ -3982,7 +3982,7 @@ Bool amdgpu_do_pageflip(ScrnInfoPtr scrn, ClientPtr 
client,
} else {
if (drmmode_page_flip_target_relative(pAMDGPUEnt,
  drmmode_crtc,
- fb->handle,
+ 
flipdata->fb[i]->handle,
  flip_flags,
  drm_queue_seq, 0) 
!= 0)
goto flip_error;
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm 2/2] amdgpu: Use char* pointers in amdgpu_find_bo_by_cpu_mapping

2018-08-16 Thread Michel Dänzer
On 2018-08-15 03:07 AM, Zhang, Jerry (Junwei) wrote:
> On 08/14/2018 05:58 PM, Michel Dänzer wrote:
>> From: Michel Dänzer 
>>
>> Arithmetic using void* pointers isn't defined by the C standard, only as
>> a GCC extension. Avoids compiler warnings:
>>
>> ../../amdgpu/amdgpu_bo.c: In function ‘amdgpu_find_bo_by_cpu_mapping’:
>> ../../amdgpu/amdgpu_bo.c:554:48: warning: pointer of type ‘void *’
>> used in arithmetic [-Wpointer-arith]
>>     if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size))
>>  ^
>> ../../amdgpu/amdgpu_bo.c:561:23: warning: pointer of type ‘void *’
>> used in subtraction [-Wpointer-arith]
>>     *offset_in_bo = cpu - bo->cpu_ptr;
>>     ^
>>
>> Fixes: 4d454424e1f2 ("amdgpu: add a function to find bo by cpu mapping
>>   (v2)")
>> Signed-off-by: Michel Dänzer 
>> ---
>>   amdgpu/amdgpu.h    | 2 +-
>>   amdgpu/amdgpu_bo.c | 7 ---
>>   2 files changed, 5 insertions(+), 4 deletions(-)
>>
>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>> index a8c353c6..f2bdeb95 100644
>> --- a/amdgpu/amdgpu.h
>> +++ b/amdgpu/amdgpu.h
>> @@ -695,7 +695,7 @@ int
>> amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev,
>>    *
>>   */
>>   int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev,
>> -  void *cpu,
>> +  char *cpu,
> 
> Shall we cast the cpu pointer when do arithmetic and keep the arguments
> as other cpu ponter?
> 
> e.g.
> @@ -565,14 +565,15 @@ int
> amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev,
>     bo = handle_table_lookup(>bo_handles, i);
>     if (!bo || !bo->cpu_ptr || size > bo->alloc_size)
>     continue;
> -   if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr +
> bo->alloc_size))
> +   if (cpu >= bo->cpu_ptr &&
> +  (uint64_t)cpu < ((uint64_t)bo->cpu_ptr +
> bo->alloc_size))
>     break;
>     }
> 
>     if (i < dev->bo_handles.max_key) {
>     atomic_inc(>refcount);
>     *buf_handle = bo;
> -   *offset_in_bo = cpu - bo->cpu_ptr;
> +   *offset_in_bo = (uint64_t)cpu - (uint64_t)bo->cpu_ptr;
>     } else {
>     *buf_handle = NULL;
>     *offset_in_bo = 0;

Thanks for the suggestion, I sent out a v2 patch along those lines
(though slightly different).


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 libdrm] amdgpu: Eliminate void* arithmetic in amdgpu_find_bo_by_cpu_mapping

2018-08-16 Thread Michel Dänzer
From: Michel Dänzer 

Arithmetic using void* pointers isn't defined by the C standard, only as
a GCC extension. Avoids compiler warnings:

../../amdgpu/amdgpu_bo.c: In function ‘amdgpu_find_bo_by_cpu_mapping’:
../../amdgpu/amdgpu_bo.c:554:48: warning: pointer of type ‘void *’ used in 
arithmetic [-Wpointer-arith]
   if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size))
^
../../amdgpu/amdgpu_bo.c:561:23: warning: pointer of type ‘void *’ used in 
subtraction [-Wpointer-arith]
   *offset_in_bo = cpu - bo->cpu_ptr;
   ^

v2: Use uintptr_t instead of char*, don't change function signature
(Junwei Zhang)

Fixes: 4d454424e1f2 ("amdgpu: add a function to find bo by cpu mapping
 (v2)")
Signed-off-by: Michel Dänzer 
---
 amdgpu/amdgpu_bo.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index 86d1c143..8efd014e 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -551,14 +551,15 @@ int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle 
dev,
bo = handle_table_lookup(>bo_handles, i);
if (!bo || !bo->cpu_ptr || size > bo->alloc_size)
continue;
-   if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size))
+   if (cpu >= bo->cpu_ptr &&
+   cpu < (void*)((uintptr_t)bo->cpu_ptr + bo->alloc_size))
break;
}
 
if (i < dev->bo_handles.max_key) {
atomic_inc(>refcount);
*buf_handle = bo;
-   *offset_in_bo = cpu - bo->cpu_ptr;
+   *offset_in_bo = (uintptr_t)cpu - (uintptr_t)bo->cpu_ptr;
} else {
*buf_handle = NULL;
*offset_in_bo = 0;
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu/display: add support for LVDS (v5)

2018-08-16 Thread Alex Deucher
This adds support for LVDS displays.

v2: add support for spread spectrum, sink detect
v3: clean up enable_lvds_output
v4: fix up link_detect
v5: remove assert on 888 format

Bug: https://bugs.freedesktop.org/show_bug.cgi?id=105880
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  |  2 +
 drivers/gpu/drm/amd/display/dc/core/dc_link.c  | 45 ++
 .../gpu/drm/amd/display/dc/dce/dce_clock_source.c  | 10 +
 .../gpu/drm/amd/display/dc/dce/dce_clock_source.h  |  2 +
 .../gpu/drm/amd/display/dc/dce/dce_link_encoder.c  | 34 
 .../gpu/drm/amd/display/dc/dce/dce_link_encoder.h  |  6 +++
 .../drm/amd/display/dc/dce/dce_stream_encoder.c| 24 
 .../gpu/drm/amd/display/dc/inc/hw/link_encoder.h   |  3 ++
 .../gpu/drm/amd/display/dc/inc/hw/stream_encoder.h |  4 ++
 drivers/gpu/drm/amd/display/include/signal_types.h |  5 +++
 10 files changed, 135 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 1828e4382b24..818b5ec32f37 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3360,6 +3360,8 @@ static int to_drm_connector_type(enum signal_type st)
return DRM_MODE_CONNECTOR_HDMIA;
case SIGNAL_TYPE_EDP:
return DRM_MODE_CONNECTOR_eDP;
+   case SIGNAL_TYPE_LVDS:
+   return DRM_MODE_CONNECTOR_LVDS;
case SIGNAL_TYPE_RGB:
return DRM_MODE_CONNECTOR_VGA;
case SIGNAL_TYPE_DISPLAY_PORT:
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 981f7cbd31cc..0f044fd5baf4 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -203,6 +203,11 @@ static bool detect_sink(struct dc_link *link, enum 
dc_connection_type *type)
uint32_t is_hpd_high = 0;
struct gpio *hpd_pin;
 
+   if (link->connector_signal == SIGNAL_TYPE_LVDS) {
+   *type = dc_connection_single;
+   return true;
+   }
+
/* todo: may need to lock gpio access */
hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id, 
link->ctx->gpio_service);
if (hpd_pin == NULL)
@@ -616,6 +621,10 @@ bool dc_link_detect(struct dc_link *link, enum 
dc_detect_reason reason)
link->local_sink)
return true;
 
+   if (link->connector_signal == SIGNAL_TYPE_LVDS &&
+   link->local_sink)
+   return true;
+
prev_sink = link->local_sink;
if (prev_sink != NULL) {
dc_sink_retain(prev_sink);
@@ -649,6 +658,12 @@ bool dc_link_detect(struct dc_link *link, enum 
dc_detect_reason reason)
break;
}
 
+   case SIGNAL_TYPE_LVDS: {
+   sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C;
+   sink_caps.signal = SIGNAL_TYPE_LVDS;
+   break;
+   }
+
case SIGNAL_TYPE_EDP: {
detect_edp_sink_caps(link);
sink_caps.transaction_type =
@@ -1087,6 +1102,9 @@ static bool construct(
dal_irq_get_rx_source(hpd_gpio);
}
break;
+   case CONNECTOR_ID_LVDS:
+   link->connector_signal = SIGNAL_TYPE_LVDS;
+   break;
default:
DC_LOG_WARNING("Unsupported Connector type:%d!\n", 
link->link_id.id);
goto create_fail;
@@ -1920,6 +1938,24 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
dal_ddc_service_read_scdc_data(link->ddc);
 }
 
+static void enable_link_lvds(struct pipe_ctx *pipe_ctx)
+{
+   struct dc_stream_state *stream = pipe_ctx->stream;
+   struct dc_link *link = stream->sink->link;
+
+   if (stream->phy_pix_clk == 0)
+   stream->phy_pix_clk = stream->timing.pix_clk_khz;
+
+   memset(>sink->link->cur_link_settings, 0,
+   sizeof(struct dc_link_settings));
+
+   link->link_enc->funcs->enable_lvds_output(
+   link->link_enc,
+   pipe_ctx->clock_source->id,
+   stream->phy_pix_clk);
+
+}
+
 /enable_link***/
 static enum dc_status enable_link(
struct dc_state *state,
@@ -1943,6 +1979,10 @@ static enum dc_status enable_link(
enable_link_hdmi(pipe_ctx);
status = DC_OK;
break;
+   case SIGNAL_TYPE_LVDS:
+   enable_link_lvds(pipe_ctx);
+   status = DC_OK;
+   break;
case SIGNAL_TYPE_VIRTUAL:
status = DC_OK;
break;
@@ -2492,6 +2532,11 @@ void 

Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb

2018-08-16 Thread Christian König

Am 16.08.2018 um 15:05 schrieb Emily Deng:

To avoid the tlb flush not interrupted by world switch, use kiq and one
command to do tlb invalidate.

Signed-off-by: Emily Deng 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 60 
  3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6265b88..19ef7711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -212,6 +212,10 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_LAST
  };
  
+#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */

+#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
  int amdgpu_device_ip_set_clockgating_state(void *dev,
   enum amd_ip_block_type block_type,
   enum amd_clockgating_state state);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 21adb1b6..3885636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,9 +22,6 @@
   */
  
  #include "amdgpu.h"

-#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
-#define MAX_KIQ_REG_TRY 20
  
  uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)

  {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ed467de..6c164bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -311,6 +311,58 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int 
vmid)
return req;
  }
  
+signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,

+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+   signed long r, cnt = 0;
+   unsigned long flags;
+   uint32_t seq;
+   struct amdgpu_kiq *kiq = >gfx.kiq;
+   struct amdgpu_ring *ring = >ring;
+
+   if (!ring->ready)
+   return -EINVAL;
+
+   spin_lock_irqsave(>ring_lock, flags);
+
+   amdgpu_ring_alloc(ring, 32);
+   amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+   ref, mask);
+   amdgpu_fence_emit_polling(ring, );
+   amdgpu_ring_commit(ring);
+   spin_unlock_irqrestore(>ring_lock, flags);
+
+   r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+   /* don't wait anymore for gpu reset case because this way may
+* block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+* never return if we keep waiting in virt_kiq_rreg, which cause
+* gpu_recover() hang there.
+*
+* also don't wait anymore for IRQ context
+* */
+   if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+   goto failed_kiq;
+
+   might_sleep();
+
+   while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+   msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+   r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+   }
+
+   if (cnt > MAX_KIQ_REG_TRY)
+   goto failed_kiq;
+
+   return 0;
+
+failed_kiq:
+   pr_err("failed to invalidate tlb with kiq\n");
+   return r;
+}
+
  /*
   * GART
   * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev,
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned i, j;
+   int r;
  
  	spin_lock(>gmc.invalidate_lock);
  
@@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,

struct amdgpu_vmhub *hub = >vmhub[i];
u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
  
+		spin_unlock(>gmc.invalidate_lock);


Well just remove the lock altogether. Taking it and dropping it again 
immediately doesn't do anything useful.



+   r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + 
eng,
+   hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
+   spin_lock(>gmc.invalidate_lock);
+   if (!r)
+   continue;
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
  
  		/* Busy wait for ACK.*/
Since you now use the KIQ to wait for the TLB flush I think we can now 
remove the MMIO implementation.


Regards,
Christian.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: Remove the sriov checking and add firmware checking

2018-08-16 Thread Christian König

Am 16.08.2018 um 15:05 schrieb Emily Deng:

Unify bare metal and sriov, and add firmware checking for
reg write and reg wait unify command.

Signed-off-by: Emily Deng 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 47 -
  2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53e9e2a..f172e92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -274,6 +274,8 @@ struct amdgpu_gfx {
uint32_trlc_srls_feature_version;
uint32_tmec_feature_version;
uint32_tmec2_feature_version;
+   boolmec_fw_write_wait;
+   boolme_fw_write_wait;
struct amdgpu_ring  gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsignednum_gfx_rings;
struct amdgpu_ring  compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 76d979e..76e8973 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -482,6 +482,47 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct 
amdgpu_device *adev)

le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
  }
  
+static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)

+{
+   adev->gfx.me_fw_write_wait = false;
+   adev->gfx.mec_fw_write_wait = false;
+
+   switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   if ((adev->gfx.me_fw_version >= 0x009c) && 
(adev->gfx.me_feature_version >= 42) &&
+   (adev->gfx.pfp_fw_version >=  0x00b1) && 
(adev->gfx.pfp_feature_version >= 42))
+   adev->gfx.me_fw_write_wait = true;


Well that looks like the correct solution to me. Only a nit pick left to 
fix, the coding style looks a bit off.


For example the "if" should be indented so that "(adev.." in both lines 
is on the same column:


if ((adev ) &&
    (adev

And the "adev->gfx.me_fw_write_wait = true;" is to far to the right.

With that fixed the patch is Acked-by: Christian König 
.


Regards,
Christian.


+
+   if ((adev->gfx.mec_fw_version >=  0x0193) && 
(adev->gfx.mec_feature_version >= 42))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   case CHIP_VEGA12:
+   if ((adev->gfx.me_fw_version >= 0x009c) && 
(adev->gfx.me_feature_version >= 44) &&
+   (adev->gfx.pfp_fw_version >=  0x00b2) && 
(adev->gfx.pfp_feature_version >= 44))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0196) && 
(adev->gfx.mec_feature_version >= 44))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   case CHIP_VEGA20:
+   if ((adev->gfx.me_fw_version >= 0x009c) && 
(adev->gfx.me_feature_version >= 44) &&
+   (adev->gfx.pfp_fw_version >=  0x00b2) && 
(adev->gfx.pfp_feature_version >= 44))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0197) && 
(adev->gfx.mec_feature_version >= 44))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   case CHIP_RAVEN:
+   if ((adev->gfx.me_fw_version >= 0x009c) && 
(adev->gfx.me_feature_version >= 42) &&
+   (adev->gfx.pfp_fw_version >=  0x00b1) && 
(adev->gfx.pfp_feature_version >= 42))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0192) && 
(adev->gfx.mec_feature_version >= 42))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   }
+}
+
  static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
  {
const char *chip_name;
@@ -716,6 +757,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device 
*adev)
}
  
  out:

+   gfx_v9_0_check_fw_write_wait(adev);
if (err) {
dev_err(adev->dev,
"gfx9: Failed to load firmware \"%s\"\n",
@@ -4348,8 +4390,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct 
amdgpu_ring *ring,
  uint32_t ref, uint32_t mask)
  {
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+   struct amdgpu_device *adev = ring->adev;
+   bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
+   adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
  
-	if 

[PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb

2018-08-16 Thread Emily Deng
To avoid the tlb flush not interrupted by world switch, use kiq and one
command to do tlb invalidate.

Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 60 
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6265b88..19ef7711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -212,6 +212,10 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_LAST
 };
 
+#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
 int amdgpu_device_ip_set_clockgating_state(void *dev,
   enum amd_ip_block_type block_type,
   enum amd_clockgating_state state);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 21adb1b6..3885636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,9 +22,6 @@
  */
 
 #include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
-#define MAX_KIQ_REG_TRY 20
 
 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ed467de..6c164bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -311,6 +311,58 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int 
vmid)
return req;
 }
 
+signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+   signed long r, cnt = 0;
+   unsigned long flags;
+   uint32_t seq;
+   struct amdgpu_kiq *kiq = >gfx.kiq;
+   struct amdgpu_ring *ring = >ring;
+
+   if (!ring->ready)
+   return -EINVAL;
+
+   spin_lock_irqsave(>ring_lock, flags);
+
+   amdgpu_ring_alloc(ring, 32);
+   amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+   ref, mask);
+   amdgpu_fence_emit_polling(ring, );
+   amdgpu_ring_commit(ring);
+   spin_unlock_irqrestore(>ring_lock, flags);
+
+   r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+   /* don't wait anymore for gpu reset case because this way may
+* block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+* never return if we keep waiting in virt_kiq_rreg, which cause
+* gpu_recover() hang there.
+*
+* also don't wait anymore for IRQ context
+* */
+   if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+   goto failed_kiq;
+
+   might_sleep();
+
+   while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+   msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+   r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+   }
+
+   if (cnt > MAX_KIQ_REG_TRY)
+   goto failed_kiq;
+
+   return 0;
+
+failed_kiq:
+   pr_err("failed to invalidate tlb with kiq\n");
+   return r;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev,
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned i, j;
+   int r;
 
spin_lock(>gmc.invalidate_lock);
 
@@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev,
struct amdgpu_vmhub *hub = >vmhub[i];
u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
 
+   spin_unlock(>gmc.invalidate_lock);
+   r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + 
eng,
+   hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
+   spin_lock(>gmc.invalidate_lock);
+   if (!r)
+   continue;
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
/* Busy wait for ACK.*/
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: Remove the sriov checking and add firmware checking

2018-08-16 Thread Emily Deng
Unify bare metal and sriov, and add firmware checking for
reg write and reg wait unify command.

Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 47 -
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53e9e2a..f172e92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -274,6 +274,8 @@ struct amdgpu_gfx {
uint32_trlc_srls_feature_version;
uint32_tmec_feature_version;
uint32_tmec2_feature_version;
+   boolmec_fw_write_wait;
+   boolme_fw_write_wait;
struct amdgpu_ring  gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsignednum_gfx_rings;
struct amdgpu_ring  compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 76d979e..76e8973 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -482,6 +482,47 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct 
amdgpu_device *adev)

le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
 }
 
+static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+   adev->gfx.me_fw_write_wait = false;
+   adev->gfx.mec_fw_write_wait = false;
+
+   switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   if ((adev->gfx.me_fw_version >= 0x009c) && 
(adev->gfx.me_feature_version >= 42) &&
+   (adev->gfx.pfp_fw_version >=  0x00b1) && 
(adev->gfx.pfp_feature_version >= 42))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0193) && 
(adev->gfx.mec_feature_version >= 42))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   case CHIP_VEGA12:
+   if ((adev->gfx.me_fw_version >= 0x009c) && 
(adev->gfx.me_feature_version >= 44) &&
+   (adev->gfx.pfp_fw_version >=  0x00b2) && 
(adev->gfx.pfp_feature_version >= 44))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0196) && 
(adev->gfx.mec_feature_version >= 44))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   case CHIP_VEGA20:
+   if ((adev->gfx.me_fw_version >= 0x009c) && 
(adev->gfx.me_feature_version >= 44) &&
+   (adev->gfx.pfp_fw_version >=  0x00b2) && 
(adev->gfx.pfp_feature_version >= 44))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0197) && 
(adev->gfx.mec_feature_version >= 44))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   case CHIP_RAVEN:
+   if ((adev->gfx.me_fw_version >= 0x009c) && 
(adev->gfx.me_feature_version >= 42) &&
+   (adev->gfx.pfp_fw_version >=  0x00b1) && 
(adev->gfx.pfp_feature_version >= 42))
+   adev->gfx.me_fw_write_wait = true;
+
+   if ((adev->gfx.mec_fw_version >=  0x0192) && 
(adev->gfx.mec_feature_version >= 42))
+   adev->gfx.mec_fw_write_wait = true;
+   break;
+   }
+}
+
 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 {
const char *chip_name;
@@ -716,6 +757,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device 
*adev)
}
 
 out:
+   gfx_v9_0_check_fw_write_wait(adev);
if (err) {
dev_err(adev->dev,
"gfx9: Failed to load firmware \"%s\"\n",
@@ -4348,8 +4390,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct 
amdgpu_ring *ring,
  uint32_t ref, uint32_t mask)
 {
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+   struct amdgpu_device *adev = ring->adev;
+   bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
+   adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
 
-   if (amdgpu_sriov_vf(ring->adev))
+   if (fw_version_ok)
gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
  ref, mask, 0x20);
else
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: remove fulll access for suspend phase1

2018-08-16 Thread Alex Deucher
On Thu, Aug 16, 2018 at 4:26 AM Yintian Tao  wrote:
>
> There is no need for gpu full access for suspend phase1
> because under virtualization there is no hw register access
> for dce block.
>
> Change-Id: Ie1154e2065182ba968732af87f866f11141a102b
> Signed-off-by: Yintian Tao 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 --
>  1 file changed, 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index d84ac23..5da20b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1968,9 +1968,6 @@ static int amdgpu_device_ip_suspend_phase1(struct 
> amdgpu_device *adev)
>  {
> int i, r;
>
> -   if (amdgpu_sriov_vf(adev))
> -   amdgpu_virt_request_full_gpu(adev, false);
> -
> for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
> if (!adev->ip_blocks[i].status.valid)
> continue;
> @@ -1995,9 +1992,6 @@ static int amdgpu_device_ip_suspend_phase1(struct 
> amdgpu_device *adev)
> }
> }
>
> -   if (amdgpu_sriov_vf(adev))
> -   amdgpu_virt_release_full_gpu(adev, false);
> -
> return 0;
>  }
>
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/5] drm_dp_cec: add note about good MegaChips 2900 CEC support

2018-08-16 Thread Hans Verkuil
From: Hans Verkuil 

A big problem with DP CEC-Tunneling-over-AUX is that it is tricky
to find adapters with a chipset that supports this AND where the
manufacturer actually connected the HDMI CEC line to the chipset.

Add a mention of the MegaChips 2900 chipset which seems to support
this feature well.

Signed-off-by: Hans Verkuil 
---
 drivers/gpu/drm/drm_dp_cec.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_dp_cec.c b/drivers/gpu/drm/drm_dp_cec.c
index 1407b13a8d5d..8a718f85079a 100644
--- a/drivers/gpu/drm/drm_dp_cec.c
+++ b/drivers/gpu/drm/drm_dp_cec.c
@@ -16,7 +16,9 @@
  * here. Quite a few active (mini-)DP-to-HDMI or USB-C-to-HDMI adapters
  * have a converter chip that supports CEC-Tunneling-over-AUX (usually the
  * Parade PS176), but they do not wire up the CEC pin, thus making CEC
- * useless.
+ * useless. Note that MegaChips 2900-based adapters appear to have good
+ * support for CEC tunneling. Those adapters that I have tested using
+ * this chipset all have the CEC line connected.
  *
  * Sadly there is no way for this driver to know this. What happens is
  * that a /dev/cecX device is created that is isolated and unable to see
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/5] drm/nouveau: add DisplayPort CEC-Tunneling-over-AUX support

2018-08-16 Thread Hans Verkuil
From: Hans Verkuil 

Add DisplayPort CEC-Tunneling-over-AUX support to nouveau.

Signed-off-by: Hans Verkuil 
---
 drivers/gpu/drm/nouveau/nouveau_connector.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c 
b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 51932c72334e..eb4f766b5958 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -400,8 +400,10 @@ nouveau_connector_destroy(struct drm_connector *connector)
kfree(nv_connector->edid);
drm_connector_unregister(connector);
drm_connector_cleanup(connector);
-   if (nv_connector->aux.transfer)
+   if (nv_connector->aux.transfer) {
+   drm_dp_cec_unregister_connector(_connector->aux);
drm_dp_aux_unregister(_connector->aux);
+   }
kfree(connector);
 }
 
@@ -608,6 +610,7 @@ nouveau_connector_detect(struct drm_connector *connector, 
bool force)
 
nouveau_connector_set_encoder(connector, nv_encoder);
conn_status = connector_status_connected;
+   drm_dp_cec_set_edid(_connector->aux, nv_connector->edid);
goto out;
}
 
@@ -1108,11 +,14 @@ nouveau_connector_hotplug(struct nvif_notify *notify)
 
if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) {
NV_DEBUG(drm, "service %s\n", name);
+   drm_dp_cec_irq(_connector->aux);
if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP)))
nv50_mstm_service(nv_encoder->dp.mstm);
} else {
bool plugged = (rep->mask != NVIF_NOTIFY_CONN_V0_UNPLUG);
 
+   if (!plugged)
+   drm_dp_cec_unset_edid(_connector->aux);
NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", name);
if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP))) {
if (!plugged)
@@ -1302,7 +1308,6 @@ nouveau_connector_create(struct drm_device *dev, int 
index)
kfree(nv_connector);
return ERR_PTR(ret);
}
-
funcs = _connector_funcs;
break;
default:
@@ -1356,6 +1361,14 @@ nouveau_connector_create(struct drm_device *dev, int 
index)
break;
}
 
+   switch (type) {
+   case DRM_MODE_CONNECTOR_DisplayPort:
+   case DRM_MODE_CONNECTOR_eDP:
+   drm_dp_cec_register_connector(_connector->aux,
+ connector->name, dev->dev);
+   break;
+   }
+
ret = nvif_notify_init(>disp.object, nouveau_connector_hotplug,
   true, NV04_DISP_NTFY_CONN,
   &(struct nvif_notify_conn_req_v0) {
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/5] drm_dp_mst_topology: fix broken drm_dp_sideband_parse_remote_dpcd_read()

2018-08-16 Thread Hans Verkuil
From: Hans Verkuil 

When parsing the reply of a DP_REMOTE_DPCD_READ DPCD command the
result is wrong due to a missing idx increment.

This was never noticed since DP_REMOTE_DPCD_READ is currently not
used, but if you enable it, then it is all wrong.

Signed-off-by: Hans Verkuil 
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index 7780567aa669..5ff1d79b86c4 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -439,6 +439,7 @@ static bool drm_dp_sideband_parse_remote_dpcd_read(struct 
drm_dp_sideband_msg_rx
if (idx > raw->curlen)
goto fail_len;
repmsg->u.remote_dpcd_read_ack.num_bytes = raw->msg[idx];
+   idx++;
if (idx > raw->curlen)
goto fail_len;
 
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/5] drm/amdgpu: add DisplayPort CEC-Tunneling-over-AUX support

2018-08-16 Thread Hans Verkuil
From: Hans Verkuil 

Add DisplayPort CEC-Tunneling-over-AUX support to amdgpu.

Signed-off-by: Hans Verkuil 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 13 +++--
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c |  2 ++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 34f34823bab5..77898c95bef6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -898,6 +898,7 @@ amdgpu_dm_update_connector_after_detect(struct 
amdgpu_dm_connector *aconnector)
aconnector->dc_sink = sink;
if (sink->dc_edid.length == 0) {
aconnector->edid = NULL;
+   drm_dp_cec_unset_edid(>dm_dp_aux.aux);
} else {
aconnector->edid =
(struct edid *) sink->dc_edid.raw_edid;
@@ -905,10 +906,13 @@ amdgpu_dm_update_connector_after_detect(struct 
amdgpu_dm_connector *aconnector)
 
drm_connector_update_edid_property(connector,
aconnector->edid);
+   drm_dp_cec_set_edid(>dm_dp_aux.aux,
+   aconnector->edid);
}
amdgpu_dm_add_sink_to_freesync_module(connector, 
aconnector->edid);
 
} else {
+   drm_dp_cec_unset_edid(>dm_dp_aux.aux);
amdgpu_dm_remove_sink_from_freesync_module(connector);
drm_connector_update_edid_property(connector, NULL);
aconnector->num_modes = 0;
@@ -1059,12 +1063,16 @@ static void handle_hpd_rx_irq(void *param)
drm_kms_helper_hotplug_event(dev);
}
}
+
if ((dc_link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
-   (dc_link->type == dc_connection_mst_branch))
+   (dc_link->type == dc_connection_mst_branch)) {
dm_handle_hpd_rx_irq(aconnector);
+   }
 
-   if (dc_link->type != dc_connection_mst_branch)
+   if (dc_link->type != dc_connection_mst_branch) {
+   drm_dp_cec_irq(>dm_dp_aux.aux);
mutex_unlock(>hpd_lock);
+   }
 }
 
 static void register_hpd_handlers(struct amdgpu_device *adev)
@@ -2732,6 +2740,7 @@ static void amdgpu_dm_connector_destroy(struct 
drm_connector *connector)
dm->backlight_dev = NULL;
}
 #endif
+   drm_dp_cec_unregister_connector(>dm_dp_aux.aux);
drm_connector_unregister(connector);
drm_connector_cleanup(connector);
kfree(connector);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 9a300732ba37..18a3a6e5ffa0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -496,6 +496,8 @@ void amdgpu_dm_initialize_dp_connector(struct 
amdgpu_display_manager *dm,
aconnector->dm_dp_aux.ddc_service = aconnector->dc_link->ddc;
 
drm_dp_aux_register(>dm_dp_aux.aux);
+   drm_dp_cec_register_connector(>dm_dp_aux.aux,
+ aconnector->base.name, dm->adev->dev);
aconnector->mst_mgr.cbs = _mst_cbs;
drm_dp_mst_topology_mgr_init(
>mst_mgr,
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 0/5] drm/nouveau+amdgpu: add DP CEC-Tunneling-over-AUX

2018-08-16 Thread Hans Verkuil
From: Hans Verkuil 

Now that the DisplayPort CEC-Tunneling-over-AUX drm+i915 support
has been merged in the mainline kernel it is time to roll this
out to nouveau and amdgpu as well.

I combined both in the same patch series since both depend on the
same first patch, the comments in this cover letter apply to both
and the implementation is also very similar (and simple).

As mentioned, the first patch is required for this: it adds checks that
the drm_dp_cec functions are called with a working aux implementation.
These checks weren't necessary for the i915, but nouveau and amdgpu
require them.

The next two patches update a comment in drm_dp_cec.c and fix a bug
in sideband AUX handling that I found while researching CEC Tunneling
over an MST hub. It's there to prevent others from stumbling over the
same bug in the future.

The fourth patch adds support for CEC to the nouveau driver.

The last patch adds support for CEC to the amdgpu driver. However, there
appear to be two classes of amdgpu hardware: as a discrete GPU or
integrated. I only have a discrete GPU, so I can't test the integrated
GPU support and I only implemented this for the discrete GPU case.

If someone has the integrated GPU and wants to get this working and is
willing to do some testing, then please contact me. It shouldn't be
difficult. You will likely have to buy a working DP-to-HDMI adapter.
See https://hverkuil.home.xs4all.nl/cec-status.txt for a (sadly very
short) list of working adapters.

Note that I may be completely off-base regarding what atombios_dp.c
does, it's the first time I ever looked at amdgpu code.

Two notes on CEC-Tunneling-over-AUX:

1) You need to be very careful about which USB-C/DP/mini-DP to HDMI
   adapters you buy. Only a few support this feature correctly today.
   Known chipsets that support this are Parade PS175 & PS176 and
   MegaChips 2900. Unfortunately almost all Parade-based adapters
   do not hook up the HDMI CEC pin to the chip, making them useless
   for this. The Parade reference design was only recently changed
   to hook up this pin, so perhaps this situation will change for
   new Parade-based adapters.

   Adapters based on the new MegaChips 2900 fare much better: it
   appears that their reference design *does* hook up the CEC pin.
   Club3D has adapters using this device for USB-C, DP and mini-DP
   to HDMI, and they all work fine.

   If anyone finds other adapters that work besides those I list
   in https://hverkuil.home.xs4all.nl/cec-status.txt, please let
   me know and I'll add them to the list.

   Linux is the first OS that supports this feature, so I am
   not surprised that HW support for this has been poor. Hopefully
   this will change going forward. BTW, note the irony that CEC is
   now supported for DP-to-HDMI adapters, but not for the native
   HDMI ports on NVIDIA/AMD/Intel GPUs.

2) CEC-Tunneling does not work (yet?) if there is an MST hub in
   between. I'm still researching this but this might be a limitation 
   of MST.

Regards,

Hans

Hans Verkuil (5):
  drm_dp_cec: check that aux has a transfer function
  drm_dp_cec: add note about good MegaChips 2900 CEC support
  drm_dp_mst_topology: fix broken
drm_dp_sideband_parse_remote_dpcd_read()
  drm/nouveau: add DisplayPort CEC-Tunneling-over-AUX support
  drm/amdgpu: add DisplayPort CEC-Tunneling-over-AUX support

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 13 +++--
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c|  2 ++
 drivers/gpu/drm/drm_dp_cec.c   | 18 +-
 drivers/gpu/drm/drm_dp_mst_topology.c  |  1 +
 drivers/gpu/drm/nouveau/nouveau_connector.c| 17 +++--
 5 files changed, 46 insertions(+), 5 deletions(-)

-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/5] drm_dp_cec: check that aux has a transfer function

2018-08-16 Thread Hans Verkuil
From: Hans Verkuil 

If aux->transfer == NULL, then just return without doing
anything. In that case the function is likely called for
a non-(e)DP connector.

This never happened for the i915 driver, but the nouveau and amdgpu
drivers need this check.

The alternative would be to add this check in those drivers before
every drm_dp_cec call, but it makes sense to check it in the
drm_dp_cec functions to prevent a kernel oops.

Signed-off-by: Hans Verkuil 
---
 drivers/gpu/drm/drm_dp_cec.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/drm_dp_cec.c b/drivers/gpu/drm/drm_dp_cec.c
index 988513346e9c..1407b13a8d5d 100644
--- a/drivers/gpu/drm/drm_dp_cec.c
+++ b/drivers/gpu/drm/drm_dp_cec.c
@@ -238,6 +238,10 @@ void drm_dp_cec_irq(struct drm_dp_aux *aux)
u8 cec_irq;
int ret;
 
+   /* No transfer function was set, so not a DP connector */
+   if (!aux->transfer)
+   return;
+
mutex_lock(>cec.lock);
if (!aux->cec.adap)
goto unlock;
@@ -293,6 +297,10 @@ void drm_dp_cec_set_edid(struct drm_dp_aux *aux, const 
struct edid *edid)
unsigned int num_las = 1;
u8 cap;
 
+   /* No transfer function was set, so not a DP connector */
+   if (!aux->transfer)
+   return;
+
 #ifndef CONFIG_MEDIA_CEC_RC
/*
 * CEC_CAP_RC is part of CEC_CAP_DEFAULTS, but it is stripped by
@@ -361,6 +369,10 @@ EXPORT_SYMBOL(drm_dp_cec_set_edid);
  */
 void drm_dp_cec_unset_edid(struct drm_dp_aux *aux)
 {
+   /* No transfer function was set, so not a DP connector */
+   if (!aux->transfer)
+   return;
+
cancel_delayed_work_sync(>cec.unregister_work);
 
mutex_lock(>cec.lock);
@@ -404,6 +416,8 @@ void drm_dp_cec_register_connector(struct drm_dp_aux *aux, 
const char *name,
   struct device *parent)
 {
WARN_ON(aux->cec.adap);
+   if (WARN_ON(!aux->transfer))
+   return;
aux->cec.name = name;
aux->cec.parent = parent;
INIT_DELAYED_WORK(>cec.unregister_work,
-- 
2.18.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: rework ctx entity creation

2018-08-16 Thread zhoucm1



On 2018年08月16日 16:11, Christian König wrote:

Am 16.08.2018 um 04:07 schrieb zhoucm1:



On 2018年08月15日 18:59, Christian König wrote:

Use a fixed number of entities for each hardware IP.

The number of compute entities is reduced to four, SDMA keeps it two
entities and all other engines just expose one entity.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 291 


  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  30 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  36 ++--
  3 files changed, 190 insertions(+), 167 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 0a6cd1202ee5..987b7f256463 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -27,8 +27,29 @@
  #include "amdgpu.h"
  #include "amdgpu_sched.h"
  -#define to_amdgpu_ctx_ring(e)    \
-    container_of((e), struct amdgpu_ctx_ring, entity)
+#define to_amdgpu_ctx_entity(e)    \
+    container_of((e), struct amdgpu_ctx_entity, entity)
+
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+    [AMDGPU_HW_IP_GFX]    =    1,
+    [AMDGPU_HW_IP_COMPUTE]    =    4,

Could you explain why reduct to four? otherwise it looks good to me.


Currently we change the priority of the compute queues on the fly, but 
the idea is that we will have fixed high priority and low priority 
compute queues in the future.
Yeah, I see that, feel free to add my RB: Reviewed-by: Chunming Zhou 



Regards,
David Zhou


We could as well say we have only 2 or 3 if the closed stack is fine 
with that.


Regards,
Christian.



Thanks,
David Zhou

+    [AMDGPU_HW_IP_DMA]    =    2,
+    [AMDGPU_HW_IP_UVD]    =    1,
+    [AMDGPU_HW_IP_VCE]    =    1,
+    [AMDGPU_HW_IP_UVD_ENC]    =    1,
+    [AMDGPU_HW_IP_VCN_DEC]    =    1,
+    [AMDGPU_HW_IP_VCN_ENC]    =    1,
+};
+
+static int amdgput_ctx_total_num_entities(void)
+{
+    unsigned i, num_entities = 0;
+
+    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+    num_entities += amdgpu_ctx_num_entities[i];
+
+    return num_entities;
+}
    static int amdgpu_ctx_priority_permit(struct drm_file *filp,
    enum drm_sched_priority priority)
@@ -51,9 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,

 struct drm_file *filp,
 struct amdgpu_ctx *ctx)
  {
-    struct drm_sched_rq *sdma_rqs[AMDGPU_MAX_RINGS];
-    struct drm_sched_rq *comp_rqs[AMDGPU_MAX_RINGS];
-    unsigned i, j, num_sdma_rqs, num_comp_rqs;
+    unsigned num_entities = amdgput_ctx_total_num_entities();
+    unsigned i, j;
  int r;
    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -65,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,

    memset(ctx, 0, sizeof(*ctx));
  ctx->adev = adev;
-    kref_init(>refcount);
-    spin_lock_init(>ring_lock);
-    ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
+
+    ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
    sizeof(struct dma_fence*), GFP_KERNEL);
  if (!ctx->fences)
  return -ENOMEM;
  -    mutex_init(>lock);
+    ctx->entities[0] = kcalloc(num_entities,
+   sizeof(struct amdgpu_ctx_entity),
+   GFP_KERNEL);
+    if (!ctx->entities[0]) {
+    r = -ENOMEM;
+    goto error_free_fences;
+    }
  -    for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-    ctx->rings[i].sequence = 1;
-    ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i];
+    for (i = 0; i < num_entities; ++i) {
+    struct amdgpu_ctx_entity *entity = >entities[0][i];
+
+    entity->sequence = 1;
+    entity->fences = >fences[amdgpu_sched_jobs * i];
  }
+    for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
+    ctx->entities[i] = ctx->entities[i - 1] +
+    amdgpu_ctx_num_entities[i - 1];
+
+    kref_init(>refcount);
+    spin_lock_init(>ring_lock);
+    mutex_init(>lock);
    ctx->reset_counter = atomic_read(>gpu_reset_counter);
  ctx->reset_counter_query = ctx->reset_counter;
@@ -85,50 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,

  ctx->init_priority = priority;
  ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
  -    num_sdma_rqs = 0;
-    num_comp_rqs = 0;
-    for (i = 0; i < adev->num_rings; i++) {
-    struct amdgpu_ring *ring = adev->rings[i];
-    struct drm_sched_rq *rq;
-
-    rq = >sched.sched_rq[priority];
-    if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA)
-    sdma_rqs[num_sdma_rqs++] = rq;
-    else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
-    comp_rqs[num_comp_rqs++] = rq;
-    }
-
-    /* create context entity for each ring */
-    for (i = 0; i < adev->num_rings; i++) {
-    struct amdgpu_ring *ring = adev->rings[i];
+    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+    struct drm_sched_rq 

[PATCH] drm/amdgpu: remove fulll access for suspend phase1

2018-08-16 Thread Yintian Tao
There is no need for gpu full access for suspend phase1
because under virtualization there is no hw register access
for dce block.

Change-Id: Ie1154e2065182ba968732af87f866f11141a102b
Signed-off-by: Yintian Tao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d84ac23..5da20b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1968,9 +1968,6 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
 {
int i, r;
 
-   if (amdgpu_sriov_vf(adev))
-   amdgpu_virt_request_full_gpu(adev, false);
-
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -1995,9 +1992,6 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
}
}
 
-   if (amdgpu_sriov_vf(adev))
-   amdgpu_virt_release_full_gpu(adev, false);
-
return 0;
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: rework ctx entity creation

2018-08-16 Thread Christian König

Am 16.08.2018 um 04:07 schrieb zhoucm1:



On 2018年08月15日 18:59, Christian König wrote:

Use a fixed number of entities for each hardware IP.

The number of compute entities is reduced to four, SDMA keeps it two
entities and all other engines just expose one entity.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 291 


  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  30 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  36 ++--
  3 files changed, 190 insertions(+), 167 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 0a6cd1202ee5..987b7f256463 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -27,8 +27,29 @@
  #include "amdgpu.h"
  #include "amdgpu_sched.h"
  -#define to_amdgpu_ctx_ring(e)    \
-    container_of((e), struct amdgpu_ctx_ring, entity)
+#define to_amdgpu_ctx_entity(e)    \
+    container_of((e), struct amdgpu_ctx_entity, entity)
+
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+    [AMDGPU_HW_IP_GFX]    =    1,
+    [AMDGPU_HW_IP_COMPUTE]    =    4,

Could you explain why reduct to four? otherwise it looks good to me.


Currently we change the priority of the compute queues on the fly, but 
the idea is that we will have fixed high priority and low priority 
compute queues in the future.


We could as well say we have only 2 or 3 if the closed stack is fine 
with that.


Regards,
Christian.



Thanks,
David Zhou

+    [AMDGPU_HW_IP_DMA]    =    2,
+    [AMDGPU_HW_IP_UVD]    =    1,
+    [AMDGPU_HW_IP_VCE]    =    1,
+    [AMDGPU_HW_IP_UVD_ENC]    =    1,
+    [AMDGPU_HW_IP_VCN_DEC]    =    1,
+    [AMDGPU_HW_IP_VCN_ENC]    =    1,
+};
+
+static int amdgput_ctx_total_num_entities(void)
+{
+    unsigned i, num_entities = 0;
+
+    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+    num_entities += amdgpu_ctx_num_entities[i];
+
+    return num_entities;
+}
    static int amdgpu_ctx_priority_permit(struct drm_file *filp,
    enum drm_sched_priority priority)
@@ -51,9 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 struct drm_file *filp,
 struct amdgpu_ctx *ctx)
  {
-    struct drm_sched_rq *sdma_rqs[AMDGPU_MAX_RINGS];
-    struct drm_sched_rq *comp_rqs[AMDGPU_MAX_RINGS];
-    unsigned i, j, num_sdma_rqs, num_comp_rqs;
+    unsigned num_entities = amdgput_ctx_total_num_entities();
+    unsigned i, j;
  int r;
    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -65,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,

    memset(ctx, 0, sizeof(*ctx));
  ctx->adev = adev;
-    kref_init(>refcount);
-    spin_lock_init(>ring_lock);
-    ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
+
+    ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
    sizeof(struct dma_fence*), GFP_KERNEL);
  if (!ctx->fences)
  return -ENOMEM;
  -    mutex_init(>lock);
+    ctx->entities[0] = kcalloc(num_entities,
+   sizeof(struct amdgpu_ctx_entity),
+   GFP_KERNEL);
+    if (!ctx->entities[0]) {
+    r = -ENOMEM;
+    goto error_free_fences;
+    }
  -    for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-    ctx->rings[i].sequence = 1;
-    ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i];
+    for (i = 0; i < num_entities; ++i) {
+    struct amdgpu_ctx_entity *entity = >entities[0][i];
+
+    entity->sequence = 1;
+    entity->fences = >fences[amdgpu_sched_jobs * i];
  }
+    for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
+    ctx->entities[i] = ctx->entities[i - 1] +
+    amdgpu_ctx_num_entities[i - 1];
+
+    kref_init(>refcount);
+    spin_lock_init(>ring_lock);
+    mutex_init(>lock);
    ctx->reset_counter = atomic_read(>gpu_reset_counter);
  ctx->reset_counter_query = ctx->reset_counter;
@@ -85,50 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,

  ctx->init_priority = priority;
  ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
  -    num_sdma_rqs = 0;
-    num_comp_rqs = 0;
-    for (i = 0; i < adev->num_rings; i++) {
-    struct amdgpu_ring *ring = adev->rings[i];
-    struct drm_sched_rq *rq;
-
-    rq = >sched.sched_rq[priority];
-    if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA)
-    sdma_rqs[num_sdma_rqs++] = rq;
-    else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
-    comp_rqs[num_comp_rqs++] = rq;
-    }
-
-    /* create context entity for each ring */
-    for (i = 0; i < adev->num_rings; i++) {
-    struct amdgpu_ring *ring = adev->rings[i];
+    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+    struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+    unsigned num_rings;
+
+    switch (i) {
+    case AMDGPU_HW_IP_GFX:
+    rings[0] = 

[PATCH] drm/amdpgu: access register without KIQ

2018-08-16 Thread Yintian Tao
there is no need to access register such as mmSMC_IND_INDEX_11
and mmSMC_IND_DATA_11 through KIQ because they are VF-copy.

Change-Id: I88302dc5945e0b0fa2e6411081fb798aab4fdb5e
Signed-off-by: Yintian Tao 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 29fbb91..5a30bf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -112,8 +112,8 @@ static u32 vi_smc_rreg(struct amdgpu_device *adev, u32 reg)
u32 r;
 
spin_lock_irqsave(>smc_idx_lock, flags);
-   WREG32(mmSMC_IND_INDEX_11, (reg));
-   r = RREG32(mmSMC_IND_DATA_11);
+   WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg));
+   r = RREG32_NO_KIQ(mmSMC_IND_DATA_11);
spin_unlock_irqrestore(>smc_idx_lock, flags);
return r;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq to do invalidate tlb

2018-08-16 Thread Deng, Emily
Thanks all, I am modifying the patch, and testing.

Best wishes
Emily Deng



>-Original Message-
>From: Zhu, Rex
>Sent: Thursday, August 16, 2018 2:25 PM
>To: 'Alex Deucher' 
>Cc: Deng, Emily ; 'amd-gfx list' g...@lists.freedesktop.org>
>Subject: RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq to do
>invalidate tlb
>
>
>
>> -Original Message-
>> From: Zhu, Rex
>> Sent: Thursday, August 16, 2018 1:41 PM
>> To: 'Alex Deucher' 
>> Cc: Deng, Emily ; amd-gfx list > g...@lists.freedesktop.org>
>> Subject: RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq
>> to do invalidate tlb
>>
>>
>>
>> > -Original Message-
>> > From: Alex Deucher 
>> > Sent: Wednesday, August 15, 2018 10:14 PM
>> > To: Zhu, Rex 
>> > Cc: Deng, Emily ; amd-gfx list > > g...@lists.freedesktop.org>
>> > Subject: Re: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use
>> > kiq to do invalidate tlb
>> >
>> > On Wed, Aug 15, 2018 at 9:56 AM Zhu, Rex  wrote:
>> > >
>> > >
>> > >
>> > > > -Original Message-
>> > > > From: amd-gfx  On Behalf
>> > > > Of Emily Deng
>> > > > Sent: Wednesday, August 15, 2018 5:48 PM
>> > > > To: amd-gfx@lists.freedesktop.org
>> > > > Cc: Deng, Emily 
>> > > > Subject: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use
>> > > > kiq to do invalidate tlb
>> > > >
>> > > > To avoid the tlb flush not interrupted by world switch, use kiq
>> > > > and one command to do tlb invalidate.
>> > > >
>> > > > v2:
>> > > > Add firmware version checking.
>> > > >
>> > > > v3:
>> > > > Refine the code, and move the firmware checking into
>> > > > gfx_v9_0_ring_emit_reg_write_reg_wait.
>> > > >
>> > > > SWDEV-161497
>> > >
>> > > The "SWDEV-161497" is meanless.
>> > > you can describe the issue or just remove the bug number.
>> > >
>> > > >
>> > > > Signed-off-by: Emily Deng 
>> > > > ---
>> > > >  drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 +++
>> > > >  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
>> > > >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 15 +++-
>> > > >  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 61
>> > > > 
>> > > >  4 files changed, 79 insertions(+), 4 deletions(-)
>> > > >
>> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > > > index 07924d4..67b584b 100644
>> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > > > @@ -210,6 +210,10 @@ enum amdgpu_kiq_irq {
>> > > >   AMDGPU_CP_KIQ_IRQ_LAST
>> > > >  };
>> > > >
>> > > > +#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
>> > > > +#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
>> > > > +#define MAX_KIQ_REG_TRY 20
>> > > > +
>> > > >  int amdgpu_device_ip_set_clockgating_state(void *dev,
>> > > >  enum amd_ip_block_type
>> > > > block_type,
>> > > >  enum
>> > > > amd_clockgating_state state); diff --git
>> > > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> > > > index 21adb1b6..3885636 100644
>> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> > > > @@ -22,9 +22,6 @@
>> > > >   */
>> > > >
>> > > >  #include "amdgpu.h"
>> > > > -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
>> > > > -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ -
>> > #define
>> > > > MAX_KIQ_REG_TRY 20
>> > > >
>> > > >  uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)  { diff
>> > > > --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> > > > index 76d979e..c9b3db4 100644
>> > > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> > > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> > > > @@ -4348,8 +4348,21 @@ static void
>> > > > gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
>> > > > uint32_t ref,
>> > > > uint32_t mask)  {
>> > > >   int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
>> > > > + struct amdgpu_device *adev = ring->adev;
>> > > > + bool fw_version_ok = false;
>> > > >
>> > > > - if (amdgpu_sriov_vf(ring->adev))
>> > > > + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
>> > > > + if ((adev->gfx.me_fw_version >= 0x009c) &&
>> > > > + (adev-
>> > > > >gfx.me_feature_version >= 42))
>> > > > + if ((adev->gfx.pfp_fw_version >=
>> > > > + 0x00b1) &&
>> > > > (adev->gfx.pfp_feature_version >= 42))
>> > > > + fw_version_ok = true;
>> > > > + } else {
>> > > > + if ((adev->gfx.mec_fw_version >=  0x0193) &&
>> > > > + (adev-
>> > > > >gfx.mec_feature_version >= 42))
>> > > > + fw_version_ok = true;
>> > > > + }
>> > >
>> > > Maybe we can add a flag and set the flag when request_firmware.
>> >
>> > I 

Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence

2018-08-16 Thread Christian König

Am 15.08.2018 um 20:49 schrieb Felix Kuehling:

On 2018-08-15 02:27 PM, Christian König wrote:

Am 15.08.2018 um 20:17 schrieb Felix Kuehling:

On 2018-08-15 03:02 AM, Christian König wrote:

Hi Felix,

yeah, you pretty much nailed it.

The problem is that the array itself is RCU protected. This means that
you can only copy the whole structure when you want to update it.

The exception is reservation_object_add_shared() which only works
because we replace an either signaled fence or a fence within the same
context but a later sequence number.

This also explains why this is only a band aid and the whole approach
of removing fences doesn't work in general. At any time somebody could
have taken an RCU reference to the old array, created a copy of it and
is now still using this one.

The only 100% correct solution would be to mark the existing fence as
signaled and replace it everywhere else.

Depends what you're trying to achieve. I think the problem you see is,
that some reader may still be using the old reservation_object_list copy
with the fence still in it. But, the remaining lifetime of the
reservation_object_list copy is limited. If we wanted to be sure no more
copies with the old fence exist, all we'd need to do is call
synchronize_rcu. Maybe we need to do that before releasing the fence
references, or release the fence reference in an RCU callback to be
safe.

The assumption that the fence would die with the array is what is
incorrect here.

I'm making no such assumption. The point is not to destroy the fence.
Only to remove the fence reference from the reservation object. That is,
we want any BO with this reservation object to stop checking or waiting
for our eviction fence.


Maybe I'm overthinking this, but let me explain the point once more.

See reservation_object_wait_timeout_rcu() for an example of the problem 
I see here:

    seq = read_seqcount_begin(>seq);
    rcu_read_lock();



    if (!dma_fence_get_rcu(lfence))
    goto unlock_retry;

...

    rcu_read_unlock();

...

    if (read_seqcount_retry(>seq, seq)) {
    dma_fence_put(fence);
    goto retry;
    }

...

    ret = dma_fence_wait_timeout(fence, intr, ret);


In other words the RCU mechanism guarantees that we also wait for newly 
added fences, but it does not guarantee that we don't wait for a fence 
which is already removed.



The lifetime of RCUed array object is limit, but there is absolutely
no guarantee that somebody didn't made a copy of the fences.

E.g. somebody could have called reservation_object_get_fences_rcu(),
reservation_object_copy_fences() or a concurrent
reservation_object_wait_timeout_rcu() is underway.

That's fine. Then there will be additional fence references. When we
remove a fence from a reservation object, we don't know and don't care
who else is holding a reference to the fence anyway. This is no different.


So the KFD implementation doesn't care that the fence is removed from a 
BO but somebody could still start to wait on it because of the BO?


I mean it could be that in the worst case we race and stop a KFD process 
for no good reason.


Regards,
Christian.



Regards,
   Felix


That's also the reason why fences live for much longer than their
signaling, e.g. somebody can have a reference to the fence object even
hours after it is signaled.

Regards,
Christian.


Regards,
    Felix


Going to fix the copy error I made with the sequence number and
send it out again.

Regards,
Christian.

Am 14.08.2018 um 22:17 schrieb Felix Kuehling:

[+Harish]

I think this looks good for the most part. See one comment inline
below.

But bear with me while I'm trying to understand what was wrong with
the
old code. Please correct me if I get this wrong or point out anything
I'm missing.

The reservation_object_list looks to be protected by a combination of
three mechanism:

     * Holding the reservation object
     * RCU
     * seqcount

Updating the fence list requires holding the reservation object. But
there are some readers that can be called without holding that lock
(reservation_object_copy_fences, reservation_object_get_fences_rcu,
reservation_object_wait_timeout_rcu,
reservation_object_test_signaled_rcu). They rely on RCU to work on a
copy and seqcount to make sure they had the most up-to-date
information.
So any function updating the fence lists needs to do RCU and seqcount
correctly to prevent breaking those readers.

As I understand it, RCU with seqcount retry just means that readers
will
spin retrying while there are writers, and writers are never
blocked by
readers. Writers are blocked by each other, because they need to hold
the reservation.

The code you added looks a lot like
reservation_object_add_shared_replace, which removes fences that have
signalled, and atomically replaces obj->fences with a new
reservation_fence_list. That atomicity is important 

RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq to do invalidate tlb

2018-08-16 Thread Zhu, Rex


> -Original Message-
> From: Zhu, Rex
> Sent: Thursday, August 16, 2018 1:41 PM
> To: 'Alex Deucher' 
> Cc: Deng, Emily ; amd-gfx list  g...@lists.freedesktop.org>
> Subject: RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq to do
> invalidate tlb
> 
> 
> 
> > -Original Message-
> > From: Alex Deucher 
> > Sent: Wednesday, August 15, 2018 10:14 PM
> > To: Zhu, Rex 
> > Cc: Deng, Emily ; amd-gfx list  > g...@lists.freedesktop.org>
> > Subject: Re: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq
> > to do invalidate tlb
> >
> > On Wed, Aug 15, 2018 at 9:56 AM Zhu, Rex  wrote:
> > >
> > >
> > >
> > > > -Original Message-
> > > > From: amd-gfx  On Behalf Of
> > > > Emily Deng
> > > > Sent: Wednesday, August 15, 2018 5:48 PM
> > > > To: amd-gfx@lists.freedesktop.org
> > > > Cc: Deng, Emily 
> > > > Subject: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq
> > > > to do invalidate tlb
> > > >
> > > > To avoid the tlb flush not interrupted by world switch, use kiq
> > > > and one command to do tlb invalidate.
> > > >
> > > > v2:
> > > > Add firmware version checking.
> > > >
> > > > v3:
> > > > Refine the code, and move the firmware checking into
> > > > gfx_v9_0_ring_emit_reg_write_reg_wait.
> > > >
> > > > SWDEV-161497
> > >
> > > The "SWDEV-161497" is meanless.
> > > you can describe the issue or just remove the bug number.
> > >
> > > >
> > > > Signed-off-by: Emily Deng 
> > > > ---
> > > >  drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  4 +++
> > > >  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
> > > >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 15 +++-
> > > >  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 61
> > > > 
> > > >  4 files changed, 79 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > > index 07924d4..67b584b 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > > @@ -210,6 +210,10 @@ enum amdgpu_kiq_irq {
> > > >   AMDGPU_CP_KIQ_IRQ_LAST
> > > >  };
> > > >
> > > > +#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
> > > > +#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
> > > > +#define MAX_KIQ_REG_TRY 20
> > > > +
> > > >  int amdgpu_device_ip_set_clockgating_state(void *dev,
> > > >  enum amd_ip_block_type
> > > > block_type,
> > > >  enum
> > > > amd_clockgating_state state); diff --git
> > > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > > > index 21adb1b6..3885636 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > > > @@ -22,9 +22,6 @@
> > > >   */
> > > >
> > > >  #include "amdgpu.h"
> > > > -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
> > > > -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ -
> > #define
> > > > MAX_KIQ_REG_TRY 20
> > > >
> > > >  uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)  { diff
> > > > --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > > > index 76d979e..c9b3db4 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > > > @@ -4348,8 +4348,21 @@ static void
> > > > gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> > > > uint32_t ref,
> > > > uint32_t mask)  {
> > > >   int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> > > > + struct amdgpu_device *adev = ring->adev;
> > > > + bool fw_version_ok = false;
> > > >
> > > > - if (amdgpu_sriov_vf(ring->adev))
> > > > + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
> > > > + if ((adev->gfx.me_fw_version >= 0x009c) &&
> > > > + (adev-
> > > > >gfx.me_feature_version >= 42))
> > > > + if ((adev->gfx.pfp_fw_version >=
> > > > + 0x00b1) &&
> > > > (adev->gfx.pfp_feature_version >= 42))
> > > > + fw_version_ok = true;
> > > > + } else {
> > > > + if ((adev->gfx.mec_fw_version >=  0x0193) &&
> > > > + (adev-
> > > > >gfx.mec_feature_version >= 42))
> > > > + fw_version_ok = true;
> > > > + }
> > >
> > > Maybe we can add a flag and set the flag when request_firmware.
> >
> > I was going to suggest the same thing.
> >
> >
> > >
> > > > +
> > > > + fw_version_ok = (adev->asic_type == CHIP_VEGA10) ?
> > > > + fw_version_ok
> >
> > Also is this specific to vega10 or do all gfx9 parts have this fix?
> > Please verify.
> >
> > Alex
> 
> The patch can work on Raven.
> Raven  firmware  version as :
> MC feature version: 0, firmware version: 0x ME feature version: 42,
> firmware version: 0x009c PFP feature version: 42,