date:20190507

[PATCH v2] drm/amdgpu: add badpages sysfs interafce

2019-05-07 Thread Pan, Xinhui

add badpages node.
it will output badpages list in format
page : size  : flags

page is PFN.
flags can be R, P, F.

example
0x : 0x1000 : R
0x0001 : 0x1000 : R
0x0002 : 0x1000 : R
0x0003 : 0x1000 : R
0x0004 : 0x1000 : R
0x0005 : 0x1000 : R
0x0006 : 0x1000 : R
0x0007 : 0x1000 : P
0x0008 : 0x1000 : P
0x0009 : 0x1000 : P

R: reserved.
P: pending
F: failed to reserve for some reason.

Signed-off-by: xinhui pan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 133 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |   1 +
 2 files changed, 134 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 22bd21efe6b1..2e9fb785019d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -90,6 +90,12 @@ struct ras_manager {
struct ras_err_data err_data;
 };
 
+struct ras_badpage {
+   unsigned int bp;
+   unsigned int size;
+   unsigned int flags;
+};
+
 const char *ras_error_string[] = {
"none",
"parity",
@@ -691,6 +697,62 @@ int amdgpu_ras_query_error_count(struct amdgpu_device 
*adev,
 
 /* sysfs begin */
 
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+   struct ras_badpage **bps, unsigned int *count);
+
+static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
+{
+   switch (flags) {
+   case 0:
+   return "R";
+   case 1:
+   return "P";
+   case 2:
+   default:
+   return "F";
+   };
+}
+
+/*
+ * format: start - end : R|P|F
+ * start, end: page frame number, end is not included.
+ * R: reserved
+ * P: pedning for reserve
+ * F: unable to reserve.
+ */
+
+static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
+   struct kobject *kobj, struct bin_attribute *attr,
+   char *buf, loff_t ppos, size_t count)
+{
+   struct amdgpu_ras *con =
+   container_of(attr, struct amdgpu_ras, badpages_attr);
+   struct amdgpu_device *adev = con->adev;
+   const unsigned int element_size =
+   sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
+   unsigned int start = (ppos + element_size - 1) / element_size;
+   unsigned int end = (ppos + count - 1) / element_size;
+   ssize_t s = 0;
+   struct ras_badpage *bps = NULL;
+   unsigned int bps_count = 0;
+
+   memset(buf, 0, count);
+
+   if (amdgpu_ras_badpages_read(adev, , _count))
+   return 0;
+
+   for (; start < end && start < bps_count; start++)
+   s += scnprintf([s], element_size + 1,
+   "0x%08x : 0x%08x : %1s\n",
+   bps[start].bp,
+   bps[start].size,
+   amdgpu_ras_badpage_flags_str(bps[start].flags));
+
+   kfree(bps);
+
+   return s;
+}
+
 static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
struct device_attribute *attr, char *buf)
 {
@@ -731,9 +793,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct 
amdgpu_device *adev)
>features_attr.attr,
NULL
};
+   struct bin_attribute *bin_attrs[] = {
+   >badpages_attr,
+   NULL
+   };
struct attribute_group group = {
.name = "ras",
.attrs = attrs,
+   .bin_attrs = bin_attrs,
};
 
con->features_attr = (struct device_attribute) {
@@ -743,7 +810,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct 
amdgpu_device *adev)
},
.show = amdgpu_ras_sysfs_features_read,
};
+
+   con->badpages_attr = (struct bin_attribute) {
+   .attr = {
+   .name = "umc_badpages",
+   .mode = S_IRUGO,
+   },
+   .size = 0,
+   .private = NULL,
+   .read = amdgpu_ras_sysfs_badpages_read,
+   };
+
sysfs_attr_init(attrs[0]);
+   sysfs_bin_attr_init(bin_attrs[0]);
 
return sysfs_create_group(>dev->kobj, );
 }
@@ -755,9 +834,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct 
amdgpu_device *adev)
>features_attr.attr,
NULL
};
+   struct bin_attribute *bin_attrs[] = {
+   >badpages_attr,
+   NULL
+   };
struct attribute_group group = {
.name = "ras",
.attrs = attrs,
+   .bin_attrs = bin_attrs,
};
 
sysfs_remove_group(>dev->kobj, );
@@ -1089,6 +1173,55 @@ static int amdgpu_ras_interrupt_remove_all(struct 
amdgpu_device *adev)
 /* ih end */
 
 /* recovery begin */
+
+/* return 0 on success.
+ * caller need free bps.
+ */
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+

[PATCH 9/9] drm/amdgpu: RLC to program regs for Vega10 SR-IOV

2019-05-07 Thread Trigger Huang

Under Vega10 SR-IOV, with new RLC's new feature, VF should call RLC
to program some registers if supported

Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  30 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 100 +++---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c  |  20 ++---
 drivers/gpu/drm/amd/amdgpu/soc15.c|  12 ++-
 4 files changed, 85 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 62067d0..9cd0fd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -259,8 +259,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
 
lock_srbm(kgd, 0, 0, 0, vmid);
 
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
 
unlock_srbm(kgd);
@@ -413,7 +413,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
((mec << 5) | (pipe << 3) | queue_id | 0x80));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
}
 
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
@@ -422,13 +422,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -457,25 +457,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
   lower_32_bits(guessed_wptr));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
   upper_32_bits(guessed_wptr));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
   lower_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
   upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
   get_queue_mask(adev, pipe_id, queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
   REG_SET_FIELD(m->cp_hqd_eop_rptr,
 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
 
release_queue(kgd);
 
@@ -677,7 +677,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
acquire_queue(kgd, pipe_id, queue_id);
 
if (m->cp_hqd_vmid == 0)
-   WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+   WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
 
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -691,7 +691,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
 
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
 
end_jiffies = (utimeout * HZ / 1000) +

[PATCH 8/9] drm/amdgpu: add basic func for RLC program reg

2019-05-07 Thread Trigger Huang

New feature for RLC, some registers can be programmed by
RLC interface under SR-IOV VF:

WREG32_SOC15_RLC_SHADOW:
1, for GRBM_GFX_CNTL, firstly the new register value should be be
programmed to SCRATCH_REG2
1, for GRBM_GFX_INDEX, firstly the new register value should be be
programmed to SCRATCH_REG3

WREG32_RLC:
for registers supported to be programmed by RLC interface, the
following sequence should be used:
1, write the value to SCRATCH_REG0
2, write reg | 0x8000 to SCRATCH_REG1
3, write 0x1 to RLC_SPARE_INT to notify RLC
4, polling SCRATCH_REG1 to check if finished

Change-Id: I5017d2cc10d475a2f8f7204b8043b3e5f870348b
Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 57 ++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 49c2625..c634606 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -89,6 +89,61 @@
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
} while (0)
 
-#endif
 
+#define WREG32_RLC(reg, value) \
+   do {\
+   if (amdgpu_virt_support_rlc_prg_reg(adev)) {\
+   uint32_t i = 0; \
+   uint32_t retries = 5;   \
+   uint32_t r0 = 
adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0;   \
+   uint32_t r1 = 
adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1;   \
+   uint32_t spare_int = 
adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT;  \
+   WREG32(r0, value);  \
+   WREG32(r1, (reg | 0x8000)); \
+   WREG32(spare_int, 0x1); \
+   for (i = 0; i < retries; i++) { \
+   u32 tmp = RREG32(r1);   \
+   if (!(tmp & 0x8000))\
+   break;  \
+   udelay(10); \
+   }   \
+   if (i >= retries)   \
+   pr_err("timeout: rlcg program reg:0x%05x failed 
!\n", reg); \
+   } else {\
+   WREG32(reg, value); \
+   }   \
+   } while (0)
+
+#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
+   do {\
+   uint32_t target_reg = 
adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
+   if (amdgpu_virt_support_rlc_prg_reg(adev)) {\
+   uint32_t r2 = 
adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2;   \
+   uint32_t r3 = 
adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3;   \
+   uint32_t grbm_cntl = 
adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;   \
+   uint32_t grbm_idx = 
adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;   \
+   if (target_reg == grbm_cntl) \
+   WREG32(r2, value);  \
+   else if (target_reg == grbm_idx) \
+   WREG32(r3, value);  \
+   WREG32(target_reg, value);  \
+   } else {\
+   WREG32(target_reg, value); \
+   }   \
+   } while (0)
+
+#define WREG32_SOC15_RLC(ip, inst, reg, value) \
+   do {\
+   uint32_t target_reg = 
adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
+   WREG32_RLC(target_reg, value); \
+   } while (0)
+
+#define WREG32_FIELD15_RLC(ip, idx, reg, field, val)   \
+WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + 
mm##reg), \
+(RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
+& ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
+#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
+WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + 
offset), value)
 
+#endif
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/9] drm/amdgpu: Support PSP VMR ring for Vega10 VF

2019-05-07 Thread Trigger Huang

Add VMR ring support for Vega10 SR-IOV VF if PSP supported

Change-Id: I1990e4c9babdac95d9797e7870569c1c6f630585
Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 131 +-
 1 file changed, 99 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 143f0fa..3f58277 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -50,6 +50,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
 
 static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
 
+static bool psp_v3_1_support_vmr_ring(struct psp_context *psp);
+static int psp_v3_1_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type);
+
 static int psp_v3_1_init_microcode(struct psp_context *psp)
 {
struct amdgpu_device *adev = psp->adev;
@@ -296,27 +300,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
 
psp_v3_1_reroute_ih(psp);
 
-   /* Write low address of the ring to C2PMSG_69 */
-   psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
-   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
-   /* Write high address of the ring to C2PMSG_70 */
-   psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
-   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
-   /* Write size of ring to C2PMSG_71 */
-   psp_ring_reg = ring->ring_size;
-   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
-   /* Write the ring initialization command to C2PMSG_64 */
-   psp_ring_reg = ring_type;
-   psp_ring_reg = psp_ring_reg << 16;
-   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
-   /* there might be handshake issue with hardware which needs delay */
-   mdelay(20);
-
-   /* Wait for response flag (bit 31) in C2PMSG_64 */
-   ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-  0x8000, 0x8000, false);
+   if (psp_v3_1_support_vmr_ring(psp)) {
+   ret = psp_v3_1_ring_stop(psp, ring_type);
+   if (ret) {
+   DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
+   return ret;
+   }
+
+   /* Write low address of the ring to C2PMSG_102 */
+   psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+   /* Write high address of the ring to C2PMSG_103 */
+   psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+   /* No size initialization for sriov  */
+   /* Write the ring initialization command to C2PMSG_101 */
+   psp_ring_reg = ring_type;
+   psp_ring_reg = psp_ring_reg << 16;
+   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
+
+   /* there might be hardware handshake issue which needs delay */
+   mdelay(20);
+
+   /* Wait for response flag (bit 31) in C2PMSG_101 */
+   ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+   mmMP0_SMN_C2PMSG_101), 0x8000,
+   0x8000, false);
+   } else {
+
+   /* Write low address of the ring to C2PMSG_69 */
+   psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+   /* Write high address of the ring to C2PMSG_70 */
+   psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+   /* Write size of ring to C2PMSG_71 */
+   psp_ring_reg = ring->ring_size;
+   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+   /* Write the ring initialization command to C2PMSG_64 */
+   psp_ring_reg = ring_type;
+   psp_ring_reg = psp_ring_reg << 16;
+   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+   /* there might be hardware handshake issue which needs delay */
+   mdelay(20);
+
+   /* Wait for response flag (bit 31) in C2PMSG_64 */
+   ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+   mmMP0_SMN_C2PMSG_64), 0x8000,
+   0x8000, false);
 
+   }
return ret;
 }
 
@@ -327,16 +361,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
unsigned int psp_ring_reg = 0;
struct amdgpu_device *adev = psp->adev;
 
-   /* Write the ring destroy command to C2PMSG_64 */
-   psp_ring_reg = 3 << 16;
-   WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,

[PATCH 3/9] drm/amdgpu: Add new PSP cmd GFX_CMD_ID_PROG_REG

2019-05-07 Thread Trigger Huang

Add new PSP command GFX_CMD_ID_PROG_REG definition

Change-Id: I685baa2a219cac60417c2aa609cd3d6b9ff2b0cf
Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h 
b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 2f79765..7f8edc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -94,6 +94,7 @@ enum psp_gfx_cmd_id
 GFX_CMD_ID_SAVE_RESTORE = 0x0008,   /* save/restore HW IP FW */
 GFX_CMD_ID_SETUP_VMR= 0x0009,   /* setup VMR region */
 GFX_CMD_ID_DESTROY_VMR  = 0x000A,   /* destroy VMR region */
+GFX_CMD_ID_PROG_REG = 0x000B,   /* program regs */
 };
 
 
@@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw
 enum psp_gfx_fw_typefw_type;  /* FW type */
 };
 
+/* Command to setup register program */
+struct psp_gfx_cmd_reg_prog {
+   uint32_treg_value;
+   uint32_treg_id;
+};
+
 /* All GFX ring buffer commands. */
 union psp_gfx_commands
 {
@@ -226,6 +233,7 @@ union psp_gfx_commands
 struct psp_gfx_cmd_setup_tmrcmd_setup_tmr;
 struct psp_gfx_cmd_load_ip_fw   cmd_load_ip_fw;
 struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
+struct psp_gfx_cmd_reg_prog   cmd_setup_reg_prog;
 };
 
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/9] drm/amdgpu: implement PSP cmd GFX_CMD_ID_PROG_REG

2019-05-07 Thread Trigger Huang

Add implementation to program regs by PSP, currently the following
IH registers are supported:
IH_RB_CNTL
IH_RB_CNTL_RING1
IH_RB_CNTL_RING2

Change-Id: I8e777f1080043066843d3962d3635e7075ecf21b
Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 28 
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 11 ++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 32388b5..b3bc0f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -286,6 +286,34 @@ static int psp_asd_load(struct psp_context *psp)
return ret;
 }
 
+static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+   uint32_t id, uint32_t value)
+{
+   cmd->cmd_id = GFX_CMD_ID_PROG_REG;
+   cmd->cmd.cmd_setup_reg_prog.reg_value = value;
+   cmd->cmd.cmd_setup_reg_prog.reg_id = id;
+}
+
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+   uint32_t value)
+{
+   struct psp_gfx_cmd_resp *cmd = NULL;
+   int ret = 0;
+
+   if (reg >= PSP_REG_LAST)
+   return -EINVAL;
+
+   cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+   if (!cmd)
+   return -ENOMEM;
+
+   psp_prep_reg_prog_cmd_buf(cmd, reg, value);
+   ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+   kfree(cmd);
+   return ret;
+}
+
 static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
  uint64_t xgmi_ta_mc, uint64_t 
xgmi_mc_shared,
  uint32_t xgmi_ta_size, uint32_t 
shared_size)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index cde113f..23d90b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -62,6 +62,14 @@ struct psp_ring
uint32_tring_size;
 };
 
+/* More registers may will be supported */
+enum psp_reg_prog_id {
+   PSP_REG_IH_RB_CNTL= 0,  /* register IH_RB_CNTL */
+   PSP_REG_IH_RB_CNTL_RING1  = 1,  /* register IH_RB_CNTL_RING1 */
+   PSP_REG_IH_RB_CNTL_RING2  = 2,  /* register IH_RB_CNTL_RING2 */
+   PSP_REG_LAST
+};
+
 struct psp_funcs
 {
int (*init_microcode)(struct psp_context *psp);
@@ -250,5 +258,6 @@ int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
 
 extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
-
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+   uint32_t value);
 #endif
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/9] drm/amdgpu: init vega10 SR-IOV reg access mode

2019-05-07 Thread Trigger Huang

Set different register access mode according to the features
provided by firmware

Change-Id: Ia03e25a5a3b188f66363a0af487edfa21aafefc5
Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 43 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   | 12 +
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  | 19 +
 4 files changed, 77 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3ff8899..615e775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1532,6 +1532,9 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return -EAGAIN;
+
+   /* query the reg access mode at the very beginning */
+   amdgpu_virt_init_reg_access_mode(adev);
}
 
adev->pm.pp_feature = amdgpu_pp_feature_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 462a04e..b50b6c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -375,4 +375,47 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
*adev)
}
 }
 
+void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
+{
+   struct amdgpu_virt *virt = >virt;
 
+   if (virt->ops && virt->ops->init_reg_access_mode)
+   virt->ops->init_reg_access_mode(adev);
+
+}
+
+bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
+{
+   bool ret = false;
+   struct amdgpu_virt *virt = >virt;
+
+   if (amdgpu_sriov_vf(adev)
+   && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
+   ret = true;
+
+   return ret;
+}
+
+bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
+{
+   bool ret = false;
+   struct amdgpu_virt *virt = >virt;
+
+   if (amdgpu_sriov_vf(adev)
+   && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC))
+   ret = true;
+
+   return ret;
+}
+
+bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
+{
+   bool ret = false;
+   struct amdgpu_virt *virt = >virt;
+
+   if (amdgpu_sriov_vf(adev)
+   && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
+   ret = true;
+
+   return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 722deef..eb4cb1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer {
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
 };
 
+/* According to the fw feature, some new reg access modes are supported */
+#define AMDGPU_VIRT_REG_ACCESS_LEGACY  (1 << 0) /* directly mmio */
+#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH  (1 << 1) /* by PSP */
+#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
+#define AMDGPU_VIRT_REG_SKIP_SEETING   (1 << 3) /* Skip setting reg */
+
 /**
  * struct amdgpu_virt_ops - amdgpu device virt operations
  */
@@ -57,6 +63,7 @@ struct amdgpu_virt_ops {
int (*reset_gpu)(struct amdgpu_device *adev);
int (*wait_reset)(struct amdgpu_device *adev);
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 
data2, u32 data3);
+   void (*init_reg_access_mode)(struct amdgpu_device *adev);
 };
 
 /*
@@ -252,6 +259,7 @@ struct amdgpu_virt {
struct amdgpu_vf_error_buffer   vf_errors;
struct amdgpu_virt_fw_reserve   fw_reserve;
uint32_t gim_feature;
+   uint32_t reg_access_mode;
 };
 
 #define amdgpu_sriov_enabled(adev) \
@@ -295,5 +303,9 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned 
long obj_size,
unsigned int key,
unsigned int chksum);
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
+void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
+bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
+bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
+bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 73851eb..f548e22 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -26,6 +26,7 @@
 #include "nbio/nbio_6_1_sh_mask.h"
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
+#include "mp/mp_9_0_offset.h"
 #include "soc15.h"
 #include "vega10_ih.h"
 #include "soc15_common.h"
@@ -369,10 +370,28 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device

[PATCH 2/9] drm/amdgpu: initialize PSP before IH under SR-IOV

2019-05-07 Thread Trigger Huang

In order to support new PSP feature that PSP may provide interface
to program IH CNTL register, initialize PSP before IH under Vega10
SR-IOV VF

Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/amd/amdgpu/soc15.c | 24 ++--
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 615e775..dacd128 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1580,6 +1580,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct 
amdgpu_device *adev)
if (adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_COMMON ||
+   (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type 
== AMD_IP_BLOCK_TYPE_PSP)) ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 77493a0f..9150e93 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -597,12 +597,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_VEGA20:
amdgpu_device_ip_block_add(adev, _common_ip_block);
amdgpu_device_ip_block_add(adev, _v9_0_ip_block);
-   amdgpu_device_ip_block_add(adev, _ih_ip_block);
-   if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
-   if (adev->asic_type == CHIP_VEGA20)
-   amdgpu_device_ip_block_add(adev, 
_v11_0_ip_block);
-   else
-   amdgpu_device_ip_block_add(adev, 
_v3_1_ip_block);
+
+   /* For Vega10 SR-IOV, PSP need to be initialized before IH */
+   if (amdgpu_sriov_vf(adev)) {
+   if (likely(adev->firmware.load_type == 
AMDGPU_FW_LOAD_PSP)) {
+   if (adev->asic_type == CHIP_VEGA20)
+   amdgpu_device_ip_block_add(adev, 
_v11_0_ip_block);
+   else
+   amdgpu_device_ip_block_add(adev, 
_v3_1_ip_block);
+   }
+   amdgpu_device_ip_block_add(adev, _ih_ip_block);
+   } else {
+   amdgpu_device_ip_block_add(adev, _ih_ip_block);
+   if (likely(adev->firmware.load_type == 
AMDGPU_FW_LOAD_PSP)) {
+   if (adev->asic_type == CHIP_VEGA20)
+   amdgpu_device_ip_block_add(adev, 
_v11_0_ip_block);
+   else
+   amdgpu_device_ip_block_add(adev, 
_v3_1_ip_block);
+   }
}
amdgpu_device_ip_block_add(adev, _v9_0_ip_block);
amdgpu_device_ip_block_add(adev, _v4_0_ip_block);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 7/9] drm/amdgpu: Skip setting some regs under Vega10 VF

2019-05-07 Thread Trigger Huang

For Vega10 SR-IOV VF, skip setting some regs due to:
1, host will program thme
2, avoid VF register programming violations

Change-Id: Id43e7fca7775035be47696c67a74ad418403036b
Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 14 --
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  3 +++
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 25 -
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  | 14 --
 drivers/gpu/drm/amd/amdgpu/soc15.c  | 16 +++-
 5 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ef4272d..6b203c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -307,12 +307,14 @@ static void gfx_v9_0_init_golden_registers(struct 
amdgpu_device *adev)
 {
switch (adev->asic_type) {
case CHIP_VEGA10:
-   soc15_program_register_sequence(adev,
-golden_settings_gc_9_0,
-
ARRAY_SIZE(golden_settings_gc_9_0));
-   soc15_program_register_sequence(adev,
-golden_settings_gc_9_0_vg10,
-
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+   if (!amdgpu_virt_support_skip_setting(adev)) {
+   soc15_program_register_sequence(adev,
+golden_settings_gc_9_0,
+
ARRAY_SIZE(golden_settings_gc_9_0));
+   soc15_program_register_sequence(adev,
+
golden_settings_gc_9_0_vg10,
+
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+   }
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 727e26a..b41574e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1087,6 +1087,9 @@ static void gmc_v9_0_init_golden_registers(struct 
amdgpu_device *adev)
 
switch (adev->asic_type) {
case CHIP_VEGA10:
+   if (amdgpu_virt_support_skip_setting(adev))
+   break;
+   /* fall through */
case CHIP_VEGA20:
soc15_program_register_sequence(adev,
golden_settings_mmhub_1_0_0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 1741056..8054131 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct 
amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
 
+   if (amdgpu_virt_support_skip_setting(adev))
+   return;
+
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
@@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device 
*adev)
 {
uint32_t tmp;
 
+   if (amdgpu_virt_support_skip_setting(adev))
+   return;
+
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
@@ -201,6 +207,9 @@ static void mmhub_v1_0_enable_system_domain(struct 
amdgpu_device *adev)
 
 static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
 {
+   if (amdgpu_virt_support_skip_setting(adev))
+   return;
+
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
 0X);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
@@ -337,11 +346,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
0);
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
 
-   /* Setup L2 cache */
-   tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
-   tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
-   WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
-   WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
+   if (!amdgpu_virt_support_skip_setting(adev)) {
+   /* Setup L2 cache */
+   tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
+   tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+   WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
+   WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
+   }
 }
 
 /**
@@ -353,6

[PATCH 0/9] Enable new L1 security for Vega10 SR-IOV

2019-05-07 Thread Trigger Huang

To support new Vega10 SR-IOV L1 security, KMD need some modifications
1: Due to the new features supported in FW(PSP, RLC, etc),
   for register access during initialization, we have more
   modes:
1), request PSP to program
2), request RLC to program
3), request SR-IOV host driver to program and skip
programming them in amdgpu
4), Legacy MMIO access
   We will try to read the firmware version to see which mode
   is support

2: If PSP FW support to program some registers, such as IH,
   we need:
1), initialize PSP before IH
2), send the specific command to PSP

3: Support VMR ring. VMR ring, compared with physical platform
   TMR ring, the program sequence are nearly the same,  but we
   will use another register set, mmMP0_SMN_C2PMSG_101/102/103
   to communicate with PSP

4: Skip programming some registers in guest KMD
   As some registers are processed by new L1 security, amdgpu
   on VF will fail to program them, and don’t worry, these
   registers will be programmed  on the SR-IOV host driver
   side.

5: Call RLC to program some registers in instead of MMIO
   With new L1 policy, some registers can’t be programmed in
   SR-IOV VF amdgpu with MMIO. Fortunately, new RLC firmware
   will support to program them with specific program sequence,
   which are described in the patch commit message

Trigger Huang (9):
  drm/amdgpu: init vega10 SR-IOV reg access mode
  drm/amdgpu: initialize PSP before IH under SR-IOV
  drm/amdgpu: Add new PSP cmd GFX_CMD_ID_PROG_REG
  drm/amdgpu: implement PSP cmd GFX_CMD_ID_PROG_REG
  drm/amdgpu: call psp to progrm ih cntl in SR-IOV
  drm/amdgpu: Support PSP VMR ring for Vega10 VF
  drm/amdgpu: Skip setting some regs under Vega10 VF
  drm/amdgpu: add basic func for RLC program reg
  drm/amdgpu: RLC to program regs for Vega10 SR-IOV

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  30 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |  28 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h   |  11 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  43 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  12 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 114 ++-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c  |  20 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |   3 +
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c   |  25 -
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c |  19 
 drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h   |   8 ++
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 131 --
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c|  14 ++-
 drivers/gpu/drm/amd/amdgpu/soc15.c|  52 ++---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  57 +-
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c|  91 +--
 17 files changed, 514 insertions(+), 148 deletions(-)

-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 5/9] drm/amdgpu: call psp to progrm ih cntl in SR-IOV

2019-05-07 Thread Trigger Huang

call psp to progrm ih cntl in SR-IOV if supported

Change-Id: I466dd66926221e764cbcddca48b1f0fe5cd798b4
Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 91 ++
 1 file changed, 82 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 1b2f69a..fbb1ed8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -48,14 +48,29 @@ static void vega10_ih_enable_interrupts(struct 
amdgpu_device *adev)
 
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
-   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+   if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+   if (psp_reg_program(>psp, PSP_REG_IH_RB_CNTL, 
ih_rb_cntl)) {
+   DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+   return;
+   }
+   } else {
+   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+   }
adev->irq.ih.enabled = true;
 
if (adev->irq.ih1.ring_size) {
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
   RB_ENABLE, 1);
-   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+   if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+   if (psp_reg_program(>psp, 
PSP_REG_IH_RB_CNTL_RING1,
+   ih_rb_cntl)) {
+   DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+   return;
+   }
+   } else {
+   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+   }
adev->irq.ih1.enabled = true;
}
 
@@ -63,7 +78,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device 
*adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
   RB_ENABLE, 1);
-   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+   if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+   if (psp_reg_program(>psp, 
PSP_REG_IH_RB_CNTL_RING2,
+   ih_rb_cntl)) {
+   DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+   return;
+   }
+   } else {
+   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+   }
adev->irq.ih2.enabled = true;
}
 }
@@ -81,7 +104,15 @@ static void vega10_ih_disable_interrupts(struct 
amdgpu_device *adev)
 
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
-   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+   if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+   if (psp_reg_program(>psp, PSP_REG_IH_RB_CNTL, 
ih_rb_cntl)) {
+   DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+   return;
+   }
+   } else {
+   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+   }
+
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
@@ -92,7 +123,15 @@ static void vega10_ih_disable_interrupts(struct 
amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
   RB_ENABLE, 0);
-   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+   if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+   if (psp_reg_program(>psp, 
PSP_REG_IH_RB_CNTL_RING1,
+   ih_rb_cntl)) {
+   DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+   return;
+   }
+   } else {
+   WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+   }
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@@ -104,7 +143,16 @@ static void vega10_ih_disable_interrupts(struct 
amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,

[PATCH 1/1] drm/amdgpu: Improve error handling for HMM

2019-05-07 Thread Kuehling, Felix

Use unsigned long for number of pages.

Check that pfns are valid after hmm_vma_fault. If they are not,
return an error instead of continuing with invalid page pointers and
PTEs.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c14198737dcd..38ce11e338e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -734,10 +734,11 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
struct page **pages)
struct mm_struct *mm = gtt->usertask->mm;
unsigned long start = gtt->userptr;
unsigned long end = start + ttm->num_pages * PAGE_SIZE;
-   struct hmm_range *ranges;
struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
+   struct hmm_range *ranges;
+   unsigned long nr_pages, i;
uint64_t *pfns, f;
-   int r = 0, i, nr_pages;
+   int r = 0;
 
if (!mm) /* Happens during process shutdown */
return -ESRCH;
@@ -813,8 +814,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
struct page **pages)
 
up_read(>mmap_sem);
 
-   for (i = 0; i < ttm->num_pages; i++)
+   for (i = 0; i < ttm->num_pages; i++) {
pages[i] = hmm_pfn_to_page([0], pfns[i]);
+   if (!pages[i]) {
+   pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
+  i, pfns[i]);
+   goto out_invalid_pfn;
+   }
+   }
gtt->ranges = ranges;
 
return 0;
@@ -827,6 +834,13 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
struct page **pages)
up_read(>mmap_sem);
 
return r;
+
+out_invalid_pfn:
+   for (i = 0; i < gtt->nr_ranges; i++)
+   hmm_vma_range_done([i]);
+   kvfree(pfns);
+   kvfree(ranges);
+   return -ENOMEM;
 }
 
 /**
@@ -871,7 +885,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
  */
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
-   unsigned i;
+   unsigned long i;
 
for (i = 0; i < ttm->num_pages; ++i)
ttm->pages[i] = pages ? pages[i] : NULL;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 5/5] drm: don't block fb changes for async plane updates

2019-05-07 Thread Sean Paul

On Wed, Mar 13, 2019 at 09:20:26PM -0300, Helen Koike wrote:
> In the case of a normal sync update, the preparation of framebuffers (be
> it calling drm_atomic_helper_prepare_planes() or doing setups with
> drm_framebuffer_get()) are performed in the new_state and the respective
> cleanups are performed in the old_state.
> 
> In the case of async updates, the preparation is also done in the
> new_state but the cleanups are done in the new_state (because updates
> are performed in place, i.e. in the current state).
> 
> The current code blocks async udpates when the fb is changed, turning
> async updates into sync updates, slowing down cursor updates and
> introducing regressions in igt tests with errors of type:
> 
> "CRITICAL: completed 97 cursor updated in a period of 30 flips, we
> expect to complete approximately 15360 updates, with the threshold set
> at 7680"
> 
> Fb changes in async updates were prevented to avoid the following scenario:
> 
> - Async update, oldfb = NULL, newfb = fb1, prepare fb1, cleanup fb1
> - Async update, oldfb = fb1, newfb = fb2, prepare fb2, cleanup fb2
> - Non-async commit, oldfb = fb2, newfb = fb1, prepare fb1, cleanup fb2 (wrong)
> Where we have a single call to prepare fb2 but double cleanup call to fb2.
> 
> To solve the above problems, instead of blocking async fb changes, we
> place the old framebuffer in the new_state object, so when the code
> performs cleanups in the new_state it will cleanup the old_fb and we
> will have the following scenario instead:
> 
> - Async update, oldfb = NULL, newfb = fb1, prepare fb1, no cleanup
> - Async update, oldfb = fb1, newfb = fb2, prepare fb2, cleanup fb1
> - Non-async commit, oldfb = fb2, newfb = fb1, prepare fb1, cleanup fb2
> 
> Where calls to prepare/cleanup are balanced.
> 
> Cc:  # v4.14+

I'm not convinced this should be cc: stable, seems more in the improvement
category than a bug fix.

> Fixes: 25dc194b34dd ("drm: Block fb changes for async plane updates")
> Suggested-by: Boris Brezillon 
> Signed-off-by: Helen Koike 
> Reviewed-by: Boris Brezillon 
> Reviewed-by: Nicholas Kazlauskas 
> 
> ---
> Hello,
> 
> I added a TODO in drm_atomic_helper_async_commit() regarding doing a
> full state swap(), Boris and Nicholas, let me know if this is ok and if
> I can keep your Reviewed-by tags)
> 
> As mentioned in the cover letter, I tested in almost all platforms with
> igt plane_cursor_legacy and kms_cursor_legacy and I didn't see any
> regressions. But I couldn't test on MSM and AMD because I don't have
> the hardware I would appreciate if anyone could help me testing those.
> 
> Thanks!
> Helen
> 
> Changes in v3:
> - Add Reviewed-by tags
> - Add TODO in drm_atomic_helper_async_commit()
> 
> Changes in v2:
> - Change the order of the patch in the series, add this as the last one.
> - Add documentation
> - s/ballanced/balanced
> 
>  drivers/gpu/drm/drm_atomic_helper.c  | 22 --
>  include/drm/drm_modeset_helper_vtables.h |  5 +
>  2 files changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
> b/drivers/gpu/drm/drm_atomic_helper.c
> index 2453678d1186..de5812c362b5 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -1608,15 +1608,6 @@ int drm_atomic_helper_async_check(struct drm_device 
> *dev,
>   old_plane_state->crtc != new_plane_state->crtc)
>   return -EINVAL;
>  
> - /*
> -  * FIXME: Since prepare_fb and cleanup_fb are always called on
> -  * the new_plane_state for async updates we need to block framebuffer
> -  * changes. This prevents use of a fb that's been cleaned up and
> -  * double cleanups from occuring.
> -  */
> - if (old_plane_state->fb != new_plane_state->fb)
> - return -EINVAL;
> -
>   funcs = plane->helper_private;
>   if (!funcs->atomic_async_update)
>   return -EINVAL;
> @@ -1647,6 +1638,8 @@ EXPORT_SYMBOL(drm_atomic_helper_async_check);
>   * drm_atomic_async_check() succeeds. Async commits are not supposed to swap
>   * the states like normal sync commits, but just do in-place changes on the
>   * current state.
> + *
> + * TODO: Implement full swap instead of doing in-place changes.
>   */
>  void drm_atomic_helper_async_commit(struct drm_device *dev,
>   struct drm_atomic_state *state)
> @@ -1657,6 +1650,9 @@ void drm_atomic_helper_async_commit(struct drm_device 
> *dev,
>   int i;
>  
>   for_each_new_plane_in_state(state, plane, plane_state, i) {
> + struct drm_framebuffer *new_fb = plane_state->fb;
> + struct drm_framebuffer *old_fb = plane->state->fb;
> +
>   funcs = plane->helper_private;
>   funcs->atomic_async_update(plane, plane_state);
>  
> @@ -1665,11 +1661,17 @@ void drm_atomic_helper_async_commit(struct drm_device 
> *dev,
>* plane->state in-place, make sure at least common
>

Re: [PATCH 1/1] drm/amdgpu: Reserve shared fence for eviction fence

2019-05-07 Thread Kasiviswanathan, Harish

Reviewed-by: Harish Kasiviswanathan 


On 2019-05-06 4:23 p.m., Kuehling, Felix wrote:
> [CAUTION: External Email]
>
> Need to reserve space for the shared eviction fence when initializing
> a KFD VM.
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 20cf8e1e7445..e1cae4a37113 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -875,6 +875,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void 
> **process_info,
>   AMDGPU_FENCE_OWNER_KFD, false);
> if (ret)
> goto wait_pd_fail;
> +   ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 
> 1);
> +   if (ret)
> +   goto reserve_shared_fail;
> amdgpu_bo_fence(vm->root.base.bo,
> >process_info->eviction_fence->base, true);
> amdgpu_bo_unreserve(vm->root.base.bo);
> @@ -888,6 +891,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void 
> **process_info,
>
> return 0;
>
> +reserve_shared_fail:
>  wait_pd_fail:
>  validate_pd_fail:
> amdgpu_bo_unreserve(vm->root.base.bo);
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: Report firmware versions with sysfs

2019-05-07 Thread Russell, Kent

The debugfs won't have anything in it that this interface won't provide. It 
does FW+VBIOS, and there will be separate files for each of those components.

From a housekeeping standpoint, should we make a subfolder called fw_version to 
dump the files into, or are they fine in the base sysfs tree?

 Kent

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Tuesday, May 7, 2019 1:35 PM
To: Messinger, Ori ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Report firmware versions with sysfs

[CAUTION: External Email]

Am 07.05.19 um 19:30 schrieb Messinger, Ori:
> Firmware versions can be found as separate sysfs files at:
> /sys/class/drm/cardX/device/ (where X is the card number) The firmware 
> versions are displayed in hexadecimal.
>
> Change-Id: I10cae4c0ca6f1b6a9ced07da143426e1d011e203
> Signed-off-by: Ori Messinger 

Well that looks like a really nice one, patch is Reviewed-by: Christian König 


Could we remove the debugfs interface now or should we keep it?

Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c  | 71 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h  |  2 +
>   3 files changed, 78 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 3f1c6b2d3d87..6bfee8d1f1c3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2701,6 +2701,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   if (r)
>   DRM_ERROR("registering pm debugfs failed (%d).\n", r);
>
> + r = amdgpu_ucode_sysfs_init(adev);
> + if (r)
> + DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
> +
>   r = amdgpu_debugfs_gem_init(adev);
>   if (r)
>   DRM_ERROR("registering gem debugfs failed (%d).\n", r); 
> @@ -2813,6 +2817,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
>   amdgpu_device_doorbell_fini(adev);
>   amdgpu_debugfs_regs_cleanup(adev);
>   device_remove_file(adev->dev, _attr_pcie_replay_count);
> + amdgpu_ucode_sysfs_fini(adev);
>   }
>
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> index 7b33867036e7..3aa750e6bbf6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> @@ -313,6 +313,77 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, 
> int load_type)
>   return AMDGPU_FW_LOAD_DIRECT;
>   }
>
> +#define FW_VERSION_ATTR(name, mode, field)   \
> +static ssize_t show_##name(struct device *dev,   
> \
> +   struct device_attribute *attr,\
> +   char *buf)\
> +{\
> + struct drm_device *ddev = dev_get_drvdata(dev); \
> + struct amdgpu_device *adev = ddev->dev_private; \
> + \
> + return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field);   \
> +}\
> +static DEVICE_ATTR(name, mode, show_##name, NULL)
> +
> +FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version); 
> +FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version); 
> +FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version); 
> +FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version); 
> +FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version); 
> +FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version); 
> +FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version); 
> +FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version); 
> +FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); 
> +FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); 
> +FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); 
> +FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); 
> +FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version); 
> +FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); 
> +FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version); 
> +FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version); 
> +FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version); 
> +FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); 
> +FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); 
> +FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version); 
> +FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
> +
> +static struct device_attribute *dev_fw_attr[] = {
> + _attr_vce_fw_version, _attr_uvd_fw_version,
> + _attr_mc_fw_version, _attr_me_fw_version,
> + _attr_pfp_fw_version, _attr_ce_fw_version,
> +

Re: [PATCH] drm/amdgpu: Report firmware versions with sysfs

2019-05-07 Thread Christian König


Am 07.05.19 um 19:30 schrieb Messinger, Ori:

Firmware versions can be found as separate sysfs files at:
/sys/class/drm/cardX/device/ (where X is the card number)
The firmware versions are displayed in hexadecimal.

Change-Id: I10cae4c0ca6f1b6a9ced07da143426e1d011e203
Signed-off-by: Ori Messinger 


Well that looks like a really nice one, patch is Reviewed-by: Christian 
König 


Could we remove the debugfs interface now or should we keep it?

Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c  | 71 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h  |  2 +
  3 files changed, 78 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3f1c6b2d3d87..6bfee8d1f1c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2701,6 +2701,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
  
+	r = amdgpu_ucode_sysfs_init(adev);

+   if (r)
+   DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+
r = amdgpu_debugfs_gem_init(adev);
if (r)
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@@ -2813,6 +2817,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
amdgpu_device_doorbell_fini(adev);
amdgpu_debugfs_regs_cleanup(adev);
device_remove_file(adev->dev, _attr_pcie_replay_count);
+   amdgpu_ucode_sysfs_fini(adev);
  }
  
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c

index 7b33867036e7..3aa750e6bbf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -313,6 +313,77 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int 
load_type)
return AMDGPU_FW_LOAD_DIRECT;
  }
  
+#define FW_VERSION_ATTR(name, mode, field)\

+static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr,\
+ char *buf)\
+{  \
+   struct drm_device *ddev = dev_get_drvdata(dev); \
+   struct amdgpu_device *adev = ddev->dev_private;  \
+   \
+   return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field);  \
+}  \
+static DEVICE_ATTR(name, mode, show_##name, NULL)
+
+FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
+FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
+FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
+FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
+FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
+FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
+FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
+FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
+FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
+FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
+FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
+FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
+FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
+FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
+FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
+FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
+FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
+FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
+FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+
+static struct device_attribute *dev_fw_attr[] = {
+   _attr_vce_fw_version, _attr_uvd_fw_version,
+   _attr_mc_fw_version, _attr_me_fw_version,
+   _attr_pfp_fw_version, _attr_ce_fw_version,
+   _attr_rlc_fw_version, _attr_rlc_srlc_fw_version,
+   _attr_rlc_srlg_fw_version, _attr_rlc_srls_fw_version,
+   _attr_mec_fw_version, _attr_mec2_fw_version,
+   _attr_sos_fw_version, _attr_asd_fw_version,
+   _attr_ta_ras_fw_version, _attr_ta_xgmi_fw_version,
+   _attr_smc_fw_version, _attr_sdma_fw_version,
+   _attr_sdma2_fw_version, _attr_vcn_fw_version,
+   _attr_dmcu_fw_version
+};
+
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(dev_fw_attr); i++)
+   device_remove_file(adev->dev, dev_fw_attr[i]);
+}
+
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
+{
+   int i, ret;
+
+   for (i = 0; i <

[PATCH] drm/amdgpu: Report firmware versions with sysfs

2019-05-07 Thread Messinger, Ori

Firmware versions can be found as separate sysfs files at:
/sys/class/drm/cardX/device/ (where X is the card number)
The firmware versions are displayed in hexadecimal.

Change-Id: I10cae4c0ca6f1b6a9ced07da143426e1d011e203
Signed-off-by: Ori Messinger 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c  | 71 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h  |  2 +
 3 files changed, 78 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3f1c6b2d3d87..6bfee8d1f1c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2701,6 +2701,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
 
+   r = amdgpu_ucode_sysfs_init(adev);
+   if (r)
+   DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+
r = amdgpu_debugfs_gem_init(adev);
if (r)
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@@ -2813,6 +2817,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
amdgpu_device_doorbell_fini(adev);
amdgpu_debugfs_regs_cleanup(adev);
device_remove_file(adev->dev, _attr_pcie_replay_count);
+   amdgpu_ucode_sysfs_fini(adev);
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 7b33867036e7..3aa750e6bbf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -313,6 +313,77 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int 
load_type)
return AMDGPU_FW_LOAD_DIRECT;
 }
 
+#define FW_VERSION_ATTR(name, mode, field) \
+static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr,\
+ char *buf)\
+{  \
+   struct drm_device *ddev = dev_get_drvdata(dev); \
+   struct amdgpu_device *adev = ddev->dev_private; \
+   \
+   return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field);   \
+}  \
+static DEVICE_ATTR(name, mode, show_##name, NULL)
+
+FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
+FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
+FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
+FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
+FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
+FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
+FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
+FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
+FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
+FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
+FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
+FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
+FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
+FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
+FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
+FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
+FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
+FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
+FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+
+static struct device_attribute *dev_fw_attr[] = {
+   _attr_vce_fw_version, _attr_uvd_fw_version,
+   _attr_mc_fw_version, _attr_me_fw_version,
+   _attr_pfp_fw_version, _attr_ce_fw_version,
+   _attr_rlc_fw_version, _attr_rlc_srlc_fw_version,
+   _attr_rlc_srlg_fw_version, _attr_rlc_srls_fw_version,
+   _attr_mec_fw_version, _attr_mec2_fw_version,
+   _attr_sos_fw_version, _attr_asd_fw_version,
+   _attr_ta_ras_fw_version, _attr_ta_xgmi_fw_version,
+   _attr_smc_fw_version, _attr_sdma_fw_version,
+   _attr_sdma2_fw_version, _attr_vcn_fw_version,
+   _attr_dmcu_fw_version
+};
+
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(dev_fw_attr); i++)
+   device_remove_file(adev->dev, dev_fw_attr[i]);
+}
+
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
+{
+   int i, ret;
+
+   for (i = 0; i < ARRAY_SIZE(dev_fw_attr); i++) {
+   ret = device_create_file(adev->dev, dev_fw_attr[i]);
+   if (ret) {
+   DRM_ERROR("Failed to create %s\n",
+

Re: [PATCH v15 11/17] drm/amdgpu, arm64: untag user pointers

2019-05-07 Thread Kuehling, Felix

On 2019-05-06 12:30 p.m., Andrey Konovalov wrote:
> [CAUTION: External Email]
>
> This patch is a part of a series that extends arm64 kernel ABI to allow to
> pass tagged user pointers (with the top byte set to something else other
> than 0x00) as syscall arguments.
>
> In amdgpu_gem_userptr_ioctl() and amdgpu_amdkfd_gpuvm.c/init_user_pages()
> an MMU notifier is set up with a (tagged) userspace pointer. The untagged
> address should be used so that MMU notifiers for the untagged address get
> correctly matched up with the right BO. This patch untag user pointers in
> amdgpu_gem_userptr_ioctl() for the GEM case and in amdgpu_amdkfd_gpuvm_
> alloc_memory_of_gpu() for the KFD case. This also makes sure that an
> untagged pointer is passed to amdgpu_ttm_tt_get_user_pages(), which uses
> it for vma lookups.
>
> Suggested-by: Kuehling, Felix 
> Signed-off-by: Andrey Konovalov 

Acked-by: Felix Kuehling 


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c  | 2 ++
>   2 files changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 1921dec3df7a..20cac44ed449 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1121,7 +1121,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>  alloc_flags = 0;
>  if (!offset || !*offset)
>  return -EINVAL;
> -   user_addr = *offset;
> +   user_addr = untagged_addr(*offset);
>  } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
>  domain = AMDGPU_GEM_DOMAIN_GTT;
>  alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d21dd2f369da..985cb82b2aa6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -286,6 +286,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void 
> *data,
>  uint32_t handle;
>  int r;
>
> +   args->addr = untagged_addr(args->addr);
> +
>  if (offset_in_page(args->addr | args->size))
>  return -EINVAL;
>
> --
> 2.21.0.1020.gf2820cf01a-goog
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v15 12/17] drm/radeon, arm64: untag user pointers in radeon_gem_userptr_ioctl

2019-05-07 Thread Kuehling, Felix

On 2019-05-06 12:30 p.m., Andrey Konovalov wrote:
> [CAUTION: External Email]
>
> This patch is a part of a series that extends arm64 kernel ABI to allow to
> pass tagged user pointers (with the top byte set to something else other
> than 0x00) as syscall arguments.
>
> In radeon_gem_userptr_ioctl() an MMU notifier is set up with a (tagged)
> userspace pointer. The untagged address should be used so that MMU
> notifiers for the untagged address get correctly matched up with the right
> BO. This funcation also calls radeon_ttm_tt_pin_userptr(), which uses
> provided user pointers for vma lookups, which can only by done with
> untagged pointers.
>
> This patch untags user pointers in radeon_gem_userptr_ioctl().
>
> Signed-off-by: Andrey Konovalov 
Acked-by: Felix Kuehling 


> ---
>   drivers/gpu/drm/radeon/radeon_gem.c | 2 ++
>   1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
> b/drivers/gpu/drm/radeon/radeon_gem.c
> index 44617dec8183..90eb78fb5eb2 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -291,6 +291,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void 
> *data,
>  uint32_t handle;
>  int r;
>
> +   args->addr = untagged_addr(args->addr);
> +
>  if (offset_in_page(args->addr | args->size))
>  return -EINVAL;
>
> --
> 2.21.0.1020.gf2820cf01a-goog
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: add EDC counter register

2019-05-07 Thread Zhu, James

Add EDC counter register to support gfx9 gpr EDC workaround to
clear all EDC counters.

Signed-off-by: James Zhu 
Reviewed-by: Alex Deucher 
---
 .../drm/amd/include/asic_reg/gc/gc_9_0_offset.h| 31 ++
 1 file changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h 
b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
index 529b37d..f1d048e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
@@ -829,6 +829,8 @@
 #define mmTD_CNTL_BASE_IDX 
0
 #define mmTD_STATUS
0x0526
 #define mmTD_STATUS_BASE_IDX   
0
+#define mmTD_EDC_CNT   
0x052e
+#define mmTD_EDC_CNT_BASE_IDX  
0
 #define mmTD_DSM_CNTL  
0x052f
 #define mmTD_DSM_CNTL_BASE_IDX 
0
 #define mmTD_DSM_CNTL2 
0x0530
@@ -845,6 +847,8 @@
 #define mmTA_STATUS_BASE_IDX   
0
 #define mmTA_SCRATCH   
0x0564
 #define mmTA_SCRATCH_BASE_IDX  
0
+#define mmTA_EDC_CNT   
0x0586
+#define mmTA_EDC_CNT_BASE_IDX  
0
 
 
 // addressBlock: gc_gdsdec
@@ -1051,6 +1055,13 @@
 #define mmGC_USER_RB_BACKEND_DISABLE_BASE_IDX  
0
 
 
+// addressBlock: gc_ea_gceadec2
+// base address: 0x9c00
+#define mmGCEA_EDC_CNT 
0x0706
+#define mmGCEA_EDC_CNT_BASE_IDX
0
+#define mmGCEA_EDC_CNT2
0x0707
+#define mmGCEA_EDC_CNT2_BASE_IDX   
0
+
 // addressBlock: gc_rmi_rmidec
 // base address: 0x9e00
 #define mmRMI_GENERAL_CNTL 
0x0780
@@ -1709,6 +1720,8 @@
 #define mmTC_CFG_L1_VOLATILE_BASE_IDX  
0
 #define mmTC_CFG_L2_VOLATILE   
0x0b23
 #define mmTC_CFG_L2_VOLATILE_BASE_IDX  
0
+#define mmTCI_EDC_CNT  
0x0b60
+#define mmTCI_EDC_CNT_BASE_IDX 
0
 #define mmTCI_STATUS   
0x0b61
 #define mmTCI_STATUS_BASE_IDX  
0
 #define mmTCI_CNTL_1   
0x0b62
@@ -2594,6 +2607,24 @@
 #define mmCP_RB_DOORBELL_CONTROL_SCH_7_BASE_IDX
0
 #define mmCP_RB_DOORBELL_CLEAR 
0x1188
 #define mmCP_RB_DOORBELL_CLEAR_BASE_IDX
0
+#define mmCPF_EDC_TAG_CNT  
0x1189
+#define mmCPF_EDC_TAG_CNT_BASE_IDX 
0
+#define mmCPF_EDC_ROQ_CNT  
0x118a
+#define mmCPF_EDC_ROQ_CNT_BASE_IDX 
0
+#define mmCPG_EDC_TAG_CNT  
0x118b
+#define mmCPG_EDC_TAG_CNT_BASE_IDX 
0
+#define mmCPG_EDC_DMA_CNT  
0x118d
+#define mmCPG_EDC_DMA_CNT_BASE_IDX 
0
+#define mmCPC_EDC_SCRATCH_CNT

[PATCH 2/2] drm/amdgpu: add gfx9 gpr EDC workaround when RAS is enabled

2019-05-07 Thread Zhu, James

When RAS is enabled, initializes the VGPRs/LDS/SGPRs and
resets EDC error counts. This is done in late_init, before
RAS TA GFX enable.

Signed-off-by: James Zhu 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 245 ++
 drivers/gpu/drm/amd/amdgpu/soc15.h|  10 ++
 2 files changed, 255 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ba67d10..14e671d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -34,6 +34,7 @@
 #include "vega10_enum.h"
 #include "hdp/hdp_4_0_offset.h"
 
+#include "soc15.h"
 #include "soc15_common.h"
 #include "clearstate_gfx9.h"
 #include "v9_structs.h"
@@ -3529,6 +3530,245 @@ static void gfx_v9_0_ring_emit_gds_switch(struct 
amdgpu_ring *ring,
   (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
+static const u32 vgpr_init_compute_shader[] =
+{
+   0xb07c, 0xbe8000ff,
+   0x00f8, 0xbf110800,
+   0x7e000280, 0x7e020280,
+   0x7e040280, 0x7e060280,
+   0x7e080280, 0x7e0a0280,
+   0x7e0c0280, 0x7e0e0280,
+   0x80808800, 0xbe803200,
+   0xbf84fff5, 0xbf9c,
+   0xd28c0001, 0x0001007f,
+   0xd28d0001, 0x0002027e,
+   0x10020288, 0xb8810904,
+   0xb7814000, 0xd1196a01,
+   0x0301, 0xbe800087,
+   0xbefc00c1, 0xd89c4000,
+   0x00020201, 0xd89cc080,
+   0x00040401, 0x320202ff,
+   0x0800, 0x80808100,
+   0xbf84fff8, 0x7e020280,
+   0xbf81, 0x,
+};
+
+static const u32 sgpr_init_compute_shader[] =
+{
+   0xb07c, 0xbe8000ff,
+   0x005f, 0xbee50080,
+   0xbe812c65, 0xbe822c65,
+   0xbe832c65, 0xbe842c65,
+   0xbe852c65, 0xb77c0005,
+   0x80808500, 0xbf84fff8,
+   0xbe800080, 0xbf81,
+};
+
+static const struct soc15_reg_entry vgpr_init_regs[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x100 }, /* 
CU_GROUP_COUNT=1 */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x17f }, /* VGPRS=15 
(256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x40 },  /* 64KB LDS */
+};
+
+static const struct soc15_reg_entry sgpr_init_regs[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x100 }, /* 
CU_GROUP_COUNT=1 */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 
GPRS) */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+};
+
+static const struct soc15_reg_entry sec_ded_counter_registers[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
+   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2)

Re: [PATCH 4/4] drm/amd/powerplay: update Vega10 power state on OD

2019-05-07 Thread Alex Deucher

On Tue, May 7, 2019 at 2:09 AM Evan Quan  wrote:
>
> Update Vega10 top performance level power state accordingly
> on OD.
>
> Change-Id: Iaadeefb2904222bf5f4d54b39d7179ce53f92ac0
> Signed-off-by: Evan Quan 

Series is:
Acked-by: Alex Deucher 

> ---
>  .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c| 59 +++
>  1 file changed, 59 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index f4b81f50b185..4878938ecf33 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -5009,6 +5009,63 @@ static bool vega10_check_clk_voltage_valid(struct 
> pp_hwmgr *hwmgr,
> return true;
>  }
>
> +static void vega10_odn_update_power_state(struct pp_hwmgr *hwmgr)
> +{
> +   struct vega10_hwmgr *data = hwmgr->backend;
> +   struct pp_power_state *ps = hwmgr->request_ps;
> +   struct vega10_power_state *vega10_ps;
> +   struct vega10_single_dpm_table *gfx_dpm_table =
> +   >dpm_table.gfx_table;
> +   struct vega10_single_dpm_table *soc_dpm_table =
> +   >dpm_table.soc_table;
> +   struct vega10_single_dpm_table *mem_dpm_table =
> +   >dpm_table.mem_table;
> +   int max_level;
> +
> +   if (!ps)
> +   return;
> +
> +   vega10_ps = cast_phw_vega10_power_state(>hardware);
> +   max_level = vega10_ps->performance_level_count - 1;
> +
> +   if (vega10_ps->performance_levels[max_level].gfx_clock !=
> +   gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
> +   vega10_ps->performance_levels[max_level].gfx_clock =
> +   gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 
> 1].value;
> +
> +   if (vega10_ps->performance_levels[max_level].soc_clock !=
> +   soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
> +   vega10_ps->performance_levels[max_level].soc_clock =
> +   soc_dpm_table->dpm_levels[soc_dpm_table->count - 
> 1].value;
> +
> +   if (vega10_ps->performance_levels[max_level].mem_clock !=
> +   mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
> +   vega10_ps->performance_levels[max_level].mem_clock =
> +   mem_dpm_table->dpm_levels[mem_dpm_table->count - 
> 1].value;
> +
> +   if (!hwmgr->ps)
> +   return;
> +
> +   ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + 
> hwmgr->ps_size * (hwmgr->num_ps - 1));
> +   vega10_ps = cast_phw_vega10_power_state(>hardware);
> +   max_level = vega10_ps->performance_level_count - 1;
> +
> +   if (vega10_ps->performance_levels[max_level].gfx_clock !=
> +   gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
> +   vega10_ps->performance_levels[max_level].gfx_clock =
> +   gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 
> 1].value;
> +
> +   if (vega10_ps->performance_levels[max_level].soc_clock !=
> +   soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
> +   vega10_ps->performance_levels[max_level].soc_clock =
> +   soc_dpm_table->dpm_levels[soc_dpm_table->count - 
> 1].value;
> +
> +   if (vega10_ps->performance_levels[max_level].mem_clock !=
> +   mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
> +   vega10_ps->performance_levels[max_level].mem_clock =
> +   mem_dpm_table->dpm_levels[mem_dpm_table->count - 
> 1].value;
> +}
> +
>  static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
> enum PP_OD_DPM_TABLE_COMMAND 
> type)
>  {
> @@ -5079,6 +5136,7 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr 
> *hwmgr,
> podn_vdd_dep->entries[podn_vdd_dep->count - 
> 1].vddInd;
> }
> }
> +   vega10_odn_update_power_state(hwmgr);
>  }
>
>  static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
> @@ -5113,6 +5171,7 @@ static int vega10_odn_edit_dpm_table(struct pp_hwmgr 
> *hwmgr,
> } else if (PP_OD_RESTORE_DEFAULT_TABLE == type) {
> memcpy(&(data->dpm_table), &(data->golden_dpm_table), 
> sizeof(struct vega10_dpm_table));
> vega10_odn_initial_default_setting(hwmgr);
> +   vega10_odn_update_power_state(hwmgr);
> return 0;
> } else if (PP_OD_COMMIT_DPM_TABLE == type) {
> vega10_check_dpm_table_updated(hwmgr);
> --
> 2.21.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/ttm: fix busy memory to fail other user v6

2019-05-07 Thread Thomas Hellstrom


On 5/7/19 1:24 PM, Christian König wrote:

Am 07.05.19 um 13:22 schrieb zhoucm1:



On 2019年05月07日 19:13, Koenig, Christian wrote:

Am 07.05.19 um 13:08 schrieb zhoucm1:


On 2019年05月07日 18:53, Koenig, Christian wrote:

Am 07.05.19 um 11:36 schrieb Chunming Zhou:

heavy gpu job could occupy memory long time, which lead other user
fail to get memory.

basically pick up Christian idea:

1. Reserve the BO in DC using a ww_mutex ticket (trivial).
2. If we then run into this EBUSY condition in TTM check if the BO
we need memory for (or rather the ww_mutex of its reservation
object) has a ticket assigned.
3. If we have a ticket we grab a reference to the first BO on the
LRU, drop the LRU lock and try to grab the reservation lock with the
ticket.
4. If getting the reservation lock with the ticket succeeded we
check if the BO is still the first one on the LRU in question (the
BO could have moved).
5. If the BO is still the first one on the LRU in question we try to
evict it as we would evict any other BO.
6. If any of the "If's" above fail we just back off and return 
-EBUSY.


v2: fix some minor check
v3: address Christian v2 comments.
v4: fix some missing
v5: handle first_bo unlock and bo_get/put
v6: abstract unified iterate function, and handle all possible
usecase not only pinned bo.

Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
Signed-off-by: Chunming Zhou 
---
    drivers/gpu/drm/ttm/ttm_bo.c | 113
++-
    1 file changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
b/drivers/gpu/drm/ttm/ttm_bo.c
index 8502b3ed2d88..bbf1d14d00a7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 * b. Otherwise, trylock it.
 */
    static bool ttm_bo_evict_swapout_allowable(struct
ttm_buffer_object *bo,
-    struct ttm_operation_ctx *ctx, bool *locked)
+    struct ttm_operation_ctx *ctx, bool *locked, bool 
*busy)

    {
    bool ret = false;
       *locked = false;
+    if (busy)
+    *busy = false;
    if (bo->resv == ctx->resv) {
    reservation_object_assert_held(bo->resv);
    if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
@@ -779,35 +781,45 @@ static bool
ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
    } else {
    *locked = reservation_object_trylock(bo->resv);
    ret = *locked;
+    if (!ret && busy)
+    *busy = true;
    }
       return ret;
    }
    -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-   uint32_t mem_type,
-   const struct ttm_place *place,
-   struct ttm_operation_ctx *ctx)
+static struct ttm_buffer_object*
+ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
+ struct ttm_mem_type_manager *man,
+ const struct ttm_place *place,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_buffer_object **first_bo,
+ bool *locked)
    {
-    struct ttm_bo_global *glob = bdev->glob;
-    struct ttm_mem_type_manager *man = >man[mem_type];
    struct ttm_buffer_object *bo = NULL;
-    bool locked = false;
-    unsigned i;
-    int ret;
+    int i;
    -    spin_lock(>lru_lock);
+    if (first_bo)
+    *first_bo = NULL;
    for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
    list_for_each_entry(bo, >lru[i], lru) {
-    if (!ttm_bo_evict_swapout_allowable(bo, ctx, ))
+    bool busy = false;
+    if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
+    )) {

A newline between declaration and code please.


+    if (first_bo && !(*first_bo) && busy) {
+    ttm_bo_get(bo);
+    *first_bo = bo;
+    }
    continue;
+    }
       if (place && !bdev->driver->eviction_valuable(bo,
  place)) {
-    if (locked)
+    if (*locked)
reservation_object_unlock(bo->resv);
    continue;
    }
+
    break;
    }
    @@ -818,9 +830,66 @@ static int ttm_mem_evict_first(struct
ttm_bo_device *bdev,
    bo = NULL;
    }
    +    return bo;
+}
+
+static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
+   uint32_t mem_type,
+   const struct ttm_place *place,
+   struct ttm_operation_ctx *ctx)
+{
+    struct ttm_bo_global *glob = bdev->glob;
+    struct ttm_mem_type_manager *man = >man[mem_type];
+    struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
+    bool locked = false;
+    int ret;
+
+    spin_lock(>lru_lock);
+    bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, _bo,
+  );
    if (!bo) {
+    struct ttm_operation_ctx busy_ctx;
+
    spin_unlock(>lru_lock);
-    return

Re: [PATCH 1/2] drm/ttm: fix busy memory to fail other user v6

2019-05-07 Thread Koenig, Christian

Am 07.05.19 um 13:37 schrieb Thomas Hellstrom:
> [CAUTION: External Email]
>
> On 5/7/19 1:24 PM, Christian König wrote:
>> Am 07.05.19 um 13:22 schrieb zhoucm1:
>>>
>>>
>>> On 2019年05月07日 19:13, Koenig, Christian wrote:
 Am 07.05.19 um 13:08 schrieb zhoucm1:
>
> On 2019年05月07日 18:53, Koenig, Christian wrote:
>> Am 07.05.19 um 11:36 schrieb Chunming Zhou:
>>> heavy gpu job could occupy memory long time, which lead other user
>>> fail to get memory.
>>>
>>> basically pick up Christian idea:
>>>
>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>> 2. If we then run into this EBUSY condition in TTM check if the BO
>>> we need memory for (or rather the ww_mutex of its reservation
>>> object) has a ticket assigned.
>>> 3. If we have a ticket we grab a reference to the first BO on the
>>> LRU, drop the LRU lock and try to grab the reservation lock with 
>>> the
>>> ticket.
>>> 4. If getting the reservation lock with the ticket succeeded we
>>> check if the BO is still the first one on the LRU in question (the
>>> BO could have moved).
>>> 5. If the BO is still the first one on the LRU in question we 
>>> try to
>>> evict it as we would evict any other BO.
>>> 6. If any of the "If's" above fail we just back off and return
>>> -EBUSY.
>>>
>>> v2: fix some minor check
>>> v3: address Christian v2 comments.
>>> v4: fix some missing
>>> v5: handle first_bo unlock and bo_get/put
>>> v6: abstract unified iterate function, and handle all possible
>>> usecase not only pinned bo.
>>>
>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>> Signed-off-by: Chunming Zhou 
>>> ---
>>>     drivers/gpu/drm/ttm/ttm_bo.c | 113
>>> ++-
>>>     1 file changed, 97 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 8502b3ed2d88..bbf1d14d00a7 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>>  * b. Otherwise, trylock it.
>>>  */
>>>     static bool ttm_bo_evict_swapout_allowable(struct
>>> ttm_buffer_object *bo,
>>> -    struct ttm_operation_ctx *ctx, bool *locked)
>>> +    struct ttm_operation_ctx *ctx, bool *locked, bool
>>> *busy)
>>>     {
>>>     bool ret = false;
>>>    *locked = false;
>>> +    if (busy)
>>> +    *busy = false;
>>>     if (bo->resv == ctx->resv) {
>>> reservation_object_assert_held(bo->resv);
>>>     if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>> @@ -779,35 +781,45 @@ static bool
>>> ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>>     } else {
>>>     *locked = reservation_object_trylock(bo->resv);
>>>     ret = *locked;
>>> +    if (!ret && busy)
>>> +    *busy = true;
>>>     }
>>>    return ret;
>>>     }
>>>     -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> -   uint32_t mem_type,
>>> -   const struct ttm_place *place,
>>> -   struct ttm_operation_ctx *ctx)
>>> +static struct ttm_buffer_object*
>>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
>>> + struct ttm_mem_type_manager *man,
>>> + const struct ttm_place *place,
>>> + struct ttm_operation_ctx *ctx,
>>> + struct ttm_buffer_object **first_bo,
>>> + bool *locked)
>>>     {
>>> -    struct ttm_bo_global *glob = bdev->glob;
>>> -    struct ttm_mem_type_manager *man = >man[mem_type];
>>>     struct ttm_buffer_object *bo = NULL;
>>> -    bool locked = false;
>>> -    unsigned i;
>>> -    int ret;
>>> +    int i;
>>>     -    spin_lock(>lru_lock);
>>> +    if (first_bo)
>>> +    *first_bo = NULL;
>>>     for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>     list_for_each_entry(bo, >lru[i], lru) {
>>> -    if (!ttm_bo_evict_swapout_allowable(bo, ctx, ))
>>> +    bool busy = false;
>>> +    if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>>> +    )) {
>> A newline between declaration and code please.
>>
>>> +    if (first_bo && !(*first_bo) && busy) {
>>> +    ttm_bo_get(bo);
>>> +    *first_bo = bo;
>>> +    }
>>>     continue;
>>> +    }
>>>    if (place && 
>>> !bdev->driver->eviction_valuable(bo,
>>>   place)) {
>>> -    if

Re: [PATCH 1/2] drm/ttm: fix busy memory to fail other user v6

2019-05-07 Thread Christian König


Am 07.05.19 um 13:22 schrieb zhoucm1:



On 2019年05月07日 19:13, Koenig, Christian wrote:

Am 07.05.19 um 13:08 schrieb zhoucm1:


On 2019年05月07日 18:53, Koenig, Christian wrote:

Am 07.05.19 um 11:36 schrieb Chunming Zhou:

heavy gpu job could occupy memory long time, which lead other user
fail to get memory.

basically pick up Christian idea:

1. Reserve the BO in DC using a ww_mutex ticket (trivial).
2. If we then run into this EBUSY condition in TTM check if the BO
we need memory for (or rather the ww_mutex of its reservation
object) has a ticket assigned.
3. If we have a ticket we grab a reference to the first BO on the
LRU, drop the LRU lock and try to grab the reservation lock with the
ticket.
4. If getting the reservation lock with the ticket succeeded we
check if the BO is still the first one on the LRU in question (the
BO could have moved).
5. If the BO is still the first one on the LRU in question we try to
evict it as we would evict any other BO.
6. If any of the "If's" above fail we just back off and return 
-EBUSY.


v2: fix some minor check
v3: address Christian v2 comments.
v4: fix some missing
v5: handle first_bo unlock and bo_get/put
v6: abstract unified iterate function, and handle all possible
usecase not only pinned bo.

Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
Signed-off-by: Chunming Zhou 
---
    drivers/gpu/drm/ttm/ttm_bo.c | 113
++-
    1 file changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
b/drivers/gpu/drm/ttm/ttm_bo.c
index 8502b3ed2d88..bbf1d14d00a7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 * b. Otherwise, trylock it.
 */
    static bool ttm_bo_evict_swapout_allowable(struct
ttm_buffer_object *bo,
-    struct ttm_operation_ctx *ctx, bool *locked)
+    struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
    {
    bool ret = false;
       *locked = false;
+    if (busy)
+    *busy = false;
    if (bo->resv == ctx->resv) {
    reservation_object_assert_held(bo->resv);
    if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
@@ -779,35 +781,45 @@ static bool
ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
    } else {
    *locked = reservation_object_trylock(bo->resv);
    ret = *locked;
+    if (!ret && busy)
+    *busy = true;
    }
       return ret;
    }
    -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-   uint32_t mem_type,
-   const struct ttm_place *place,
-   struct ttm_operation_ctx *ctx)
+static struct ttm_buffer_object*
+ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
+ struct ttm_mem_type_manager *man,
+ const struct ttm_place *place,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_buffer_object **first_bo,
+ bool *locked)
    {
-    struct ttm_bo_global *glob = bdev->glob;
-    struct ttm_mem_type_manager *man = >man[mem_type];
    struct ttm_buffer_object *bo = NULL;
-    bool locked = false;
-    unsigned i;
-    int ret;
+    int i;
    -    spin_lock(>lru_lock);
+    if (first_bo)
+    *first_bo = NULL;
    for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
    list_for_each_entry(bo, >lru[i], lru) {
-    if (!ttm_bo_evict_swapout_allowable(bo, ctx, ))
+    bool busy = false;
+    if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
+    )) {

A newline between declaration and code please.


+    if (first_bo && !(*first_bo) && busy) {
+    ttm_bo_get(bo);
+    *first_bo = bo;
+    }
    continue;
+    }
       if (place && !bdev->driver->eviction_valuable(bo,
  place)) {
-    if (locked)
+    if (*locked)
reservation_object_unlock(bo->resv);
    continue;
    }
+
    break;
    }
    @@ -818,9 +830,66 @@ static int ttm_mem_evict_first(struct
ttm_bo_device *bdev,
    bo = NULL;
    }
    +    return bo;
+}
+
+static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
+   uint32_t mem_type,
+   const struct ttm_place *place,
+   struct ttm_operation_ctx *ctx)
+{
+    struct ttm_bo_global *glob = bdev->glob;
+    struct ttm_mem_type_manager *man = >man[mem_type];
+    struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
+    bool locked = false;
+    int ret;
+
+    spin_lock(>lru_lock);
+    bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, _bo,
+  );
    if (!bo) {
+    struct ttm_operation_ctx busy_ctx;
+
    spin_unlock(>lru_lock);
-    return -EBUSY;
+    /* check if other user occupy

Re: [PATCH 1/2] drm/ttm: fix busy memory to fail other user v6

2019-05-07 Thread zhoucm1




On 2019年05月07日 19:13, Koenig, Christian wrote:

Am 07.05.19 um 13:08 schrieb zhoucm1:


On 2019年05月07日 18:53, Koenig, Christian wrote:

Am 07.05.19 um 11:36 schrieb Chunming Zhou:

heavy gpu job could occupy memory long time, which lead other user
fail to get memory.

basically pick up Christian idea:

1. Reserve the BO in DC using a ww_mutex ticket (trivial).
2. If we then run into this EBUSY condition in TTM check if the BO
we need memory for (or rather the ww_mutex of its reservation
object) has a ticket assigned.
3. If we have a ticket we grab a reference to the first BO on the
LRU, drop the LRU lock and try to grab the reservation lock with the
ticket.
4. If getting the reservation lock with the ticket succeeded we
check if the BO is still the first one on the LRU in question (the
BO could have moved).
5. If the BO is still the first one on the LRU in question we try to
evict it as we would evict any other BO.
6. If any of the "If's" above fail we just back off and return -EBUSY.

v2: fix some minor check
v3: address Christian v2 comments.
v4: fix some missing
v5: handle first_bo unlock and bo_get/put
v6: abstract unified iterate function, and handle all possible
usecase not only pinned bo.

Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
Signed-off-by: Chunming Zhou 
---
    drivers/gpu/drm/ttm/ttm_bo.c | 113
++-
    1 file changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
b/drivers/gpu/drm/ttm/ttm_bo.c
index 8502b3ed2d88..bbf1d14d00a7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
     * b. Otherwise, trylock it.
     */
    static bool ttm_bo_evict_swapout_allowable(struct
ttm_buffer_object *bo,
-    struct ttm_operation_ctx *ctx, bool *locked)
+    struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
    {
    bool ret = false;
       *locked = false;
+    if (busy)
+    *busy = false;
    if (bo->resv == ctx->resv) {
    reservation_object_assert_held(bo->resv);
    if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
@@ -779,35 +781,45 @@ static bool
ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
    } else {
    *locked = reservation_object_trylock(bo->resv);
    ret = *locked;
+    if (!ret && busy)
+    *busy = true;
    }
       return ret;
    }
    -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-   uint32_t mem_type,
-   const struct ttm_place *place,
-   struct ttm_operation_ctx *ctx)
+static struct ttm_buffer_object*
+ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
+ struct ttm_mem_type_manager *man,
+ const struct ttm_place *place,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_buffer_object **first_bo,
+ bool *locked)
    {
-    struct ttm_bo_global *glob = bdev->glob;
-    struct ttm_mem_type_manager *man = >man[mem_type];
    struct ttm_buffer_object *bo = NULL;
-    bool locked = false;
-    unsigned i;
-    int ret;
+    int i;
    -    spin_lock(>lru_lock);
+    if (first_bo)
+    *first_bo = NULL;
    for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
    list_for_each_entry(bo, >lru[i], lru) {
-    if (!ttm_bo_evict_swapout_allowable(bo, ctx, ))
+    bool busy = false;
+    if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
+    )) {

A newline between declaration and code please.


+    if (first_bo && !(*first_bo) && busy) {
+    ttm_bo_get(bo);
+    *first_bo = bo;
+    }
    continue;
+    }
       if (place && !bdev->driver->eviction_valuable(bo,
  place)) {
-    if (locked)
+    if (*locked)
    reservation_object_unlock(bo->resv);
    continue;
    }
+
    break;
    }
    @@ -818,9 +830,66 @@ static int ttm_mem_evict_first(struct
ttm_bo_device *bdev,
    bo = NULL;
    }
    +    return bo;
+}
+
+static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
+   uint32_t mem_type,
+   const struct ttm_place *place,
+   struct ttm_operation_ctx *ctx)
+{
+    struct ttm_bo_global *glob = bdev->glob;
+    struct ttm_mem_type_manager *man = >man[mem_type];
+    struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
+    bool locked = false;
+    int ret;
+
+    spin_lock(>lru_lock);
+    bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, _bo,
+  );
    if (!bo) {
+    struct ttm_operation_ctx busy_ctx;
+
    spin_unlock(>lru_lock);
-    return -EBUSY;
+    /* check if other user occupy memory too long

Re: [PATCH 1/2] drm/ttm: fix busy memory to fail other user v6

2019-05-07 Thread Koenig, Christian

Am 07.05.19 um 13:08 schrieb zhoucm1:
>
>
> On 2019年05月07日 18:53, Koenig, Christian wrote:
>> Am 07.05.19 um 11:36 schrieb Chunming Zhou:
>>> heavy gpu job could occupy memory long time, which lead other user 
>>> fail to get memory.
>>>
>>> basically pick up Christian idea:
>>>
>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>> 2. If we then run into this EBUSY condition in TTM check if the BO 
>>> we need memory for (or rather the ww_mutex of its reservation 
>>> object) has a ticket assigned.
>>> 3. If we have a ticket we grab a reference to the first BO on the 
>>> LRU, drop the LRU lock and try to grab the reservation lock with the 
>>> ticket.
>>> 4. If getting the reservation lock with the ticket succeeded we 
>>> check if the BO is still the first one on the LRU in question (the 
>>> BO could have moved).
>>> 5. If the BO is still the first one on the LRU in question we try to 
>>> evict it as we would evict any other BO.
>>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>>
>>> v2: fix some minor check
>>> v3: address Christian v2 comments.
>>> v4: fix some missing
>>> v5: handle first_bo unlock and bo_get/put
>>> v6: abstract unified iterate function, and handle all possible 
>>> usecase not only pinned bo.
>>>
>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>> Signed-off-by: Chunming Zhou 
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_bo.c | 113 
>>> ++-
>>>    1 file changed, 97 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 8502b3ed2d88..bbf1d14d00a7 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>>     * b. Otherwise, trylock it.
>>>     */
>>>    static bool ttm_bo_evict_swapout_allowable(struct 
>>> ttm_buffer_object *bo,
>>> -    struct ttm_operation_ctx *ctx, bool *locked)
>>> +    struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>>    {
>>>    bool ret = false;
>>>       *locked = false;
>>> +    if (busy)
>>> +    *busy = false;
>>>    if (bo->resv == ctx->resv) {
>>>    reservation_object_assert_held(bo->resv);
>>>    if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>> @@ -779,35 +781,45 @@ static bool 
>>> ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>>    } else {
>>>    *locked = reservation_object_trylock(bo->resv);
>>>    ret = *locked;
>>> +    if (!ret && busy)
>>> +    *busy = true;
>>>    }
>>>       return ret;
>>>    }
>>>    -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> -   uint32_t mem_type,
>>> -   const struct ttm_place *place,
>>> -   struct ttm_operation_ctx *ctx)
>>> +static struct ttm_buffer_object*
>>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
>>> + struct ttm_mem_type_manager *man,
>>> + const struct ttm_place *place,
>>> + struct ttm_operation_ctx *ctx,
>>> + struct ttm_buffer_object **first_bo,
>>> + bool *locked)
>>>    {
>>> -    struct ttm_bo_global *glob = bdev->glob;
>>> -    struct ttm_mem_type_manager *man = >man[mem_type];
>>>    struct ttm_buffer_object *bo = NULL;
>>> -    bool locked = false;
>>> -    unsigned i;
>>> -    int ret;
>>> +    int i;
>>>    -    spin_lock(>lru_lock);
>>> +    if (first_bo)
>>> +    *first_bo = NULL;
>>>    for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>    list_for_each_entry(bo, >lru[i], lru) {
>>> -    if (!ttm_bo_evict_swapout_allowable(bo, ctx, ))
>>> +    bool busy = false;
>>> +    if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>>> +    )) {
>> A newline between declaration and code please.
>>
>>> +    if (first_bo && !(*first_bo) && busy) {
>>> +    ttm_bo_get(bo);
>>> +    *first_bo = bo;
>>> +    }
>>>    continue;
>>> +    }
>>>       if (place && !bdev->driver->eviction_valuable(bo,
>>>  place)) {
>>> -    if (locked)
>>> +    if (*locked)
>>>    reservation_object_unlock(bo->resv);
>>>    continue;
>>>    }
>>> +
>>>    break;
>>>    }
>>>    @@ -818,9 +830,66 @@ static int ttm_mem_evict_first(struct 
>>> ttm_bo_device *bdev,
>>>    bo = NULL;
>>>    }
>>>    +    return bo;
>>> +}
>>> +
>>> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> +   uint32_t mem_type,
>>> +   const struct ttm_place *place,
>>> +   struct ttm_operation_ctx *ctx)
>>> +{
>>> +    struct ttm_bo_global *glob = bdev->glob;
>>> +    struct ttm_mem_type_manager *man =

Re: [PATCH 1/2] drm/ttm: fix busy memory to fail other user v6

2019-05-07 Thread zhoucm1




On 2019年05月07日 18:53, Koenig, Christian wrote:

Am 07.05.19 um 11:36 schrieb Chunming Zhou:

heavy gpu job could occupy memory long time, which lead other user fail to get 
memory.

basically pick up Christian idea:

1. Reserve the BO in DC using a ww_mutex ticket (trivial).
2. If we then run into this EBUSY condition in TTM check if the BO we need 
memory for (or rather the ww_mutex of its reservation object) has a ticket 
assigned.
3. If we have a ticket we grab a reference to the first BO on the LRU, drop the 
LRU lock and try to grab the reservation lock with the ticket.
4. If getting the reservation lock with the ticket succeeded we check if the BO 
is still the first one on the LRU in question (the BO could have moved).
5. If the BO is still the first one on the LRU in question we try to evict it 
as we would evict any other BO.
6. If any of the "If's" above fail we just back off and return -EBUSY.

v2: fix some minor check
v3: address Christian v2 comments.
v4: fix some missing
v5: handle first_bo unlock and bo_get/put
v6: abstract unified iterate function, and handle all possible usecase not only 
pinned bo.

Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
Signed-off-by: Chunming Zhou 
---
   drivers/gpu/drm/ttm/ttm_bo.c | 113 ++-
   1 file changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 8502b3ed2d88..bbf1d14d00a7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
* b. Otherwise, trylock it.
*/
   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-   struct ttm_operation_ctx *ctx, bool *locked)
+   struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
   {
bool ret = false;
   
   	*locked = false;

+   if (busy)
+   *busy = false;
if (bo->resv == ctx->resv) {
reservation_object_assert_held(bo->resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
@@ -779,35 +781,45 @@ static bool ttm_bo_evict_swapout_allowable(struct 
ttm_buffer_object *bo,
} else {
*locked = reservation_object_trylock(bo->resv);
ret = *locked;
+   if (!ret && busy)
+   *busy = true;
}
   
   	return ret;

   }
   
-static int ttm_mem_evict_first(struct ttm_bo_device *bdev,

-  uint32_t mem_type,
-  const struct ttm_place *place,
-  struct ttm_operation_ctx *ctx)
+static struct ttm_buffer_object*
+ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
+struct ttm_mem_type_manager *man,
+const struct ttm_place *place,
+struct ttm_operation_ctx *ctx,
+struct ttm_buffer_object **first_bo,
+bool *locked)
   {
-   struct ttm_bo_global *glob = bdev->glob;
-   struct ttm_mem_type_manager *man = >man[mem_type];
struct ttm_buffer_object *bo = NULL;
-   bool locked = false;
-   unsigned i;
-   int ret;
+   int i;
   
-	spin_lock(>lru_lock);

+   if (first_bo)
+   *first_bo = NULL;
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, >lru[i], lru) {
-   if (!ttm_bo_evict_swapout_allowable(bo, ctx, ))
+   bool busy = false;
+   if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
+   )) {

A newline between declaration and code please.


+   if (first_bo && !(*first_bo) && busy) {
+   ttm_bo_get(bo);
+   *first_bo = bo;
+   }
continue;
+   }
   
   			if (place && !bdev->driver->eviction_valuable(bo,

  place)) {
-   if (locked)
+   if (*locked)
reservation_object_unlock(bo->resv);
continue;
}
+
break;
}
   
@@ -818,9 +830,66 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,

bo = NULL;
}
   
+	return bo;

+}
+
+static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
+  uint32_t mem_type,
+  const struct ttm_place *place,
+  struct ttm_operation_ctx *ctx)
+{
+   struct ttm_bo_global *glob = bdev->glob;
+   struct ttm_mem_type_manager *man = >man[mem_type];
+   struct

Re: [PATCH 1/2] drm/ttm: fix busy memory to fail other user v6

2019-05-07 Thread Koenig, Christian

Am 07.05.19 um 11:36 schrieb Chunming Zhou:
> heavy gpu job could occupy memory long time, which lead other user fail to 
> get memory.
>
> basically pick up Christian idea:
>
> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
> 2. If we then run into this EBUSY condition in TTM check if the BO we need 
> memory for (or rather the ww_mutex of its reservation object) has a ticket 
> assigned.
> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop 
> the LRU lock and try to grab the reservation lock with the ticket.
> 4. If getting the reservation lock with the ticket succeeded we check if the 
> BO is still the first one on the LRU in question (the BO could have moved).
> 5. If the BO is still the first one on the LRU in question we try to evict it 
> as we would evict any other BO.
> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>
> v2: fix some minor check
> v3: address Christian v2 comments.
> v4: fix some missing
> v5: handle first_bo unlock and bo_get/put
> v6: abstract unified iterate function, and handle all possible usecase not 
> only pinned bo.
>
> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
> Signed-off-by: Chunming Zhou 
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 113 ++-
>   1 file changed, 97 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 8502b3ed2d88..bbf1d14d00a7 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>* b. Otherwise, trylock it.
>*/
>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> - struct ttm_operation_ctx *ctx, bool *locked)
> + struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>   {
>   bool ret = false;
>   
>   *locked = false;
> + if (busy)
> + *busy = false;
>   if (bo->resv == ctx->resv) {
>   reservation_object_assert_held(bo->resv);
>   if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
> @@ -779,35 +781,45 @@ static bool ttm_bo_evict_swapout_allowable(struct 
> ttm_buffer_object *bo,
>   } else {
>   *locked = reservation_object_trylock(bo->resv);
>   ret = *locked;
> + if (!ret && busy)
> + *busy = true;
>   }
>   
>   return ret;
>   }
>   
> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> -uint32_t mem_type,
> -const struct ttm_place *place,
> -struct ttm_operation_ctx *ctx)
> +static struct ttm_buffer_object*
> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
> +  struct ttm_mem_type_manager *man,
> +  const struct ttm_place *place,
> +  struct ttm_operation_ctx *ctx,
> +  struct ttm_buffer_object **first_bo,
> +  bool *locked)
>   {
> - struct ttm_bo_global *glob = bdev->glob;
> - struct ttm_mem_type_manager *man = >man[mem_type];
>   struct ttm_buffer_object *bo = NULL;
> - bool locked = false;
> - unsigned i;
> - int ret;
> + int i;
>   
> - spin_lock(>lru_lock);
> + if (first_bo)
> + *first_bo = NULL;
>   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   list_for_each_entry(bo, >lru[i], lru) {
> - if (!ttm_bo_evict_swapout_allowable(bo, ctx, ))
> + bool busy = false;
> + if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
> + )) {

A newline between declaration and code please.

> + if (first_bo && !(*first_bo) && busy) {
> + ttm_bo_get(bo);
> + *first_bo = bo;
> + }
>   continue;
> + }
>   
>   if (place && !bdev->driver->eviction_valuable(bo,
> place)) {
> - if (locked)
> + if (*locked)
>   reservation_object_unlock(bo->resv);
>   continue;
>   }
> +
>   break;
>   }
>   
> @@ -818,9 +830,66 @@ static int ttm_mem_evict_first(struct ttm_bo_device 
> *bdev,
>   bo = NULL;
>   }
>   
> + return bo;
> +}
> +
> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> +uint32_t mem_type,
> +const struct ttm_place *place,
> +struct ttm_operation_ctx *ctx)
> +{
> + struct ttm_bo_global *glob =

Re: [PATCH 2/2] drm/amd/display: use ttm_eu_reserve_buffers instead of amdgpu_bo_reserve

2019-05-07 Thread Koenig, Christian

Am 07.05.19 um 11:36 schrieb Chunming Zhou:
> add ticket for display bo, so that it can preempt busy bo.
>
> Change-Id: I9f031cdcc8267de00e819ae303baa0a52df8ebb9
> Signed-off-by: Chunming Zhou 
> ---
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 ++-
>   1 file changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index ac22f7351a42..8633d52e3fbe 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -4176,6 +4176,9 @@ static int dm_plane_helper_prepare_fb(struct drm_plane 
> *plane,
>   struct amdgpu_device *adev;
>   struct amdgpu_bo *rbo;
>   struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
> + struct list_head list, duplicates;
> + struct ttm_validate_buffer tv;
> + struct ww_acquire_ctx ticket;
>   uint64_t tiling_flags;
>   uint32_t domain;
>   int r;
> @@ -4192,9 +4195,18 @@ static int dm_plane_helper_prepare_fb(struct drm_plane 
> *plane,
>   obj = new_state->fb->obj[0];
>   rbo = gem_to_amdgpu_bo(obj);
>   adev = amdgpu_ttm_adev(rbo->tbo.bdev);
> - r = amdgpu_bo_reserve(rbo, false);
> - if (unlikely(r != 0))
> + INIT_LIST_HEAD();
> + INIT_LIST_HEAD();
> +
> + tv.bo = >tbo;
> + tv.num_shared = 1;
> + list_add(, );
> +
> + r = ttm_eu_reserve_buffers(, , false, );

duplicates are superfluous and can be NULL in this case.

Apart from that the patch is Reviewed-by: Christian König 


Regards,
Christian.

> + if (r) {
> + dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
>   return r;
> + }
>   
>   if (plane->type != DRM_PLANE_TYPE_CURSOR)
>   domain = amdgpu_display_supported_domains(adev);
> @@ -4205,21 +4217,21 @@ static int dm_plane_helper_prepare_fb(struct 
> drm_plane *plane,
>   if (unlikely(r != 0)) {
>   if (r != -ERESTARTSYS)
>   DRM_ERROR("Failed to pin framebuffer with error %d\n", 
> r);
> - amdgpu_bo_unreserve(rbo);
> + ttm_eu_backoff_reservation(, );
>   return r;
>   }
>   
>   r = amdgpu_ttm_alloc_gart(>tbo);
>   if (unlikely(r != 0)) {
>   amdgpu_bo_unpin(rbo);
> - amdgpu_bo_unreserve(rbo);
> + ttm_eu_backoff_reservation(, );
>   DRM_ERROR("%p bind failed\n", rbo);
>   return r;
>   }
>   
>   amdgpu_bo_get_tiling_flags(rbo, _flags);
>   
> - amdgpu_bo_unreserve(rbo);
> + ttm_eu_backoff_reservation(, );
>   
>   afb->address = amdgpu_bo_gpu_offset(rbo);
>   

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/ttm: fix busy memory to fail other user v6

2019-05-07 Thread Chunming Zhou

heavy gpu job could occupy memory long time, which lead other user fail to get 
memory.

basically pick up Christian idea:

1. Reserve the BO in DC using a ww_mutex ticket (trivial).
2. If we then run into this EBUSY condition in TTM check if the BO we need 
memory for (or rather the ww_mutex of its reservation object) has a ticket 
assigned.
3. If we have a ticket we grab a reference to the first BO on the LRU, drop the 
LRU lock and try to grab the reservation lock with the ticket.
4. If getting the reservation lock with the ticket succeeded we check if the BO 
is still the first one on the LRU in question (the BO could have moved).
5. If the BO is still the first one on the LRU in question we try to evict it 
as we would evict any other BO.
6. If any of the "If's" above fail we just back off and return -EBUSY.

v2: fix some minor check
v3: address Christian v2 comments.
v4: fix some missing
v5: handle first_bo unlock and bo_get/put
v6: abstract unified iterate function, and handle all possible usecase not only 
pinned bo.

Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 113 ++-
 1 file changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 8502b3ed2d88..bbf1d14d00a7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
  * b. Otherwise, trylock it.
  */
 static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-   struct ttm_operation_ctx *ctx, bool *locked)
+   struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
 {
bool ret = false;
 
*locked = false;
+   if (busy)
+   *busy = false;
if (bo->resv == ctx->resv) {
reservation_object_assert_held(bo->resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
@@ -779,35 +781,45 @@ static bool ttm_bo_evict_swapout_allowable(struct 
ttm_buffer_object *bo,
} else {
*locked = reservation_object_trylock(bo->resv);
ret = *locked;
+   if (!ret && busy)
+   *busy = true;
}
 
return ret;
 }
 
-static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-  uint32_t mem_type,
-  const struct ttm_place *place,
-  struct ttm_operation_ctx *ctx)
+static struct ttm_buffer_object*
+ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
+struct ttm_mem_type_manager *man,
+const struct ttm_place *place,
+struct ttm_operation_ctx *ctx,
+struct ttm_buffer_object **first_bo,
+bool *locked)
 {
-   struct ttm_bo_global *glob = bdev->glob;
-   struct ttm_mem_type_manager *man = >man[mem_type];
struct ttm_buffer_object *bo = NULL;
-   bool locked = false;
-   unsigned i;
-   int ret;
+   int i;
 
-   spin_lock(>lru_lock);
+   if (first_bo)
+   *first_bo = NULL;
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, >lru[i], lru) {
-   if (!ttm_bo_evict_swapout_allowable(bo, ctx, ))
+   bool busy = false;
+   if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
+   )) {
+   if (first_bo && !(*first_bo) && busy) {
+   ttm_bo_get(bo);
+   *first_bo = bo;
+   }
continue;
+   }
 
if (place && !bdev->driver->eviction_valuable(bo,
  place)) {
-   if (locked)
+   if (*locked)
reservation_object_unlock(bo->resv);
continue;
}
+
break;
}
 
@@ -818,9 +830,66 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
bo = NULL;
}
 
+   return bo;
+}
+
+static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
+  uint32_t mem_type,
+  const struct ttm_place *place,
+  struct ttm_operation_ctx *ctx)
+{
+   struct ttm_bo_global *glob = bdev->glob;
+   struct ttm_mem_type_manager *man = >man[mem_type];
+   struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
+   bool locked = false;
+   int ret;
+
+   spin_lock(>lru_lock);
+   bo =

[PATCH 2/2] drm/amd/display: use ttm_eu_reserve_buffers instead of amdgpu_bo_reserve

2019-05-07 Thread Chunming Zhou

add ticket for display bo, so that it can preempt busy bo.

Change-Id: I9f031cdcc8267de00e819ae303baa0a52df8ebb9
Signed-off-by: Chunming Zhou 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 ++-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ac22f7351a42..8633d52e3fbe 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4176,6 +4176,9 @@ static int dm_plane_helper_prepare_fb(struct drm_plane 
*plane,
struct amdgpu_device *adev;
struct amdgpu_bo *rbo;
struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
+   struct list_head list, duplicates;
+   struct ttm_validate_buffer tv;
+   struct ww_acquire_ctx ticket;
uint64_t tiling_flags;
uint32_t domain;
int r;
@@ -4192,9 +4195,18 @@ static int dm_plane_helper_prepare_fb(struct drm_plane 
*plane,
obj = new_state->fb->obj[0];
rbo = gem_to_amdgpu_bo(obj);
adev = amdgpu_ttm_adev(rbo->tbo.bdev);
-   r = amdgpu_bo_reserve(rbo, false);
-   if (unlikely(r != 0))
+   INIT_LIST_HEAD();
+   INIT_LIST_HEAD();
+
+   tv.bo = >tbo;
+   tv.num_shared = 1;
+   list_add(, );
+
+   r = ttm_eu_reserve_buffers(, , false, );
+   if (r) {
+   dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
return r;
+   }
 
if (plane->type != DRM_PLANE_TYPE_CURSOR)
domain = amdgpu_display_supported_domains(adev);
@@ -4205,21 +4217,21 @@ static int dm_plane_helper_prepare_fb(struct drm_plane 
*plane,
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("Failed to pin framebuffer with error %d\n", 
r);
-   amdgpu_bo_unreserve(rbo);
+   ttm_eu_backoff_reservation(, );
return r;
}
 
r = amdgpu_ttm_alloc_gart(>tbo);
if (unlikely(r != 0)) {
amdgpu_bo_unpin(rbo);
-   amdgpu_bo_unreserve(rbo);
+   ttm_eu_backoff_reservation(, );
DRM_ERROR("%p bind failed\n", rbo);
return r;
}
 
amdgpu_bo_get_tiling_flags(rbo, _flags);
 
-   amdgpu_bo_unreserve(rbo);
+   ttm_eu_backoff_reservation(, );
 
afb->address = amdgpu_bo_gpu_offset(rbo);
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 0/2] Skip IH re-route on Vega SR-IOV

2019-05-07 Thread Huang, Trigger


OK, thanks for the detailed background,  before I didn't  know the limitation 
in the hardware.

Thanks & Best Wishes,
Trigger Huang

-Original Message-
From: Christian König  
Sent: Tuesday, May 07, 2019 5:04 PM
To: Huang, Trigger ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 0/2] Skip IH re-route on Vega SR-IOV

[CAUTION: External Email]

Hi Trigger,

> And see this interrupt is still from IH0 amdgpu_irq_handler, which can prove 
> this feature is not working under SR-IOV.
In this case this change is a clear NAK.

> I suggest to remove this feature from SR-IOV, as my concern is,  some weird 
> bugs may be cased by it in the Virtualization heavy stress test.
And I really think we should keep it to make sure that we have the same 
handling for bare metal as for SRIOV.

> In the future, maybe we can request PSP team to add this support for SR-IOV.
We will never be able to use this under SRIOV because of limitation in the 
hardware.

What we could maybe do is check the response code from the PSP firmware if it 
correctly ignored the commands under SR-IOV, but I think the response code is 
the same for ignoring as for executing the commands.

Regards,
Christian.

Am 07.05.19 um 10:54 schrieb Huang, Trigger:
> Hi Christian,
>
> On Vega10 SR-IOV VF, I injected a 'real' VMC page fault from user space, 
> using the modified amdgpu_test.
> [   19.127874] amdgpu :00:08.0: [gfxhub] no-retry page fault (src_id:0 
> ring:174 vmid:1 pasid:32768, for process amdgpu_test pid 1071 thread 
> amdgpu_test pid 1071)
> [   19.130037] amdgpu :00:08.0:   in page starting at address 
> 0x0008 from 27
>
> And see this interrupt is still from IH0 amdgpu_irq_handler, which can prove 
> this feature is not working under SR-IOV.
>
> I suggest to remove this feature from SR-IOV, as my concern is,  some weird 
> bugs may be cased by it in the Virtualization heavy stress test.
> In the future, maybe we can request PSP team to add this support for SR-IOV.
>
> Thanks & Best Wishes,
> Trigger Huang
>
> -Original Message-
> From: Christian König 
> Sent: Tuesday, May 07, 2019 3:37 PM
> To: Huang, Trigger ; 
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 0/2] Skip IH re-route on Vega SR-IOV
>
> [CAUTION: External Email]
>
> We intentionally didn't do this to make sure that the commands are ignored by 
> the PSP firmware.
>
> I have no strong opinion on if we should do this or not, but the PSP firmware 
> guys might have.
>
> Christian.
>
> Am 07.05.19 um 06:08 schrieb Trigger Huang:
>> IH re-route is not supported on Vega SR-IOV, need to be skipped
>>
>> Trigger Huang (2):
>> drm/amdgpu: Skip IH reroute in Vega10 SR-IOV VF
>> drm/amdgpu: Skip IH reroute in Vega20 SR-IOV VF
>>
>>drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 4 
>>drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  | 4 
>>2 files changed, 8 insertions(+)
>>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/2] Skip IH re-route on Vega SR-IOV

2019-05-07 Thread Christian König


Hi Trigger,


And see this interrupt is still from IH0 amdgpu_irq_handler, which can prove 
this feature is not working under SR-IOV.

In this case this change is a clear NAK.


I suggest to remove this feature from SR-IOV, as my concern is,  some weird 
bugs may be cased by it in the Virtualization heavy stress test.
And I really think we should keep it to make sure that we have the same 
handling for bare metal as for SRIOV.



In the future, maybe we can request PSP team to add this support for SR-IOV.
We will never be able to use this under SRIOV because of limitation in 
the hardware.


What we could maybe do is check the response code from the PSP firmware 
if it correctly ignored the commands under SR-IOV, but I think the 
response code is the same for ignoring as for executing the commands.


Regards,
Christian.

Am 07.05.19 um 10:54 schrieb Huang, Trigger:

Hi Christian,

On Vega10 SR-IOV VF, I injected a 'real' VMC page fault from user space, using 
the modified amdgpu_test.
[   19.127874] amdgpu :00:08.0: [gfxhub] no-retry page fault (src_id:0 
ring:174 vmid:1 pasid:32768, for process amdgpu_test pid 1071 thread 
amdgpu_test pid 1071)
[   19.130037] amdgpu :00:08.0:   in page starting at address 
0x0008 from 27

And see this interrupt is still from IH0 amdgpu_irq_handler, which can prove 
this feature is not working under SR-IOV.

I suggest to remove this feature from SR-IOV, as my concern is,  some weird 
bugs may be cased by it in the Virtualization heavy stress test.
In the future, maybe we can request PSP team to add this support for SR-IOV.

Thanks & Best Wishes,
Trigger Huang

-Original Message-
From: Christian König 
Sent: Tuesday, May 07, 2019 3:37 PM
To: Huang, Trigger ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 0/2] Skip IH re-route on Vega SR-IOV

[CAUTION: External Email]

We intentionally didn't do this to make sure that the commands are ignored by 
the PSP firmware.

I have no strong opinion on if we should do this or not, but the PSP firmware 
guys might have.

Christian.

Am 07.05.19 um 06:08 schrieb Trigger Huang:

IH re-route is not supported on Vega SR-IOV, need to be skipped

Trigger Huang (2):
drm/amdgpu: Skip IH reroute in Vega10 SR-IOV VF
drm/amdgpu: Skip IH reroute in Vega20 SR-IOV VF

   drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 4 
   drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  | 4 
   2 files changed, 8 insertions(+)


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 0/2] Skip IH re-route on Vega SR-IOV

2019-05-07 Thread Huang, Trigger

Hi Christian,

On Vega10 SR-IOV VF, I injected a 'real' VMC page fault from user space, using 
the modified amdgpu_test.
[   19.127874] amdgpu :00:08.0: [gfxhub] no-retry page fault (src_id:0 
ring:174 vmid:1 pasid:32768, for process amdgpu_test pid 1071 thread 
amdgpu_test pid 1071)
[   19.130037] amdgpu :00:08.0:   in page starting at address 
0x0008 from 27

And see this interrupt is still from IH0 amdgpu_irq_handler, which can prove 
this feature is not working under SR-IOV.

I suggest to remove this feature from SR-IOV, as my concern is,  some weird 
bugs may be cased by it in the Virtualization heavy stress test.
In the future, maybe we can request PSP team to add this support for SR-IOV.

Thanks & Best Wishes,
Trigger Huang

-Original Message-
From: Christian König  
Sent: Tuesday, May 07, 2019 3:37 PM
To: Huang, Trigger ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 0/2] Skip IH re-route on Vega SR-IOV

[CAUTION: External Email]

We intentionally didn't do this to make sure that the commands are ignored by 
the PSP firmware.

I have no strong opinion on if we should do this or not, but the PSP firmware 
guys might have.

Christian.

Am 07.05.19 um 06:08 schrieb Trigger Huang:
> IH re-route is not supported on Vega SR-IOV, need to be skipped
>
> Trigger Huang (2):
>drm/amdgpu: Skip IH reroute in Vega10 SR-IOV VF
>drm/amdgpu: Skip IH reroute in Vega20 SR-IOV VF
>
>   drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 4 
>   drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  | 4 
>   2 files changed, 8 insertions(+)
>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/9] drm/amdgpu: use allowed_domains for exported DMA-bufs

2019-05-07 Thread Christian König

Avoid that we ping/pong the buffers when we stop to pin DMA-buf
exports by using the allowed domains for exported buffers.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d0e221c8d940..d6223e41e358 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -26,6 +26,7 @@
  */
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -412,7 +413,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
/* Don't move this buffer if we have depleted our allowance
 * to move it. Don't move anything if the threshold is zero.
 */
-   if (p->bytes_moved < p->bytes_moved_threshold) {
+   if (p->bytes_moved < p->bytes_moved_threshold &&
+   (!bo->gem_base.dma_buf ||
+   list_empty(>gem_base.dma_buf->attachments))) {
if (!amdgpu_gmc_vram_full_visible(>gmc) &&
(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
/* And don't move a CPU_ACCESS_REQUIRED BO to limited
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/9] drm/ttm: remove the backing store if no placement is given

2019-05-07 Thread Christian König

Pipeline removal of the BOs backing store when no placement is given
during validation.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2845fceb2fbd..8502b3ed2d88 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1160,6 +1160,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
uint32_t new_flags;
 
reservation_object_assert_held(bo->resv);
+
+   /*
+* Remove the backing store if no placement is given.
+*/
+   if (!placement->num_placement && !placement->num_busy_placement) {
+   ret = ttm_bo_pipeline_gutting(bo);
+   if (ret)
+   return ret;
+
+   return ttm_tt_create(bo, false);
+   }
+
/*
 * Check whether we need to move buffer.
 */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7

2019-05-07 Thread Christian König

On the exporter side we add optional explicit pinning callbacks. If those
callbacks are implemented the framework no longer caches sg tables and the
map/unmap callbacks are always called with the lock of the reservation object
held.

On the importer side we add an optional invalidate callback. This callback is
used by the exporter to inform the importers that their mappings should be
destroyed as soon as possible.

This allows the exporter to provide the mappings without the need to pin
the backing store.

v2: don't try to invalidate mappings when the callback is NULL,
lock the reservation obj while using the attachments,
add helper to set the callback
v3: move flag for invalidation support into the DMA-buf,
use new attach_info structure to set the callback
v4: use importer_priv field instead of mangling exporter priv.
v5: drop invalidation_supported flag
v6: squash together with pin/unpin changes
v7: pin/unpin takes an attachment now

Signed-off-by: Christian König 
---
 drivers/dma-buf/dma-buf.c | 190 --
 include/linux/dma-buf.h   |  91 --
 2 files changed, 261 insertions(+), 20 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 775e13f54083..464a4c38df6c 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -530,10 +530,12 @@ void dma_buf_put(struct dma_buf *dmabuf)
 EXPORT_SYMBOL_GPL(dma_buf_put);
 
 /**
- * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
+ * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list; 
optionally,
  * calls attach() of dma_buf_ops to allow device-specific attach functionality
- * @dmabuf:[in]buffer to attach device to.
- * @dev:   [in]device to be attached.
+ * @dmabuf:[in]buffer to attach device to.
+ * @dev:   [in]device to be attached.
+ * @importer_ops   [in]importer operations for the attachment
+ * @importer_priv  [in]importer private pointer for the attachment
  *
  * Returns struct dma_buf_attachment pointer for this attachment. Attachments
  * must be cleaned up by calling dma_buf_detach().
@@ -547,8 +549,10 @@ EXPORT_SYMBOL_GPL(dma_buf_put);
  * accessible to @dev, and cannot be moved to a more suitable place. This is
  * indicated with the error code -EBUSY.
  */
-struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
- struct device *dev)
+struct dma_buf_attachment *
+dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
+  const struct dma_buf_attach_ops *importer_ops,
+  void *importer_priv)
 {
struct dma_buf_attachment *attach;
int ret;
@@ -562,6 +566,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf 
*dmabuf,
 
attach->dev = dev;
attach->dmabuf = dmabuf;
+   attach->importer_ops = importer_ops;
+   attach->importer_priv = importer_priv;
 
mutex_lock(>lock);
 
@@ -570,7 +576,9 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf 
*dmabuf,
if (ret)
goto err_attach;
}
+   reservation_object_lock(dmabuf->resv, NULL);
list_add(>node, >attachments);
+   reservation_object_unlock(dmabuf->resv);
 
mutex_unlock(>lock);
 
@@ -594,6 +602,21 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf 
*dmabuf,
mutex_unlock(>lock);
return ERR_PTR(ret);
 }
+EXPORT_SYMBOL_GPL(dma_buf_dynamic_attach);
+
+/**
+ * dma_buf_attach - Wrapper for dma_buf_dynamic_attach
+ * @dmabuf:[in]buffer to attach device to.
+ * @dev:   [in]device to be attached.
+ *
+ * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a 
static
+ * mapping.
+ */
+struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+ struct device *dev)
+{
+   return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL);
+}
 EXPORT_SYMBOL_GPL(dma_buf_attach);
 
 /**
@@ -614,7 +637,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct 
dma_buf_attachment *attach)
   DMA_BIDIRECTIONAL);
 
mutex_lock(>lock);
+   reservation_object_lock(dmabuf->resv, NULL);
list_del(>node);
+   reservation_object_unlock(dmabuf->resv);
if (dmabuf->ops->detach)
dmabuf->ops->detach(dmabuf, attach);
 
@@ -623,6 +648,100 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct 
dma_buf_attachment *attach)
 }
 EXPORT_SYMBOL_GPL(dma_buf_detach);
 
+/**
+ * dma_buf_pin - Lock down the DMA-buf
+ *
+ * @attach:[in]attachment which should be pinned
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
+ */
+int dma_buf_pin(struct dma_buf_attachment *attach)
+{
+   struct dma_buf *dmabuf = attach->dmabuf;
+   int ret = 0;
+
+   reservation_object_assert_held(dmabuf->resv);
+
+

[PATCH 3/9] drm: remove prime sg_table caching

2019-05-07 Thread Christian König

That is now done by the DMA-buf helpers instead.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/drm_prime.c | 76 -
 1 file changed, 16 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 231e3f6d5f41..90f5230cc0d5 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -86,11 +86,6 @@ struct drm_prime_member {
struct rb_node handle_rb;
 };
 
-struct drm_prime_attachment {
-   struct sg_table *sgt;
-   enum dma_data_direction dir;
-};
-
 static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
struct dma_buf *dma_buf, uint32_t handle)
 {
@@ -188,25 +183,16 @@ static int drm_prime_lookup_buf_handle(struct 
drm_prime_file_private *prime_fpri
  * @dma_buf: buffer to attach device to
  * @attach: buffer attachment data
  *
- * Allocates _prime_attachment and calls _driver.gem_prime_pin for
- * device specific attachment. This can be used as the _buf_ops.attach
- * callback.
+ * Calls _driver.gem_prime_pin for device specific handling. This can be
+ * used as the _buf_ops.attach callback.
  *
  * Returns 0 on success, negative error code on failure.
  */
 int drm_gem_map_attach(struct dma_buf *dma_buf,
   struct dma_buf_attachment *attach)
 {
-   struct drm_prime_attachment *prime_attach;
struct drm_gem_object *obj = dma_buf->priv;
 
-   prime_attach = kzalloc(sizeof(*prime_attach), GFP_KERNEL);
-   if (!prime_attach)
-   return -ENOMEM;
-
-   prime_attach->dir = DMA_NONE;
-   attach->priv = prime_attach;
-
return drm_gem_pin(obj);
 }
 EXPORT_SYMBOL(drm_gem_map_attach);
@@ -222,26 +208,8 @@ EXPORT_SYMBOL(drm_gem_map_attach);
 void drm_gem_map_detach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
 {
-   struct drm_prime_attachment *prime_attach = attach->priv;
struct drm_gem_object *obj = dma_buf->priv;
 
-   if (prime_attach) {
-   struct sg_table *sgt = prime_attach->sgt;
-
-   if (sgt) {
-   if (prime_attach->dir != DMA_NONE)
-   dma_unmap_sg_attrs(attach->dev, sgt->sgl,
-  sgt->nents,
-  prime_attach->dir,
-  DMA_ATTR_SKIP_CPU_SYNC);
-   sg_free_table(sgt);
-   }
-
-   kfree(sgt);
-   kfree(prime_attach);
-   attach->priv = NULL;
-   }
-
drm_gem_unpin(obj);
 }
 EXPORT_SYMBOL(drm_gem_map_detach);
@@ -286,39 +254,22 @@ void drm_prime_remove_buf_handle_locked(struct 
drm_prime_file_private *prime_fpr
 struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
 enum dma_data_direction dir)
 {
-   struct drm_prime_attachment *prime_attach = attach->priv;
struct drm_gem_object *obj = attach->dmabuf->priv;
struct sg_table *sgt;
 
-   if (WARN_ON(dir == DMA_NONE || !prime_attach))
+   if (WARN_ON(dir == DMA_NONE))
return ERR_PTR(-EINVAL);
 
-   /* return the cached mapping when possible */
-   if (prime_attach->dir == dir)
-   return prime_attach->sgt;
-
-   /*
-* two mappings with different directions for the same attachment are
-* not allowed
-*/
-   if (WARN_ON(prime_attach->dir != DMA_NONE))
-   return ERR_PTR(-EBUSY);
-
if (obj->funcs)
sgt = obj->funcs->get_sg_table(obj);
else
sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
 
-   if (!IS_ERR(sgt)) {
-   if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
- DMA_ATTR_SKIP_CPU_SYNC)) {
-   sg_free_table(sgt);
-   kfree(sgt);
-   sgt = ERR_PTR(-ENOMEM);
-   } else {
-   prime_attach->sgt = sgt;
-   prime_attach->dir = dir;
-   }
+   if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+ DMA_ATTR_SKIP_CPU_SYNC)) {
+   sg_free_table(sgt);
+   kfree(sgt);
+   sgt = ERR_PTR(-ENOMEM);
}
 
return sgt;
@@ -331,14 +282,19 @@ EXPORT_SYMBOL(drm_gem_map_dma_buf);
  * @sgt: scatterlist info of the buffer to unmap
  * @dir: direction of DMA transfer
  *
- * Not implemented. The unmap is done at drm_gem_map_detach().  This can be
- * used as the _buf_ops.unmap_dma_buf callback.
+ * This can be used as the _buf_ops.unmap_dma_buf callback.
  */
 void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
   struct sg_table *sgt,
   enum

[PATCH 8/9] drm/amdgpu: add independent DMA-buf import v5

2019-05-07 Thread Christian König

Instead of relying on the DRM functions just implement our own import
functions. This prepares support for taking care of unpinned DMA-buf.

v2: enable for all exporters, not just amdgpu, fix invalidation
handling, lock reservation object while setting callback
v3: change to new dma_buf attach interface
v4: split out from unpinned DMA-buf work
v5: rebased and cleanup on new DMA-buf interface

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 38 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |  4 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 34 +++---
 4 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 68a071060793..0f4fe21d1b2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -420,31 +420,28 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device 
*dev,
 }
 
 /**
- * amdgpu_gem_prime_import_sg_table - _driver.gem_prime_import_sg_table
- * implementation
+ * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
+ *
  * @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
+ * @dma_buf: DMA-buf
  *
- * Imports shared DMA buffer memory exported by another device.
+ * Creates an empty SG BO for DMA-buf import.
  *
  * Returns:
  * A new GEM BO of the given DRM device, representing the memory
  * described by the given DMA-buf attachment and scatter/gather table.
  */
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-struct dma_buf_attachment *attach,
-struct sg_table *sg)
+static struct drm_gem_object *
+amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 {
-   struct reservation_object *resv = attach->dmabuf->resv;
+   struct reservation_object *resv = dma_buf->resv;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
int ret;
 
memset(, 0, sizeof(bp));
-   bp.size = attach->dmabuf->size;
+   bp.size = dma_buf->size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_CPU;
bp.flags = 0;
@@ -455,11 +452,9 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
if (ret)
goto error;
 
-   bo->tbo.sg = sg;
-   bo->tbo.ttm->sg = sg;
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-   if (attach->dmabuf->ops != _dmabuf_ops)
+   if (dma_buf->ops != _dmabuf_ops)
bo->prime_shared_count = 1;
 
ww_mutex_unlock(>lock);
@@ -484,6 +479,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)
 {
+   struct dma_buf_attachment *attach;
struct drm_gem_object *obj;
 
if (dma_buf->ops == _dmabuf_ops) {
@@ -498,5 +494,17 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct 
drm_device *dev,
}
}
 
-   return drm_gem_prime_import(dev, dma_buf);
+   obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+   if (IS_ERR(obj))
+   return obj;
+
+   attach = dma_buf_attach(dma_buf, dev->dev);
+   if (IS_ERR(attach)) {
+   drm_gem_object_put(obj);
+   return ERR_CAST(attach);
+   }
+
+   get_dma_buf(dma_buf);
+   obj->import_attach = attach;
+   return obj;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index f1522292814c..2765413770b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -25,10 +25,6 @@
 
 #include 
 
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-struct dma_buf_attachment *attach,
-struct sg_table *sg);
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 70c5cd2d5fb4..47ff2981d877 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1321,7 +1321,6 @@ static struct drm_driver kms_driver = {
.gem_prime_export = amdgpu_gem_prime_export,
.gem_prime_import = amdgpu_gem_prime_import,
.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
-   .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
.gem_prime_vmap = amdgpu_gem_prime_vmap,

[PATCH 1/9] dma-buf: start caching of sg_table objects

2019-05-07 Thread Christian König

To allow a smooth transition from pinning buffer objects to dynamic
invalidation we first start to cache the sg_table for an attachment.

Signed-off-by: Christian König 
---
 drivers/dma-buf/dma-buf.c | 24 
 include/linux/dma-buf.h   | 14 ++
 2 files changed, 38 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 7c858020d14b..775e13f54083 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -573,6 +573,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf 
*dmabuf,
list_add(>node, >attachments);
 
mutex_unlock(>lock);
+
+   if (!dma_buf_is_dynamic(dmabuf)) {
+   struct sg_table *sgt;
+
+   sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
+   if (!sgt)
+   sgt = ERR_PTR(-ENOMEM);
+   if (IS_ERR(sgt)) {
+   dma_buf_detach(dmabuf, attach);
+   return ERR_CAST(sgt);
+   }
+   attach->sgt = sgt;
+   }
+
return attach;
 
 err_attach:
@@ -595,6 +609,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct 
dma_buf_attachment *attach)
if (WARN_ON(!dmabuf || !attach))
return;
 
+   if (attach->sgt)
+   dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
+  DMA_BIDIRECTIONAL);
+
mutex_lock(>lock);
list_del(>node);
if (dmabuf->ops->detach)
@@ -630,6 +648,9 @@ struct sg_table *dma_buf_map_attachment(struct 
dma_buf_attachment *attach,
if (WARN_ON(!attach || !attach->dmabuf))
return ERR_PTR(-EINVAL);
 
+   if (attach->sgt)
+   return attach->sgt;
+
sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
if (!sg_table)
sg_table = ERR_PTR(-ENOMEM);
@@ -657,6 +678,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment 
*attach,
if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
return;
 
+   if (attach->sgt == sg_table)
+   return;
+
attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
direction);
 }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 58725f890b5b..52031fdc75bb 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -322,6 +322,7 @@ struct dma_buf_attachment {
struct dma_buf *dmabuf;
struct device *dev;
struct list_head node;
+   struct sg_table *sgt;
void *priv;
 };
 
@@ -373,6 +374,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
get_file(dmabuf->file);
 }
 
+/**
+ * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
+ * @dmabuf: the DMA-buf to check
+ *
+ * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
+ * for each attachment. False if only a single static sg table should be used.
+ */
+static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
+{
+   /* Always use a static mapping for now */
+   return false;
+}
+
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
struct device *dev);
 void dma_buf_detach(struct dma_buf *dmabuf,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 9/9] drm/amdgpu: add DMA-buf invalidation callback v2

2019-05-07 Thread Christian König

Allow for invalidation of imported DMA-bufs.

v2: add dma_buf_pin/dma_buf_unpin support

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 29 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  6 +
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 0f4fe21d1b2b..63dedc93c3d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -465,6 +465,32 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
return ERR_PTR(ret);
 }
 
+/**
+ * amdgpu_gem_prime_invalidate_mappings -  implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_gem_prime_invalidate_mappings(struct dma_buf_attachment *attach)
+{
+   struct ttm_operation_ctx ctx = { false, false };
+   struct drm_gem_object *obj = attach->importer_priv;
+   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct ttm_placement placement = {};
+   int r;
+
+   r = ttm_bo_validate(>tbo, , );
+   if (r)
+   DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
+}
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
+   .invalidate = amdgpu_gem_prime_invalidate_mappings
+};
+
 /**
  * amdgpu_gem_prime_import - _driver.gem_prime_import implementation
  * @dev: DRM device
@@ -498,7 +524,8 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct 
drm_device *dev,
if (IS_ERR(obj))
return obj;
 
-   attach = dma_buf_attach(dma_buf, dev->dev);
+   attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
+   _dma_buf_attach_ops, obj);
if (IS_ERR(attach)) {
drm_gem_object_put(obj);
return ERR_CAST(attach);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d26e2f0b88d2..cf01da083c77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -850,6 +850,9 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
domain,
return 0;
}
 
+   if (bo->gem_base.import_attach)
+   dma_buf_pin(bo->gem_base.import_attach);
+
bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
/* force to pin into visible video ram */
if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
@@ -933,6 +936,9 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 
amdgpu_bo_subtract_pin_size(bo);
 
+   if (bo->gem_base.import_attach)
+   dma_buf_unpin(bo->gem_base.import_attach);
+
for (i = 0; i < bo->placement.num_placement; i++) {
bo->placements[i].lpfn = 0;
bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 7/9] drm/amdgpu: add independent DMA-buf export v3

2019-05-07 Thread Christian König

The caching of SGT's is actually quite harmful and should probably removed
altogether when all drivers are audited.

Start by providing a separate DMA-buf export implementation in amdgpu. This is
also a prerequisite of unpinned DMA-buf handling.

v2: fix unintended recursion, remove debugging leftovers
v3: split out from unpinned DMA-buf work
v4: rebase on top of new no_sgt_cache flag

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 204 
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |   5 +
 4 files changed, 133 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 4711cf1b5bd2..68a071060793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -40,22 +40,6 @@
 #include 
 #include 
 
-/**
- * amdgpu_gem_prime_get_sg_table - _driver.gem_prime_get_sg_table
- * implementation
- * @obj: GEM buffer object (BO)
- *
- * Returns:
- * A scatter/gather table for the pinned pages of the BO's memory.
- */
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-   int npages = bo->tbo.num_pages;
-
-   return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
-}
-
 /**
  * amdgpu_gem_prime_vmap - _buf_ops.vmap implementation
  * @obj: GEM BO
@@ -181,92 +165,158 @@ __reservation_object_make_exclusive(struct 
reservation_object *obj)
 }
 
 /**
- * amdgpu_dma_buf_map_attach - _buf_ops.attach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_dma_buf_attach - _buf_ops.attach implementation
+ *
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
+ *
+ * Add the attachment as user to the exported DMA-buf.
+ */
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+struct dma_buf_attachment *attach)
+{
+   struct drm_gem_object *obj = dmabuf->priv;
+   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   int r;
+
+   if (attach->dev->driver == adev->dev->driver)
+   return 0;
+
+   /*
+* We only create shared fences for internal use, but importers
+* of the dmabuf rely on exclusive fences for implicitly
+* tracking write hazards. As any of the current fences may
+* correspond to a write, we need to convert all existing
+* fences on the reservation object into a single exclusive
+* fence.
+*/
+   r = __reservation_object_make_exclusive(bo->tbo.resv);
+   if (r)
+   return r;
+
+   bo->prime_shared_count++;
+   return 0;
+}
+
+/**
+ * amdgpu_dma_buf_detach - _buf_ops.detach implementation
+ *
+ * @dmabuf: DMA-buf where we remove the attachment from
+ * @attach: the attachment to remove
+ *
+ * Called when an attachment is removed from the DMA-buf.
+ */
+static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+   struct drm_gem_object *obj = dmabuf->priv;
+   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+   if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
+   bo->prime_shared_count--;
+}
+
+/**
+ * amdgpu_dma_buf_pin - _buf_ops.pin implementation
+ *
+ * @attach: attachment to pin down
+ *
+ * Pin the BO which is backing the DMA-buf so that it can't move any more.
+ */
+static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+   struct drm_gem_object *obj = attach->dmabuf->priv;
+   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+   /* pin buffer into GTT */
+   return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+}
+
+/**
+ * amdgpu_dma_buf_unpin - _buf_ops.unpin implementation
+ *
+ * @attach: attachment to unpin
+ *
+ * Unpin a previously pinned BO to make it movable again.
+ */
+static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+   struct drm_gem_object *obj = attach->dmabuf->priv;
+   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+   amdgpu_bo_unpin(bo);
+}
+
+/**
+ * amdgpu_dma_buf_map_dma_buf - _buf_ops.map_dma_buf implementation
  * @attach: DMA-buf attachment
+ * @dir: DMA direction
  *
  * Makes sure that the shared DMA buffer can be accessed by the target device.
  * For now, simply pins it to the GTT domain, where it should be accessible by
  * all DMA devices.
  *
  * Returns:
- * 0 on success or a negative error code on failure.
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
  */
-static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
-struct dma_buf_attachment

[PATCH 5/9] drm/ttm: use the parent resv for ghost objects

2019-05-07 Thread Christian König

This way we can even pipeline imported BO evictions.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 18 +-
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 895d77d799e4..97f35c4bda35 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -486,7 +486,6 @@ static int ttm_buffer_object_transfer(struct 
ttm_buffer_object *bo,
  struct ttm_buffer_object **new_obj)
 {
struct ttm_transfer_obj *fbo;
-   int ret;
 
fbo = kmalloc(sizeof(*fbo), GFP_KERNEL);
if (!fbo)
@@ -517,10 +516,7 @@ static int ttm_buffer_object_transfer(struct 
ttm_buffer_object *bo,
kref_init(>base.kref);
fbo->base.destroy = _transfered_destroy;
fbo->base.acc_size = 0;
-   fbo->base.resv = >base.ttm_resv;
-   reservation_object_init(fbo->base.resv);
-   ret = reservation_object_trylock(fbo->base.resv);
-   WARN_ON(!ret);
+   reservation_object_init(>base.ttm_resv);
 
*new_obj = >base;
return 0;
@@ -716,8 +712,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
if (ret)
return ret;
 
-   reservation_object_add_excl_fence(ghost_obj->resv, fence);
-
/**
 * If we're not moving to fixed memory, the TTM object
 * needs to stay alive. Otherwhise hang it on the ghost
@@ -729,7 +723,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
else
bo->ttm = NULL;
 
-   ttm_bo_unreserve(ghost_obj);
ttm_bo_put(ghost_obj);
}
 
@@ -772,8 +765,6 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
if (ret)
return ret;
 
-   reservation_object_add_excl_fence(ghost_obj->resv, fence);
-
/**
 * If we're not moving to fixed memory, the TTM object
 * needs to stay alive. Otherwhise hang it on the ghost
@@ -785,7 +776,6 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
else
bo->ttm = NULL;
 
-   ttm_bo_unreserve(ghost_obj);
ttm_bo_put(ghost_obj);
 
} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
@@ -841,16 +831,10 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
if (ret)
return ret;
 
-   ret = reservation_object_copy_fences(ghost->resv, bo->resv);
-   /* Last resort, wait for the BO to be idle when we are OOM */
-   if (ret)
-   ttm_bo_wait(bo, false, false);
-
memset(>mem, 0, sizeof(bo->mem));
bo->mem.mem_type = TTM_PL_SYSTEM;
bo->ttm = NULL;
 
-   ttm_bo_unreserve(ghost);
ttm_bo_put(ghost);
 
return 0;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: treat negative lockup timeout as 'infinite timeout' V2

2019-05-07 Thread Christian König


Am 07.05.19 um 03:47 schrieb Evan Quan:

Negative lockup timeout is valid and will be treated as
'infinite timeout'.

- V2: use msecs_to_jiffies for negative values

Change-Id: I0d8387956a9c744073c0281ef2e1a547d4f16dec
Signed-off-by: Evan Quan 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 20 
  1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c5fba79c3660..bcd59ba07bb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -237,13 +237,14 @@ module_param_named(msi, amdgpu_msi, int, 0444);
   * Set GPU scheduler timeout value in ms.
   *
   * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there 
can be one or
- * multiple values specified. 0 and negative values are invalidated. They will 
be adjusted
- * to default timeout.
+ * multiple values specified.
   *  - With one value specified, the setting will apply to all non-compute 
jobs.
   *  - With multiple values specified, the first one will be for GFX. The 
second one is for Compute.
   *And the third and fourth ones are for SDMA and Video.
   * By default(with no lockup_timeout settings), the timeout for all 
non-compute(GFX, SDMA and Video)
   * jobs is 1. And there is no timeout enforced on compute jobs.
+ * Value 0 is invalidated, will be adjusted to default timeout settings.
+ * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET).
   */
  MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 1 for 
non-compute jobs and no timeout for compute jobs), "
"format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
@@ -1339,24 +1340,27 @@ int amdgpu_device_get_job_timeout_settings(struct 
amdgpu_device *adev)
if (ret)
return ret;
  
-			/* Invalidate 0 and negative values */

-   if (timeout <= 0) {
+   /*
+* Value 0 will be adjusted to default timeout settings.
+* Negative values mean 'infinite timeout' 
(MAX_JIFFY_OFFSET).
+*/
+   if (!timeout) {
index++;
continue;
}
  
  			switch (index++) {

case 0:
-   adev->gfx_timeout = timeout;
+   adev->gfx_timeout = msecs_to_jiffies(timeout);
break;
case 1:
-   adev->compute_timeout = timeout;
+   adev->compute_timeout = 
msecs_to_jiffies(timeout);
break;
case 2:
-   adev->sdma_timeout = timeout;
+   adev->sdma_timeout = msecs_to_jiffies(timeout);
break;
case 3:
-   adev->video_timeout = timeout;
+   adev->video_timeout = msecs_to_jiffies(timeout);


Maybe move the msecs_to_jiffies() call before the switch to add it only 
once.


Apart from that looks good to me,
Christian.


break;
default:
break;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/2] Skip IH re-route on Vega SR-IOV

2019-05-07 Thread Christian König

We intentionally didn't do this to make sure that the commands are 
ignored by the PSP firmware.


I have no strong opinion on if we should do this or not, but the PSP 
firmware guys might have.


Christian.

Am 07.05.19 um 06:08 schrieb Trigger Huang:

IH re-route is not supported on Vega SR-IOV, need to be skipped

Trigger Huang (2):
   drm/amdgpu: Skip IH reroute in Vega10 SR-IOV VF
   drm/amdgpu: Skip IH reroute in Vega20 SR-IOV VF

  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 4 
  drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  | 4 
  2 files changed, 8 insertions(+)



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v15 13/17] IB, arm64: untag user pointers in ib_uverbs_(re)reg_mr()

2019-05-07 Thread Leon Romanovsky

On Mon, May 06, 2019 at 04:50:20PM -0300, Jason Gunthorpe wrote:
> On Mon, May 06, 2019 at 06:30:59PM +0200, Andrey Konovalov wrote:
> > This patch is a part of a series that extends arm64 kernel ABI to allow to
> > pass tagged user pointers (with the top byte set to something else other
> > than 0x00) as syscall arguments.
> >
> > ib_uverbs_(re)reg_mr() use provided user pointers for vma lookups (through
> > e.g. mlx4_get_umem_mr()), which can only by done with untagged pointers.
> >
> > Untag user pointers in these functions.
> >
> > Signed-off-by: Andrey Konovalov 
> > ---
> >  drivers/infiniband/core/uverbs_cmd.c | 4 
> >  1 file changed, 4 insertions(+)
>
> I think this is OK.. We should really get it tested though.. Leon?

It can be done after v5.2-rc1.

Thanks

>
> Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: Bug Report: [PowerPlay] MCLK can't be set above 1107MHz on Vega 64

2019-05-07 Thread Quan, Evan

Hi Yanik,

I just sent out several patches(with you in the CC list) and I believe the 1st 
patch may fix your issue(raise SOCCLK with mclk).

Regards,
Evan
From: Yanik Yiannakis 
Sent: 2019年5月6日 18:56
To: Quan, Evan ; amd-gfx@lists.freedesktop.org; Deucher, 
Alexander 
Subject: Re: Bug Report: [PowerPlay] MCLK can't be set above 1107MHz on Vega 64

[CAUTION: External Email]

Hello Evan,

Yes I always used that command to commit my changes. I also have 
amdgpu.ppfeaturemask=0x as a boot parameter and I set 
power_dpm_force_performance_level to manual. Sorry for omitting that I assumed 
it was evident.

I have heard that the MCLK can only be as high as the SOCCLK. That would make 
sense because the SOCCLK of my Vega 64 is 1107MHz in its highest state. I 
noticed that on Windows the SOCCLK is raised automatically if the user sets the 
MCLK high enough through Wattman.

To replicate this on Linux I manually edited the pp_table to change the MCLK to 
1175MHz and the SOCCLK to 1180MHz. The new SOCCLK was displayed in 
pp_dpm_socclk and in Unigine Superposition the FPS increased as expected 
(compared to an MCLK of 1107MHz). As a final test I edited the pp_table to set 
the MCLK to 1220MHz (this was unstable on Windows) and the SOCCLK to 1250MHz. 
This resulted in a crash (just like on Windows) which indicates that the MCLK 
really was set to 1220MHz.

My understanding of the situation is that powerplay doesn't automatically raise 
the SOCCLK like Wattman.
It would be cool if the user had the ability to overclock the SOCCLK through 
powerplay.

Greetings,
Yanik

On 06.05.19 10:13, Quan, Evan wrote:
+Alex,

Hi Yanik,

Did you ever run the following command to let your OD settings take effect 
(before running games)? Otherwise, they did not take effect actually.
echo "c" > 
/sys/devices/pci:00/:00:01.0/:01:00.0/:02:00.0/:03:00.0/pp_od_clk_voltage

Regards,
Evan
From: Yanik Yiannakis 
Sent: Monday, April 29, 2019 7:44 AM
To: rex@amd.com; Quan, Evan 
; 
amd-gfx@lists.freedesktop.org
Subject: Bug Report: [PowerPlay] MCLK can't be set above 1107MHz on Vega 64

Hello,

I experience a bug that prevents me from setting the MCLK of my Vega 64 LC 
above 1107MHz.

I am using Unigine Superposition 1.1 in "Game"-mode to check the performance by 
watching the FPS.

Behaviour with a single monitor:

First I set the MCLK to a known stable value below 1108MHz:

$ echo "m 3 1100 950" > 
/sys/devices/pci:00/:00:01.0/:01:00.0/:02:00.0/:03:00.0/pp_od_clk_voltage

In Unigine Superposition the FPS increase as expected.

pp_dpm_mclk also confirms the change.

$ watch cat 
/sys/devices/pci:00/:00:01.0/:01:00.0/:02:00.0/:03:00.0/pp_dpm_mclk

0: 167Mhz
1: 500Mhz
2: 800Mhz
3: 1100Mhz *

After that I set the MCLK to a stable value above 1107MHz:

$ echo "m 3 1200 950" > 
/sys/devices/pci:00/:00:01.0/:01:00.0/:02:00.0/:03:00.0/pp_od_clk_voltage

In Unigine Superposition the FPS drop drastically.

pp_dpm_mclk indicates that the MCLK is stuck in state 0 (167MHz):

0: 167Mhz *
1: 500Mhz
2: 800Mhz
3: 1200Mhz

Behaviour with multiple monitors that have different refresh rates:

My monitors have different refresh rates. This causes the MCLK to stay in state 
3 (945MHz stock) which is the expected behaviour as I understand it.

Now I try to set the MCLK to a value above 1107MHz:

$ echo "m 3 1200 950" > 
/sys/devices/pci:00/:00:01.0/:01:00.0/:02:00.0/:03:00.0/pp_od_clk_voltage

The FPS in Unigine Superposition remain the same as they were with 945MHz.

pp_dpm_mclk shows however that the value was set:

0: 167Mhz
1: 500Mhz
2: 800Mhz
3: 1200Mhz *

Then I set the MCLK to a value of 1107MHz or lower:

$ echo "m 3 1100 950" > 
/sys/devices/pci:00/:00:01.0/:01:00.0/:02:00.0/:03:00.0/pp_od_clk_voltage

The FPS in Unigine Superposition increase.

pp_dpm_mclk again confirms the set value:

0: 167Mhz
1: 500Mhz
2: 800Mhz
3: 1100Mhz *

Finally I increase MCLK to a known unstable value:

$ echo "m 3 1300 950" > 
/sys/devices/pci:00/:00:01.0/:01:00.0/:02:00.0/:03:00.0/pp_od_clk_voltage

The FPS in Unigine Superposition remain the same. I therefore believe the value 
was not actually applied.

However pp_dpm_mclk shows that it was:

0: 167Mhz
1: 500Mhz
2: 800Mhz
3: 1300Mhz *

amdgpu_pm_info also claims that the value was set:

$ sudo watch cat /sys/kernel/debug/dri/1/amdgpu_pm_info

GFX Clocks and Power:
1300 MHz (MCLK)
27 MHz (SCLK)
1348 MHz (PSTATE_SCLK)
800 MHz (PSTATE_MCLK)
825 mV (VDDGFX)
4.0 W (average GPU)

Again, I think the displayed MCLK is false and the memory still runs at 1100MHz 
because the performance in Unigine Superposition indicates this and 1300MHz 
would cause a crash immediately.

A stable value (e.g. 1200MHz) causes

[PATCH 1/4] drm/amd/powerplay: fix Vega10 mclk/socclk voltage link setup

2019-05-07 Thread Evan Quan

This may affects the Vega10 MCLK OD functionality.

Change-Id: Icd685187501b4ec8867fb3c5077ea2664edbd114
Signed-off-by: Evan Quan 
---
 .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c| 35 +--
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index d5d0db456021..138f9f9ea765 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -5016,7 +5016,7 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr 
*hwmgr,
struct vega10_hwmgr *data = hwmgr->backend;
struct phm_ppt_v2_information *table_info = hwmgr->pptable;
struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = 
table_info->vdd_dep_on_socclk;
-   struct vega10_single_dpm_table *dpm_table = 
>golden_dpm_table.soc_table;
+   struct vega10_single_dpm_table *dpm_table = 
>golden_dpm_table.mem_table;
 
struct vega10_odn_clock_voltage_dependency_table 
*podn_vdd_dep_on_socclk =

>odn_dpm_table.vdd_dep_on_socclk;
@@ -5040,7 +5040,8 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr 
*hwmgr,
break;
}
if (j == od_vddc_lookup_table->count) {
-   od_vddc_lookup_table->entries[j-1].us_vdd =
+   j = od_vddc_lookup_table->count - 1;
+   od_vddc_lookup_table->entries[j].us_vdd =
podn_vdd_dep->entries[i].vddc;
data->need_update_dpm_table |= 
DPMTABLE_OD_UPDATE_VDDC;
}
@@ -5048,23 +5049,35 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr 
*hwmgr,
}
dpm_table = >dpm_table.soc_table;
for (i = 0; i < dep_table->count; i++) {
-   if (dep_table->entries[i].vddInd == 
podn_vdd_dep->entries[dep_table->count-1].vddInd &&
-   dep_table->entries[i].clk < 
podn_vdd_dep->entries[dep_table->count-1].clk) {
+   if (dep_table->entries[i].vddInd == 
podn_vdd_dep->entries[podn_vdd_dep->count-1].vddInd &&
+   dep_table->entries[i].clk < 
podn_vdd_dep->entries[podn_vdd_dep->count-1].clk) {
data->need_update_dpm_table |= 
DPMTABLE_UPDATE_SOCCLK;
-   podn_vdd_dep_on_socclk->entries[i].clk = 
podn_vdd_dep->entries[dep_table->count-1].clk;
-   dpm_table->dpm_levels[i].value = 
podn_vdd_dep_on_socclk->entries[i].clk;
+   for (; (i < dep_table->count) &&
+  (dep_table->entries[i].clk < 
podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk); i++) {
+   podn_vdd_dep_on_socclk->entries[i].clk 
= podn_vdd_dep->entries[podn_vdd_dep->count-1].clk;
+   dpm_table->dpm_levels[i].value = 
podn_vdd_dep_on_socclk->entries[i].clk;
+   }
+   break;
+   } else {
+   dpm_table->dpm_levels[i].value = 
dep_table->entries[i].clk;
+   podn_vdd_dep_on_socclk->entries[i].vddc = 
dep_table->entries[i].vddc;
+   podn_vdd_dep_on_socclk->entries[i].vddInd = 
dep_table->entries[i].vddInd;
+   podn_vdd_dep_on_socclk->entries[i].clk = 
dep_table->entries[i].clk;
}
}
if 
(podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk <
-   
podn_vdd_dep->entries[dep_table->count-1].clk) {
+   
podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk) {
data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
-   
podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = 
podn_vdd_dep->entries[dep_table->count-1].clk;
-   dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 
1].value = podn_vdd_dep->entries[dep_table->count-1].clk;
+   
podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk =
+   podn_vdd_dep->entries[podn_vdd_dep->count - 
1].clk;
+   dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 
1].value =
+   podn_vdd_dep->entries[podn_vdd_dep->count - 
1].clk;
}
if 
(podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd <
-

[PATCH 3/4] drm/amd/powerplay: avoid repeat AVFS enablement/disablement

2019-05-07 Thread Evan Quan

No need to enable or disable AVFS if it's already in wanted
state.

Change-Id: I862c0c3d642e6a0dc7bb34e04c5a59f17b6b8deb
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 103f7e3f0783..f4b81f50b185 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -2364,6 +2364,10 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, 
bool enable)
struct vega10_hwmgr *data = hwmgr->backend;
 
if (data->smu_features[GNLD_AVFS].supported) {
+   /* Already enabled or disabled */
+   if (!(enable ^ data->smu_features[GNLD_AVFS].enabled))
+   return 0;
+
if (enable) {
PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
true,
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/4] drm/amd/powerplay: valid Vega10 DPMTABLE_OD_UPDATE_VDDC settings

2019-05-07 Thread Evan Quan

With user specified voltage(DPMTABLE_OD_UPDATE_VDDC), the AVFS
will be disabled. However, the buggy code makes this actually not
working as expected.

Change-Id: Ifa83a6255bb3f6fa4bdb4de616521cb7bab6830a
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 138f9f9ea765..103f7e3f0783 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -2466,11 +2466,6 @@ static void vega10_check_dpm_table_updated(struct 
pp_hwmgr *hwmgr)
return;
}
}
-
-   if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
-   data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
-   data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | 
DPMTABLE_OD_UPDATE_MCLK;
-   }
 }
 
 /**
@@ -3683,7 +3678,7 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr 
*hwmgr,
 
vega10_update_avfs(hwmgr);
 
-   data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC;
+   data->need_update_dpm_table = 0;
 
return 0;
 }
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/4] drm/amd/powerplay: update Vega10 power state on OD

2019-05-07 Thread Evan Quan

Update Vega10 top performance level power state accordingly
on OD.

Change-Id: Iaadeefb2904222bf5f4d54b39d7179ce53f92ac0
Signed-off-by: Evan Quan 
---
 .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c| 59 +++
 1 file changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index f4b81f50b185..4878938ecf33 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -5009,6 +5009,63 @@ static bool vega10_check_clk_voltage_valid(struct 
pp_hwmgr *hwmgr,
return true;
 }
 
+static void vega10_odn_update_power_state(struct pp_hwmgr *hwmgr)
+{
+   struct vega10_hwmgr *data = hwmgr->backend;
+   struct pp_power_state *ps = hwmgr->request_ps;
+   struct vega10_power_state *vega10_ps;
+   struct vega10_single_dpm_table *gfx_dpm_table =
+   >dpm_table.gfx_table;
+   struct vega10_single_dpm_table *soc_dpm_table =
+   >dpm_table.soc_table;
+   struct vega10_single_dpm_table *mem_dpm_table =
+   >dpm_table.mem_table;
+   int max_level;
+
+   if (!ps)
+   return;
+
+   vega10_ps = cast_phw_vega10_power_state(>hardware);
+   max_level = vega10_ps->performance_level_count - 1;
+
+   if (vega10_ps->performance_levels[max_level].gfx_clock !=
+   gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
+   vega10_ps->performance_levels[max_level].gfx_clock =
+   gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 
1].value;
+
+   if (vega10_ps->performance_levels[max_level].soc_clock !=
+   soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
+   vega10_ps->performance_levels[max_level].soc_clock =
+   soc_dpm_table->dpm_levels[soc_dpm_table->count - 
1].value;
+
+   if (vega10_ps->performance_levels[max_level].mem_clock !=
+   mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
+   vega10_ps->performance_levels[max_level].mem_clock =
+   mem_dpm_table->dpm_levels[mem_dpm_table->count - 
1].value;
+
+   if (!hwmgr->ps)
+   return;
+
+   ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + 
hwmgr->ps_size * (hwmgr->num_ps - 1));
+   vega10_ps = cast_phw_vega10_power_state(>hardware);
+   max_level = vega10_ps->performance_level_count - 1;
+
+   if (vega10_ps->performance_levels[max_level].gfx_clock !=
+   gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
+   vega10_ps->performance_levels[max_level].gfx_clock =
+   gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 
1].value;
+
+   if (vega10_ps->performance_levels[max_level].soc_clock !=
+   soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
+   vega10_ps->performance_levels[max_level].soc_clock =
+   soc_dpm_table->dpm_levels[soc_dpm_table->count - 
1].value;
+
+   if (vega10_ps->performance_levels[max_level].mem_clock !=
+   mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
+   vega10_ps->performance_levels[max_level].mem_clock =
+   mem_dpm_table->dpm_levels[mem_dpm_table->count - 
1].value;
+}
+
 static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
enum PP_OD_DPM_TABLE_COMMAND 
type)
 {
@@ -5079,6 +5136,7 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr 
*hwmgr,
podn_vdd_dep->entries[podn_vdd_dep->count - 
1].vddInd;
}
}
+   vega10_odn_update_power_state(hwmgr);
 }
 
 static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
@@ -5113,6 +5171,7 @@ static int vega10_odn_edit_dpm_table(struct pp_hwmgr 
*hwmgr,
} else if (PP_OD_RESTORE_DEFAULT_TABLE == type) {
memcpy(&(data->dpm_table), &(data->golden_dpm_table), 
sizeof(struct vega10_dpm_table));
vega10_odn_initial_default_setting(hwmgr);
+   vega10_odn_update_power_state(hwmgr);
return 0;
} else if (PP_OD_COMMIT_DPM_TABLE == type) {
vega10_check_dpm_table_updated(hwmgr);
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

52 matches

Mail list logo