[PATCH] drm/amd/pp: Add #ifdef checks for CONFIG_ACPI

2018-03-05 Thread Rex Zhu
Fix compiling error when CONFIG_ACPI not enabled.

Change-Id: I20d3f55bbd2ad68e65363cde7452802b063643f0
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c  | 2 ++
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index 8a37c80..aa83114 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -902,8 +902,10 @@ void hwmgr_init_default_caps(struct pp_hwmgr *hwmgr)
phm_cap_set(hwmgr->platform_descriptor.platformCaps, 
PHM_PlatformCaps_UVDDPM);
phm_cap_set(hwmgr->platform_descriptor.platformCaps, 
PHM_PlatformCaps_VCEDPM);
 
+#if defined(CONFIG_ACPI)
if (amdgpu_acpi_is_pcie_performance_request_supported(hwmgr->adev))
phm_cap_set(hwmgr->platform_descriptor.platformCaps, 
PHM_PlatformCaps_PCIEPerformanceRequest);
+#endif
 
phm_cap_set(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_DynamicPatchPowerState);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 62ebe15..4f26014 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3596,6 +3596,7 @@ static int 
smu7_request_link_speed_change_before_state_change(
 
if (target_link_speed > current_link_speed) {
switch (target_link_speed) {
+#ifdef CONFIG_ACPI
case PP_PCIEGen3:
if (0 == 
amdgpu_acpi_pcie_performance_request(hwmgr->adev, PCIE_PERF_REQ_GEN3, false))
break;
@@ -3605,6 +3606,7 @@ static int 
smu7_request_link_speed_change_before_state_change(
case PP_PCIEGen2:
if (0 == 
amdgpu_acpi_pcie_performance_request(hwmgr->adev, PCIE_PERF_REQ_GEN2, false))
break;
+#endif
default:
data->force_pcie_gen = 
smu7_get_current_pcie_speed(hwmgr);
break;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/pp: Drop wrapper functions for upper/lower_32_bits

2018-03-05 Thread Alex Deucher
On Tue, Mar 6, 2018 at 12:46 AM, Rex Zhu  wrote:
> replace smu_upper_32_bits/smu_lower_32_bits with
> the standard kernel macros
>
> Change-Id: I89dec914c716d6dd74f39f3eb63d875f750d460e
> Signed-off-by: Rex Zhu 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/powerplay/inc/smumgr.h |  5 -
>  drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c   | 24 
> +++---
>  drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c   |  8 
>  drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c | 12 +--
>  .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c   | 12 +--
>  5 files changed, 28 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h 
> b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
> index 8872c5c..9bba0a0 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
> @@ -26,11 +26,6 @@
>  #include "amd_powerplay.h"
>  #include "hwmgr.h"
>
> -#define smu_lower_32_bits(n) ((uint32_t)(n))
> -#define smu_upper_32_bits(n) ((uint32_t)(((n)>>16)>>16))
> -
> -
> -
>  enum AVFS_BTC_STATUS {
> AVFS_BTC_BOOT = 0,
> AVFS_BTC_BOOT_STARTEDSMU,
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
> index df58539..957739a 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
> @@ -204,11 +204,11 @@ static int cz_load_mec_firmware(struct pp_hwmgr *hwmgr)
> tmp = PHM_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, MTYPE, 1);
> cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_CNTL, tmp);
>
> -   reg_data = smu_lower_32_bits(info.mc_addr) &
> +   reg_data = lower_32_bits(info.mc_addr) &
> PHM_FIELD_MASK(CP_CPC_IC_BASE_LO, IC_BASE_LO);
> cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_LO, reg_data);
>
> -   reg_data = smu_upper_32_bits(info.mc_addr) &
> +   reg_data = upper_32_bits(info.mc_addr) &
> PHM_FIELD_MASK(CP_CPC_IC_BASE_HI, IC_BASE_HI);
> cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_HI, reg_data);
>
> @@ -347,8 +347,8 @@ static int cz_smu_populate_single_scratch_task(
> return -EINVAL;
> }
>
> -   task->addr.low = smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr);
> -   task->addr.high = 
> smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr);
> +   task->addr.low = lower_32_bits(cz_smu->scratch_buffer[i].mc_addr);
> +   task->addr.high = upper_32_bits(cz_smu->scratch_buffer[i].mc_addr);
> task->size_bytes = cz_smu->scratch_buffer[i].data_size;
>
> if (CZ_SCRATCH_ENTRY_DATA_ID_IH_REGISTERS == fw_enum) {
> @@ -384,8 +384,8 @@ static int cz_smu_populate_single_ucode_load_task(
> return -EINVAL;
> }
>
> -   task->addr.low = smu_lower_32_bits(cz_smu->driver_buffer[i].mc_addr);
> -   task->addr.high = smu_upper_32_bits(cz_smu->driver_buffer[i].mc_addr);
> +   task->addr.low = lower_32_bits(cz_smu->driver_buffer[i].mc_addr);
> +   task->addr.high = upper_32_bits(cz_smu->driver_buffer[i].mc_addr);
> task->size_bytes = cz_smu->driver_buffer[i].data_size;
>
> return 0;
> @@ -613,11 +613,11 @@ static int cz_download_pptable_settings(struct pp_hwmgr 
> *hwmgr, void **table)
>
> cz_send_msg_to_smc_with_parameter(hwmgr,
> PPSMC_MSG_SetClkTableAddrHi,
> -   
> smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr));
> +   
> upper_32_bits(cz_smu->scratch_buffer[i].mc_addr));
>
> cz_send_msg_to_smc_with_parameter(hwmgr,
> PPSMC_MSG_SetClkTableAddrLo,
> -   
> smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr));
> +   
> lower_32_bits(cz_smu->scratch_buffer[i].mc_addr));
>
> cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
> cz_smu->toc_entry_clock_table);
> @@ -640,11 +640,11 @@ static int cz_upload_pptable_settings(struct pp_hwmgr 
> *hwmgr)
>
> cz_send_msg_to_smc_with_parameter(hwmgr,
> PPSMC_MSG_SetClkTableAddrHi,
> -   
> smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr));
> +   
> upper_32_bits(cz_smu->scratch_buffer[i].mc_addr));
>
> cz_send_msg_to_smc_with_parameter(hwmgr,
> PPSMC_MSG_SetClkTableAddrLo,
> -   
> smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr));
> +   
> lower_32_bits(cz_smu->scratch_buffer[i].mc_addr));
>
> cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
> 

[PATCH] drm/amd/pp: Drop wrapper functions for upper/lower_32_bits

2018-03-05 Thread Rex Zhu
replace smu_upper_32_bits/smu_lower_32_bits with
the standard kernel macros

Change-Id: I89dec914c716d6dd74f39f3eb63d875f750d460e
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/inc/smumgr.h |  5 -
 drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c   | 24 +++---
 drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c   |  8 
 drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c | 12 +--
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c   | 12 +--
 5 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h 
b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index 8872c5c..9bba0a0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -26,11 +26,6 @@
 #include "amd_powerplay.h"
 #include "hwmgr.h"
 
-#define smu_lower_32_bits(n) ((uint32_t)(n))
-#define smu_upper_32_bits(n) ((uint32_t)(((n)>>16)>>16))
-
-
-
 enum AVFS_BTC_STATUS {
AVFS_BTC_BOOT = 0,
AVFS_BTC_BOOT_STARTEDSMU,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index df58539..957739a 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -204,11 +204,11 @@ static int cz_load_mec_firmware(struct pp_hwmgr *hwmgr)
tmp = PHM_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, MTYPE, 1);
cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_CNTL, tmp);
 
-   reg_data = smu_lower_32_bits(info.mc_addr) &
+   reg_data = lower_32_bits(info.mc_addr) &
PHM_FIELD_MASK(CP_CPC_IC_BASE_LO, IC_BASE_LO);
cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_LO, reg_data);
 
-   reg_data = smu_upper_32_bits(info.mc_addr) &
+   reg_data = upper_32_bits(info.mc_addr) &
PHM_FIELD_MASK(CP_CPC_IC_BASE_HI, IC_BASE_HI);
cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_HI, reg_data);
 
@@ -347,8 +347,8 @@ static int cz_smu_populate_single_scratch_task(
return -EINVAL;
}
 
-   task->addr.low = smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr);
-   task->addr.high = smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr);
+   task->addr.low = lower_32_bits(cz_smu->scratch_buffer[i].mc_addr);
+   task->addr.high = upper_32_bits(cz_smu->scratch_buffer[i].mc_addr);
task->size_bytes = cz_smu->scratch_buffer[i].data_size;
 
if (CZ_SCRATCH_ENTRY_DATA_ID_IH_REGISTERS == fw_enum) {
@@ -384,8 +384,8 @@ static int cz_smu_populate_single_ucode_load_task(
return -EINVAL;
}
 
-   task->addr.low = smu_lower_32_bits(cz_smu->driver_buffer[i].mc_addr);
-   task->addr.high = smu_upper_32_bits(cz_smu->driver_buffer[i].mc_addr);
+   task->addr.low = lower_32_bits(cz_smu->driver_buffer[i].mc_addr);
+   task->addr.high = upper_32_bits(cz_smu->driver_buffer[i].mc_addr);
task->size_bytes = cz_smu->driver_buffer[i].data_size;
 
return 0;
@@ -613,11 +613,11 @@ static int cz_download_pptable_settings(struct pp_hwmgr 
*hwmgr, void **table)
 
cz_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetClkTableAddrHi,
-   
smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr));
+   
upper_32_bits(cz_smu->scratch_buffer[i].mc_addr));
 
cz_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetClkTableAddrLo,
-   
smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr));
+   
lower_32_bits(cz_smu->scratch_buffer[i].mc_addr));
 
cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
cz_smu->toc_entry_clock_table);
@@ -640,11 +640,11 @@ static int cz_upload_pptable_settings(struct pp_hwmgr 
*hwmgr)
 
cz_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetClkTableAddrHi,
-   
smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr));
+   
upper_32_bits(cz_smu->scratch_buffer[i].mc_addr));
 
cz_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetClkTableAddrLo,
-   
smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr));
+   
lower_32_bits(cz_smu->scratch_buffer[i].mc_addr));
 
cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
cz_smu->toc_entry_clock_table);
@@ -675,11 +675,11 @@ static int cz_request_smu_load_fw(struct pp_hwmgr *hwmgr)
 
cz_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_DriverDramAddrHi,
-   
smu_upper_32_bits(cz_smu->toc_buffer.mc_addr));
+ 

[PATCH 2/4] drm/amdgpu: refactoring mailbox to fix TDR handshake bugs

2018-03-05 Thread Monk Liu
this patch actually refactor mailbox implmentations, and
all below changes are needed together to fix all those mailbox
handshake issues exposured by heavey TDR test.

1)refactor all mailbox functions based on byte accessing for mb_control
reason is to avoid touching non-related bits when writing trn/rcv part of
mailbox_control, this way some incorrect INTR sent to hypervisor
side could be avoided, and it fixes couple handshake bug.

2)trans_msg function re-impled: put a invalid
logic before transmitting message to make sure the ACK bit is in
a clear status, otherwise there is chance that ACK asserted already
before transmitting message and lead to fake ACK polling.
(hypervisor side have some tricks to workaround ACK bit being corrupted
by VF FLR which hase an side effects that may make guest side ACK bit
asserted wrongly), and clear TRANS_MSG words after message transferred.

3)for mailbox_flr_work, it is also re-worked: it takes the mutex lock
first if invoked, to block gpu recover's participate too early while
hypervisor side is doing VF FLR. (hypervisor sends FLR_NOTIFY to guest
before doing VF FLR and sentds FLR_COMPLETE after VF FLR done, and
the FLR_NOTIFY will trigger interrupt to guest which lead to
mailbox_flr_work being invoked)

This can avoid the issue that mailbox trans msg being cleared by its VF FLR.

4)for mailbox_rcv_irq IRQ routine, it should only peek msg and schedule
mailbox_flr_work, instead of ACK to hypervisor itself, because FLR_NOTIFY
msg sent from hypervisor side doesn't need VF's ACK (this is because
VF's ACK would lead to hypervisor clear its trans_valid/msg, and this
would cause handshake bug if trans_valid/msg is cleared not due to
correct VF ACK but from a wrong VF ACK like this "FLR_NOTIFY" one)

This fixed handshake bug that sometimes GUEST always couldn't receive
"READY_TO_ACCESS_GPU" msg from hypervisor.

5)seperate polling time limite accordingly:
POLL ACK cost no more than 500ms
POLL MSG cost no more than 12000ms
POLL FLR finish cost no more than 500ms

6) we still need to set adev into in_gpu_reset mode after we received
FLR_NOTIFY from host side, this can prevent innocent app wrongly succesed
to open amdgpu dri device.

FLR_NOFITY is received due to an IDLE hang detected from hypervisor side
which indicating GPU is already die in this VF.

Change-Id: I17df8b4490a5b53a1cc2bd6c8f9bc3ee14c23f1a
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 200 ++
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |   4 +-
 2 files changed, 111 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 271452d..8d09380 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -33,56 +33,42 @@
 
 static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
 {
-   u32 reg;
-   int timeout = AI_MAILBOX_TIMEDOUT;
-   u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
-
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-  mmBIF_BX_PF0_MAILBOX_CONTROL), reg);
-
-   /*Wait for RCV_MSG_VALID to be 0*/
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   while (reg & mask) {
-   if (timeout <= 0) {
-   pr_err("RCV_MSG_VALID is not cleared\n");
-   break;
-   }
-   mdelay(1);
-   timeout -=1;
-
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-
mmBIF_BX_PF0_MAILBOX_CONTROL));
-   }
+   const u32 offset = SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1;
+   WREG8(offset, 2);
 }
 
 static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
 {
-   u32 reg;
+   const u32 offset = SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_BX_PF0_MAILBOX_CONTROL) * 4;
+   WREG8(offset, val ? 1 : 0);
+}
 
-   reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-mmBIF_BX_PF0_MAILBOX_CONTROL));
-   reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL,
-   TRN_MSG_VALID, val ? 1 : 0);
-   WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL),
- reg);
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since it doesn't return the RCV_DW0 under the case that
+ * 

[PATCH 4/4] dma-buf/reservation: should keep the new fence in add_shared_inplace

2018-03-05 Thread Monk Liu
Change-Id: If6a979ba9fd6c923b82212f35f07a9ff31c86767
Signed-off-by: Monk Liu 
---
 drivers/dma-buf/reservation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 314eb10..29b7e45 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -118,7 +118,7 @@ reservation_object_add_shared_inplace(struct 
reservation_object *obj,
old_fence = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(obj));
 
-   if (old_fence->context == fence->context) {
+   if (dma_fence_is_later(fence, old_fence)) {
/* memory barrier is added by write_seqcount_begin */
RCU_INIT_POINTER(fobj->shared[i], fence);
write_seqcount_end(>seq);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/4] drm/amdgpu: give warning before sleep in kiq_r/wreg

2018-03-05 Thread Monk Liu
to catch error that may schedule in atomic context early on

Change-Id: I49dec7c55470011729b7fa7d3e1ecfe1f38ed89f
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 42c1401..21adb1b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -167,6 +167,9 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg)
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_read;
 
+   if (in_interrupt())
+   might_sleep();
+
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@@ -212,7 +215,11 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, 
uint32_t reg, uint32_t v)
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_write;
 
+   if (in_interrupt())
+   might_sleep();
+
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/4] drm/amdgpu: imlement mmio byte access helpers

2018-03-05 Thread Monk Liu
mailbox register can be accessed with a byte boundry according
to BIF team, so this patch prepares register byte access
and will be used by following patches

Change-Id: I1e84f1c6e8e75dc42eb5be09c492fa5e7eb7502a
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ++
 2 files changed, 32 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 292c7e7..72385bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1635,6 +1635,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
uint32_t reg,
uint32_t acc_flags);
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
 
@@ -1658,6 +1661,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
 
+#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
+#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
+
 #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
 #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
 #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", 
amdgpu_mm_rreg(adev, (reg), 0))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 65584f6..c8e1940 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -121,6 +121,32 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, 
uint32_t reg,
return ret;
 }
 
+/*
+ * MMIO register read with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ *
+*/
+
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
+   if (offset < adev->rmmio_size)
+   return (readb(adev->rmmio + offset));
+   BUG();
+}
+
+/*
+ * MMIO register write with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ * @value: the value want to be written to the register
+ *
+*/
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value) {
+   if (offset < adev->rmmio_size)
+   writeb(value, adev->rmmio + offset);
+   else
+   BUG();
+}
+
+
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags)
 {
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Disable sdma wptr polling memory for sriov

2018-03-05 Thread Deucher, Alexander
Acked-by: Alex Deucher 


From: amd-gfx  on behalf of Emily Deng 

Sent: Monday, March 5, 2018 9:14 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deng, Emily
Subject: [PATCH] drm/amdgpu: Disable sdma wptr polling memory for sriov

The sdma wptr polling memory will introduce serious issue sdma hang for
sriov environment on sdma v3.
And the sdma wptr polling memory is only to fix the FLR cornner case, the
issue's probabity is very low.

Change-Id: I2c447533aac6b16d541f58644d141228dd75dfb3
Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index ec885ff..44d7d08 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -716,10 +716,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i],
upper_32_bits(wptr_gpu_addr));
 wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + 
sdma_offsets[i]);
-   if (amdgpu_sriov_vf(adev))
-   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
-   else
-   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
+   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], 
wptr_poll_cntl);

 /* enable DMA RB */
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF"

2018-03-05 Thread Deucher, Alexander
Acked-by: Alex Deucher 


From: amd-gfx  on behalf of Emily Deng 

Sent: Thursday, March 1, 2018 10:31 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deng, Emily
Subject: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF"

This reverts commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145.
The sdma wptr poll memomy doesn't have the same efficiency as
doorbell, and it will make sdma hang when running tests.

Change-Id: I6e334430b309b0c21aa18a08764320c7ff51e353
Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 -
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   | 27 ---
 2 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 102dad3..5dcf98b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -186,7 +186,6 @@ struct amdgpu_ring {
 uint64_teop_gpu_addr;
 u32 doorbell_index;
 booluse_doorbell;
-   booluse_pollmem;
 unsignedwptr_offs;
 unsignedfence_offs;
 uint64_tcurrent_ctx;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 521978c..d3fb3ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring 
*ring)
 struct amdgpu_device *adev = ring->adev;
 u32 wptr;

-   if (ring->use_doorbell || ring->use_pollmem) {
+   if (ring->use_doorbell) {
 /* XXX check if swapping is necessary on BE */
 wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
 } else {
@@ -380,13 +380,10 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring 
*ring)

 if (ring->use_doorbell) {
 u32 *wb = (u32 *)>wb.wb[ring->wptr_offs];
+
 /* XXX check if swapping is necessary on BE */
 WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 
2);
-   } else if (ring->use_pollmem) {
-   u32 *wb = (u32 *)>wb.wb[ring->wptr_offs];
-
-   WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
 } else {
 int me = (ring == >adev->sdma.instance[0].ring) ? 0 : 1;

@@ -719,14 +716,10 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device 
*adev)
 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i],
upper_32_bits(wptr_gpu_addr));
 wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + 
sdma_offsets[i]);
-   if (ring->use_pollmem)
-   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
-  
SDMA0_GFX_RB_WPTR_POLL_CNTL,
-  ENABLE, 1);
+   if (amdgpu_sriov_vf(adev))
+   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
 else
-   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
-  
SDMA0_GFX_RB_WPTR_POLL_CNTL,
-  ENABLE, 0);
+   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], 
wptr_poll_cntl);

 /* enable DMA RB */
@@ -1208,13 +1201,9 @@ static int sdma_v3_0_sw_init(void *handle)
 for (i = 0; i < adev->sdma.num_instances; i++) {
 ring = >sdma.instance[i].ring;
 ring->ring_obj = NULL;
-   if (!amdgpu_sriov_vf(adev)) {
-   ring->use_doorbell = true;
-   ring->doorbell_index = (i == 0) ?
-   AMDGPU_DOORBELL_sDMA_ENGINE0 : 
AMDGPU_DOORBELL_sDMA_ENGINE1;
-   } else {
-   ring->use_pollmem = true;
-   }
+   ring->use_doorbell = true;
+   ring->doorbell_index = (i == 0) ?
+   AMDGPU_DOORBELL_sDMA_ENGINE0 : 
AMDGPU_DOORBELL_sDMA_ENGINE1;

 sprintf(ring->name, "sdma%d", i);
 r = amdgpu_ring_init(adev, ring, 1024,
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list

[PATCH] drm/amdgpu: Disable sdma wptr polling memory for sriov

2018-03-05 Thread Emily Deng
The sdma wptr polling memory will introduce serious issue sdma hang for
sriov environment on sdma v3.
And the sdma wptr polling memory is only to fix the FLR cornner case, the
issue's probabity is very low.

Change-Id: I2c447533aac6b16d541f58644d141228dd75dfb3
Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index ec885ff..44d7d08 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -716,10 +716,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i],
   upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + 
sdma_offsets[i]);
-   if (amdgpu_sriov_vf(adev))
-   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
-   else
-   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
+   wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], 
wptr_poll_cntl);
 
/* enable DMA RB */
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF"

2018-03-05 Thread Deng, Emily
Ping
Hi all,
Please help review this, the regression introduced by the commit 
2ffe31deb27579e2f2c9444e01f4d8abf385d145 is gating the sriov sanity test.

Best Wishes,
Emily Deng


> -Original Message-
> From: Emily Deng [mailto:emily.d...@amd.com]
> Sent: Friday, March 02, 2018 11:32 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deng, Emily 
> Subject: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr
> for VF"
> 
> This reverts commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145.
> The sdma wptr poll memomy doesn't have the same efficiency as doorbell,
> and it will make sdma hang when running tests.
> 
> Change-Id: I6e334430b309b0c21aa18a08764320c7ff51e353
> Signed-off-by: Emily Deng 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 -
>  drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   | 27 ---
>  2 files changed, 8 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 102dad3..5dcf98b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -186,7 +186,6 @@ struct amdgpu_ring {
>   uint64_teop_gpu_addr;
>   u32 doorbell_index;
>   booluse_doorbell;
> - booluse_pollmem;
>   unsignedwptr_offs;
>   unsignedfence_offs;
>   uint64_tcurrent_ctx;
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 521978c..d3fb3ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct
> amdgpu_ring *ring)
>   struct amdgpu_device *adev = ring->adev;
>   u32 wptr;
> 
> - if (ring->use_doorbell || ring->use_pollmem) {
> + if (ring->use_doorbell) {
>   /* XXX check if swapping is necessary on BE */
>   wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
>   } else {
> @@ -380,13 +380,10 @@ static void sdma_v3_0_ring_set_wptr(struct
> amdgpu_ring *ring)
> 
>   if (ring->use_doorbell) {
>   u32 *wb = (u32 *)>wb.wb[ring->wptr_offs];
> +
>   /* XXX check if swapping is necessary on BE */
>   WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>   WDOORBELL32(ring->doorbell_index, lower_32_bits(ring-
> >wptr) << 2);
> - } else if (ring->use_pollmem) {
> - u32 *wb = (u32 *)>wb.wb[ring->wptr_offs];
> -
> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>   } else {
>   int me = (ring == >adev->sdma.instance[0].ring) ? 0 : 1;
> 
> @@ -719,14 +716,10 @@ static int sdma_v3_0_gfx_resume(struct
> amdgpu_device *adev)
>   WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI +
> sdma_offsets[i],
>  upper_32_bits(wptr_gpu_addr));
>   wptr_poll_cntl =
> RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
> - if (ring->use_pollmem)
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> -
> SDMA0_GFX_RB_WPTR_POLL_CNTL,
> -ENABLE, 1);
> + if (amdgpu_sriov_vf(adev))
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
>   else
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> -
> SDMA0_GFX_RB_WPTR_POLL_CNTL,
> -ENABLE, 0);
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
>   WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL +
> sdma_offsets[i], wptr_poll_cntl);
> 
>   /* enable DMA RB */
> @@ -1208,13 +1201,9 @@ static int sdma_v3_0_sw_init(void *handle)
>   for (i = 0; i < adev->sdma.num_instances; i++) {
>   ring = >sdma.instance[i].ring;
>   ring->ring_obj = NULL;
> - if (!amdgpu_sriov_vf(adev)) {
> - ring->use_doorbell = true;
> - ring->doorbell_index = (i == 0) ?
> - AMDGPU_DOORBELL_sDMA_ENGINE0 :
> AMDGPU_DOORBELL_sDMA_ENGINE1;
> - } else {
> - ring->use_pollmem = true;
> - }
> + ring->use_doorbell = true;
> + ring->doorbell_index = (i == 0) ?
> + AMDGPU_DOORBELL_sDMA_ENGINE0 :
> AMDGPU_DOORBELL_sDMA_ENGINE1;
> 
>   sprintf(ring->name, "sdma%d", i);
>   r = amdgpu_ring_init(adev, ring, 1024,
> --
> 2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM

2018-03-05 Thread Felix Kuehling

On 2018-03-05 12:50 PM, Christian König wrote:
>>> The easiest way to work around this to to add a separate IOCTL which
>>> waits for VM updates to complete. Should be trivial to use
>>> vm->last_update for this.
>> For the cost of an extra system call. That means every GPU mapping
>> operation now requires two system calls. With Spectre workarounds, that
>> can be a lot more expensive.
>
> There is still very little overhead associated with that.
>
> And additional to this as long you actually need to wait it doesn't
> matter if you wait directly in one system call or if you split it into
> two. The time from the initial call to the end of the operation stays
> the same.

That's if you have waiting. For CPU page table updates, there is no
waiting, so the extra system call is wasted.

Another alternative would be to make the ioctl fail with -EINTR instead.
That way the Thunk would be in charge of handling the restart, and we
wouldn't have to pretend that no state was changed by the first call.

>
> You can then also pipeline multiple mappings and wait for all of them
> to finish in the end.

Our Thunk mapping function already bundles all the mappings it can do at
once into a single ioctl. The Thunk API must guarantee that memory can
be accessed when it returns. So we have no further room for improving
the pipelining without changing the Thunk API semantics.

Regards,
  Felix

>
> Regards,
> Christian. 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/atomic: Add new reverse iterator over all plane state (V2)

2018-03-05 Thread Harry Wentland
On 2018-03-01 02:56 AM, S, Shirish wrote:
> From: Shirish S 
> 
> Add reverse iterator for_each_oldnew_plane_in_state_reverse to compliment the 
> for_each_oldnew_plane_in_state way or reading plane states.
> 
> The plane states are required to be read in reverse order for amd drivers, 
> cause the z order convention followed in linux is opposite to how the planes 
> are supposed to be presented to DC engine, which is in common to both windows 
> and linux.
> 
> V2: fix compile time errors due to -Werror flag.
> 
> Signed-off-by: Shirish S 
> Signed-off-by: Pratik Vishwakarma 
> ---
>  include/drm/drm_atomic.h | 22 ++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 
> cf13842..3fe8dde 100644
> --- a/include/drm/drm_atomic.h
> +++ b/include/drm/drm_atomic.h
> @@ -754,6 +754,28 @@ void drm_state_dump(struct drm_device *dev, struct 
> drm_printer *p);
> (new_plane_state) = 
> (__state)->planes[__i].new_state, 1))
>  
>  /**
> + * for_each_oldnew_plane_in_state_reverse - iterate over all planes in 
> +an atomic
> + * update in reverse order

Looks good but can you check if docs build and render well with the newline in 
here? To build the docs run
make DOCBOOKS="" htmldocs
and then check that it renders correctly in 
Documentation/output/gpu/drm_kms.html

Harry

> + * @__state:  drm_atomic_state pointer
> + * @plane:  drm_plane iteration cursor
> + * @old_plane_state:  drm_plane_state iteration cursor for the 
> +old state
> + * @new_plane_state:  drm_plane_state iteration cursor for the 
> +new state
> + * @__i: int iteration cursor, for macro-internal use
> + *
> + * This iterates over all planes in an atomic update in reverse order,
> + * tracking both old and  new state. This is useful in places where the
> + * state delta needs to be considered, for example in atomic check functions.
> + */
> +#define for_each_oldnew_plane_in_state_reverse(__state, plane, 
> old_plane_state, new_plane_state, __i) \
> + for ((__i) = ((__state)->dev->mode_config.num_total_plane - 1); \
> +  (__i) >= 0;\
> +  (__i)--)   \
> + for_each_if ((__state)->planes[__i].ptr &&  \
> +  ((plane) = (__state)->planes[__i].ptr, \
> +   (old_plane_state) = 
> (__state)->planes[__i].old_state,\
> +   (new_plane_state) = 
> (__state)->planes[__i].new_state, 1))
> +
> +/**
>   * for_each_old_plane_in_state - iterate over all planes in an atomic update
>   * @__state:  drm_atomic_state pointer
>   * @plane:  drm_plane iteration cursor
> --
> 2.7.4
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH xf86-video-ati] Only change Set/MoveCursor hooks from what we expect

2018-03-05 Thread Alex Deucher
On Mon, Mar 5, 2018 at 12:44 PM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> Since xf86CursorCloseScreen runs after RADEONCloseScreen_KMS,
> PointPriv->spriteFuncs doesn't point to the same struct in the latter as
> in RADEONCursorInit_KMS. So we were restoring info->Set/MoveCursor to
> the wrong struct. Then in the next server generation,
> info->Set/MoveCursor would end up pointing to
> drmmode_sprite_set/move_cursor, resulting in an infinite loop if one of
> them was called.
>
> To avoid this, only change the Set/MoveCursor hooks if their values
> match our expectations, otherwise leave them as is. This is kind of a
> hack, but the alternative would be invasive and thus risky changes to
> the way we're wrapping CloseScreen, and it's not even clear that can
> work without changing xserver code.
>
> Fixes: 1fe8ca75974c ("Keep track of how many SW cursors are visible on
>   each screen")
> Signed-off-by: Michel Dänzer 

Acked-by: Alex Deucher 

> ---
>  src/radeon_kms.c | 16 ++--
>  1 file changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/src/radeon_kms.c b/src/radeon_kms.c
> index 85390e306..790d4be16 100644
> --- a/src/radeon_kms.c
> +++ b/src/radeon_kms.c
> @@ -2017,10 +2017,12 @@ static Bool RADEONCursorInit_KMS(ScreenPtr pScreen)
> return FALSE;
> }
>
> -   info->SetCursor = PointPriv->spriteFuncs->SetCursor;
> -   info->MoveCursor = PointPriv->spriteFuncs->MoveCursor;
> -   PointPriv->spriteFuncs->SetCursor = drmmode_sprite_set_cursor;
> -   PointPriv->spriteFuncs->MoveCursor = drmmode_sprite_move_cursor;
> +   if (PointPriv->spriteFuncs->SetCursor != drmmode_sprite_set_cursor) {
> +   info->SetCursor = PointPriv->spriteFuncs->SetCursor;
> +   info->MoveCursor = PointPriv->spriteFuncs->MoveCursor;
> +   PointPriv->spriteFuncs->SetCursor = drmmode_sprite_set_cursor;
> +   PointPriv->spriteFuncs->MoveCursor = drmmode_sprite_move_cursor;
> +   }
>  }
>
>  if (xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE))
> @@ -2184,8 +2186,10 @@ static Bool RADEONCloseScreen_KMS(ScreenPtr pScreen)
> miPointerScreenPtr PointPriv =
> dixLookupPrivate(>devPrivates, miPointerScreenKey);
>
> -   PointPriv->spriteFuncs->SetCursor = info->SetCursor;
> -   PointPriv->spriteFuncs->MoveCursor = info->MoveCursor;
> +   if (PointPriv->spriteFuncs->SetCursor == drmmode_sprite_set_cursor) {
> +   PointPriv->spriteFuncs->SetCursor = info->SetCursor;
> +   PointPriv->spriteFuncs->MoveCursor = info->MoveCursor;
> +   }
>  }
>
>  pScreen->BlockHandler = info->BlockHandler;
> --
> 2.16.2
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/5] drm/ttm: add ttm_sg_tt_init

2018-03-05 Thread Ben Skeggs
On Mon, Mar 5, 2018 at 10:06 PM, Christian König
 wrote:
> Ping?
On a quick look, it looks like both are used sometimes.  I believe
this had something to do with the addition of Tegra support, but I'll
need some time to look into the details of why/how these things are
used again.

Ben.

>
>
> Am 27.02.2018 um 13:07 schrieb Christian König:
>>
>> Hi guys,
>>
>> at least on amdgpu and radeon the page array allocated by ttm_dma_tt_init
>> is completely unused in the case of DMA-buf sharing. So I'm trying to get
>> rid of that by only allocating the DMA address array.
>>
>> Now the only other user of DMA-buf together with ttm_dma_tt_init is
>> Nouveau. So my question is are you guys using the page array anywhere in
>> your kernel driver in case of a DMA-buf sharing?
>>
>> If no then I could just make this the default behavior for all drivers and
>> save quite a bit of memory for everybody.
>>
>> Thanks,
>> Christian.
>>
>> Am 27.02.2018 um 12:49 schrieb Christian König:
>>>
>>> This allows drivers to only allocate dma addresses, but not a page
>>> array.
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_tt.c | 54
>>> 
>>>   include/drm/ttm/ttm_tt.h |  2 ++
>>>   2 files changed, 47 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>>> index 8e0b525cda00..971133106ec2 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>>> @@ -108,6 +108,16 @@ static int ttm_dma_tt_alloc_page_directory(struct
>>> ttm_dma_tt *ttm)
>>>   return 0;
>>>   }
>>>   +static int ttm_sg_tt_alloc_page_directory(struct ttm_dma_tt *ttm)
>>> +{
>>> +ttm->dma_address = kvmalloc_array(ttm->ttm.num_pages,
>>> +  sizeof(*ttm->dma_address),
>>> +  GFP_KERNEL | __GFP_ZERO);
>>> +if (!ttm->dma_address)
>>> +return -ENOMEM;
>>> +return 0;
>>> +}
>>> +
>>>   #ifdef CONFIG_X86
>>>   static inline int ttm_tt_set_page_caching(struct page *p,
>>> enum ttm_caching_state c_old,
>>> @@ -227,8 +237,8 @@ void ttm_tt_destroy(struct ttm_tt *ttm)
>>>   ttm->func->destroy(ttm);
>>>   }
>>>   -int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
>>> -unsigned long size, uint32_t page_flags)
>>> +void ttm_tt_init_fields(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
>>> +unsigned long size, uint32_t page_flags)
>>>   {
>>>   ttm->bdev = bdev;
>>>   ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
>>> @@ -236,6 +246,12 @@ int ttm_tt_init(struct ttm_tt *ttm, struct
>>> ttm_bo_device *bdev,
>>>   ttm->page_flags = page_flags;
>>>   ttm->state = tt_unpopulated;
>>>   ttm->swap_storage = NULL;
>>> +}
>>> +
>>> +int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
>>> +unsigned long size, uint32_t page_flags)
>>> +{
>>> +ttm_tt_init_fields(ttm, bdev, size, page_flags);
>>> if (ttm_tt_alloc_page_directory(ttm)) {
>>>   ttm_tt_destroy(ttm);
>>> @@ -258,12 +274,7 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma,
>>> struct ttm_bo_device *bdev,
>>>   {
>>>   struct ttm_tt *ttm = _dma->ttm;
>>>   -ttm->bdev = bdev;
>>> -ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
>>> -ttm->caching_state = tt_cached;
>>> -ttm->page_flags = page_flags;
>>> -ttm->state = tt_unpopulated;
>>> -ttm->swap_storage = NULL;
>>> +ttm_tt_init_fields(ttm, bdev, size, page_flags);
>>> INIT_LIST_HEAD(_dma->pages_list);
>>>   if (ttm_dma_tt_alloc_page_directory(ttm_dma)) {
>>> @@ -275,11 +286,36 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma,
>>> struct ttm_bo_device *bdev,
>>>   }
>>>   EXPORT_SYMBOL(ttm_dma_tt_init);
>>>   +int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device
>>> *bdev,
>>> +   unsigned long size, uint32_t page_flags)
>>> +{
>>> +struct ttm_tt *ttm = _dma->ttm;
>>> +int ret;
>>> +
>>> +ttm_tt_init_fields(ttm, bdev, size, page_flags);
>>> +
>>> +INIT_LIST_HEAD(_dma->pages_list);
>>> +if (page_flags & TTM_PAGE_FLAG_SG)
>>> +ret = ttm_sg_tt_alloc_page_directory(ttm_dma);
>>> +else
>>> +ret = ttm_dma_tt_alloc_page_directory(ttm_dma);
>>> +if (ret) {
>>> +ttm_tt_destroy(ttm);
>>> +pr_err("Failed allocating page table\n");
>>> +return -ENOMEM;
>>> +}
>>> +return 0;
>>> +}
>>> +EXPORT_SYMBOL(ttm_sg_tt_init);
>>> +
>>>   void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma)
>>>   {
>>>   struct ttm_tt *ttm = _dma->ttm;
>>>   -kvfree(ttm->pages);
>>> +if (ttm->pages)
>>> +kvfree(ttm->pages);
>>> +else
>>> +kvfree(ttm_dma->dma_address);
>>>   ttm->pages = NULL;
>>>   ttm_dma->dma_address = NULL;
>>>   }
>>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>>> index 

Modesetting (amdgpudrmfb) artifacts on RAVEN APU

2018-03-05 Thread KARBOWSKI Piotr

Hi list,

I'd like to report a very odd screen artifacts while running both 
4.16-rc3, as well as latest 4.16-rc4 with git linux-firmware.


I am using Ryzen 2400G with the integrate Vega.

I am aware that RAVEN support is yet to be finished, however I've read 
that some people do run it already, so I figured I will report the 
issues, since other does not seems to hit it.


I have amdgpu and all it's symbols built into the kernel image, and the 
firmware added to initrammfs. The moment modesetting is initializing I 
can see that native screen resolution goes, however, I can see only like 
25% of the screen and this very top-left 25% of screen is duplicated to 
top-right. While the bottom half of screen is either black or have 
lines, usually gray, unless some text on screen had another color then 
it's green, blue, etc.


Screenshots:

https://i.imgur.com/qnDOKY7.jpg
https://i.imgur.com/XH42zit.jpg

The AMD symbols that I've enabled in kernel:

CONFIG_CPU_SUP_AMD=y
CONFIG_X86_MCE_AMD=y
CONFIG_AMD_NB=y
CONFIG_NET_VENDOR_AMD=y
CONFIG_DRM_AMDGPU=y
CONFIG_DRM_AMDGPU_SI=y
CONFIG_DRM_AMDGPU_CIK=y
CONFIG_DRM_AMD_ACP=y
CONFIG_DRM_AMD_DC=y
CONFIG_DRM_AMD_DC_FBC=y
CONFIG_DRM_AMD_DC_DCN1_0=y
CONFIG_HSA_AMD=y
CONFIG_AMD_IOMMU=y
CONFIG_AMD_IOMMU_V2=y

The kernel log that had either drm, amd or firmware in there:

[0.00] RAMDISK: [mem 0x7f88c000-0x7fff]
[0.00] ACPI: SSDT 0x9BD94908 005367 (v02 AMD 
AmdTable 0002 MSFT 0202)
[0.00] ACPI: SSDT 0x9BD99C70 00119C (v01 AMDAMD 
CPU  0001 AMD  0001)
[0.00] ACPI: CRAT 0x9BD9AE10 000810 (v01 AMDAMD 
CRAT 0001 AMD  0001)
[0.00] ACPI: CDIT 0x9BD9B620 29 (v01 AMDAMD 
CDIT 0001 AMD  0001)
[0.00] ACPI: SSDT 0x9BD9B650 002E6E (v01 AMDAMD 
AOD  0001 INTL 20120913)
[0.00] ACPI: IVRS 0x9BD9E580 D0 (v02 AMDAMD 
IVRS 0001 AMD  )
[0.00] ACPI: SSDT 0x9BD9E650 F8 (v01 AMDAMD 
PT   1000 INTL 20120913)
[0.00] ACPI: SSDT 0x9BD9E748 000E96 (v01 AMD 
AmdTable 0001 INTL 20120913)
[0.00] ACPI: SSDT 0x9BD9F5E0 000850 (v01 AMD 
AmdTable 0001 INTL 20120913)
[0.00] ACPI: SSDT 0x9BD9FE30 001993 (v01 AMD 
AmdTable 0001 INTL 20120913)
[0.00] Kernel command line: BOOT_IMAGE=/bzImage-4.16.0-rc4 
rootfstype=ext4 luks enc_root=/dev/sda2 lvm 
root=/dev/mapper/megumin-rootfs 
initrd=/initramfs.cpio.gz,/firmware-initramfs.cpio.gz
[0.00] ACPI Error: AE_ALREADY_EXISTS, (SSDT:  AMD PT) while 
loading table (20180105/tbxfload-228)
[0.08] smpboot: CPU0: AMD Ryzen 5 2400G with Radeon Vega 
Graphics (family: 0x17, model: 0x11, stepping: 0x0)

[0.08] Performance Events: Fam17h core perfctr, AMD PMU driver.
[0.101786] ACPI: [Firmware Bug]: BIOS _OSI(Linux) query ignored
[0.615782] AMD-Vi: IOMMU performance counters supported
[0.623179] AMD-Vi: Found IOMMU at :00:00.2 cap 0x40
[0.623314] AMD-Vi: Extended features (0x4f77ef22294ada):
[0.623684] AMD-Vi: Lazy IO/TLB flushing enabled
[0.624533] amd_uncore: AMD NB counters detected
[0.624666] amd_uncore: AMD LLC counters detected
[0.625076] perf/amd_iommu: Detected AMD IOMMU #0 (2 banks, 4 
counters/bank).

[0.636229] AMD IOMMUv2 driver by Joerg Roedel 
[0.637179] [drm] amdgpu kernel modesetting enabled.
[0.637409] [drm] initializing kernel modesetting (RAVEN 
0x1002:0x15DD 0x1458:0xD000 0xC6).

[0.637583] [drm] register mmio base: 0xFE50
[0.637709] [drm] register mmio size: 524288
[0.637852] [drm] probing gen 2 caps for device 1022:15db = 700d03/e
[0.638005] [drm] probing mlw for device 1022:15db = 700d03
[0.638213] [drm] VCN decode is enabled in VM mode
[0.638341] [drm] VCN encode is enabled in VM mode
[0.660265] [drm] BIOS signature incorrect 74 7
[0.660422] [drm] vm size is 262144 GB, 4 levels, block size is 
9-bit, fragment size is 9-bit
[0.660515] amdgpu :09:00.0: VRAM: 1024M 0x00F4 
- 0x00F43FFF (1024M used)
[0.660607] amdgpu :09:00.0: GTT: 1024M 0x00F5 - 
0x00F53FFF

[0.660689] [drm] Detected VRAM RAM=1024M, BAR=256M
[0.660756] [drm] RAM width 128bits UNKNOWN
[0.661653] [drm] amdgpu: 1024M of VRAM memory ready
[0.661720] [drm] amdgpu: 3072M of GTT memory ready.
[0.661793] [drm] GART: num cpu pages 262144, num gpu pages 262144
[0.662027] [drm] PCIE GART of 1024M enabled (table at 
0x00F40080).

[0.663093] [drm] use_doorbell being set to: [true]
[0.663229] [drm] Found VCN firmware Version: 1.73 Family ID: 18
[1.011745] 

Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM

2018-03-05 Thread Alex Deucher
On Mon, Mar 5, 2018 at 12:14 PM, Christian König
 wrote:
> Am 05.03.2018 um 17:58 schrieb Felix Kuehling:
>>
>> On 2018-03-05 07:17 AM, Christian König wrote:
>>>
>>> Am 04.03.2018 um 03:34 schrieb Felix Kuehling:

 Signed-off-by: Felix Kuehling 
 ---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82
 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
2 files changed, 83 insertions(+)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 index 5afbc5e..58153ef 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 @@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
 struct amdgpu_vm *vm,
INIT_KFIFO(vm->faults);
vm->fault_credit = 16;
+vm->vm_context = vm_context;
>>>
>>> I think we should drop the vm_context parameter and all the related
>>> code in amdgpu_vm_init(). But that can be part of a later cleanup
>>> patch as well.
>>
>> Yep. It will be a prerequisite for sharing VMs between graphics and
>> compute. But then CPU vs SDMA page table update would need to be handled
>> differently, on a per-call basis and probably some synchronization
>> between them.
>>
>> Also, we'd need to fix shadow page table handling with CPU updates, or
>> get rid of shadow page tables. I'm not sure why they're needed, TBH.
>
>
> The idea behind shadow page tables is that you have the current state of the
> pipeline if the GPU crashes. Since with CPU updates there is no pipeline
> they doesn't make sense at all with them.

They are also to protect against vram loss in the case of a GPU reset.

Alex

>
>
>>
 +
return 0;
  error_free_root:
 @@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
 struct amdgpu_vm *vm,
}
  /**
 + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
 + *
 + * This only works on GFX VMs that don't have any BOs added and no
 + * page tables allocated yet.
 + *
 + * Changes the following VM parameters:
 + * - vm_context
 + * - use_cpu_for_update
 + * - pte_supports_ats
 + * - pasid (old PASID is released, because compute manages its own
 PASIDs)
 + *
 + * Reinitializes the page directory to reflect the changed ATS
 + * setting. May leave behind an unused shadow BO for the page
 + * directory when switching from SDMA updates to CPU updates.
 + *
 + * Returns 0 for success, -errno for errors.
 + */
 +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct
 amdgpu_vm *vm)
 +{
 +bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
 +int r;
 +
 +r = amdgpu_bo_reserve(vm->root.base.bo, true);
 +if (r)
 +return r;
 +
 +/* Sanity checks */
 +if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
 +/* Can happen if ioctl is interrupted by a signal after
 + * this function already completed. Just return success.
 + */
 +r = 0;
 +goto error;
 +}
>>>
>>> Ok, that is actually a show stopper. An interrupted IOCTL should never
>>> have a visible effect.
>>>
>>> Is that just a theoretical issue or did you run into this in practice?
>>
>> It's theoretical. init_kfd_vm in patch 4 reserves the page directory
>> again, validates it and waits for the validation to complete. Both the
>> reservation and the waiting can be interrupted by signals.
>
>
> No, both have parameters to control if they are interruptible or not for
> exactly this reason.
>
>>   This happens
>> after amdgpu_vm_make_compute has completed.
>>
>> If we wanted to achieve "no visible effect", we'd need to roll back the
>> conversion to a compute VM, which may involve more waiting.
>>
>> We have a similar issue in map_memory_to_gpu. We map on all GPUs and
>> wait for a sync object in the end. If SDMA is used, this allows the SDMA
>> on all GPUs to work concurrently. The wait in the end can be
>> interrupted. Instead of rolling back the mapping on all GPUs (which
>> would require more waiting), we just let the system call return
>> -ERESTARTSYS and make sure the restart doesn't cause any trouble.
>
>
> If waits are interruptible or not is controllable by parameters. At least
> for importing the VM it is probably ok to disable this.
>
>> I've been looking for documentation about the expected behaviour for
>> system calls interrupted by signals. All I can find is this statement in
>> Documentation/kernel-hacking/hacking.rst:
>>>
>>> After you slept you should check if a signal occurred: the Unix/Linux
>>> way of handling signals is to temporarily exit the system call with the
>>> ``-ERESTARTSYS`` error. The system call entry code will switch back to
>>> user context, process the signal 

Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM

2018-03-05 Thread Felix Kuehling

On 2018-03-05 12:14 PM, Christian König wrote:
>> Also, we'd need to fix shadow page table handling with CPU updates, or
>> get rid of shadow page tables. I'm not sure why they're needed, TBH.
>
> The idea behind shadow page tables is that you have the current state
> of the pipeline if the GPU crashes.

I see. So a GPU can be reset in the middle of a page table update
without losing anything and without having to recreate the page table
from scratch.

> Since with CPU updates there is no pipeline they doesn't make sense at
> all with them. 

That's true when all page table updates are done by the CPU. If we start
mixing CPU and SDMA updates in the same VM, then CPU updates need to
update the shadow page table as well.

Regards,
  Felix
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/radeon: fix KV harvesting

2018-03-05 Thread Christian König

Am 05.03.2018 um 19:30 schrieb Alex Deucher:

Always set the graphics values to the max for the
asic type.  E.g., some 1 RB chips are actually 1 RB chips,
others are actually harvested 2 RB chips.

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=99353
Signed-off-by: Alex Deucher 
Cc: sta...@vger.kernel.org


Reviewed-by: Christian König  for the series.


---
  drivers/gpu/drm/radeon/cik.c | 31 ++-
  1 file changed, 2 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index d3045a371a55..7c73bc7e2f85 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3221,35 +3221,8 @@ static void cik_gpu_init(struct radeon_device *rdev)
case CHIP_KAVERI:
rdev->config.cik.max_shader_engines = 1;
rdev->config.cik.max_tile_pipes = 4;
-   if ((rdev->pdev->device == 0x1304) ||
-   (rdev->pdev->device == 0x1305) ||
-   (rdev->pdev->device == 0x130C) ||
-   (rdev->pdev->device == 0x130F) ||
-   (rdev->pdev->device == 0x1310) ||
-   (rdev->pdev->device == 0x1311) ||
-   (rdev->pdev->device == 0x131C)) {
-   rdev->config.cik.max_cu_per_sh = 8;
-   rdev->config.cik.max_backends_per_se = 2;
-   } else if ((rdev->pdev->device == 0x1309) ||
-  (rdev->pdev->device == 0x130A) ||
-  (rdev->pdev->device == 0x130D) ||
-  (rdev->pdev->device == 0x1313) ||
-  (rdev->pdev->device == 0x131D)) {
-   rdev->config.cik.max_cu_per_sh = 6;
-   rdev->config.cik.max_backends_per_se = 2;
-   } else if ((rdev->pdev->device == 0x1306) ||
-  (rdev->pdev->device == 0x1307) ||
-  (rdev->pdev->device == 0x130B) ||
-  (rdev->pdev->device == 0x130E) ||
-  (rdev->pdev->device == 0x1315) ||
-  (rdev->pdev->device == 0x1318) ||
-  (rdev->pdev->device == 0x131B)) {
-   rdev->config.cik.max_cu_per_sh = 4;
-   rdev->config.cik.max_backends_per_se = 1;
-   } else {
-   rdev->config.cik.max_cu_per_sh = 3;
-   rdev->config.cik.max_backends_per_se = 1;
-   }
+   rdev->config.cik.max_cu_per_sh = 8;
+   rdev->config.cik.max_backends_per_se = 2;
rdev->config.cik.max_sh_per_se = 1;
rdev->config.cik.max_texture_channel_caches = 4;
rdev->config.cik.max_gprs = 256;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: fix KV harvesting

2018-03-05 Thread Alex Deucher
Always set the graphics values to the max for the
asic type.  E.g., some 1 RB chips are actually 1 RB chips,
others are actually harvested 2 RB chips.

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=99353
Signed-off-by: Alex Deucher 
Cc: sta...@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 30 ++
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 972d421caada..e13d9d83767b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4358,34 +4358,8 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device 
*adev)
case CHIP_KAVERI:
adev->gfx.config.max_shader_engines = 1;
adev->gfx.config.max_tile_pipes = 4;
-   if ((adev->pdev->device == 0x1304) ||
-   (adev->pdev->device == 0x1305) ||
-   (adev->pdev->device == 0x130C) ||
-   (adev->pdev->device == 0x130F) ||
-   (adev->pdev->device == 0x1310) ||
-   (adev->pdev->device == 0x1311) ||
-   (adev->pdev->device == 0x131C)) {
-   adev->gfx.config.max_cu_per_sh = 8;
-   adev->gfx.config.max_backends_per_se = 2;
-   } else if ((adev->pdev->device == 0x1309) ||
-  (adev->pdev->device == 0x130A) ||
-  (adev->pdev->device == 0x130D) ||
-  (adev->pdev->device == 0x1313) ||
-  (adev->pdev->device == 0x131D)) {
-   adev->gfx.config.max_cu_per_sh = 6;
-   adev->gfx.config.max_backends_per_se = 2;
-   } else if ((adev->pdev->device == 0x1306) ||
-  (adev->pdev->device == 0x1307) ||
-  (adev->pdev->device == 0x130B) ||
-  (adev->pdev->device == 0x130E) ||
-  (adev->pdev->device == 0x1315) ||
-  (adev->pdev->device == 0x131B)) {
-   adev->gfx.config.max_cu_per_sh = 4;
-   adev->gfx.config.max_backends_per_se = 1;
-   } else {
-   adev->gfx.config.max_cu_per_sh = 3;
-   adev->gfx.config.max_backends_per_se = 1;
-   }
+   adev->gfx.config.max_cu_per_sh = 8;
+   adev->gfx.config.max_backends_per_se = 2;
adev->gfx.config.max_sh_per_se = 1;
adev->gfx.config.max_texture_channel_caches = 4;
adev->gfx.config.max_gprs = 256;
-- 
2.13.6

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/radeon: fix KV harvesting

2018-03-05 Thread Alex Deucher
Always set the graphics values to the max for the
asic type.  E.g., some 1 RB chips are actually 1 RB chips,
others are actually harvested 2 RB chips.

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=99353
Signed-off-by: Alex Deucher 
Cc: sta...@vger.kernel.org
---
 drivers/gpu/drm/radeon/cik.c | 31 ++-
 1 file changed, 2 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index d3045a371a55..7c73bc7e2f85 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3221,35 +3221,8 @@ static void cik_gpu_init(struct radeon_device *rdev)
case CHIP_KAVERI:
rdev->config.cik.max_shader_engines = 1;
rdev->config.cik.max_tile_pipes = 4;
-   if ((rdev->pdev->device == 0x1304) ||
-   (rdev->pdev->device == 0x1305) ||
-   (rdev->pdev->device == 0x130C) ||
-   (rdev->pdev->device == 0x130F) ||
-   (rdev->pdev->device == 0x1310) ||
-   (rdev->pdev->device == 0x1311) ||
-   (rdev->pdev->device == 0x131C)) {
-   rdev->config.cik.max_cu_per_sh = 8;
-   rdev->config.cik.max_backends_per_se = 2;
-   } else if ((rdev->pdev->device == 0x1309) ||
-  (rdev->pdev->device == 0x130A) ||
-  (rdev->pdev->device == 0x130D) ||
-  (rdev->pdev->device == 0x1313) ||
-  (rdev->pdev->device == 0x131D)) {
-   rdev->config.cik.max_cu_per_sh = 6;
-   rdev->config.cik.max_backends_per_se = 2;
-   } else if ((rdev->pdev->device == 0x1306) ||
-  (rdev->pdev->device == 0x1307) ||
-  (rdev->pdev->device == 0x130B) ||
-  (rdev->pdev->device == 0x130E) ||
-  (rdev->pdev->device == 0x1315) ||
-  (rdev->pdev->device == 0x1318) ||
-  (rdev->pdev->device == 0x131B)) {
-   rdev->config.cik.max_cu_per_sh = 4;
-   rdev->config.cik.max_backends_per_se = 1;
-   } else {
-   rdev->config.cik.max_cu_per_sh = 3;
-   rdev->config.cik.max_backends_per_se = 1;
-   }
+   rdev->config.cik.max_cu_per_sh = 8;
+   rdev->config.cik.max_backends_per_se = 2;
rdev->config.cik.max_sh_per_se = 1;
rdev->config.cik.max_texture_channel_caches = 4;
rdev->config.cik.max_gprs = 256;
-- 
2.13.6

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/4] drm/amdgpu: remove useless variable

2018-03-05 Thread Alex Deucher
On Sun, Mar 4, 2018 at 11:09 PM, Monk Liu  wrote:
> Change-Id: Iec96c7762d791164c719ec64ec410e00a8cf5d90
> Signed-off-by: Monk Liu 

Already fixed in amd-staging-drm-next.

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 43ce2e5..42c1401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -141,7 +141,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, 
> uint32_t reg)
>  {
> signed long r, cnt = 0;
> unsigned long flags;
> -   uint32_t val, seq;
> +   uint32_t seq;
> struct amdgpu_kiq *kiq = >gfx.kiq;
> struct amdgpu_ring *ring = >ring;
>
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amd/pp: Implement update_dpm_settings on CI

2018-03-05 Thread Alex Deucher
On Mon, Mar 5, 2018 at 2:55 AM, Rex Zhu  wrote:
> use SW method to update DPM settings by updating SRAM
> directly on CI.
>
> Change-Id: Ie9ed6c3a0e1c327cc9a9b06bec47b1cede87278d
> Signed-off-by: Rex Zhu 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 97 
> 
>  1 file changed, 97 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
> index 76f700f..179d00c 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
> @@ -2819,6 +2819,102 @@ static int ci_start_smu(struct pp_hwmgr *hwmgr)
> return 0;
>  }
>
> +static int ci_update_dpm_settings(struct pp_hwmgr *hwmgr,
> +   void *profile_setting)
> +{
> +   struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
> +   struct ci_smumgr *smu_data = (struct ci_smumgr *)
> +   (hwmgr->smu_backend);
> +   struct profile_mode_setting *setting;
> +   struct SMU7_Discrete_GraphicsLevel *levels =
> +   smu_data->smc_state_table.GraphicsLevel;
> +   uint32_t array = smu_data->dpm_table_start +
> +   offsetof(SMU7_Discrete_DpmTable, GraphicsLevel);
> +
> +   uint32_t mclk_array = smu_data->dpm_table_start +
> +   offsetof(SMU7_Discrete_DpmTable, MemoryLevel);
> +   struct SMU7_Discrete_MemoryLevel *mclk_levels =
> +   smu_data->smc_state_table.MemoryLevel;
> +   uint32_t i;
> +   uint32_t offset, up_hyst_offset, down_hyst_offset, 
> clk_activity_offset, tmp;
> +
> +   if (profile_setting == NULL)
> +   return -EINVAL;
> +
> +   setting = (struct profile_mode_setting *)profile_setting;
> +
> +   if (setting->bupdate_sclk) {
> +   if (!data->sclk_dpm_key_disabled)
> +   smum_send_msg_to_smc(hwmgr, 
> PPSMC_MSG_SCLKDPM_FreezeLevel);
> +   for (i = 0; i < 
> smu_data->smc_state_table.GraphicsDpmLevelCount; i++) {
> +   if (levels[i].ActivityLevel !=
> +   cpu_to_be16(setting->sclk_activity)) {
> +   levels[i].ActivityLevel = 
> cpu_to_be16(setting->sclk_activity);
> +
> +   clk_activity_offset = array + 
> (sizeof(SMU7_Discrete_GraphicsLevel) * i)
> +   + 
> offsetof(SMU7_Discrete_GraphicsLevel, ActivityLevel);
> +   offset = clk_activity_offset & ~0x3;
> +   tmp = 
> PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, 
> offset));
> +   tmp = 
> phm_set_field_to_u32(clk_activity_offset, tmp, levels[i].ActivityLevel, 
> sizeof(uint16_t));
> +   cgs_write_ind_register(hwmgr->device, 
> CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp));
> +
> +   }
> +   if (levels[i].UpH != setting->sclk_up_hyst ||
> +   levels[i].DownH != setting->sclk_down_hyst) {
> +   levels[i].UpH = setting->sclk_up_hyst;
> +   levels[i].DownH = setting->sclk_down_hyst;
> +   up_hyst_offset = array + 
> (sizeof(SMU7_Discrete_GraphicsLevel) * i)
> +   + 
> offsetof(SMU7_Discrete_GraphicsLevel, UpH);
> +   down_hyst_offset = array + 
> (sizeof(SMU7_Discrete_GraphicsLevel) * i)
> +   + 
> offsetof(SMU7_Discrete_GraphicsLevel, DownH);
> +   offset = up_hyst_offset & ~0x3;
> +   tmp = 
> PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, 
> offset));
> +   tmp = phm_set_field_to_u32(up_hyst_offset, 
> tmp, levels[i].UpH, sizeof(uint8_t));
> +   tmp = phm_set_field_to_u32(down_hyst_offset, 
> tmp, levels[i].DownH, sizeof(uint8_t));
> +   cgs_write_ind_register(hwmgr->device, 
> CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp));
> +   }
> +   }
> +   if (!data->sclk_dpm_key_disabled)
> +   smum_send_msg_to_smc(hwmgr, 
> PPSMC_MSG_SCLKDPM_UnfreezeLevel);
> +   }
> +
> +   if (setting->bupdate_mclk) {
> +   if (!data->mclk_dpm_key_disabled)
> +   smum_send_msg_to_smc(hwmgr, 
> PPSMC_MSG_MCLKDPM_FreezeLevel);
> +   for (i = 0; i < 
> smu_data->smc_state_table.MemoryDpmLevelCount; i++) {
> +   if (mclk_levels[i].ActivityLevel !=
> +   

Re: [PATCH 3/3] drm/amdgpu: further mitigate workaround for i915

2018-03-05 Thread Alex Deucher
On Mon, Mar 5, 2018 at 7:00 AM, Christian König
 wrote:
> Disable the workaround on imported BOs as well.
>
> Signed-off-by: Christian König 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index 8ce74a1d9966..fb66b45548d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -107,12 +107,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
> ww_mutex_lock(>lock, NULL);
> ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false,
>AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, );
> -   ww_mutex_unlock(>lock);
> if (ret)
> -   return ERR_PTR(ret);
> +   goto error;
> +
> +   if (attach->dmabuf->ops != _dmabuf_ops)
> +   bo->prime_shared_count = 1;
>
> -   bo->prime_shared_count = 1;
> +   ww_mutex_unlock(>lock);
> return >gem_base;
> +
> +error:
> +   ww_mutex_unlock(>lock);
> +   return ERR_PTR(ret);
>  }
>
>  static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
> --
> 2.14.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/3] drm/amd/pp: Delete the wrap layer of smu_allocate/free_memory

2018-03-05 Thread Alex Deucher
On Mon, Mar 5, 2018 at 5:43 AM, Rex Zhu  wrote:
> use amdgpu_bo_create/free_kernel directly.
>
> Change-Id: I74f20353edd4e0df6328db66914cd9eabb60e1d7
> Signed-off-by: Rex Zhu 
> ---
>  drivers/gpu/drm/amd/powerplay/inc/smumgr.h |   7 -
>  drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c   |  39 +++---
>  drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.h   |  11 +-
>  drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c   |  53 +++
>  drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h   |  11 +-
>  drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c |  52 +++
>  drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h |  11 +-
>  drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c  |  51 ---
>  .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c   | 155 
> ++---
>  .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h   |  11 +-
>  10 files changed, 176 insertions(+), 225 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h 
> b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
> index e1f6e83..8872c5c 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
> @@ -106,13 +106,6 @@ enum SMU_MAC_DEFINITION {
>  extern int smum_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
> uint16_t msg, uint32_t parameter);
>
> -extern int smu_allocate_memory(void *device, uint32_t size,
> -enum cgs_gpu_mem_type type,
> -uint32_t byte_align, uint64_t *mc_addr,
> -void **kptr, void *handle);
> -
> -extern int smu_free_memory(void *device, void *handle);
> -
>  extern int smum_update_sclk_threshold(struct pp_hwmgr *hwmgr);
>
>  extern int smum_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type);
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
> index 7fe4c11..0d2892d 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
> @@ -750,7 +750,6 @@ static int cz_start_smu(struct pp_hwmgr *hwmgr)
>
>  static int cz_smu_init(struct pp_hwmgr *hwmgr)
>  {
> -   uint64_t mc_addr = 0;
> int ret = 0;
> struct cz_smumgr *cz_smu;
>
> @@ -768,31 +767,31 @@ static int cz_smu_init(struct pp_hwmgr *hwmgr)
> ALIGN(sizeof(struct SMU8_MultimediaPowerLogData), 32) +
> ALIGN(sizeof(struct SMU8_Fusion_ClkTable), 32);
>
> -   ret = smu_allocate_memory(hwmgr->device,
> +   ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
> cz_smu->toc_buffer.data_size,
> -   CGS_GPU_MEM_TYPE__GART_CACHEABLE,
> PAGE_SIZE,
> -   _addr,
> -   _smu->toc_buffer.kaddr,
> -   _smu->toc_buffer.handle);
> +   AMDGPU_GEM_DOMAIN_VRAM,
> +   _smu->toc_buffer.handle,
> +   _smu->toc_buffer.mc_addr,
> +   _smu->toc_buffer.kaddr);
> if (ret != 0)
> return -1;
>
> -   cz_smu->toc_buffer.mc_addr_high = smu_upper_32_bits(mc_addr);
> -   cz_smu->toc_buffer.mc_addr_low = smu_lower_32_bits(mc_addr);
> +   cz_smu->toc_buffer.mc_addr_high = 
> smu_upper_32_bits(cz_smu->toc_buffer.mc_addr);
> +   cz_smu->toc_buffer.mc_addr_low = 
> smu_lower_32_bits(cz_smu->toc_buffer.mc_addr);

Another potential future patch would be to replace
smu_upper_32_bits/smu_lower_32_bits with the standard kernel macros.

Series is:
Reviewed-by: Alex Deucher 

Alex

>
> -   ret = smu_allocate_memory(hwmgr->device,
> +   ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
> cz_smu->smu_buffer.data_size,
> -   CGS_GPU_MEM_TYPE__GART_CACHEABLE,
> PAGE_SIZE,
> -   _addr,
> -   _smu->smu_buffer.kaddr,
> -   _smu->smu_buffer.handle);
> +   AMDGPU_GEM_DOMAIN_VRAM,
> +   _smu->smu_buffer.handle,
> +   _smu->toc_buffer.mc_addr,
> +   _smu->smu_buffer.kaddr);
> if (ret != 0)
> return -1;
>
> -   cz_smu->smu_buffer.mc_addr_high = smu_upper_32_bits(mc_addr);
> -   cz_smu->smu_buffer.mc_addr_low = smu_lower_32_bits(mc_addr);
> +   cz_smu->smu_buffer.mc_addr_high = 
> smu_upper_32_bits(cz_smu->toc_buffer.mc_addr);
> +   cz_smu->smu_buffer.mc_addr_low = 
> smu_lower_32_bits(cz_smu->toc_buffer.mc_addr);
>
> if (0 != cz_smu_populate_single_scratch_entry(hwmgr,
> 

Re: [PATCH] Revert "drm/amd/pp: Add a pp feature mask bit for AutoWattman feature"

2018-03-05 Thread Alex Deucher
On Mon, Mar 5, 2018 at 1:46 AM, Rex Zhu  wrote:
> This reverts commit e429ea87b2939c4cce1b439baf6d76535a0767f2.
>
> Implement Workload Aware Dynamic power management instand of
> AutoWattman feature in linux.
>
> Change-Id: I619ba5695b14e1d165829c98d2776e5173f2737e
> Signed-off-by: Rex Zhu 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h 
> b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> index 94df6ee..b151ad85 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> @@ -85,7 +85,6 @@ enum PP_FEATURE_MASK {
> PP_SOCCLK_DPM_MASK = 0x1000,
> PP_DCEFCLK_DPM_MASK = 0x2000,
> PP_OVERDRIVE_MASK = 0x4000,
> -   PP_AUTOWATTMAN_MASK = 0x8000,
>  };
>
>  enum PHM_BackEnd_Magic {
> --
> 1.9.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/pp: Fix incorrect return value in smu7_check_clk_voltage_valid

2018-03-05 Thread Alex Deucher
On Fri, Mar 2, 2018 at 5:49 AM, Rex Zhu  wrote:
> Change-Id: I248982c5e0100b7a975d1a19781749baeadd71f4
> Signed-off-by: Rex Zhu 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> index dbfe139..10fb3ec 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> @@ -4692,7 +4692,7 @@ static bool smu7_check_clk_voltage_valid(struct 
> pp_hwmgr *hwmgr,
> struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table;
>
> if (table_info == NULL)
> -   return -EINVAL;
> +   return false;
>
> dep_sclk_table = table_info->vdd_dep_on_sclk;
> min_vddc = dep_sclk_table->entries[0].vddc;
> --
> 1.9.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH xf86-video-ati] Only change Set/MoveCursor hooks from what we expect

2018-03-05 Thread Michel Dänzer
From: Michel Dänzer 

Since xf86CursorCloseScreen runs after RADEONCloseScreen_KMS,
PointPriv->spriteFuncs doesn't point to the same struct in the latter as
in RADEONCursorInit_KMS. So we were restoring info->Set/MoveCursor to
the wrong struct. Then in the next server generation,
info->Set/MoveCursor would end up pointing to
drmmode_sprite_set/move_cursor, resulting in an infinite loop if one of
them was called.

To avoid this, only change the Set/MoveCursor hooks if their values
match our expectations, otherwise leave them as is. This is kind of a
hack, but the alternative would be invasive and thus risky changes to
the way we're wrapping CloseScreen, and it's not even clear that can
work without changing xserver code.

Fixes: 1fe8ca75974c ("Keep track of how many SW cursors are visible on
  each screen")
Signed-off-by: Michel Dänzer 
---
 src/radeon_kms.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index 85390e306..790d4be16 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -2017,10 +2017,12 @@ static Bool RADEONCursorInit_KMS(ScreenPtr pScreen)
return FALSE;
}
 
-   info->SetCursor = PointPriv->spriteFuncs->SetCursor;
-   info->MoveCursor = PointPriv->spriteFuncs->MoveCursor;
-   PointPriv->spriteFuncs->SetCursor = drmmode_sprite_set_cursor;
-   PointPriv->spriteFuncs->MoveCursor = drmmode_sprite_move_cursor;
+   if (PointPriv->spriteFuncs->SetCursor != drmmode_sprite_set_cursor) {
+   info->SetCursor = PointPriv->spriteFuncs->SetCursor;
+   info->MoveCursor = PointPriv->spriteFuncs->MoveCursor;
+   PointPriv->spriteFuncs->SetCursor = drmmode_sprite_set_cursor;
+   PointPriv->spriteFuncs->MoveCursor = drmmode_sprite_move_cursor;
+   }
 }
 
 if (xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE))
@@ -2184,8 +2186,10 @@ static Bool RADEONCloseScreen_KMS(ScreenPtr pScreen)
miPointerScreenPtr PointPriv =
dixLookupPrivate(>devPrivates, miPointerScreenKey);
 
-   PointPriv->spriteFuncs->SetCursor = info->SetCursor;
-   PointPriv->spriteFuncs->MoveCursor = info->MoveCursor;
+   if (PointPriv->spriteFuncs->SetCursor == drmmode_sprite_set_cursor) {
+   PointPriv->spriteFuncs->SetCursor = info->SetCursor;
+   PointPriv->spriteFuncs->MoveCursor = info->MoveCursor;
+   }
 }
 
 pScreen->BlockHandler = info->BlockHandler;
-- 
2.16.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM

2018-03-05 Thread Felix Kuehling

On 2018-03-05 12:14 PM, Christian König wrote:
> Am 05.03.2018 um 17:58 schrieb Felix Kuehling:
>> On 2018-03-05 07:17 AM, Christian König wrote:
>>> Am 04.03.2018 um 03:34 schrieb Felix Kuehling:
 Signed-off-by: Felix Kuehling 
 ---
    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82
 ++
    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
    2 files changed, 83 insertions(+)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 index 5afbc5e..58153ef 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
 @@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
 struct amdgpu_vm *vm,
    INIT_KFIFO(vm->faults);
    vm->fault_credit = 16;
    +    vm->vm_context = vm_context;
>>> I think we should drop the vm_context parameter and all the related
>>> code in amdgpu_vm_init(). But that can be part of a later cleanup
>>> patch as well.
>> Yep. It will be a prerequisite for sharing VMs between graphics and
>> compute. But then CPU vs SDMA page table update would need to be handled
>> differently, on a per-call basis and probably some synchronization
>> between them.
>>
>> Also, we'd need to fix shadow page table handling with CPU updates, or
>> get rid of shadow page tables. I'm not sure why they're needed, TBH.
>
> The idea behind shadow page tables is that you have the current state
> of the pipeline if the GPU crashes. Since with CPU updates there is no
> pipeline they doesn't make sense at all with them.
>
>>
 +
    return 0;
      error_free_root:
 @@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
 struct amdgpu_vm *vm,
    }
      /**
 + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
 + *
 + * This only works on GFX VMs that don't have any BOs added and no
 + * page tables allocated yet.
 + *
 + * Changes the following VM parameters:
 + * - vm_context
 + * - use_cpu_for_update
 + * - pte_supports_ats
 + * - pasid (old PASID is released, because compute manages its own
 PASIDs)
 + *
 + * Reinitializes the page directory to reflect the changed ATS
 + * setting. May leave behind an unused shadow BO for the page
 + * directory when switching from SDMA updates to CPU updates.
 + *
 + * Returns 0 for success, -errno for errors.
 + */
 +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct
 amdgpu_vm *vm)
 +{
 +    bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
 +    int r;
 +
 +    r = amdgpu_bo_reserve(vm->root.base.bo, true);
 +    if (r)
 +    return r;
 +
 +    /* Sanity checks */
 +    if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
 +    /* Can happen if ioctl is interrupted by a signal after
 + * this function already completed. Just return success.
 + */
 +    r = 0;
 +    goto error;
 +    }
>>> Ok, that is actually a show stopper. An interrupted IOCTL should never
>>> have a visible effect.
>>>
>>> Is that just a theoretical issue or did you run into this in practice?
>> It's theoretical. init_kfd_vm in patch 4 reserves the page directory
>> again, validates it and waits for the validation to complete. Both the
>> reservation and the waiting can be interrupted by signals.
>
> No, both have parameters to control if they are interruptible or not
> for exactly this reason.
>
>>   This happens
>> after amdgpu_vm_make_compute has completed.
>>
>> If we wanted to achieve "no visible effect", we'd need to roll back the
>> conversion to a compute VM, which may involve more waiting.
>>
>> We have a similar issue in map_memory_to_gpu. We map on all GPUs and
>> wait for a sync object in the end. If SDMA is used, this allows the SDMA
>> on all GPUs to work concurrently. The wait in the end can be
>> interrupted. Instead of rolling back the mapping on all GPUs (which
>> would require more waiting), we just let the system call return
>> -ERESTARTSYS and make sure the restart doesn't cause any trouble.
>
> If waits are interruptible or not is controllable by parameters. At
> least for importing the VM it is probably ok to disable this.

Sure, I can change that.

>
>> I've been looking for documentation about the expected behaviour for
>> system calls interrupted by signals. All I can find is this statement in
>> Documentation/kernel-hacking/hacking.rst:
>>> After you slept you should check if a signal occurred: the Unix/Linux
>>> way of handling signals is to temporarily exit the system call with the
>>> ``-ERESTARTSYS`` error. The system call entry code will switch back to
>>> user context, process the signal handler and then your system call will
>>> be restarted (unless the user disabled that). So you should be prepared
>>> to 

Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM

2018-03-05 Thread Christian König

Am 05.03.2018 um 17:58 schrieb Felix Kuehling:

On 2018-03-05 07:17 AM, Christian König wrote:

Am 04.03.2018 um 03:34 schrieb Felix Kuehling:

Signed-off-by: Felix Kuehling 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82
++
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
   2 files changed, 83 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5afbc5e..58153ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
   INIT_KFIFO(vm->faults);
   vm->fault_credit = 16;
   +    vm->vm_context = vm_context;

I think we should drop the vm_context parameter and all the related
code in amdgpu_vm_init(). But that can be part of a later cleanup
patch as well.

Yep. It will be a prerequisite for sharing VMs between graphics and
compute. But then CPU vs SDMA page table update would need to be handled
differently, on a per-call basis and probably some synchronization
between them.

Also, we'd need to fix shadow page table handling with CPU updates, or
get rid of shadow page tables. I'm not sure why they're needed, TBH.


The idea behind shadow page tables is that you have the current state of 
the pipeline if the GPU crashes. Since with CPU updates there is no 
pipeline they doesn't make sense at all with them.





+
   return 0;
     error_free_root:
@@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
   }
     /**
+ * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
+ *
+ * This only works on GFX VMs that don't have any BOs added and no
+ * page tables allocated yet.
+ *
+ * Changes the following VM parameters:
+ * - vm_context
+ * - use_cpu_for_update
+ * - pte_supports_ats
+ * - pasid (old PASID is released, because compute manages its own
PASIDs)
+ *
+ * Reinitializes the page directory to reflect the changed ATS
+ * setting. May leave behind an unused shadow BO for the page
+ * directory when switching from SDMA updates to CPU updates.
+ *
+ * Returns 0 for success, -errno for errors.
+ */
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct
amdgpu_vm *vm)
+{
+    bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
+    int r;
+
+    r = amdgpu_bo_reserve(vm->root.base.bo, true);
+    if (r)
+    return r;
+
+    /* Sanity checks */
+    if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
+    /* Can happen if ioctl is interrupted by a signal after
+ * this function already completed. Just return success.
+ */
+    r = 0;
+    goto error;
+    }

Ok, that is actually a show stopper. An interrupted IOCTL should never
have a visible effect.

Is that just a theoretical issue or did you run into this in practice?

It's theoretical. init_kfd_vm in patch 4 reserves the page directory
again, validates it and waits for the validation to complete. Both the
reservation and the waiting can be interrupted by signals.


No, both have parameters to control if they are interruptible or not for 
exactly this reason.



  This happens
after amdgpu_vm_make_compute has completed.

If we wanted to achieve "no visible effect", we'd need to roll back the
conversion to a compute VM, which may involve more waiting.

We have a similar issue in map_memory_to_gpu. We map on all GPUs and
wait for a sync object in the end. If SDMA is used, this allows the SDMA
on all GPUs to work concurrently. The wait in the end can be
interrupted. Instead of rolling back the mapping on all GPUs (which
would require more waiting), we just let the system call return
-ERESTARTSYS and make sure the restart doesn't cause any trouble.


If waits are interruptible or not is controllable by parameters. At 
least for importing the VM it is probably ok to disable this.



I've been looking for documentation about the expected behaviour for
system calls interrupted by signals. All I can find is this statement in
Documentation/kernel-hacking/hacking.rst:

After you slept you should check if a signal occurred: the Unix/Linux
way of handling signals is to temporarily exit the system call with the
``-ERESTARTSYS`` error. The system call entry code will switch back to
user context, process the signal handler and then your system call will
be restarted (unless the user disabled that). So you should be prepared
to process the restart, e.g. if you're in the middle of manipulating
some data structure.

The same paragraph is also copied in
https://www.kernel.org/doc/htmldocs/kernel-hacking/ioctls.html.

The key sentence is "should be prepared to process the restart, e.g. if
you're in the middle of manipulating some data structure". Rolling back
everything would achieve that, but it's not the only way and not the
most efficient way.

In this case, I'm handling the restart by checking whether the VM is
already a compute 

Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM

2018-03-05 Thread Felix Kuehling
On 2018-03-05 07:17 AM, Christian König wrote:
> Am 04.03.2018 um 03:34 schrieb Felix Kuehling:
>> Signed-off-by: Felix Kuehling 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82
>> ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
>>   2 files changed, 83 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 5afbc5e..58153ef 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>> struct amdgpu_vm *vm,
>>   INIT_KFIFO(vm->faults);
>>   vm->fault_credit = 16;
>>   +    vm->vm_context = vm_context;
>
> I think we should drop the vm_context parameter and all the related
> code in amdgpu_vm_init(). But that can be part of a later cleanup
> patch as well.

Yep. It will be a prerequisite for sharing VMs between graphics and
compute. But then CPU vs SDMA page table update would need to be handled
differently, on a per-call basis and probably some synchronization
between them.

Also, we'd need to fix shadow page table handling with CPU updates, or
get rid of shadow page tables. I'm not sure why they're needed, TBH.

>
>> +
>>   return 0;
>>     error_free_root:
>> @@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>> struct amdgpu_vm *vm,
>>   }
>>     /**
>> + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
>> + *
>> + * This only works on GFX VMs that don't have any BOs added and no
>> + * page tables allocated yet.
>> + *
>> + * Changes the following VM parameters:
>> + * - vm_context
>> + * - use_cpu_for_update
>> + * - pte_supports_ats
>> + * - pasid (old PASID is released, because compute manages its own
>> PASIDs)
>> + *
>> + * Reinitializes the page directory to reflect the changed ATS
>> + * setting. May leave behind an unused shadow BO for the page
>> + * directory when switching from SDMA updates to CPU updates.
>> + *
>> + * Returns 0 for success, -errno for errors.
>> + */
>> +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct
>> amdgpu_vm *vm)
>> +{
>> +    bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
>> +    int r;
>> +
>> +    r = amdgpu_bo_reserve(vm->root.base.bo, true);
>> +    if (r)
>> +    return r;
>> +
>> +    /* Sanity checks */
>> +    if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
>> +    /* Can happen if ioctl is interrupted by a signal after
>> + * this function already completed. Just return success.
>> + */
>> +    r = 0;
>> +    goto error;
>> +    }
>
> Ok, that is actually a show stopper. An interrupted IOCTL should never
> have a visible effect.
>
> Is that just a theoretical issue or did you run into this in practice?

It's theoretical. init_kfd_vm in patch 4 reserves the page directory
again, validates it and waits for the validation to complete. Both the
reservation and the waiting can be interrupted by signals. This happens
after amdgpu_vm_make_compute has completed.

If we wanted to achieve "no visible effect", we'd need to roll back the
conversion to a compute VM, which may involve more waiting.

We have a similar issue in map_memory_to_gpu. We map on all GPUs and
wait for a sync object in the end. If SDMA is used, this allows the SDMA
on all GPUs to work concurrently. The wait in the end can be
interrupted. Instead of rolling back the mapping on all GPUs (which
would require more waiting), we just let the system call return
-ERESTARTSYS and make sure the restart doesn't cause any trouble.

I've been looking for documentation about the expected behaviour for
system calls interrupted by signals. All I can find is this statement in
Documentation/kernel-hacking/hacking.rst:
> After you slept you should check if a signal occurred: the Unix/Linux
> way of handling signals is to temporarily exit the system call with the
> ``-ERESTARTSYS`` error. The system call entry code will switch back to
> user context, process the signal handler and then your system call will
> be restarted (unless the user disabled that). So you should be prepared
> to process the restart, e.g. if you're in the middle of manipulating
> some data structure.
The same paragraph is also copied in
https://www.kernel.org/doc/htmldocs/kernel-hacking/ioctls.html.

The key sentence is "should be prepared to process the restart, e.g. if
you're in the middle of manipulating some data structure". Rolling back
everything would achieve that, but it's not the only way and not the
most efficient way.

In this case, I'm handling the restart by checking whether the VM is
already a compute VM. So on the restart the conversion to a compute VM
becomes a no-op.

Similarly, in the map_memory_to_gpu ioctl, mapping on a GPU where the
memory is already mapped becomes a no-op, which handles the restart
case. In this case we even added a specific test for this condition in

Re:

2018-03-05 Thread Keith Packard
Michel Dänzer  writes:

> On 2018-03-03 05:49 AM, Keith Packard wrote:
>> Here are the patches to the modesetting driver amended for the amdgpu
>> driver.
>
> Thanks for the patches. Unfortunately, since this driver still has to
> compile and work with xserver >= 1.13, at least patches 1 & 3 cannot be
> applied as is.
>
> I was going to port these and take care of that anyway, though I might
> not get around to it before April. If it can't wait that long, I can
> give you details about what needs to be done.

I'm good with that -- I needed this to test amdgpu vs modesetting for
some applications, and just having the patches with support is good
enough for me.

-- 
-keith


signature.asc
Description: PGP signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: Enable scatter gather display support

2018-03-05 Thread Samuel Li
There are three major options when SG capability is enabled,
1) Allocate everything when possible from GTT memory
2) Allocate everything when possible from VRAM
3) Allow both VRAM/GTT to be available
Each has its own pros/cons, and it has not been decided which direction is 
going to be used for all PCs. So it seems beneficial to provide all the options.

Sam

On 2018-03-05 05:17 AM, Michel Dänzer wrote:
> On 2018-03-03 12:25 AM, Samuel Li wrote:
>> It's enabled by default. -1 is auto, to allow both vram and gtt
>> memory be available, for testing purpose only.
> 
> Do we really need a module parameter for this? There's already too many
> of them. The driver should know which to use in which cases, and
> developers can easily tweak the code. Users should never need to change
> this.
> 
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Expose scatter gather display to user space.

2018-03-05 Thread Samuel Li

>> +if (amdgpu_display_framebuffer_domains(adev) == 
>> AMDGPU_GEM_DOMAIN_GTT)
> 
> This check should probably be & instead of ==.

Thought about that as well. For mixed VRAM/GTT display case, since it has some 
complications, I prefer to do it later.

Sam


On 2018-03-03 08:42 AM, Christian König wrote:
> Am 03.03.2018 um 00:26 schrieb Samuel Li:
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
>>   include/uapi/drm/amdgpu_drm.h   | 1 +
>>   2 files changed, 4 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 51a4b08..2cb344f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -31,6 +31,7 @@
>>   #include "amdgpu_sched.h"
>>   #include "amdgpu_uvd.h"
>>   #include "amdgpu_vce.h"
>> +#include "amdgpu_display.h"
>>     #include 
>>   #include 
>> @@ -578,6 +579,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file
>>   dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
>>   if (amdgpu_sriov_vf(adev))
>>   dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
>> +    if (amdgpu_display_framebuffer_domains(adev) == 
>> AMDGPU_GEM_DOMAIN_GTT)
> 
> This check should probably be & instead of ==.
> 
> Christian.
> 
>> +    dev_info.ids_flags |= AMDGPU_IDS_FLAGS_SGDISPLAY;
>>     vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
>>   vm_size -= AMDGPU_VA_RESERVED_SIZE;
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index fe17b67..2f30b98 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -584,6 +584,7 @@ struct drm_amdgpu_cs_chunk_data {
>>    */
>>   #define AMDGPU_IDS_FLAGS_FUSION 0x1
>>   #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2
>> +#define AMDGPU_IDS_FLAGS_SGDISPLAY  0x4
>>     /* indicate if acceleration can be working */
>>   #define AMDGPU_INFO_ACCEL_WORKING    0x00
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/3] drm/amd/pp: Delete the wrap layer of smu_allocate/free_memory

2018-03-05 Thread Zhu, Rex
will remove repeated assignment to mc_addr in verion 2.


Best Regards

Rex



From: amd-gfx  on behalf of Rex Zhu 

Sent: Monday, March 5, 2018 6:43 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhu, Rex
Subject: [PATCH 2/3] drm/amd/pp: Delete the wrap layer of 
smu_allocate/free_memory

use amdgpu_bo_create/free_kernel directly.

Change-Id: I74f20353edd4e0df6328db66914cd9eabb60e1d7
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/inc/smumgr.h |   7 -
 drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c   |  39 +++---
 drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.h   |  11 +-
 drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c   |  53 +++
 drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h   |  11 +-
 drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c |  52 +++
 drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h |  11 +-
 drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c  |  51 ---
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c   | 155 ++---
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h   |  11 +-
 10 files changed, 176 insertions(+), 225 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h 
b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index e1f6e83..8872c5c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -106,13 +106,6 @@ enum SMU_MAC_DEFINITION {
 extern int smum_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
 uint16_t msg, uint32_t parameter);

-extern int smu_allocate_memory(void *device, uint32_t size,
-enum cgs_gpu_mem_type type,
-uint32_t byte_align, uint64_t *mc_addr,
-void **kptr, void *handle);
-
-extern int smu_free_memory(void *device, void *handle);
-
 extern int smum_update_sclk_threshold(struct pp_hwmgr *hwmgr);

 extern int smum_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index 7fe4c11..0d2892d 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -750,7 +750,6 @@ static int cz_start_smu(struct pp_hwmgr *hwmgr)

 static int cz_smu_init(struct pp_hwmgr *hwmgr)
 {
-   uint64_t mc_addr = 0;
 int ret = 0;
 struct cz_smumgr *cz_smu;

@@ -768,31 +767,31 @@ static int cz_smu_init(struct pp_hwmgr *hwmgr)
 ALIGN(sizeof(struct SMU8_MultimediaPowerLogData), 32) +
 ALIGN(sizeof(struct SMU8_Fusion_ClkTable), 32);

-   ret = smu_allocate_memory(hwmgr->device,
+   ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
 cz_smu->toc_buffer.data_size,
-   CGS_GPU_MEM_TYPE__GART_CACHEABLE,
 PAGE_SIZE,
-   _addr,
-   _smu->toc_buffer.kaddr,
-   _smu->toc_buffer.handle);
+   AMDGPU_GEM_DOMAIN_VRAM,
+   _smu->toc_buffer.handle,
+   _smu->toc_buffer.mc_addr,
+   _smu->toc_buffer.kaddr);
 if (ret != 0)
 return -1;

-   cz_smu->toc_buffer.mc_addr_high = smu_upper_32_bits(mc_addr);
-   cz_smu->toc_buffer.mc_addr_low = smu_lower_32_bits(mc_addr);
+   cz_smu->toc_buffer.mc_addr_high = 
smu_upper_32_bits(cz_smu->toc_buffer.mc_addr);
+   cz_smu->toc_buffer.mc_addr_low = 
smu_lower_32_bits(cz_smu->toc_buffer.mc_addr);

-   ret = smu_allocate_memory(hwmgr->device,
+   ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
 cz_smu->smu_buffer.data_size,
-   CGS_GPU_MEM_TYPE__GART_CACHEABLE,
 PAGE_SIZE,
-   _addr,
-   _smu->smu_buffer.kaddr,
-   _smu->smu_buffer.handle);
+   AMDGPU_GEM_DOMAIN_VRAM,
+   _smu->smu_buffer.handle,
+   _smu->toc_buffer.mc_addr,
+   _smu->smu_buffer.kaddr);
 if (ret != 0)
 return -1;

-   cz_smu->smu_buffer.mc_addr_high = smu_upper_32_bits(mc_addr);
-   cz_smu->smu_buffer.mc_addr_low = smu_lower_32_bits(mc_addr);
+   cz_smu->smu_buffer.mc_addr_high = 
smu_upper_32_bits(cz_smu->toc_buffer.mc_addr);
+   cz_smu->smu_buffer.mc_addr_low = 
smu_lower_32_bits(cz_smu->toc_buffer.mc_addr);

 if (0 != cz_smu_populate_single_scratch_entry(hwmgr,
 CZ_SCRATCH_ENTRY_UCODE_ID_RLC_SCRATCH,
@@ -845,10 +844,12 @@ static int 

Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM

2018-03-05 Thread Christian König

Am 04.03.2018 um 03:34 schrieb Felix Kuehling:

Signed-off-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
  2 files changed, 83 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5afbc5e..58153ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
INIT_KFIFO(vm->faults);
vm->fault_credit = 16;
  
+	vm->vm_context = vm_context;


I think we should drop the vm_context parameter and all the related code 
in amdgpu_vm_init(). But that can be part of a later cleanup patch as well.



+
return 0;
  
  error_free_root:

@@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
  }
  
  /**

+ * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
+ *
+ * This only works on GFX VMs that don't have any BOs added and no
+ * page tables allocated yet.
+ *
+ * Changes the following VM parameters:
+ * - vm_context
+ * - use_cpu_for_update
+ * - pte_supports_ats
+ * - pasid (old PASID is released, because compute manages its own PASIDs)
+ *
+ * Reinitializes the page directory to reflect the changed ATS
+ * setting. May leave behind an unused shadow BO for the page
+ * directory when switching from SDMA updates to CPU updates.
+ *
+ * Returns 0 for success, -errno for errors.
+ */
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+   bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
+   int r;
+
+   r = amdgpu_bo_reserve(vm->root.base.bo, true);
+   if (r)
+   return r;
+
+   /* Sanity checks */
+   if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
+   /* Can happen if ioctl is interrupted by a signal after
+* this function already completed. Just return success.
+*/
+   r = 0;
+   goto error;
+   }


Ok, that is actually a show stopper. An interrupted IOCTL should never 
have a visible effect.


Is that just a theoretical issue or did you run into this in practice?

Regards,
Christian.


+   if (!RB_EMPTY_ROOT(>va.rb_root) || vm->root.entries) {
+   r = -EINVAL;
+   goto error;
+   }
+
+   /* Check if PD needs to be reinitialized and do it before
+* changing any other state, in case it fails.
+*/
+   if (pte_support_ats != vm->pte_support_ats) {
+   /* TODO: This depends on a patch series by Christian.
+* It's only needed for GFX9 GPUs, which aren't
+* supported by upstream KFD yet.
+   r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+  adev->vm_manager.root_level,
+  pte_support_ats);
+   if (r)
+   goto error;
+   */
+   }
+
+   /* Update VM state */
+   vm->vm_context = AMDGPU_VM_CONTEXT_COMPUTE;
+   vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+   AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+   vm->pte_support_ats = pte_support_ats;
+   DRM_DEBUG_DRIVER("VM update mode is %s\n",
+vm->use_cpu_for_update ? "CPU" : "SDMA");
+   WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+ "CPU update of VM recommended only for large BAR system\n");
+
+   if (vm->pasid) {
+   unsigned long flags;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   idr_remove(>vm_manager.pasid_idr, vm->pasid);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+   vm->pasid = 0;
+   }
+
+error:
+   amdgpu_bo_unreserve(vm->root.base.bo);
+   return r;
+}
+
+/**
   * amdgpu_vm_free_levels - free PD/PT levels
   *
   * @adev: amdgpu device structure
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 40b4e09..7f50a38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -263,6 +263,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
  void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
  int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
   int vm_context, unsigned int pasid);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
  void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
  bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
  unsigned int pasid);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

Re: [PATCH 4/5] drm/ttm: add ttm_sg_tt_init

2018-03-05 Thread Christian König

Ping?

Am 27.02.2018 um 13:07 schrieb Christian König:

Hi guys,

at least on amdgpu and radeon the page array allocated by 
ttm_dma_tt_init is completely unused in the case of DMA-buf sharing. 
So I'm trying to get rid of that by only allocating the DMA address 
array.


Now the only other user of DMA-buf together with ttm_dma_tt_init is 
Nouveau. So my question is are you guys using the page array anywhere 
in your kernel driver in case of a DMA-buf sharing?


If no then I could just make this the default behavior for all drivers 
and save quite a bit of memory for everybody.


Thanks,
Christian.

Am 27.02.2018 um 12:49 schrieb Christian König:

This allows drivers to only allocate dma addresses, but not a page
array.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/ttm/ttm_tt.c | 54 


  include/drm/ttm/ttm_tt.h |  2 ++
  2 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 8e0b525cda00..971133106ec2 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -108,6 +108,16 @@ static int 
ttm_dma_tt_alloc_page_directory(struct ttm_dma_tt *ttm)

  return 0;
  }
  +static int ttm_sg_tt_alloc_page_directory(struct ttm_dma_tt *ttm)
+{
+    ttm->dma_address = kvmalloc_array(ttm->ttm.num_pages,
+  sizeof(*ttm->dma_address),
+  GFP_KERNEL | __GFP_ZERO);
+    if (!ttm->dma_address)
+    return -ENOMEM;
+    return 0;
+}
+
  #ifdef CONFIG_X86
  static inline int ttm_tt_set_page_caching(struct page *p,
    enum ttm_caching_state c_old,
@@ -227,8 +237,8 @@ void ttm_tt_destroy(struct ttm_tt *ttm)
  ttm->func->destroy(ttm);
  }
  -int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
-    unsigned long size, uint32_t page_flags)
+void ttm_tt_init_fields(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
+    unsigned long size, uint32_t page_flags)
  {
  ttm->bdev = bdev;
  ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -236,6 +246,12 @@ int ttm_tt_init(struct ttm_tt *ttm, struct 
ttm_bo_device *bdev,

  ttm->page_flags = page_flags;
  ttm->state = tt_unpopulated;
  ttm->swap_storage = NULL;
+}
+
+int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
+    unsigned long size, uint32_t page_flags)
+{
+    ttm_tt_init_fields(ttm, bdev, size, page_flags);
    if (ttm_tt_alloc_page_directory(ttm)) {
  ttm_tt_destroy(ttm);
@@ -258,12 +274,7 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, 
struct ttm_bo_device *bdev,

  {
  struct ttm_tt *ttm = _dma->ttm;
  -    ttm->bdev = bdev;
-    ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-    ttm->caching_state = tt_cached;
-    ttm->page_flags = page_flags;
-    ttm->state = tt_unpopulated;
-    ttm->swap_storage = NULL;
+    ttm_tt_init_fields(ttm, bdev, size, page_flags);
    INIT_LIST_HEAD(_dma->pages_list);
  if (ttm_dma_tt_alloc_page_directory(ttm_dma)) {
@@ -275,11 +286,36 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, 
struct ttm_bo_device *bdev,

  }
  EXPORT_SYMBOL(ttm_dma_tt_init);
  +int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct 
ttm_bo_device *bdev,

+   unsigned long size, uint32_t page_flags)
+{
+    struct ttm_tt *ttm = _dma->ttm;
+    int ret;
+
+    ttm_tt_init_fields(ttm, bdev, size, page_flags);
+
+    INIT_LIST_HEAD(_dma->pages_list);
+    if (page_flags & TTM_PAGE_FLAG_SG)
+    ret = ttm_sg_tt_alloc_page_directory(ttm_dma);
+    else
+    ret = ttm_dma_tt_alloc_page_directory(ttm_dma);
+    if (ret) {
+    ttm_tt_destroy(ttm);
+    pr_err("Failed allocating page table\n");
+    return -ENOMEM;
+    }
+    return 0;
+}
+EXPORT_SYMBOL(ttm_sg_tt_init);
+
  void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma)
  {
  struct ttm_tt *ttm = _dma->ttm;
  -    kvfree(ttm->pages);
+    if (ttm->pages)
+    kvfree(ttm->pages);
+    else
+    kvfree(ttm_dma->dma_address);
  ttm->pages = NULL;
  ttm_dma->dma_address = NULL;
  }
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 9c78556b488e..1cf316a4257c 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -163,6 +163,8 @@ int ttm_tt_init(struct ttm_tt *ttm, struct 
ttm_bo_device *bdev,

  unsigned long size, uint32_t page_flags);
  int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct 
ttm_bo_device *bdev,

  unsigned long size, uint32_t page_flags);
+int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device 
*bdev,

+   unsigned long size, uint32_t page_flags);
    /**
   * ttm_tt_fini




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/5] drm/prime: fix potential race in drm_gem_map_detach

2018-03-05 Thread Christian König

Ping? Anybody who could give me an review on those changes?

The first one is just nice to have, the rest is a nice memory usage 
reduction in some cases.


Christian.

Am 27.02.2018 um 12:49 schrieb Christian König:

Unpin the GEM object only after freeing the sg table.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/drm_prime.c | 32 
  1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index e82a976f0fba..c38dacda6119 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -230,26 +230,26 @@ void drm_gem_map_detach(struct dma_buf *dma_buf,
struct drm_prime_attachment *prime_attach = attach->priv;
struct drm_gem_object *obj = dma_buf->priv;
struct drm_device *dev = obj->dev;
-   struct sg_table *sgt;
  
-	if (dev->driver->gem_prime_unpin)

-   dev->driver->gem_prime_unpin(obj);
+   if (prime_attach) {
+   struct sg_table *sgt = prime_attach->sgt;
  
-	if (!prime_attach)

-   return;
-
-   sgt = prime_attach->sgt;
-   if (sgt) {
-   if (prime_attach->dir != DMA_NONE)
-   dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents,
-  prime_attach->dir,
-  DMA_ATTR_SKIP_CPU_SYNC);
-   sg_free_table(sgt);
+   if (sgt) {
+   if (prime_attach->dir != DMA_NONE)
+   dma_unmap_sg_attrs(attach->dev, sgt->sgl,
+  sgt->nents,
+  prime_attach->dir,
+  DMA_ATTR_SKIP_CPU_SYNC);
+   sg_free_table(sgt);
+   }
+
+   kfree(sgt);
+   kfree(prime_attach);
+   attach->priv = NULL;
}
  
-	kfree(sgt);

-   kfree(prime_attach);
-   attach->priv = NULL;
+   if (dev->driver->gem_prime_unpin)
+   dev->driver->gem_prime_unpin(obj);
  }
  EXPORT_SYMBOL(drm_gem_map_detach);
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/3] drm/amdgpu: add amdgpu_evict_gtt debugfs entry

2018-03-05 Thread Christian König
Allow evicting all BOs from the GTT domain.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index ee76b468774a..369beb5041a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -767,10 +767,21 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, 
void *data)
return 0;
 }
 
+static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = (struct drm_info_node *)m->private;
+   struct drm_device *dev = node->minor->dev;
+   struct amdgpu_device *adev = dev->dev_private;
+
+   seq_printf(m, "(%d)\n", ttm_bo_evict_mm(>mman.bdev, TTM_PL_TT));
+   return 0;
+}
+
 static const struct drm_info_list amdgpu_debugfs_list[] = {
{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
{"amdgpu_test_ib", _debugfs_test_ib},
-   {"amdgpu_evict_vram", _debugfs_evict_vram}
+   {"amdgpu_evict_vram", _debugfs_evict_vram},
+   {"amdgpu_evict_gtt", _debugfs_evict_gtt},
 };
 
 int amdgpu_debugfs_init(struct amdgpu_device *adev)
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/3] drm/amdgpu: further mitigate workaround for i915

2018-03-05 Thread Christian König
Disable the workaround on imported BOs as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 8ce74a1d9966..fb66b45548d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -107,12 +107,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
ww_mutex_lock(>lock, NULL);
ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false,
   AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, );
-   ww_mutex_unlock(>lock);
if (ret)
-   return ERR_PTR(ret);
+   goto error;
+
+   if (attach->dmabuf->ops != _dmabuf_ops)
+   bo->prime_shared_count = 1;
 
-   bo->prime_shared_count = 1;
+   ww_mutex_unlock(>lock);
return >gem_base;
+
+error:
+   ww_mutex_unlock(>lock);
+   return ERR_PTR(ret);
 }
 
 static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/3] drm/amdgpu: drop gtt->adev

2018-03-05 Thread Christian König
We can use ttm->bdev instead.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 614811061d3d..b19a4f7a845e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -679,7 +679,6 @@ struct amdgpu_ttm_gup_task_list {
 
 struct amdgpu_ttm_tt {
struct ttm_dma_tt   ttm;
-   struct amdgpu_device*adev;
u64 offset;
uint64_tuserptr;
struct mm_struct*usermm;
@@ -837,6 +836,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
   struct ttm_mem_reg *bo_mem)
 {
+   struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void*)ttm;
uint64_t flags;
int r = 0;
@@ -863,9 +863,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
return 0;
}
 
-   flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
+   flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
-   r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
+   r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
ttm->pages, gtt->ttm.dma_address, flags);
 
if (r)
@@ -942,6 +942,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
 
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 {
+   struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
 
@@ -952,7 +953,7 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
return 0;
 
/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
-   r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
+   r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
if (r)
DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
  gtt->ttm.ttm.num_pages, gtt->offset);
@@ -986,7 +987,6 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct 
ttm_bo_device *bdev,
return NULL;
}
gtt->ttm.ttm.func = _backend_func;
-   gtt->adev = adev;
if (ttm_dma_tt_init(>ttm, bdev, size, page_flags)) {
kfree(gtt);
return NULL;
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] dma-buf/reservation: should keep the new fence in add_shared_inplace

2018-03-05 Thread Monk Liu
Change-Id: If6a979ba9fd6c923b82212f35f07a9ff31c86767
Signed-off-by: Monk Liu 
---
 drivers/dma-buf/reservation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 375de41..9b875267 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -118,7 +118,7 @@ reservation_object_add_shared_inplace(struct 
reservation_object *obj,
old_fence = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(obj));
 
-   if (old_fence->context == fence->context) {
+   if (dma_fence_is_later(fence, old_fence)) {
/* memory barrier is added by write_seqcount_begin */
RCU_INIT_POINTER(fobj->shared[i], fence);
write_seqcount_end(>seq);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)

2018-03-05 Thread Christian König

Hi Monk,

then that was written differently. Maybe "might_sleep()" or something 
like that?


It's just a checker which raises a nice warning when you try to sleep in 
atomic context on debug kernels.


Christian.

Am 05.03.2018 um 12:27 schrieb Liu, Monk:

Hi Christian

I couln't find this "may_sleep()" in my 4.13kernel , did I miss something ??

Thanks
/Monk

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
Sent: 2018年3月5日 19:21
To: Liu, Monk ; Koenig, Christian ; 
Kuehling, Felix ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)

Am 05.03.2018 um 09:08 schrieb Liu, Monk:

To better approach this issue I suggest to do the following:
1. Revert the original patch.

2. Stop waiting to long for writes. E.g. use a separate timeout (20ms
maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout.
Cannot do that 20ms is not enough, sometimes you need 10 seconds since
other VFs may doing bad things like occupying GPU intentionally or
they are doing TDR, so I don't think separate read and write is good
idea, they should be treated equally

Well the question is if separating read would actually help.


3. To the read function add a "if (!in_intterupt()) may_sleep();" and then 
retest. That should at least print a nice warning when called from atomic context.
Sorry what is may_sleep() ??

4. Test the whole thing and try to fix all warnings about atomic
contexts from the may_sleep();

5. Reapply the original patch, but this time only for the read function, not 
the write function.


  From current LKG code, the only one spin lock may wrapping the
kiq_rreg/wreg() is the pcie_idx_lock, and this lock is only used during init(), 
Since init() is run under the case of exclusive mode for SRIOV, which means:
1)  register access is not go through KIQ (see admgpu_mm_reg)
2)  those functions are only in bif_medium_grain_xxx part (vi.c and
nbio_v6.c) , and they won't hit under SRIOV ( we return in the head if SRIOV 
detect) So I don' think this spin_lock may cause trouble...

Ok in this case let's keep the patch for now, but please provide a new patch which adds 
"if (!in_intterupt()) may_sleep();" in both the read and write function.

This way we should at least catch problems early on.

Christian.


/Monk



-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
Sent: 2018年3月5日 15:57
To: Liu, Monk ; Koenig, Christian
; Kuehling, Felix ;
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in
IRQ(v2)


otherwise I don't see how it is better by reverting it

Well it's better to revert it for now because it seems to create more problems 
than it solves.

To better approach this issue I suggest to do the following:
1. Revert the original patch.

2. Stop waiting to long for writes. E.g. use a separate timeout (20ms
maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout.

3. To the read function add a "if (!in_intterupt()) may_sleep();" and then 
retest. That should at least print a nice warning when called from atomic context.

4. Test the whole thing and try to fix all warnings about atomic
contexts from the may_sleep();

5. Reapply the original patch, but this time only for the read function, not 
the write function.

Regards,
Christian.

Am 05.03.2018 um 05:20 schrieb Liu, Monk:

When there are 16 VF/VM on one GPU, we can easily hit "sys lockup to
22s" kernel error/warning introduced by kiq_rreg/wreg routine That's
why I must use this patch to let thread sleep a while and try again,

If you insist reverting this patch please give me a solution,
otherwise I don't see how it is better by reverting it

/Monk

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
Sent: 2018年3月3日 21:38
To: Kuehling, Felix ; Liu, Monk
; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in
IRQ(v2)

Am 02.03.2018 um 21:47 schrieb Felix Kuehling:

On 2018-03-02 04:29 AM, Liu, Monk wrote:

In_atomic() isnot encouraged to be used to judge if sleep is
possible, see the macros of it

#define in_atomic() (preept_count() != 0)

OK. But my point is still that you're not testing the right thing
when you check in_interrupt(). The comment before the in_atomic
macro definition states the limitations and says "do not use in driver code".
Unfortunately it doesn't suggest any alternative. I think
in_interrupt is actually worse, because it misses even more cases than 
in_atomic.

Thinking about this, Felix seems to be absolutely right.

So we need to revert this patch since you can't reliable detect in a driver if 
sleeping is allowed or not.

Regards,
Christian.


Regards,
  Felix


/Monk

-Original Message-

RE: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)

2018-03-05 Thread Liu, Monk
Hi Christian

I couln't find this "may_sleep()" in my 4.13kernel , did I miss something ??

Thanks 
/Monk

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: 2018年3月5日 19:21
To: Liu, Monk ; Koenig, Christian ; 
Kuehling, Felix ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)

Am 05.03.2018 um 09:08 schrieb Liu, Monk:
> To better approach this issue I suggest to do the following:
> 1. Revert the original patch.
>
> 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms
> maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout.
> Cannot do that 20ms is not enough, sometimes you need 10 seconds since 
> other VFs may doing bad things like occupying GPU intentionally or 
> they are doing TDR, so I don't think separate read and write is good 
> idea, they should be treated equally

Well the question is if separating read would actually help.

>
> 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then 
> retest. That should at least print a nice warning when called from atomic 
> context.
> Sorry what is may_sleep() ??
>
> 4. Test the whole thing and try to fix all warnings about atomic 
> contexts from the may_sleep();
>
> 5. Reapply the original patch, but this time only for the read function, not 
> the write function.
>
>
>  From current LKG code, the only one spin lock may wrapping the 
> kiq_rreg/wreg() is the pcie_idx_lock, and this lock is only used during 
> init(), Since init() is run under the case of exclusive mode for SRIOV, which 
> means:
> 1)  register access is not go through KIQ (see admgpu_mm_reg)
> 2)  those functions are only in bif_medium_grain_xxx part (vi.c and 
> nbio_v6.c) , and they won't hit under SRIOV ( we return in the head if SRIOV 
> detect) So I don' think this spin_lock may cause trouble...

Ok in this case let's keep the patch for now, but please provide a new patch 
which adds "if (!in_intterupt()) may_sleep();" in both the read and write 
function.

This way we should at least catch problems early on.

Christian.

>
> /Monk
>
>
>
> -Original Message-
> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
> Sent: 2018年3月5日 15:57
> To: Liu, Monk ; Koenig, Christian 
> ; Kuehling, Felix ; 
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in 
> IRQ(v2)
>
>> otherwise I don't see how it is better by reverting it
> Well it's better to revert it for now because it seems to create more 
> problems than it solves.
>
> To better approach this issue I suggest to do the following:
> 1. Revert the original patch.
>
> 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms
> maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout.
>
> 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then 
> retest. That should at least print a nice warning when called from atomic 
> context.
>
> 4. Test the whole thing and try to fix all warnings about atomic 
> contexts from the may_sleep();
>
> 5. Reapply the original patch, but this time only for the read function, not 
> the write function.
>
> Regards,
> Christian.
>
> Am 05.03.2018 um 05:20 schrieb Liu, Monk:
>> When there are 16 VF/VM on one GPU, we can easily hit "sys lockup to 
>> 22s" kernel error/warning introduced by kiq_rreg/wreg routine That's 
>> why I must use this patch to let thread sleep a while and try again,
>>
>> If you insist reverting this patch please give me a solution, 
>> otherwise I don't see how it is better by reverting it
>>
>> /Monk
>>
>> -Original Message-
>> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
>> Sent: 2018年3月3日 21:38
>> To: Kuehling, Felix ; Liu, Monk 
>> ; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in
>> IRQ(v2)
>>
>> Am 02.03.2018 um 21:47 schrieb Felix Kuehling:
>>> On 2018-03-02 04:29 AM, Liu, Monk wrote:
 In_atomic() isnot encouraged to be used to judge if sleep is 
 possible, see the macros of it

 #define in_atomic() (preept_count() != 0)
>>> OK. But my point is still that you're not testing the right thing 
>>> when you check in_interrupt(). The comment before the in_atomic 
>>> macro definition states the limitations and says "do not use in driver 
>>> code".
>>> Unfortunately it doesn't suggest any alternative. I think 
>>> in_interrupt is actually worse, because it misses even more cases than 
>>> in_atomic.
>> Thinking about this, Felix seems to be absolutely right.
>>
>> So we need to revert this patch since you can't reliable detect in a driver 
>> if sleeping is allowed or not.
>>
>> Regards,
>> Christian.
>>
>>> Regards,
>>>  Felix
>>>
 /Monk

 

Re: [PATCH 1/3] drm/amd/pp: Remove cgs wrap interface for temperature update

2018-03-05 Thread Christian König

Acked-by: Christian König  for the series.

Am 05.03.2018 um 11:43 schrieb Rex Zhu:

Change-Id: I742e82315910ce57aeb4a391fe2ac1cdfccdb8ea
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c   | 13 -
  drivers/gpu/drm/amd/include/cgs_common.h  |  6 --
  drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c |  4 +++-
  3 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index f37482c..76f4758 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -905,18 +905,6 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device 
*cgs_device, bool ena
return 0;
  }
  
-static int amdgpu_cgs_set_temperature_range(struct cgs_device *cgs_device,

-   int min_temperature,
-   int max_temperature)
-{
-   CGS_FUNC_ADEV;
-
-   adev->pm.dpm.thermal.min_temp = min_temperature;
-   adev->pm.dpm.thermal.max_temp = max_temperature;
-
-   return 0;
-}
-
  static const struct cgs_ops amdgpu_cgs_ops = {
.alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem,
.free_gpu_mem = amdgpu_cgs_free_gpu_mem,
@@ -941,7 +929,6 @@ static int amdgpu_cgs_set_temperature_range(struct 
cgs_device *cgs_device,
.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
-   .set_temperature_range = amdgpu_cgs_set_temperature_range,
  };
  
  static const struct cgs_os_ops amdgpu_cgs_os_ops = {

diff --git a/drivers/gpu/drm/amd/include/cgs_common.h 
b/drivers/gpu/drm/amd/include/cgs_common.h
index 113ba6f..7f26f20 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -354,9 +354,6 @@ typedef int(*cgs_get_active_displays_info)(
  
  typedef void (*cgs_lock_grbm_idx)(struct cgs_device *cgs_device, bool lock);
  
-typedef int (*cgs_set_temperature_range)(struct cgs_device *cgs_device,

-   int min_temperature,
-   int max_temperature);
  struct cgs_ops {
/* memory management calls (similar to KFD interface) */
cgs_alloc_gpu_mem_t alloc_gpu_mem;
@@ -389,7 +386,6 @@ struct cgs_ops {
cgs_is_virtualization_enabled_t is_virtualization_enabled;
cgs_enter_safe_mode enter_safe_mode;
cgs_lock_grbm_idx lock_grbm_idx;
-   cgs_set_temperature_range set_temperature_range;
  };
  
  struct cgs_os_ops; /* To be define in OS-specific CGS header */

@@ -465,7 +461,5 @@ struct cgs_device
  #define cgs_lock_grbm_idx(cgs_device, lock) \
CGS_CALL(lock_grbm_idx, cgs_device, lock)
  
-#define cgs_set_temperature_range(dev, min_temp, max_temp)	\

-   CGS_CALL(set_temperature_range, dev, min_temp, max_temp)
  
  #endif /* _CGS_COMMON_H */

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index f06f8f4..b784131 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -222,6 +222,7 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
  {
int ret = 0;
struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX};
+   struct amdgpu_device *adev = hwmgr->adev;
  
  	if (hwmgr->hwmgr_func->get_thermal_temperature_range)

hwmgr->hwmgr_func->get_thermal_temperature_range(
@@ -232,7 +233,8 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
&& hwmgr->hwmgr_func->start_thermal_controller != NULL)
ret = hwmgr->hwmgr_func->start_thermal_controller(hwmgr, 
);
  
-	cgs_set_temperature_range(hwmgr->device, range.min, range.max);

+   adev->pm.dpm.thermal.min_temp = range.min;
+   adev->pm.dpm.thermal.max_temp = range.max;
  
  	return ret;

  }


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)

2018-03-05 Thread Christian König

Am 05.03.2018 um 09:08 schrieb Liu, Monk:

To better approach this issue I suggest to do the following:
1. Revert the original patch.

2. Stop waiting to long for writes. E.g. use a separate timeout (20ms
maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout.
Cannot do that 20ms is not enough, sometimes you need 10 seconds since other 
VFs may doing bad things like occupying GPU intentionally or they are doing 
TDR, so
I don't think separate read and write is good idea, they should be treated 
equally


Well the question is if separating read would actually help.



3. To the read function add a "if (!in_intterupt()) may_sleep();" and then 
retest. That should at least print a nice warning when called from atomic context.
Sorry what is may_sleep() ??

4. Test the whole thing and try to fix all warnings about atomic contexts from 
the may_sleep();

5. Reapply the original patch, but this time only for the read function, not 
the write function.


 From current LKG code, the only one spin lock may wrapping the kiq_rreg/wreg() 
is the pcie_idx_lock, and this lock is only used during init(),
Since init() is run under the case of exclusive mode for SRIOV, which means:
1)  register access is not go through KIQ (see admgpu_mm_reg)
2)  those functions are only in bif_medium_grain_xxx part (vi.c and nbio_v6.c) 
, and they won't hit under SRIOV ( we return in the head if SRIOV detect)
So I don' think this spin_lock may cause trouble...


Ok in this case let's keep the patch for now, but please provide a new 
patch which adds "if (!in_intterupt()) may_sleep();" in both the read 
and write function.


This way we should at least catch problems early on.

Christian.



/Monk



-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
Sent: 2018年3月5日 15:57
To: Liu, Monk ; Koenig, Christian ; 
Kuehling, Felix ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)


otherwise I don't see how it is better by reverting it

Well it's better to revert it for now because it seems to create more problems 
than it solves.

To better approach this issue I suggest to do the following:
1. Revert the original patch.

2. Stop waiting to long for writes. E.g. use a separate timeout (20ms
maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout.

3. To the read function add a "if (!in_intterupt()) may_sleep();" and then 
retest. That should at least print a nice warning when called from atomic context.

4. Test the whole thing and try to fix all warnings about atomic contexts from 
the may_sleep();

5. Reapply the original patch, but this time only for the read function, not 
the write function.

Regards,
Christian.

Am 05.03.2018 um 05:20 schrieb Liu, Monk:

When there are 16 VF/VM on one GPU, we can easily hit "sys lockup to
22s" kernel error/warning introduced by kiq_rreg/wreg routine That's
why I must use this patch to let thread sleep a while and try again,

If you insist reverting this patch please give me a solution,
otherwise I don't see how it is better by reverting it

/Monk

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
Sent: 2018年3月3日 21:38
To: Kuehling, Felix ; Liu, Monk
; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in
IRQ(v2)

Am 02.03.2018 um 21:47 schrieb Felix Kuehling:

On 2018-03-02 04:29 AM, Liu, Monk wrote:

In_atomic() isnot encouraged to be used to judge if sleep is
possible, see the macros of it

#define in_atomic() (preept_count() != 0)

OK. But my point is still that you're not testing the right thing
when you check in_interrupt(). The comment before the in_atomic macro
definition states the limitations and says "do not use in driver code".
Unfortunately it doesn't suggest any alternative. I think
in_interrupt is actually worse, because it misses even more cases than 
in_atomic.

Thinking about this, Felix seems to be absolutely right.

So we need to revert this patch since you can't reliable detect in a driver if 
sleeping is allowed or not.

Regards,
Christian.


Regards,
     Felix


/Monk

-Original Message-
From: Kuehling, Felix
Sent: 2018年3月1日 23:50
To: amd-gfx@lists.freedesktop.org; Liu, Monk 
Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in
IRQ(v2)

On 2018-02-28 02:27 AM, Monk Liu wrote:

sometimes GPU is switched to other VFs and won't swich back soon,
so the kiq reg access will not signal within a short period,
instead of busy waiting a long time(MAX_KEQ_REG_WAIT) and returning
TMO we can istead sleep 5ms and try again later (non irq context)

And since the waiting in kiq_r/weg is busy wait, so
MAX_KIQ_REG_WAIT shouldn't set to a long time, set it to 10ms is more 
appropriate.

if gpu already in reset state, don't retry the KIQ reg access
otherwise 

[PATCH 3/3] drm/amdgpu: Delete cgs interface for gpu memory allocate/free

2018-03-05 Thread Rex Zhu
Change-Id: I9085b42b3ffac925aebd8000760d456a4f8ec03c
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c  | 131 ---
 drivers/gpu/drm/amd/include/cgs_common.h | 115 ---
 2 files changed, 246 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 76f4758..f2dd98d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -42,131 +42,6 @@ struct amdgpu_cgs_device {
((struct amdgpu_cgs_device *)cgs_device)->adev
 
 
-static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
-   enum cgs_gpu_mem_type type,
-   uint64_t size, uint64_t align,
-   cgs_handle_t *handle)
-{
-   CGS_FUNC_ADEV;
-   uint16_t flags = 0;
-   int ret = 0;
-   uint32_t domain = 0;
-   struct amdgpu_bo *obj;
-
-   /* fail if the alignment is not a power of 2 */
-   if (((align != 1) && (align & (align - 1)))
-   || size == 0 || align == 0)
-   return -EINVAL;
-
-
-   switch(type) {
-   case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
-   case CGS_GPU_MEM_TYPE__VISIBLE_FB:
-   flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-   AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-   domain = AMDGPU_GEM_DOMAIN_VRAM;
-   break;
-   case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
-   case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
-   flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-   AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-   domain = AMDGPU_GEM_DOMAIN_VRAM;
-   break;
-   case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
-   domain = AMDGPU_GEM_DOMAIN_GTT;
-   break;
-   case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
-   flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-   domain = AMDGPU_GEM_DOMAIN_GTT;
-   break;
-   default:
-   return -EINVAL;
-   }
-
-
-   *handle = 0;
-
-   ret = amdgpu_bo_create(adev, size, align, true, domain, flags,
-  NULL, NULL, );
-   if (ret) {
-   DRM_ERROR("(%d) bo create failed\n", ret);
-   return ret;
-   }
-   *handle = (cgs_handle_t)obj;
-
-   return ret;
-}
-
-static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t 
handle)
-{
-   struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-
-   if (obj) {
-   int r = amdgpu_bo_reserve(obj, true);
-   if (likely(r == 0)) {
-   amdgpu_bo_kunmap(obj);
-   amdgpu_bo_unpin(obj);
-   amdgpu_bo_unreserve(obj);
-   }
-   amdgpu_bo_unref();
-
-   }
-   return 0;
-}
-
-static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t 
handle,
-  uint64_t *mcaddr)
-{
-   int r;
-   struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-
-   WARN_ON_ONCE(obj->placement.num_placement > 1);
-
-   r = amdgpu_bo_reserve(obj, true);
-   if (unlikely(r != 0))
-   return r;
-   r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr);
-   amdgpu_bo_unreserve(obj);
-   return r;
-}
-
-static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, 
cgs_handle_t handle)
-{
-   int r;
-   struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-   r = amdgpu_bo_reserve(obj, true);
-   if (unlikely(r != 0))
-   return r;
-   r = amdgpu_bo_unpin(obj);
-   amdgpu_bo_unreserve(obj);
-   return r;
-}
-
-static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t 
handle,
-  void **map)
-{
-   int r;
-   struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-   r = amdgpu_bo_reserve(obj, true);
-   if (unlikely(r != 0))
-   return r;
-   r = amdgpu_bo_kmap(obj, map);
-   amdgpu_bo_unreserve(obj);
-   return r;
-}
-
-static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, 
cgs_handle_t handle)
-{
-   int r;
-   struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-   r = amdgpu_bo_reserve(obj, true);
-   if (unlikely(r != 0))
-   return r;
-   amdgpu_bo_kunmap(obj);
-   amdgpu_bo_unreserve(obj);
-   return r;
-}
-
 static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, 
unsigned offset)
 {
CGS_FUNC_ADEV;
@@ -906,12 +781,6 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device 
*cgs_device, bool ena
 }
 
 static const struct cgs_ops amdgpu_cgs_ops = {
-   .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem,
-   .free_gpu_mem = amdgpu_cgs_free_gpu_mem,
-   .gmap_gpu_mem = 

[PATCH 2/3] drm/amd/pp: Delete the wrap layer of smu_allocate/free_memory

2018-03-05 Thread Rex Zhu
use amdgpu_bo_create/free_kernel directly.

Change-Id: I74f20353edd4e0df6328db66914cd9eabb60e1d7
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/inc/smumgr.h |   7 -
 drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c   |  39 +++---
 drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.h   |  11 +-
 drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c   |  53 +++
 drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h   |  11 +-
 drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c |  52 +++
 drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h |  11 +-
 drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c  |  51 ---
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c   | 155 ++---
 .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h   |  11 +-
 10 files changed, 176 insertions(+), 225 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h 
b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index e1f6e83..8872c5c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -106,13 +106,6 @@ enum SMU_MAC_DEFINITION {
 extern int smum_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
uint16_t msg, uint32_t parameter);
 
-extern int smu_allocate_memory(void *device, uint32_t size,
-enum cgs_gpu_mem_type type,
-uint32_t byte_align, uint64_t *mc_addr,
-void **kptr, void *handle);
-
-extern int smu_free_memory(void *device, void *handle);
-
 extern int smum_update_sclk_threshold(struct pp_hwmgr *hwmgr);
 
 extern int smum_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index 7fe4c11..0d2892d 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -750,7 +750,6 @@ static int cz_start_smu(struct pp_hwmgr *hwmgr)
 
 static int cz_smu_init(struct pp_hwmgr *hwmgr)
 {
-   uint64_t mc_addr = 0;
int ret = 0;
struct cz_smumgr *cz_smu;
 
@@ -768,31 +767,31 @@ static int cz_smu_init(struct pp_hwmgr *hwmgr)
ALIGN(sizeof(struct SMU8_MultimediaPowerLogData), 32) +
ALIGN(sizeof(struct SMU8_Fusion_ClkTable), 32);
 
-   ret = smu_allocate_memory(hwmgr->device,
+   ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
cz_smu->toc_buffer.data_size,
-   CGS_GPU_MEM_TYPE__GART_CACHEABLE,
PAGE_SIZE,
-   _addr,
-   _smu->toc_buffer.kaddr,
-   _smu->toc_buffer.handle);
+   AMDGPU_GEM_DOMAIN_VRAM,
+   _smu->toc_buffer.handle,
+   _smu->toc_buffer.mc_addr,
+   _smu->toc_buffer.kaddr);
if (ret != 0)
return -1;
 
-   cz_smu->toc_buffer.mc_addr_high = smu_upper_32_bits(mc_addr);
-   cz_smu->toc_buffer.mc_addr_low = smu_lower_32_bits(mc_addr);
+   cz_smu->toc_buffer.mc_addr_high = 
smu_upper_32_bits(cz_smu->toc_buffer.mc_addr);
+   cz_smu->toc_buffer.mc_addr_low = 
smu_lower_32_bits(cz_smu->toc_buffer.mc_addr);
 
-   ret = smu_allocate_memory(hwmgr->device,
+   ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
cz_smu->smu_buffer.data_size,
-   CGS_GPU_MEM_TYPE__GART_CACHEABLE,
PAGE_SIZE,
-   _addr,
-   _smu->smu_buffer.kaddr,
-   _smu->smu_buffer.handle);
+   AMDGPU_GEM_DOMAIN_VRAM,
+   _smu->smu_buffer.handle,
+   _smu->toc_buffer.mc_addr,
+   _smu->smu_buffer.kaddr);
if (ret != 0)
return -1;
 
-   cz_smu->smu_buffer.mc_addr_high = smu_upper_32_bits(mc_addr);
-   cz_smu->smu_buffer.mc_addr_low = smu_lower_32_bits(mc_addr);
+   cz_smu->smu_buffer.mc_addr_high = 
smu_upper_32_bits(cz_smu->toc_buffer.mc_addr);
+   cz_smu->smu_buffer.mc_addr_low = 
smu_lower_32_bits(cz_smu->toc_buffer.mc_addr);
 
if (0 != cz_smu_populate_single_scratch_entry(hwmgr,
CZ_SCRATCH_ENTRY_UCODE_ID_RLC_SCRATCH,
@@ -845,10 +844,12 @@ static int cz_smu_fini(struct pp_hwmgr *hwmgr)
 
cz_smu = (struct cz_smumgr *)hwmgr->smu_backend;
if (cz_smu) {
-   cgs_free_gpu_mem(hwmgr->device,
-   cz_smu->toc_buffer.handle);
-   cgs_free_gpu_mem(hwmgr->device,
-   cz_smu->smu_buffer.handle);
+   

[PATCH 1/3] drm/amd/pp: Remove cgs wrap interface for temperature update

2018-03-05 Thread Rex Zhu
Change-Id: I742e82315910ce57aeb4a391fe2ac1cdfccdb8ea
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c   | 13 -
 drivers/gpu/drm/amd/include/cgs_common.h  |  6 --
 drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c |  4 +++-
 3 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index f37482c..76f4758 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -905,18 +905,6 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device 
*cgs_device, bool ena
return 0;
 }
 
-static int amdgpu_cgs_set_temperature_range(struct cgs_device *cgs_device,
-   int min_temperature,
-   int max_temperature)
-{
-   CGS_FUNC_ADEV;
-
-   adev->pm.dpm.thermal.min_temp = min_temperature;
-   adev->pm.dpm.thermal.max_temp = max_temperature;
-
-   return 0;
-}
-
 static const struct cgs_ops amdgpu_cgs_ops = {
.alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem,
.free_gpu_mem = amdgpu_cgs_free_gpu_mem,
@@ -941,7 +929,6 @@ static int amdgpu_cgs_set_temperature_range(struct 
cgs_device *cgs_device,
.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
-   .set_temperature_range = amdgpu_cgs_set_temperature_range,
 };
 
 static const struct cgs_os_ops amdgpu_cgs_os_ops = {
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h 
b/drivers/gpu/drm/amd/include/cgs_common.h
index 113ba6f..7f26f20 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -354,9 +354,6 @@ typedef int(*cgs_get_active_displays_info)(
 
 typedef void (*cgs_lock_grbm_idx)(struct cgs_device *cgs_device, bool lock);
 
-typedef int (*cgs_set_temperature_range)(struct cgs_device *cgs_device,
-   int min_temperature,
-   int max_temperature);
 struct cgs_ops {
/* memory management calls (similar to KFD interface) */
cgs_alloc_gpu_mem_t alloc_gpu_mem;
@@ -389,7 +386,6 @@ struct cgs_ops {
cgs_is_virtualization_enabled_t is_virtualization_enabled;
cgs_enter_safe_mode enter_safe_mode;
cgs_lock_grbm_idx lock_grbm_idx;
-   cgs_set_temperature_range set_temperature_range;
 };
 
 struct cgs_os_ops; /* To be define in OS-specific CGS header */
@@ -465,7 +461,5 @@ struct cgs_device
 #define cgs_lock_grbm_idx(cgs_device, lock) \
CGS_CALL(lock_grbm_idx, cgs_device, lock)
 
-#define cgs_set_temperature_range(dev, min_temp, max_temp) \
-   CGS_CALL(set_temperature_range, dev, min_temp, max_temp)
 
 #endif /* _CGS_COMMON_H */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index f06f8f4..b784131 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -222,6 +222,7 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 {
int ret = 0;
struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX};
+   struct amdgpu_device *adev = hwmgr->adev;
 
if (hwmgr->hwmgr_func->get_thermal_temperature_range)
hwmgr->hwmgr_func->get_thermal_temperature_range(
@@ -232,7 +233,8 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
&& hwmgr->hwmgr_func->start_thermal_controller != NULL)
ret = hwmgr->hwmgr_func->start_thermal_controller(hwmgr, 
);
 
-   cgs_set_temperature_range(hwmgr->device, range.min, range.max);
+   adev->pm.dpm.thermal.min_temp = range.min;
+   adev->pm.dpm.thermal.max_temp = range.max;
 
return ret;
 }
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF"

2018-03-05 Thread Liu, Monk
You can tag my RB

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Deng, 
Emily
Sent: 2018年3月5日 18:20
To: Deng, Emily ; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for 
VF"

Ping
Hi all,
Please help review this, the regression introduced by the commit 
2ffe31deb27579e2f2c9444e01f4d8abf385d145 is gating the sriov sanity test.

Best Wishes,
Emily Deng


> -Original Message-
> From: Emily Deng [mailto:emily.d...@amd.com]
> Sent: Friday, March 02, 2018 11:32 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deng, Emily 
> Subject: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr 
> for VF"
> 
> This reverts commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145.
> The sdma wptr poll memomy doesn't have the same efficiency as 
> doorbell, and it will make sdma hang when running tests.
> 
> Change-Id: I6e334430b309b0c21aa18a08764320c7ff51e353
> Signed-off-by: Emily Deng 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 -
>  drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   | 27 ---
>  2 files changed, 8 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 102dad3..5dcf98b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -186,7 +186,6 @@ struct amdgpu_ring {
>   uint64_teop_gpu_addr;
>   u32 doorbell_index;
>   booluse_doorbell;
> - booluse_pollmem;
>   unsignedwptr_offs;
>   unsignedfence_offs;
>   uint64_tcurrent_ctx;
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 521978c..d3fb3ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct 
> amdgpu_ring *ring)
>   struct amdgpu_device *adev = ring->adev;
>   u32 wptr;
> 
> - if (ring->use_doorbell || ring->use_pollmem) {
> + if (ring->use_doorbell) {
>   /* XXX check if swapping is necessary on BE */
>   wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
>   } else {
> @@ -380,13 +380,10 @@ static void sdma_v3_0_ring_set_wptr(struct 
> amdgpu_ring *ring)
> 
>   if (ring->use_doorbell) {
>   u32 *wb = (u32 *)>wb.wb[ring->wptr_offs];
> +
>   /* XXX check if swapping is necessary on BE */
>   WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>   WDOORBELL32(ring->doorbell_index, lower_32_bits(ring-
> >wptr) << 2);
> - } else if (ring->use_pollmem) {
> - u32 *wb = (u32 *)>wb.wb[ring->wptr_offs];
> -
> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>   } else {
>   int me = (ring == >adev->sdma.instance[0].ring) ? 0 : 1;
> 
> @@ -719,14 +716,10 @@ static int sdma_v3_0_gfx_resume(struct 
> amdgpu_device *adev)
>   WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i],
>  upper_32_bits(wptr_gpu_addr));
>   wptr_poll_cntl =
> RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
> - if (ring->use_pollmem)
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> -
> SDMA0_GFX_RB_WPTR_POLL_CNTL,
> -ENABLE, 1);
> + if (amdgpu_sriov_vf(adev))
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
> +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
>   else
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> -
> SDMA0_GFX_RB_WPTR_POLL_CNTL,
> -ENABLE, 0);
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 
> +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
>   WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], 
> wptr_poll_cntl);
> 
>   /* enable DMA RB */
> @@ -1208,13 +1201,9 @@ static int sdma_v3_0_sw_init(void *handle)
>   for (i = 0; i < adev->sdma.num_instances; i++) {
>   ring = >sdma.instance[i].ring;
>   ring->ring_obj = NULL;
> - if (!amdgpu_sriov_vf(adev)) {
> - ring->use_doorbell = true;
> - ring->doorbell_index = (i == 0) ?
> - AMDGPU_DOORBELL_sDMA_ENGINE0 :
> AMDGPU_DOORBELL_sDMA_ENGINE1;
> - } else {
> - ring->use_pollmem = true;
> - }
> + ring->use_doorbell = true;
> + ring->doorbell_index = (i == 0) ?
> + AMDGPU_DOORBELL_sDMA_ENGINE0 :
> 

Re: [PATCH 1/1] amdgpu: add scatter gather display flag

2018-03-05 Thread Michel Dänzer
On 2018-03-03 12:27 AM, Samuel Li wrote:
> ---
>  include/drm/amdgpu_drm.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
> index d9aa4a3..dc4d8bd 100644
> --- a/include/drm/amdgpu_drm.h
> +++ b/include/drm/amdgpu_drm.h
> @@ -526,6 +526,7 @@ struct drm_amdgpu_cs_chunk_data {
>   */
>  #define AMDGPU_IDS_FLAGS_FUSION 0x1
>  #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2
> +#define AMDGPU_IDS_FLAGS_SGDISPLAY  0x4
>  
>  /* indicate if acceleration can be working */
>  #define AMDGPU_INFO_ACCEL_WORKING0x00
> 

See include/drm/README on how to synchronize these headers with the kernel.


P.S. Please put this into .git/config in your libdrm tree:

[format]
subjectprefix = "PATCH libdrm"

-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF"

2018-03-05 Thread Deng, Emily
Ping
Hi all,
Please help review this, the regression introduced by the commit 
2ffe31deb27579e2f2c9444e01f4d8abf385d145 is gating the sriov sanity test.

Best Wishes,
Emily Deng


> -Original Message-
> From: Emily Deng [mailto:emily.d...@amd.com]
> Sent: Friday, March 02, 2018 11:32 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deng, Emily 
> Subject: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr
> for VF"
> 
> This reverts commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145.
> The sdma wptr poll memomy doesn't have the same efficiency as doorbell,
> and it will make sdma hang when running tests.
> 
> Change-Id: I6e334430b309b0c21aa18a08764320c7ff51e353
> Signed-off-by: Emily Deng 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 -
>  drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   | 27 ---
>  2 files changed, 8 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 102dad3..5dcf98b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -186,7 +186,6 @@ struct amdgpu_ring {
>   uint64_teop_gpu_addr;
>   u32 doorbell_index;
>   booluse_doorbell;
> - booluse_pollmem;
>   unsignedwptr_offs;
>   unsignedfence_offs;
>   uint64_tcurrent_ctx;
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 521978c..d3fb3ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct
> amdgpu_ring *ring)
>   struct amdgpu_device *adev = ring->adev;
>   u32 wptr;
> 
> - if (ring->use_doorbell || ring->use_pollmem) {
> + if (ring->use_doorbell) {
>   /* XXX check if swapping is necessary on BE */
>   wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
>   } else {
> @@ -380,13 +380,10 @@ static void sdma_v3_0_ring_set_wptr(struct
> amdgpu_ring *ring)
> 
>   if (ring->use_doorbell) {
>   u32 *wb = (u32 *)>wb.wb[ring->wptr_offs];
> +
>   /* XXX check if swapping is necessary on BE */
>   WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>   WDOORBELL32(ring->doorbell_index, lower_32_bits(ring-
> >wptr) << 2);
> - } else if (ring->use_pollmem) {
> - u32 *wb = (u32 *)>wb.wb[ring->wptr_offs];
> -
> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>   } else {
>   int me = (ring == >adev->sdma.instance[0].ring) ? 0 : 1;
> 
> @@ -719,14 +716,10 @@ static int sdma_v3_0_gfx_resume(struct
> amdgpu_device *adev)
>   WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI +
> sdma_offsets[i],
>  upper_32_bits(wptr_gpu_addr));
>   wptr_poll_cntl =
> RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
> - if (ring->use_pollmem)
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> -
> SDMA0_GFX_RB_WPTR_POLL_CNTL,
> -ENABLE, 1);
> + if (amdgpu_sriov_vf(adev))
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
>   else
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> -
> SDMA0_GFX_RB_WPTR_POLL_CNTL,
> -ENABLE, 0);
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
>   WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL +
> sdma_offsets[i], wptr_poll_cntl);
> 
>   /* enable DMA RB */
> @@ -1208,13 +1201,9 @@ static int sdma_v3_0_sw_init(void *handle)
>   for (i = 0; i < adev->sdma.num_instances; i++) {
>   ring = >sdma.instance[i].ring;
>   ring->ring_obj = NULL;
> - if (!amdgpu_sriov_vf(adev)) {
> - ring->use_doorbell = true;
> - ring->doorbell_index = (i == 0) ?
> - AMDGPU_DOORBELL_sDMA_ENGINE0 :
> AMDGPU_DOORBELL_sDMA_ENGINE1;
> - } else {
> - ring->use_pollmem = true;
> - }
> + ring->use_doorbell = true;
> + ring->doorbell_index = (i == 0) ?
> + AMDGPU_DOORBELL_sDMA_ENGINE0 :
> AMDGPU_DOORBELL_sDMA_ENGINE1;
> 
>   sprintf(ring->name, "sdma%d", i);
>   r = amdgpu_ring_init(adev, ring, 1024,
> --
> 2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: Enable scatter gather display support

2018-03-05 Thread Michel Dänzer
On 2018-03-03 12:25 AM, Samuel Li wrote:
> It's enabled by default. -1 is auto, to allow both vram and gtt
> memory be available, for testing purpose only.

Do we really need a module parameter for this? There's already too many
of them. The driver should know which to use in which cases, and
developers can easily tweak the code. Users should never need to change
this.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re:

2018-03-05 Thread Michel Dänzer
On 2018-03-03 05:49 AM, Keith Packard wrote:
> Here are the patches to the modesetting driver amended for the amdgpu
> driver.

Thanks for the patches. Unfortunately, since this driver still has to
compile and work with xserver >= 1.13, at least patches 1 & 3 cannot be
applied as is.

I was going to port these and take care of that anyway, though I might
not get around to it before April. If it can't wait that long, I can
give you details about what needs to be done.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [ANNOUNCE] xf86-video-amdgpu 18.0.0

2018-03-05 Thread Michel Dänzer
On 2018-03-04 08:25 AM, Mario Kleiner wrote:
> Hi Michel,
> 
> wrt. our conversation from ~ 1 month ago:
> 
> On 01/22/2018 07:01 PM, Michel Dänzer wrote:
>> On 2018-01-22 03:14 AM, Mario Kleiner wrote:
>>
>>> When testing against current master in dual-x-screen mode,
>>> i found out that the current ati-ddx commit at the top 1fe8ca75974c52
>>> "Keep track of how many SW cursors are visible on each screen"
>>> breaks multi x-screen setups by screwing up cursor handling.
>>
>> I stumbled over this myself the other day. There's an issue with the
>> (un)wrapping of miPointerSpriteFuncRec entries. Not sure yet what
>> exactly's wrong with it though. I'll look into it.
>>
> 
> Has the multi-x-screen breakage been fixed for amdgpu 18.0.0? I can't
> see a commit hinting at a fix, and i assume it was broken by the new HW
> cursor + pageflip logic, just as in the xf86-video-ati case? I can't
> test anything myself atm. as my only  amdgpu capable gpu has left me :(
> 
> Broken multi-x-screen from an official driver release ending in a distro
> would be a big problem for many vision scientists, as they depend on AMD
> + dual-x-screen support for their work.

Crap, I forgot about this issue. :(

I'll try fixing it ASAP and making a point release by the end of next
week. Apologies for any inconvenience.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 4/4] drm/amd/pp: Add auto power profilng switch based on workloads

2018-03-05 Thread Quan, Evan
Reviewed-by: Evan Quan 

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Rex 
Zhu
Sent: Friday, March 02, 2018 8:44 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhu, Rex 
Subject: [PATCH 4/4] drm/amd/pp: Add auto power profilng switch based on 
workloads

Add power profiling mode dynamic switch based on the workloads.
Currently, support Cumpute, VR, Video, 3D,power saving with Cumpute have 
highest prority, power saving have lowest prority.

in manual dpm mode, driver will stop auto switch, just save the client's 
requests. user can set power profiling mode through sysfs.

when exit manual dpm mode, driver will response the client's requests.
switch based on the client's prority.

Change-Id: Ia97b679f4f11a7c3cabd350462101897909c8115
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h|  4 +--
 drivers/gpu/drm/amd/include/kgd_pp_interface.h | 18 +---
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c  | 32 ++
 drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c| 16 +++
 drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c   | 11 
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c   |  6 ++--
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  8 +-  
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h |  3 --
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h  |  5 
 9 files changed, 66 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 9c373f8f..643d008 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -341,9 +341,9 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->reset_power_profile_state(\
(adev)->powerplay.pp_handle, request))
 
-#define amdgpu_dpm_switch_power_profile(adev, type) \
+#define amdgpu_dpm_switch_power_profile(adev, type, en) \
((adev)->powerplay.pp_funcs->switch_power_profile(\
-   (adev)->powerplay.pp_handle, type))
+   (adev)->powerplay.pp_handle, type, en))
 
 #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 7dfba2d..ec54584 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -83,20 +83,6 @@ enum amd_vce_level {
AMD_VCE_LEVEL_DC_GP_HIGH = 5, /* DC, general purpose queue, 1080 >= res 
> 720 */  };
 
-enum amd_pp_profile_type {
-   AMD_PP_GFX_PROFILE,
-   AMD_PP_COMPUTE_PROFILE,
-};
-
-struct amd_pp_profile {
-   enum amd_pp_profile_type type;
-   uint32_t min_sclk;
-   uint32_t min_mclk;
-   uint16_t activity_threshold;
-   uint8_t up_hyst;
-   uint8_t down_hyst;
-};
-
 enum amd_fan_ctrl_mode {
AMD_FAN_CTRL_NONE = 0,
AMD_FAN_CTRL_MANUAL = 1,
@@ -151,7 +137,6 @@ enum PP_SMC_POWER_PROFILE {
PP_SMC_POWER_PROFILE_VR   = 0x3,
PP_SMC_POWER_PROFILE_COMPUTE  = 0x4,
PP_SMC_POWER_PROFILE_CUSTOM   = 0x5,
-   PP_SMC_POWER_PROFILE_AUTO = 0x6,
 };
 
 enum {
@@ -260,8 +245,7 @@ struct amd_pm_funcs {
int (*get_pp_table)(void *handle, char **table);
int (*set_pp_table)(void *handle, const char *buf, size_t size);
void (*debugfs_print_current_performance_level)(void *handle, struct 
seq_file *m);
-   int (*switch_power_profile)(void *handle,
-   enum amd_pp_profile_type type);
+   int (*switch_power_profile)(void *handle, enum PP_SMC_POWER_PROFILE 
+type, bool en);
 /* export to amdgpu */
void (*powergate_uvd)(void *handle, bool gate);
void (*powergate_vce)(void *handle, bool gate); diff --git 
a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c 
b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 4876871..3bd2fad 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -1073,22 +1073,44 @@ static int pp_odn_edit_dpm_table(void *handle, uint32_t 
type, long *input, uint3  }
 
 static int pp_dpm_switch_power_profile(void *handle,
-   enum amd_pp_profile_type type)
+   enum PP_SMC_POWER_PROFILE type, bool en)
 {
struct pp_hwmgr *hwmgr;
-   struct amd_pp_profile request = {0};
struct pp_instance *pp_handle = (struct pp_instance *)handle;
+   long *workload;
+   uint32_t index;
 
if (pp_check(pp_handle))
return -EINVAL;
 
hwmgr = pp_handle->hwmgr;
 
-   if (hwmgr->current_power_profile != type) {
-   request.type = type;
-   pp_dpm_set_power_profile_state(handle, );
+   if 

RE: [PATCH 3/4] drm/amd/pp: Revert gfx/compute profile switch sysfs

2018-03-05 Thread Quan, Evan
Reviewed-by: Evan Quan 

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Rex 
Zhu
Sent: Friday, March 02, 2018 8:44 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhu, Rex 
Subject: [PATCH 3/4] drm/amd/pp: Revert gfx/compute profile switch sysfs

The gfx/compute profiling mode switch is only for internally test. Not a 
complete solution and unexpectly upstream.
so revert it.

Change-Id: I1af1b64a63b6fc12c24cf73df03b083b3661ca02
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h|   8 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 180 ---
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c| 256 -
 drivers/gpu/drm/amd/amdgpu/ci_dpm.h|   7 -
 drivers/gpu/drm/amd/include/kgd_pp_interface.h |   7 -
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c  | 114 -
 .../gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c  |  17 --
 drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c   |   2 -
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c   |  77 ---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  86 ---
 .../gpu/drm/amd/powerplay/inc/hardwaremanager.h|   1 -
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h  |   9 -
 drivers/gpu/drm/amd/powerplay/inc/smumgr.h |   3 -
 drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c   |  27 ---
 drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c |  66 --
 .../drm/amd/powerplay/smumgr/polaris10_smumgr.c|  64 --
 drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c  |  10 -
 .../gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c|  64 --
 18 files changed, 998 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index bd745a4..9c373f8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -341,14 +341,6 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->reset_power_profile_state(\
(adev)->powerplay.pp_handle, request))
 
-#define amdgpu_dpm_get_power_profile_state(adev, query) \
-   ((adev)->powerplay.pp_funcs->get_power_profile_state(\
-   (adev)->powerplay.pp_handle, query))
-
-#define amdgpu_dpm_set_power_profile_state(adev, request) \
-   ((adev)->powerplay.pp_funcs->set_power_profile_state(\
-   (adev)->powerplay.pp_handle, request))
-
 #define amdgpu_dpm_switch_power_profile(adev, type) \
((adev)->powerplay.pp_funcs->switch_power_profile(\
(adev)->powerplay.pp_handle, type))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 9e73cbc..632b186 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -734,161 +734,6 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct 
device *dev,
return -EINVAL;
 }
 
-static ssize_t amdgpu_get_pp_power_profile(struct device *dev,
-   char *buf, struct amd_pp_profile *query)
-{
-   struct drm_device *ddev = dev_get_drvdata(dev);
-   struct amdgpu_device *adev = ddev->dev_private;
-   int ret = 0xff;
-
-   if (adev->powerplay.pp_funcs->get_power_profile_state)
-   ret = amdgpu_dpm_get_power_profile_state(
-   adev, query);
-
-   if (ret)
-   return ret;
-
-   return snprintf(buf, PAGE_SIZE,
-   "%d %d %d %d %d\n",
-   query->min_sclk / 100,
-   query->min_mclk / 100,
-   query->activity_threshold,
-   query->up_hyst,
-   query->down_hyst);
-}
-
-static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev,
-   struct device_attribute *attr,
-   char *buf)
-{
-   struct amd_pp_profile query = {0};
-
-   query.type = AMD_PP_GFX_PROFILE;
-
-   return amdgpu_get_pp_power_profile(dev, buf, );
-}
-
-static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev,
-   struct device_attribute *attr,
-   char *buf)
-{
-   struct amd_pp_profile query = {0};
-
-   query.type = AMD_PP_COMPUTE_PROFILE;
-
-   return amdgpu_get_pp_power_profile(dev, buf, );
-}
-
-static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
-   const char *buf,
-   size_t count,
-   struct amd_pp_profile *request)
-{
-   struct drm_device *ddev = dev_get_drvdata(dev);
-   struct amdgpu_device *adev = ddev->dev_private;
-   uint32_t loop = 0;
-   char *sub_str, buf_cpy[128], *tmp_str;
-   const char delimiter[3] = {' ', '\n', '\0'};
-   long int value;
-   int ret = 0xff;
-
-   if (strncmp("reset", buf, strlen("reset")) == 0) {
-   if 

RE: [PATCH 2/4] drm/amd/pp: Fix sclk in highest two levels in compute mode on smu7

2018-03-05 Thread Quan, Evan
Reviewed-by: Evan Quan 

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Rex 
Zhu
Sent: Friday, March 02, 2018 8:44 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhu, Rex 
Subject: [PATCH 2/4] drm/amd/pp: Fix sclk in highest two levels in compute mode 
on smu7

Compute workload tends to be "bursty", Only tune the behavior of nature dpm 
don't work well for most of such workloads. From tests result, Fix sclk in 
highest two levels can get better performance.
so add min sclk setting into the default cumpute workload policy on smu7.

user still can change sclk range through sysfs pp_dpm_sclk for better perf/watt.

Change-Id: I0ffd19a5d3c9e57f22aebd9ff1e18b980a111384
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index c1e32f4..ba114482 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -5005,6 +5005,26 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr 
*hwmgr, char *buf)
return size;
 }
 
+static void smu7_patch_compute_profile_mode(struct pp_hwmgr *hwmgr,
+   enum PP_SMC_POWER_PROFILE requst)
+{
+   struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+   uint32_t tmp, level;
+
+   if (requst == PP_SMC_POWER_PROFILE_COMPUTE) {
+   if (data->dpm_level_enable_mask.sclk_dpm_enable_mask) {
+   level = 0;
+   tmp = data->dpm_level_enable_mask.sclk_dpm_enable_mask;
+   while (tmp >>= 1)
+   level++;
+   if (level > 0)
+   smu7_force_clock_level(hwmgr, PP_SCLK, 3 << 
(level-1));
+   }
+   } else if (hwmgr->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) {
+   smu7_force_clock_level(hwmgr, PP_SCLK, 
data->dpm_level_enable_mask.sclk_dpm_enable_mask);
+   }
+}
+
 static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, 
uint32_t size)  {
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ 
-5055,6 +5075,7 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr 
*hwmgr, long *input, uint
data->current_profile_setting.mclk_down_hyst = 
tmp.mclk_down_hyst;
data->current_profile_setting.mclk_activity = 
tmp.mclk_activity;
}
+   smu7_patch_compute_profile_mode(hwmgr, mode);
hwmgr->power_profile_mode = mode;
}
break;
--
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/3] drm/amd/pp: Implement update_dpm_settings on CI

2018-03-05 Thread Rex Zhu
use SW method to update DPM settings by updating SRAM
directly on CI.

Change-Id: Ie9ed6c3a0e1c327cc9a9b06bec47b1cede87278d
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 97 
 1 file changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index 76f700f..179d00c 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -2819,6 +2819,102 @@ static int ci_start_smu(struct pp_hwmgr *hwmgr)
return 0;
 }
 
+static int ci_update_dpm_settings(struct pp_hwmgr *hwmgr,
+   void *profile_setting)
+{
+   struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+   struct ci_smumgr *smu_data = (struct ci_smumgr *)
+   (hwmgr->smu_backend);
+   struct profile_mode_setting *setting;
+   struct SMU7_Discrete_GraphicsLevel *levels =
+   smu_data->smc_state_table.GraphicsLevel;
+   uint32_t array = smu_data->dpm_table_start +
+   offsetof(SMU7_Discrete_DpmTable, GraphicsLevel);
+
+   uint32_t mclk_array = smu_data->dpm_table_start +
+   offsetof(SMU7_Discrete_DpmTable, MemoryLevel);
+   struct SMU7_Discrete_MemoryLevel *mclk_levels =
+   smu_data->smc_state_table.MemoryLevel;
+   uint32_t i;
+   uint32_t offset, up_hyst_offset, down_hyst_offset, clk_activity_offset, 
tmp;
+
+   if (profile_setting == NULL)
+   return -EINVAL;
+
+   setting = (struct profile_mode_setting *)profile_setting;
+
+   if (setting->bupdate_sclk) {
+   if (!data->sclk_dpm_key_disabled)
+   smum_send_msg_to_smc(hwmgr, 
PPSMC_MSG_SCLKDPM_FreezeLevel);
+   for (i = 0; i < 
smu_data->smc_state_table.GraphicsDpmLevelCount; i++) {
+   if (levels[i].ActivityLevel !=
+   cpu_to_be16(setting->sclk_activity)) {
+   levels[i].ActivityLevel = 
cpu_to_be16(setting->sclk_activity);
+
+   clk_activity_offset = array + 
(sizeof(SMU7_Discrete_GraphicsLevel) * i)
+   + 
offsetof(SMU7_Discrete_GraphicsLevel, ActivityLevel);
+   offset = clk_activity_offset & ~0x3;
+   tmp = 
PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, 
offset));
+   tmp = phm_set_field_to_u32(clk_activity_offset, 
tmp, levels[i].ActivityLevel, sizeof(uint16_t));
+   cgs_write_ind_register(hwmgr->device, 
CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp));
+
+   }
+   if (levels[i].UpH != setting->sclk_up_hyst ||
+   levels[i].DownH != setting->sclk_down_hyst) {
+   levels[i].UpH = setting->sclk_up_hyst;
+   levels[i].DownH = setting->sclk_down_hyst;
+   up_hyst_offset = array + 
(sizeof(SMU7_Discrete_GraphicsLevel) * i)
+   + 
offsetof(SMU7_Discrete_GraphicsLevel, UpH);
+   down_hyst_offset = array + 
(sizeof(SMU7_Discrete_GraphicsLevel) * i)
+   + 
offsetof(SMU7_Discrete_GraphicsLevel, DownH);
+   offset = up_hyst_offset & ~0x3;
+   tmp = 
PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, 
offset));
+   tmp = phm_set_field_to_u32(up_hyst_offset, tmp, 
levels[i].UpH, sizeof(uint8_t));
+   tmp = phm_set_field_to_u32(down_hyst_offset, 
tmp, levels[i].DownH, sizeof(uint8_t));
+   cgs_write_ind_register(hwmgr->device, 
CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp));
+   }
+   }
+   if (!data->sclk_dpm_key_disabled)
+   smum_send_msg_to_smc(hwmgr, 
PPSMC_MSG_SCLKDPM_UnfreezeLevel);
+   }
+
+   if (setting->bupdate_mclk) {
+   if (!data->mclk_dpm_key_disabled)
+   smum_send_msg_to_smc(hwmgr, 
PPSMC_MSG_MCLKDPM_FreezeLevel);
+   for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; 
i++) {
+   if (mclk_levels[i].ActivityLevel !=
+   cpu_to_be16(setting->mclk_activity)) {
+   mclk_levels[i].ActivityLevel = 
cpu_to_be16(setting->mclk_activity);
+
+   clk_activity_offset = mclk_array + 
(sizeof(SMU7_Discrete_MemoryLevel) * i)
+   + 
offsetof(SMU7_Discrete_MemoryLevel, 

[PATCH 2/3] drm/amd/pp: Implement update_dpm_settings on Tonga

2018-03-05 Thread Rex Zhu
use SW method to update DPM settings by updating SRAM
directly on Tonga.

Change-Id: I21b1f5caee85587e30ff4d37b040d3ba36b843ee
Signed-off-by: Rex Zhu 
---
 .../gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c| 97 ++
 1 file changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index a268b98..86fb770 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -3277,6 +3277,102 @@ static int 
tonga_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr,
array_size, SMC_RAM_END);
 }
 
+static int tonga_update_dpm_settings(struct pp_hwmgr *hwmgr,
+   void *profile_setting)
+{
+   struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+   struct tonga_smumgr *smu_data = (struct tonga_smumgr *)
+   (hwmgr->smu_backend);
+   struct profile_mode_setting *setting;
+   struct SMU72_Discrete_GraphicsLevel *levels =
+   smu_data->smc_state_table.GraphicsLevel;
+   uint32_t array = smu_data->smu7_data.dpm_table_start +
+   offsetof(SMU72_Discrete_DpmTable, GraphicsLevel);
+
+   uint32_t mclk_array = smu_data->smu7_data.dpm_table_start +
+   offsetof(SMU72_Discrete_DpmTable, MemoryLevel);
+   struct SMU72_Discrete_MemoryLevel *mclk_levels =
+   smu_data->smc_state_table.MemoryLevel;
+   uint32_t i;
+   uint32_t offset, up_hyst_offset, down_hyst_offset, clk_activity_offset, 
tmp;
+
+   if (profile_setting == NULL)
+   return -EINVAL;
+
+   setting = (struct profile_mode_setting *)profile_setting;
+
+   if (setting->bupdate_sclk) {
+   if (!data->sclk_dpm_key_disabled)
+   smum_send_msg_to_smc(hwmgr, 
PPSMC_MSG_SCLKDPM_FreezeLevel);
+   for (i = 0; i < 
smu_data->smc_state_table.GraphicsDpmLevelCount; i++) {
+   if (levels[i].ActivityLevel !=
+   cpu_to_be16(setting->sclk_activity)) {
+   levels[i].ActivityLevel = 
cpu_to_be16(setting->sclk_activity);
+
+   clk_activity_offset = array + 
(sizeof(SMU72_Discrete_GraphicsLevel) * i)
+   + 
offsetof(SMU72_Discrete_GraphicsLevel, ActivityLevel);
+   offset = clk_activity_offset & ~0x3;
+   tmp = 
PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, 
offset));
+   tmp = phm_set_field_to_u32(clk_activity_offset, 
tmp, levels[i].ActivityLevel, sizeof(uint16_t));
+   cgs_write_ind_register(hwmgr->device, 
CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp));
+
+   }
+   if (levels[i].UpHyst != setting->sclk_up_hyst ||
+   levels[i].DownHyst != setting->sclk_down_hyst) {
+   levels[i].UpHyst = setting->sclk_up_hyst;
+   levels[i].DownHyst = setting->sclk_down_hyst;
+   up_hyst_offset = array + 
(sizeof(SMU72_Discrete_GraphicsLevel) * i)
+   + 
offsetof(SMU72_Discrete_GraphicsLevel, UpHyst);
+   down_hyst_offset = array + 
(sizeof(SMU72_Discrete_GraphicsLevel) * i)
+   + 
offsetof(SMU72_Discrete_GraphicsLevel, DownHyst);
+   offset = up_hyst_offset & ~0x3;
+   tmp = 
PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, 
offset));
+   tmp = phm_set_field_to_u32(up_hyst_offset, tmp, 
levels[i].UpHyst, sizeof(uint8_t));
+   tmp = phm_set_field_to_u32(down_hyst_offset, 
tmp, levels[i].DownHyst, sizeof(uint8_t));
+   cgs_write_ind_register(hwmgr->device, 
CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp));
+   }
+   }
+   if (!data->sclk_dpm_key_disabled)
+   smum_send_msg_to_smc(hwmgr, 
PPSMC_MSG_SCLKDPM_UnfreezeLevel);
+   }
+
+   if (setting->bupdate_mclk) {
+   if (!data->mclk_dpm_key_disabled)
+   smum_send_msg_to_smc(hwmgr, 
PPSMC_MSG_MCLKDPM_FreezeLevel);
+   for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; 
i++) {
+   if (mclk_levels[i].ActivityLevel !=
+   cpu_to_be16(setting->mclk_activity)) {
+   mclk_levels[i].ActivityLevel = 
cpu_to_be16(setting->mclk_activity);
+
+   clk_activity_offset = 

[PATCH 1/3] drm/amd/pp: Implement update_dpm_settings on Fiji

2018-03-05 Thread Rex Zhu
use SW method to update DPM settings by updating SRAM
directly on Fiji.

Change-Id: Ic485e526332a0f1a169d395dfe810d71e5bdfea8
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c | 97 ++
 1 file changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index 047b2df..fc556f0 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -2719,6 +2719,102 @@ static int 
fiji_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr,
array_size, SMC_RAM_END);
 }
 
+static int fiji_update_dpm_settings(struct pp_hwmgr *hwmgr,
+   void *profile_setting)
+{
+   struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+   struct fiji_smumgr *smu_data = (struct fiji_smumgr *)
+   (hwmgr->smu_backend);
+   struct profile_mode_setting *setting;
+   struct SMU73_Discrete_GraphicsLevel *levels =
+   smu_data->smc_state_table.GraphicsLevel;
+   uint32_t array = smu_data->smu7_data.dpm_table_start +
+   offsetof(SMU73_Discrete_DpmTable, GraphicsLevel);
+
+   uint32_t mclk_array = smu_data->smu7_data.dpm_table_start +
+   offsetof(SMU73_Discrete_DpmTable, MemoryLevel);
+   struct SMU73_Discrete_MemoryLevel *mclk_levels =
+   smu_data->smc_state_table.MemoryLevel;
+   uint32_t i;
+   uint32_t offset, up_hyst_offset, down_hyst_offset, clk_activity_offset, 
tmp;
+
+   if (profile_setting == NULL)
+   return -EINVAL;
+
+   setting = (struct profile_mode_setting *)profile_setting;
+
+   if (setting->bupdate_sclk) {
+   if (!data->sclk_dpm_key_disabled)
+   smum_send_msg_to_smc(hwmgr, 
PPSMC_MSG_SCLKDPM_FreezeLevel);
+   for (i = 0; i < 
smu_data->smc_state_table.GraphicsDpmLevelCount; i++) {
+   if (levels[i].ActivityLevel !=
+   cpu_to_be16(setting->sclk_activity)) {
+   levels[i].ActivityLevel = 
cpu_to_be16(setting->sclk_activity);
+
+   clk_activity_offset = array + 
(sizeof(SMU73_Discrete_GraphicsLevel) * i)
+   + 
offsetof(SMU73_Discrete_GraphicsLevel, ActivityLevel);
+   offset = clk_activity_offset & ~0x3;
+   tmp = 
PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, 
offset));
+   tmp = phm_set_field_to_u32(clk_activity_offset, 
tmp, levels[i].ActivityLevel, sizeof(uint16_t));
+   cgs_write_ind_register(hwmgr->device, 
CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp));
+
+   }
+   if (levels[i].UpHyst != setting->sclk_up_hyst ||
+   levels[i].DownHyst != setting->sclk_down_hyst) {
+   levels[i].UpHyst = setting->sclk_up_hyst;
+   levels[i].DownHyst = setting->sclk_down_hyst;
+   up_hyst_offset = array + 
(sizeof(SMU73_Discrete_GraphicsLevel) * i)
+   + 
offsetof(SMU73_Discrete_GraphicsLevel, UpHyst);
+   down_hyst_offset = array + 
(sizeof(SMU73_Discrete_GraphicsLevel) * i)
+   + 
offsetof(SMU73_Discrete_GraphicsLevel, DownHyst);
+   offset = up_hyst_offset & ~0x3;
+   tmp = 
PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, 
offset));
+   tmp = phm_set_field_to_u32(up_hyst_offset, tmp, 
levels[i].UpHyst, sizeof(uint8_t));
+   tmp = phm_set_field_to_u32(down_hyst_offset, 
tmp, levels[i].DownHyst, sizeof(uint8_t));
+   cgs_write_ind_register(hwmgr->device, 
CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp));
+   }
+   }
+   if (!data->sclk_dpm_key_disabled)
+   smum_send_msg_to_smc(hwmgr, 
PPSMC_MSG_SCLKDPM_UnfreezeLevel);
+   }
+
+   if (setting->bupdate_mclk) {
+   if (!data->mclk_dpm_key_disabled)
+   smum_send_msg_to_smc(hwmgr, 
PPSMC_MSG_MCLKDPM_FreezeLevel);
+   for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; 
i++) {
+   if (mclk_levels[i].ActivityLevel !=
+   cpu_to_be16(setting->mclk_activity)) {
+   mclk_levels[i].ActivityLevel = 
cpu_to_be16(setting->mclk_activity);
+
+   clk_activity_offset = mclk_array 

RE: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)

2018-03-05 Thread Liu, Monk
To better approach this issue I suggest to do the following:
1. Revert the original patch.

2. Stop waiting to long for writes. E.g. use a separate timeout (20ms
maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout.
Cannot do that 20ms is not enough, sometimes you need 10 seconds since other 
VFs may doing bad things like occupying GPU intentionally or they are doing 
TDR, so 
I don't think separate read and write is good idea, they should be treated 
equally 

3. To the read function add a "if (!in_intterupt()) may_sleep();" and then 
retest. That should at least print a nice warning when called from atomic 
context.
Sorry what is may_sleep() ??

4. Test the whole thing and try to fix all warnings about atomic contexts from 
the may_sleep();

5. Reapply the original patch, but this time only for the read function, not 
the write function.


From current LKG code, the only one spin lock may wrapping the kiq_rreg/wreg() 
is the pcie_idx_lock, and this lock is only used during init(),
Since init() is run under the case of exclusive mode for SRIOV, which means:
1)  register access is not go through KIQ (see admgpu_mm_reg)
2)  those functions are only in bif_medium_grain_xxx part (vi.c and nbio_v6.c) 
, and they won't hit under SRIOV ( we return in the head if SRIOV detect)
So I don' think this spin_lock may cause trouble...

/Monk



-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: 2018年3月5日 15:57
To: Liu, Monk ; Koenig, Christian ; 
Kuehling, Felix ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)

> otherwise I don't see how it is better by reverting it
Well it's better to revert it for now because it seems to create more problems 
than it solves.

To better approach this issue I suggest to do the following:
1. Revert the original patch.

2. Stop waiting to long for writes. E.g. use a separate timeout (20ms
maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout.

3. To the read function add a "if (!in_intterupt()) may_sleep();" and then 
retest. That should at least print a nice warning when called from atomic 
context.

4. Test the whole thing and try to fix all warnings about atomic contexts from 
the may_sleep();

5. Reapply the original patch, but this time only for the read function, not 
the write function.

Regards,
Christian.

Am 05.03.2018 um 05:20 schrieb Liu, Monk:
> When there are 16 VF/VM on one GPU, we can easily hit "sys lockup to 
> 22s" kernel error/warning introduced by kiq_rreg/wreg routine That's 
> why I must use this patch to let thread sleep a while and try again,
>
> If you insist reverting this patch please give me a solution, 
> otherwise I don't see how it is better by reverting it
>
> /Monk
>
> -Original Message-
> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
> Sent: 2018年3月3日 21:38
> To: Kuehling, Felix ; Liu, Monk 
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in 
> IRQ(v2)
>
> Am 02.03.2018 um 21:47 schrieb Felix Kuehling:
>> On 2018-03-02 04:29 AM, Liu, Monk wrote:
>>> In_atomic() isnot encouraged to be used to judge if sleep is 
>>> possible, see the macros of it
>>>
>>> #define in_atomic() (preept_count() != 0)
>> OK. But my point is still that you're not testing the right thing 
>> when you check in_interrupt(). The comment before the in_atomic macro 
>> definition states the limitations and says "do not use in driver code".
>> Unfortunately it doesn't suggest any alternative. I think 
>> in_interrupt is actually worse, because it misses even more cases than 
>> in_atomic.
> Thinking about this, Felix seems to be absolutely right.
>
> So we need to revert this patch since you can't reliable detect in a driver 
> if sleeping is allowed or not.
>
> Regards,
> Christian.
>
>> Regards,
>>     Felix
>>
>>> /Monk
>>>
>>> -Original Message-
>>> From: Kuehling, Felix
>>> Sent: 2018年3月1日 23:50
>>> To: amd-gfx@lists.freedesktop.org; Liu, Monk 
>>> Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in
>>> IRQ(v2)
>>>
>>> On 2018-02-28 02:27 AM, Monk Liu wrote:
 sometimes GPU is switched to other VFs and won't swich back soon, 
 so the kiq reg access will not signal within a short period, 
 instead of busy waiting a long time(MAX_KEQ_REG_WAIT) and returning 
 TMO we can istead sleep 5ms and try again later (non irq context)

 And since the waiting in kiq_r/weg is busy wait, so 
 MAX_KIQ_REG_WAIT shouldn't set to a long time, set it to 10ms is more 
 appropriate.

 if gpu already in reset state, don't retry the KIQ reg access 
 otherwise it would always hang because KIQ was already die usually.

 v2:
 replace schedule() with msleep() for the wait

 Change-Id: 

RE: [PATCH 1/4] drm/amd/pp: Implement get/set_power_profile_mode on smu7

2018-03-05 Thread Quan, Evan
Comment inline.

Regards
Evan

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Rex 
Zhu
Sent: Friday, March 02, 2018 8:44 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhu, Rex 
Subject: [PATCH 1/4] drm/amd/pp: Implement get/set_power_profile_mode on smu7

It show what parameters can be configured to tune the behavior of natural dpm 
for perf/watt on smu7.

user can select the mode per workload, but even the default per workload 
settings are not bulletproof.

user can configure custom settings per different use case for better perf or 
better perf/watt.

cat pp_power_profile_mode
NUMMODE_NAME SCLK_UP_HYST   SCLK_DOWN_HYST SCLK_ACTIVE_LEVEL 
MCLK_UP_HYST   MCLK_DOWN_HYST MCLK_ACTIVE_LEVEL
  0   3D_FULL_SCREEN:0  100   30
0  100   10
  1 POWER_SAVING:   100   30
---
  2VIDEO:---   
10   16   31
  3   VR:0   11   50
0  100   10
  4  COMPUTE:05   30
---
  5   CUSTOM:000
000
  *  CURRENT:0  100   30
0  100   10

Under manual dpm level,

user can echo "0/1/2/3/4">pp_power_profile_mode to select 
3D_FULL_SCREEN/POWER_SAVING/VIDEO/VR/COMPUTE
mode.

echo "5 * * * * * * * *">pp_power_profile_mode to set custom settings.
"5 * * * * * * * *" mean "CUSTOM enable_sclk SCLK_UP_HYST SCLK_DOWN_HYST 
SCLK_ACTIVE_LEVEL enable_mclk MCLK_UP_HYST MCLK_DOWN_HYST MCLK_ACTIVE_LEVEL"

if the parameter enable_sclk/enable_mclk is true, driver will update the 
following parameters to dpm table.
if false, ignore the following parameters.

Reviewed-by: Alex Deucher 
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 143 +++
 1 file changed, 143 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 731475b..c1e32f4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -81,6 +81,13 @@
 #define PCIE_BUS_CLK1
 #define TCLK(PCIE_BUS_CLK / 10)
 
+static const struct profile_mode_setting smu7_profiling[5] =
+   {{1, 0, 100, 30, 1, 0, 100, 10},
+{1, 10, 0, 30, 0, 0, 0, 0},
+{0, 0, 0, 0, 1, 10, 16, 31},
+{1, 0, 11, 50, 1, 0, 100, 10},
+{1, 0, 5, 30, 0, 0, 0, 0},
+   };
 
 /** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */  enum 
DPM_EVENT_SRC { @@ -2475,6 +2482,9 @@ static int smu7_hwmgr_backend_init(struct 
pp_hwmgr *hwmgr)
smu7_patch_voltage_workaround(hwmgr);
smu7_init_dpm_defaults(hwmgr);
 
+   hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+   hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+
/* Get leakage voltage based on leakage ID. */
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_EVV)) {
@@ -4923,6 +4933,137 @@ static int smu7_odn_edit_dpm_table(struct pp_hwmgr 
*hwmgr,
return 0;
 }
 
+static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char 
+*buf) {
+   struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+   uint32_t i, size = 0;
+   uint32_t len;
+
+   static const char *profile_name[6] = {"3D_FULL_SCREEN",
+   "POWER_SAVING",
+   "VIDEO",
+   "VR",
+   "COMPUTE",
+   "CUSTOM"};
+
+   static const char *title[8] = {"NUM",
+   "MODE_NAME",
+   "SCLK_UP_HYST",
+   "SCLK_DOWN_HYST",
+   "SCLK_ACTIVE_LEVEL",
+   "MCLK_UP_HYST",
+   "MCLK_DOWN_HYST",
+   "MCLK_ACTIVE_LEVEL"};
+
+   if (!buf)
+   return -EINVAL;
+
+   size += sprintf(buf + size, "%s %16s %16s %16s %16s %16s %16s %16s\n",
+   title[0], title[1], title[2], title[3],
+   title[4], title[5], title[6], title[7]);
+
+   len =