date:20190730

Re: [PATCH 2/2] drm/amdkfd: enable KFD support for navi14

2019-07-30 Thread Yuan, Xiaojie

Reviewed-by: Xiaojie Yuan 

BR,
Xiaojie


From: amd-gfx  on behalf of Alex Deucher 

Sent: Saturday, July 27, 2019 3:16 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander
Subject: [PATCH 2/2] drm/amdkfd: enable KFD support for navi14

Same as navi10.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index f052c70e4659..97f7c5235cc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -91,6 +91,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
kfd2kgd = amdgpu_amdkfd_arcturus_get_functions();
break;
case CHIP_NAVI10:
+   case CHIP_NAVI14:
kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
break;
default:
--
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amd/powerplay: support power profile retrieval and setting on arcturus

2019-07-30 Thread Kevin Wang


On 7/31/19 11:39 AM, Evan Quan wrote:
> Enable arcturus power profile retrieval and setting.
>
> Change-Id: I85447ba9ca7de8e197840f76ce3745363c4133a6
> Signed-off-by: Evan Quan 
> ---
>   drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 69 
>   1 file changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c 
> b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> index c3f9487276c0..47d015035906 100644
> --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> @@ -1503,6 +1503,73 @@ static int arcturus_get_power_limit(struct smu_context 
> *smu,
>   return 0;
>   }
>   
> +static int arcturus_get_power_profile_mode(struct smu_context *smu,
> +char *buf)
> +{
> + static const char *profile_name[] = {
> + "BOOTUP_DEFAULT",
> + "3D_FULL_SCREEN",
> + "POWER_SAVING",
> + "VIDEO",
> + "VR",
> + "COMPUTE",
> + "CUSTOM"};
> + uint32_t i, size = 0;
> + int16_t workload_type = 0;
> +
> + if (!smu->pm_enabled || !buf)
> + return -EINVAL;
> +
> + for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) {
> + /*
> +  * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
> +  * Not all profile modes are supported on arcturus.
> +  */
> + workload_type = smu_workload_get_type(smu, i);
> + if (workload_type < 0)
> + continue;
> +
> + size += sprintf(buf + size, "%2d %14s%s\n",
> + i, profile_name[i], (i == smu->power_profile_mode) ? 
> "*" : " ");
> + }
> +
> + return size;
> +}
> +
> +static int arcturus_set_power_profile_mode(struct smu_context *smu,
> +long *input,
> +uint32_t size)
> +{
> + int workload_type = 0;
> + uint32_t profile_mode = input[size];
> +
> + if (!smu->pm_enabled)
> + return -EINVAL;
> +
> + if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
> + pr_err("Invalid power profile mode %d\n", profile_mode);
> + return -EINVAL;
> + }
> +
> + /*
> +  * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
> +  * Not all profile modes are supported on arcturus.
> +  */
> + workload_type = smu_workload_get_type(smu, profile_mode);
> + if (workload_type < 0) {
> + pr_err("Unsupported power profile mode %d on arcturus\n", 
> profile_mode);
> + return -EINVAL;
> + }
> +
> + smu_send_smc_msg_with_param(smu,
> + SMU_MSG_SetWorkloadMask,
> + 1 << workload_type);

[kevin]:

please d check return value, after send message succeed, then modify 
power_profile_mode value.

if send message failed, you can't update power_profile_mode value.

after fixed:

Reviewed-by: Kevin Wang 

> +
> + smu->power_profile_mode = profile_mode;
> +
> + return 0;
> +}
> +
>   static void arcturus_dump_pptable(struct smu_context *smu)
>   {
>   struct smu_table_context *table_context = >smu_table;
> @@ -1968,6 +2035,8 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
>   .force_dpm_limit_value = arcturus_force_dpm_limit_value,
>   .unforce_dpm_levels = arcturus_unforce_dpm_levels,
>   .get_profiling_clk_mask = arcturus_get_profiling_clk_mask,
> + .get_power_profile_mode = arcturus_get_power_profile_mode,
> + .set_power_profile_mode = arcturus_set_power_profile_mode,
>   /* debug (internal used) */
>   .dump_pptable = arcturus_dump_pptable,
>   .get_power_limit = arcturus_get_power_limit,
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amd/powerplay: enable SW SMU power profile switch support in KFD

2019-07-30 Thread Kevin Wang

Reviewed-by: Kevin Wang 

BR
Kevin

On 7/31/19 11:39 AM, Evan Quan wrote:
> Hook up the SW SMU power profile switch in KFD routine.
>
> Change-Id: I41e53762cdc7504285de89f30e3e6e2bb396b953
> Signed-off-by: Evan Quan 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  8 +++--
>   drivers/gpu/drm/amd/powerplay/amdgpu_smu.c| 36 +++
>   .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h|  3 ++
>   3 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 43b11879713d..9c5dcaa8fa48 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -672,8 +672,12 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, 
> bool idle)
>   {
>   struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>   
> - if (adev->powerplay.pp_funcs &&
> - adev->powerplay.pp_funcs->switch_power_profile)
> + if (is_support_sw_smu(adev))
> + smu_switch_power_profile(>smu,
> +  PP_SMC_POWER_PROFILE_COMPUTE,
> +  !idle);
> + else if (adev->powerplay.pp_funcs &&
> +  adev->powerplay.pp_funcs->switch_power_profile)
>   amdgpu_dpm_switch_power_profile(adev,
>   PP_SMC_POWER_PROFILE_COMPUTE,
>   !idle);
> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
> b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> index d99a8aa0defb..55ccb4e6a6fb 100644
> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> @@ -1447,6 +1447,42 @@ int smu_handle_task(struct smu_context *smu,
>   return ret;
>   }
>   
> +int smu_switch_power_profile(struct smu_context *smu,
> +  enum PP_SMC_POWER_PROFILE type,
> +  bool en)
> +{
> + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
> + long workload;
> + uint32_t index;
> +
> + if (!smu->pm_enabled)
> + return -EINVAL;
> +
> + if (!(type < PP_SMC_POWER_PROFILE_CUSTOM))
> + return -EINVAL;
> +
> + mutex_lock(>mutex);
> +
> + if (!en) {
> + smu->workload_mask &= ~(1 << smu->workload_prority[type]);
> + index = fls(smu->workload_mask);
> + index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 
> 0;
> + workload = smu->workload_setting[index];
> + } else {
> + smu->workload_mask |= (1 << smu->workload_prority[type]);
> + index = fls(smu->workload_mask);
> + index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
> + workload = smu->workload_setting[index];
> + }
> +
> + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
> + smu_set_power_profile_mode(smu, , 0);
> +
> + mutex_unlock(>mutex);
> +
> + return 0;
> +}
> +
>   enum amd_dpm_forced_level smu_get_performance_level(struct smu_context *smu)
>   {
>   struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h 
> b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> index 9c0a53ef93c4..1b44414cec3b 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> @@ -973,6 +973,9 @@ extern int smu_dpm_set_power_gate(struct smu_context 
> *smu,uint32_t block_type, b
>   extern int smu_handle_task(struct smu_context *smu,
>  enum amd_dpm_forced_level level,
>  enum amd_pp_task task_id);
> +int smu_switch_power_profile(struct smu_context *smu,
> +  enum PP_SMC_POWER_PROFILE type,
> +  bool en);
>   int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, 
> uint32_t *smu_version);
>   int smu_get_dpm_freq_by_index(struct smu_context *smu, enum smu_clk_type 
> clk_type,
> uint16_t level, uint32_t *value);
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu: fix double ucode load by PSP

2019-07-30 Thread Monk Liu

previously the ucode loading of PSP was repreated, one executed in
phase_1 init/re-init/resume and the other in fw_loading routine

Avoid this double loading by clearing ip_blocks.status.hw in suspend or reset
prior to the FW loading and any block's hw_init/resume

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 79 ++
 1 file changed, 49 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6cb358c..25e721d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1673,29 +1673,35 @@ static int amdgpu_device_fw_loading(struct 
amdgpu_device *adev)
 
if (adev->asic_type >= CHIP_VEGA10) {
for (i = 0; i < adev->num_ip_blocks; i++) {
-   if (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_PSP) {
-   if (adev->in_gpu_reset || adev->in_suspend) {
-   if (amdgpu_sriov_vf(adev) && 
adev->in_gpu_reset)
-   break; /* sriov gpu reset, psp 
need to do hw_init before IH because of hw limit */
-   r = 
adev->ip_blocks[i].version->funcs->resume(adev);
-   if (r) {
-   DRM_ERROR("resume of IP block 
<%s> failed %d\n",
+   if (adev->ip_blocks[i].version->type != 
AMD_IP_BLOCK_TYPE_PSP)
+   continue;
+
+   /* no need to do the fw loading again if already done*/
+   if (adev->ip_blocks[i].status.hw == true)
+   break;
+
+   if (adev->in_gpu_reset || adev->in_suspend) {
+   r = 
adev->ip_blocks[i].version->funcs->resume(adev);
+   if (r) {
+   DRM_ERROR("resume of IP block <%s> 
failed %d\n",
  
adev->ip_blocks[i].version->funcs->name, r);
-   return r;
-   }
-   } else {
-   r = 
adev->ip_blocks[i].version->funcs->hw_init(adev);
-   if (r) {
-   DRM_ERROR("hw_init of IP block 
<%s> failed %d\n",
- 
adev->ip_blocks[i].version->funcs->name, r);
-   return r;
-   }
+   return r;
+   }
+   } else {
+   r = 
adev->ip_blocks[i].version->funcs->hw_init(adev);
+   if (r) {
+   DRM_ERROR("hw_init of IP block <%s> 
failed %d\n",
+ 
adev->ip_blocks[i].version->funcs->name, r);
+   return r;
}
-   adev->ip_blocks[i].status.hw = true;
}
+
+   adev->ip_blocks[i].status.hw = true;
+   break;
}
+   } else {
+   r = amdgpu_pm_load_smu_firmware(adev, _version);
}
-   r = amdgpu_pm_load_smu_firmware(adev, _version);
 
return r;
 }
@@ -2128,6 +2134,7 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
+
/* displays are handled separately */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
/* XXX handle errors */
@@ -2136,7 +2143,9 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
if (r) {
DRM_ERROR("suspend of IP block <%s> failed 
%d\n",
  
adev->ip_blocks[i].version->funcs->name, r);
+   return r;
}
+   adev->ip_blocks[i].status.hw = false;
}
}
 
@@ -2176,14 +2185,16 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
if (is_support_sw_smu(adev)) {
/* todo */
} else if (adev->powerplay.pp_funcs &&
-  adev->powerplay.pp_funcs->set_mp1_state) {
+  
adev->powerplay.pp_funcs->set_mp1_state) {

Re: [PATCH 2/2] drm/amdkfd: enable KFD support for navi14

2019-07-30 Thread Alex Deucher

Ping?

On Fri, Jul 26, 2019 at 3:16 PM Alex Deucher  wrote:
>
> Same as navi10.
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index f052c70e4659..97f7c5235cc9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -91,6 +91,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
> kfd2kgd = amdgpu_amdkfd_arcturus_get_functions();
> break;
> case CHIP_NAVI10:
> +   case CHIP_NAVI14:
> kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
> break;
> default:
> --
> 2.20.1
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu: cleanup vega10 SRIOV code path

2019-07-30 Thread Monk Liu

we can simplify all those unnecessary function under
SRIOV for vega10 since:
1) PSP L1 policy is by force enabled in SRIOV
2) original logic always set all flags which make itself
   a dummy step

besides,
1) the ih_doorbell_range set should also be skipped
for VEGA10 SRIOV.
2) the gfx_common registers should also be skipped
for VEGA10 SRIOV.

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 45 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   | 13 -
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 17 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c| 10 +++
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  | 15 --
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 17 ++-
 drivers/gpu/drm/amd/amdgpu/soc15.c | 11 +++-
 drivers/gpu/drm/amd/amdgpu/soc15_common.h  |  5 ++--
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 18 ++--
 11 files changed, 38 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 127ed01..6cb358c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1571,9 +1571,6 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return -EAGAIN;
-
-   /* query the reg access mode at the very beginning */
-   amdgpu_virt_init_reg_access_mode(adev);
}
 
adev->pm.pp_feature = amdgpu_pp_feature_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 1d68729..f04eb1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -426,48 +426,3 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, 
bool lowest)
 
return clk;
 }
-
-void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
-{
-   struct amdgpu_virt *virt = >virt;
-
-   if (virt->ops && virt->ops->init_reg_access_mode)
-   virt->ops->init_reg_access_mode(adev);
-}
-
-bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
-{
-   bool ret = false;
-   struct amdgpu_virt *virt = >virt;
-
-   if (amdgpu_sriov_vf(adev)
-   && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
-   ret = true;
-
-   return ret;
-}
-
-bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
-{
-   bool ret = false;
-   struct amdgpu_virt *virt = >virt;
-
-   if (amdgpu_sriov_vf(adev)
-   && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
-   && !(amdgpu_sriov_runtime(adev)))
-   ret = true;
-
-   return ret;
-}
-
-bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
-{
-   bool ret = false;
-   struct amdgpu_virt *virt = >virt;
-
-   if (amdgpu_sriov_vf(adev)
-   && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
-   ret = true;
-
-   return ret;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f510773..b0b2bdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -48,12 +48,6 @@ struct amdgpu_vf_error_buffer {
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
 };
 
-/* According to the fw feature, some new reg access modes are supported */
-#define AMDGPU_VIRT_REG_ACCESS_LEGACY  (1 << 0) /* directly mmio */
-#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH  (1 << 1) /* by PSP */
-#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
-#define AMDGPU_VIRT_REG_SKIP_SEETING   (1 << 3) /* Skip setting reg */
-
 /**
  * struct amdgpu_virt_ops - amdgpu device virt operations
  */
@@ -65,7 +59,6 @@ struct amdgpu_virt_ops {
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 
data2, u32 data3);
int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
-   void (*init_reg_access_mode)(struct amdgpu_device *adev);
 };
 
 /*
@@ -315,10 +308,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, 
unsigned long obj_size,
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
 uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
 uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
-
-void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
-bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
-bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
-bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
-
 #endif
diff

[PATCH 2/3] drm/amdgpu: fix incorrect judge on sos fw version

2019-07-30 Thread Monk Liu

for SRIOV the SOS fw of PSP is loaded in hypervisor thus
guest won't tell the version of it, and judging feature by
reading the sos fw version in guest side is completely wrong

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index ec3a056..ba32758 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -634,7 +634,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
 
 static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
 {
-   if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
+   if (amdgpu_sriov_vf(psp->adev))
return true;
 
return false;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 18/30] drm/amd/powerplay: init arcturus SMU metrics table on bootup

2019-07-30 Thread Alex Deucher

On Mon, Jul 29, 2019 at 10:43 PM Kevin Wang  wrote:
>
>
> On 7/30/19 4:14 AM, Alex Deucher wrote:
> > From: Evan Quan 
> >
> > Initialize arcturus SMU metrics table.
> >
> > Signed-off-by: Evan Quan 
> > Reviewed-by: Kevin Wang 
> > Reviewed-by: Alex Deucher 
> > Signed-off-by: Alex Deucher 
> > ---
> >   drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 7 +++
> >   1 file changed, 7 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c 
> > b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> > index a0644ef267a9..5f911f092311 100644
> > --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> > +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
> > @@ -267,6 +267,8 @@ static int arcturus_get_workload_type(struct 
> > smu_context *smu, enum PP_SMC_POWER
> >
> >   static int arcturus_tables_init(struct smu_context *smu, struct smu_table 
> > *tables)
> >   {
> > + struct smu_table_context *smu_table = >smu_table;
> > +
> >   SMU_TABLE_INIT(tables, SMU_TABLE_PPTABLE, sizeof(PPTable_t),
> >  PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> >
> > @@ -276,6 +278,11 @@ static int arcturus_tables_init(struct smu_context 
> > *smu, struct smu_table *table
> >   SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
> >  PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> >
> > + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
> > [kevin]: where is do free operation in driver code ?

It's freed in smu_v11_0_fini_smc_tables()

Alex

> > + if (!smu_table->metrics_table)
> > + return -ENOMEM;
> > + smu_table->metrics_time = 0;
> > +
> >   return 0;
> >   }
> >
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu/powerplay: return success if set_mp1_state is not set

2019-07-30 Thread Quan, Evan

Reviewed-by: Evan Quan 

> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Wednesday, July 31, 2019 10:31 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: [PATCH] drm/amdgpu/powerplay: return success if set_mp1_state
> is not set
> 
> Some asics (APUs) don't have this callback so we want to return success.
> Avoids spurious error messages on APUs.
> 
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 8 +++-
>  1 file changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> index 88a2ef75b7e1..2e3d9ef625bf 100644
> --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> @@ -931,12 +931,10 @@ static int pp_dpm_set_mp1_state(void *handle,
> enum pp_mp1_state mp1_state)
>   if (!hwmgr || !hwmgr->pm_en)
>   return -EINVAL;
> 
> - if (hwmgr->hwmgr_func->set_mp1_state == NULL) {
> - pr_info_ratelimited("%s was not implemented.\n",
> __func__);
> - return -EINVAL;
> - }
> + if (hwmgr->hwmgr_func->set_mp1_state)
> + return hwmgr->hwmgr_func->set_mp1_state(hwmgr,
> mp1_state);
> 
> - return hwmgr->hwmgr_func->set_mp1_state(hwmgr, mp1_state);
> + return 0;
>  }
> 
>  static int pp_dpm_switch_power_profile(void *handle,
> --
> 2.20.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amd/powerplay: enable SW SMU power profile switch support in KFD

2019-07-30 Thread Evan Quan

Hook up the SW SMU power profile switch in KFD routine.

Change-Id: I41e53762cdc7504285de89f30e3e6e2bb396b953
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  8 +++--
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c| 36 +++
 .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h|  3 ++
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 43b11879713d..9c5dcaa8fa48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -672,8 +672,12 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, 
bool idle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-   if (adev->powerplay.pp_funcs &&
-   adev->powerplay.pp_funcs->switch_power_profile)
+   if (is_support_sw_smu(adev))
+   smu_switch_power_profile(>smu,
+PP_SMC_POWER_PROFILE_COMPUTE,
+!idle);
+   else if (adev->powerplay.pp_funcs &&
+adev->powerplay.pp_funcs->switch_power_profile)
amdgpu_dpm_switch_power_profile(adev,
PP_SMC_POWER_PROFILE_COMPUTE,
!idle);
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index d99a8aa0defb..55ccb4e6a6fb 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1447,6 +1447,42 @@ int smu_handle_task(struct smu_context *smu,
return ret;
 }
 
+int smu_switch_power_profile(struct smu_context *smu,
+enum PP_SMC_POWER_PROFILE type,
+bool en)
+{
+   struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+   long workload;
+   uint32_t index;
+
+   if (!smu->pm_enabled)
+   return -EINVAL;
+
+   if (!(type < PP_SMC_POWER_PROFILE_CUSTOM))
+   return -EINVAL;
+
+   mutex_lock(>mutex);
+
+   if (!en) {
+   smu->workload_mask &= ~(1 << smu->workload_prority[type]);
+   index = fls(smu->workload_mask);
+   index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 
0;
+   workload = smu->workload_setting[index];
+   } else {
+   smu->workload_mask |= (1 << smu->workload_prority[type]);
+   index = fls(smu->workload_mask);
+   index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
+   workload = smu->workload_setting[index];
+   }
+
+   if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+   smu_set_power_profile_mode(smu, , 0);
+
+   mutex_unlock(>mutex);
+
+   return 0;
+}
+
 enum amd_dpm_forced_level smu_get_performance_level(struct smu_context *smu)
 {
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index 9c0a53ef93c4..1b44414cec3b 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -973,6 +973,9 @@ extern int smu_dpm_set_power_gate(struct smu_context 
*smu,uint32_t block_type, b
 extern int smu_handle_task(struct smu_context *smu,
   enum amd_dpm_forced_level level,
   enum amd_pp_task task_id);
+int smu_switch_power_profile(struct smu_context *smu,
+enum PP_SMC_POWER_PROFILE type,
+bool en);
 int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, 
uint32_t *smu_version);
 int smu_get_dpm_freq_by_index(struct smu_context *smu, enum smu_clk_type 
clk_type,
  uint16_t level, uint32_t *value);
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amd/powerplay: support power profile retrieval and setting on arcturus

2019-07-30 Thread Evan Quan

Enable arcturus power profile retrieval and setting.

Change-Id: I85447ba9ca7de8e197840f76ce3745363c4133a6
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 69 
 1 file changed, 69 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c 
b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index c3f9487276c0..47d015035906 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -1503,6 +1503,73 @@ static int arcturus_get_power_limit(struct smu_context 
*smu,
return 0;
 }
 
+static int arcturus_get_power_profile_mode(struct smu_context *smu,
+  char *buf)
+{
+   static const char *profile_name[] = {
+   "BOOTUP_DEFAULT",
+   "3D_FULL_SCREEN",
+   "POWER_SAVING",
+   "VIDEO",
+   "VR",
+   "COMPUTE",
+   "CUSTOM"};
+   uint32_t i, size = 0;
+   int16_t workload_type = 0;
+
+   if (!smu->pm_enabled || !buf)
+   return -EINVAL;
+
+   for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) {
+   /*
+* Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
+* Not all profile modes are supported on arcturus.
+*/
+   workload_type = smu_workload_get_type(smu, i);
+   if (workload_type < 0)
+   continue;
+
+   size += sprintf(buf + size, "%2d %14s%s\n",
+   i, profile_name[i], (i == smu->power_profile_mode) ? 
"*" : " ");
+   }
+
+   return size;
+}
+
+static int arcturus_set_power_profile_mode(struct smu_context *smu,
+  long *input,
+  uint32_t size)
+{
+   int workload_type = 0;
+   uint32_t profile_mode = input[size];
+
+   if (!smu->pm_enabled)
+   return -EINVAL;
+
+   if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+   pr_err("Invalid power profile mode %d\n", profile_mode);
+   return -EINVAL;
+   }
+
+   /*
+* Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
+* Not all profile modes are supported on arcturus.
+*/
+   workload_type = smu_workload_get_type(smu, profile_mode);
+   if (workload_type < 0) {
+   pr_err("Unsupported power profile mode %d on arcturus\n", 
profile_mode);
+   return -EINVAL;
+   }
+
+   smu_send_smc_msg_with_param(smu,
+   SMU_MSG_SetWorkloadMask,
+   1 << workload_type);
+
+   smu->power_profile_mode = profile_mode;
+
+   return 0;
+}
+
 static void arcturus_dump_pptable(struct smu_context *smu)
 {
struct smu_table_context *table_context = >smu_table;
@@ -1968,6 +2035,8 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
.force_dpm_limit_value = arcturus_force_dpm_limit_value,
.unforce_dpm_levels = arcturus_unforce_dpm_levels,
.get_profiling_clk_mask = arcturus_get_profiling_clk_mask,
+   .get_power_profile_mode = arcturus_get_power_profile_mode,
+   .set_power_profile_mode = arcturus_set_power_profile_mode,
/* debug (internal used) */
.dump_pptable = arcturus_dump_pptable,
.get_power_limit = arcturus_get_power_limit,
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

libdrm patch merge request

2019-07-30 Thread Ma, Le

Hi Alex,

Could you help to merge following 2 reviewed patches on 
https://gitlab.freedesktop.org/lema1/drm/commits/lema1/drm into drm master 
branch ?

  1.  tests/amdgpu: disable reset test for 
now
  2.  tests/amdgpu: divide dispatch test into compute and 
gfx

Thanks.

Regards,
Ma Le
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/powerplay: return success if set_mp1_state is not set

2019-07-30 Thread Alex Deucher

Some asics (APUs) don't have this callback so we want to return
success.  Avoids spurious error messages on APUs.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c 
b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 88a2ef75b7e1..2e3d9ef625bf 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -931,12 +931,10 @@ static int pp_dpm_set_mp1_state(void *handle, enum 
pp_mp1_state mp1_state)
if (!hwmgr || !hwmgr->pm_en)
return -EINVAL;
 
-   if (hwmgr->hwmgr_func->set_mp1_state == NULL) {
-   pr_info_ratelimited("%s was not implemented.\n", __func__);
-   return -EINVAL;
-   }
+   if (hwmgr->hwmgr_func->set_mp1_state)
+   return hwmgr->hwmgr_func->set_mp1_state(hwmgr, mp1_state);
 
-   return hwmgr->hwmgr_func->set_mp1_state(hwmgr, mp1_state);
+   return 0;
 }
 
 static int pp_dpm_switch_power_profile(void *handle,
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH 0/3] Propose new struct drm_mem_region

2019-07-30 Thread Brian Welty


On 7/30/2019 2:34 AM, Daniel Vetter wrote:
> On Tue, Jul 30, 2019 at 08:45:57AM +, Koenig, Christian wrote:
>> Yeah, that looks like a good start. Just a couple of random design 
>> comments/requirements.
>>
>> First of all please restructure the changes so that you more or less 
>> have the following:
>> 1. Adding of the new structures and functionality without any change to 
>> existing code.
>> 2. Replacing the existing functionality in TTM and all of its drivers.
>> 3. Replacing the existing functionality in i915.
>>
>> This should make it much easier to review the new functionality when it 
>> is not mixed with existing TTM stuff.

Sure, understood.  But I hope it's fair that I wouldn't be updating all
drivers in an RFC series until there is a bit of clarity/agreement on any
path forward.  But I can include amdgpu patch next time.

>>
>>
>> Second please completely drop the concept of gpu_offset or start of the 
>> memory region like here:
>>> drm_printf(p, "gpu_offset: 0x%08llX\n", man->region.start);
>> At least on AMD hardware we have the following address spaces which are 
>> sometimes even partially overlapping: VM, MC, SYSTEM, FB, AGP, XGMI, bus 
>> addresses and physical addresses.
>>
>> Pushing a concept of a general GPU address space into the memory 
>> management was a rather bad design mistake in TTM and we should not 
>> repeat that here.
>>
>> A region should only consists of a size in bytes and (internal to the 
>> region manager) allocations in that region.

Got it. I was trying to include fields that seemed relevant to a base
structure and could then optionally be leveraged at the choice of device
driver.  But I see your point.

>>
>>
>> Third please don't use any CPU or architecture specific types in any 
>> data structures:
>>> +struct drm_mem_region {
>>> +   resource_size_t start; /* within GPU physical address space */
>>> +   resource_size_t io_start; /* BAR address (CPU accessible) */
>>> +   resource_size_t size;
>>
>> I knew that resource_size is mostly 64bit on modern architectures, but 
>> dGPUs are completely separate to the architecture and we always need 
>> 64bits here at least for AMD hardware.
>>
>> So this should either be always uint64_t, or something like 
>> gpu_resource_size which depends on what the compiled in drivers require 
>> if we really need that.
>>
>> And by the way: Please always use bytes for things like sizes and not 
>> number of pages, cause page size is again CPU/architecture specific and 
>> GPU drivers don't necessary care about that.

Makes sense,  will fix.

Hmm,  I did hope that at least the DRM cgroup controller could leverage
struct page_counter.  It encapsulates nicely much of the fields for 
managing a memory limit.  But well, this is off topic

>>
>>
>> And here also a few direct comments on the code:
>>> +   union {
>>> +   struct drm_mm *mm;
>>> +   /* FIXME (for i915): struct drm_buddy_mm *buddy_mm; */
>>> +   void *priv;
>>> +   };
>> Maybe just always use void *mm here.
> 
> I'd say lets drop this outright, and handle private data by embedding this
> structure in the right place. That's how we handle everything in drm now
> as much as possible, and it's so much cleaner. I think ttm still loves
> priv pointers a bit too much in some places.

Okay, I'll drop it until I might be able to prove this might be useful later.

>
>>> +   spinlock_t move_lock;
>>> +   struct dma_fence *move;
>>
>> That is TTM specific and I'm not sure if we want it in the common memory 
>> management handling.
>>
>> If we want that here we should probably replace the lock with some rcu 
>> and atomic fence pointer exchange first.
> 
> Yeah  not sure we want any of these details in this shared structure
> either.
> 

Thanks for the feedback. I can remove it too.
I was unsure if might be a case for having it in future.

Well, struct drm_mem_region will be quite small then if it only has a
size and type field.
Hardly seems worth introducing a new structure if these are the only fields.
I know we thought it might benefit cgroups controller,  but I still hope to
find earlier purpose it could serve.

-Brian


[snip]

>>
>> Am 30.07.19 um 02:32 schrieb Brian Welty:
>>> [ By request, resending to include amd-gfx + intel-gfx.  Since resending,
>>>I fixed the nit with ordering of header includes that Sam noted. ]
>>>
>>> This RFC series is first implementation of some ideas expressed
>>> earlier on dri-devel [1].
>>>
>>> Some of the goals (open for much debate) are:
>>>- Create common base structure (subclass) for memory regions (patch #1)
>>>- Create common memory region types (patch #2)
>>>- Create common set of memory_region function callbacks (based on
>>>  ttm_mem_type_manager_funcs and intel_memory_regions_ops)
>>>- Create common helpers that operate on drm_mem_region to be leveraged
>>>  by both TTM drivers and i915, reducing code duplication
>>>- Above might start with refactoring

Re: [PATCH 08/13] mm: remove the mask variable in hmm_vma_walk_hugetlb_entry

2019-07-30 Thread Ralph Campbell



On 7/29/19 10:51 PM, Christoph Hellwig wrote:

The pagewalk code already passes the value as the hmask parameter.

Signed-off-by: Christoph Hellwig 
---
  mm/hmm.c | 7 ++-
  1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index f26d6abc4ed2..88b77a4a6a1e 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -771,19 +771,16 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, 
unsigned long hmask,
  struct mm_walk *walk)
  {
  #ifdef CONFIG_HUGETLB_PAGE
-   unsigned long addr = start, i, pfn, mask;
+   unsigned long addr = start, i, pfn;
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;
-   struct hstate *h = hstate_vma(vma);
uint64_t orig_pfn, cpu_flags;
bool fault, write_fault;
spinlock_t *ptl;
pte_t entry;
int ret = 0;
  
-	mask = huge_page_size(h) - 1;

-
ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
entry = huge_ptep_get(pte);
  
@@ -799,7 +796,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,

goto unlock;
}
  
-	pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT);

+   pfn = pte_pfn(entry) + ((start & hmask) >> PAGE_SHIFT);


This needs to be "~hmask" so that the upper bits of the start address
are not added to the pfn. It's the middle bits of the address that
offset into the huge page that are needed.


for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
 cpu_flags;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH 0/3] Propose new struct drm_mem_region

2019-07-30 Thread Brian Welty


On 7/30/2019 3:45 AM, Daniel Vetter wrote:
> On Tue, Jul 30, 2019 at 12:24 PM Koenig, Christian
>  wrote:
>>
>> Am 30.07.19 um 11:38 schrieb Daniel Vetter:
>>> On Tue, Jul 30, 2019 at 08:45:57AM +, Koenig, Christian wrote:

Snipped the feedback on struct drm_mem_region.
Will be easier to have separate thread.


> +/*
> + * Memory types for drm_mem_region
> + */
 #define DRM_MEM_SWAP?
>>> btw what did you have in mind for this? Since we use shmem we kinda don't
>>> know whether the BO is actually swapped out or not, at least on the i915
>>> side. So this would be more NOT_CURRENTLY_PINNED_AND_POSSIBLY_SWAPPED_OUT.
>>
>> Yeah, the problem is not everybody can use shmem. For some use cases you
>> have to use memory allocated through dma_alloc_coherent().
>>
>> So to be able to swap this out you need a separate domain to copy it
>> from whatever is backing it currently to shmem.
>>
>> So we essentially have:
>> DRM_MEM_SYS_SWAPABLE
>> DRM_MEM_SYS_NOT_GPU_MAPPED
>> DRM_MEM_SYS_GPU_MAPPED
>>
>> Or something like that.
> 
> Yeah i915-gem is similar. We oportunistically keep the pages pinned
> sometimes even if not currently mapped into the (what ttm calls) TT.
> So I think these three for system memory make sense for us too. I
> think that's similar (at least in spirit) to the dma_alloc cache you
> have going on. Mabye instead of the somewhat cumbersome NOT_GPU_MAPPED
> we could have something like PINNED or so. Although it's not
> permanently pinned, so maybe that's confusing too.
> 

Okay, I see now I was far off the mark with what I thought TTM_PL_SYSTEM
was.  The discussion helped clear up several bits of confusion on my part.
From proposed names, I find MAPPED and PINNED slightly confusing.
In terms of backing store description, maybe these are a little better:
  DRM_MEM_SYS_UNTRANSLATED  (TTM_PL_SYSTEM)
  DRM_MEM_SYS_TRANSLATED(TTM_PL_TT or i915's SYSTEM)

Are these allowed to be both overlapping? Or non-overlapping (partitioned)?
Per Christian's point about removing .start, seems it doesn't need to
matter.

Whatever we define for these sub-types, does it make sense for SYSTEM and
VRAM to each have them defined?

I'm unclear how DRM_MEM_SWAP (or DRM_MEM_SYS_SWAPABLE) would get
configured by driver...  this is a fixed size partition of host memory?
Or it is a kind of dummy memory region just for swap implementation?


 TTM was clearly missing that resulting in a whole bunch of extra
 handling and rather complicated handling.

> +#define DRM_MEM_SYSTEM 0
> +#define DRM_MEM_STOLEN 1
 I think we need a better naming for that.

 STOLEN sounds way to much like stolen VRAM for integrated GPUs, but at
 least for TTM this is the system memory currently GPU accessible.
>>> Yup this is wrong, for i915 we use this as stolen, for ttm it's the gpu
>>> translation table window into system memory. Not the same thing at all.
>>
>> Thought so. The closest I have in mind is GTT, but everything else works
>> as well.
> 
> Would your GPU_MAPPED above work for TT? I think we'll also need
> STOLEN, I'm even hearing noises that there's going to be stolen for
> discrete vram for us ... Also if we expand I guess we need to teach
> ttm to cope with more, or maybe treat the DRM one as some kind of
> sub-flavour.
Daniel, maybe what i915 calls stolen could just be DRM_MEM_RESERVED or
DRM_MEM_PRIV.  Or maybe can argue it falls into UNTRANSLATED type that
I suggested above, I'm not sure.

-Brian


> -Daniel
> 
>>
>> Christian.
>>
>>> -Daniel
>>>

 Thanks for looking into that,
 Christian.

 Am 30.07.19 um 02:32 schrieb Brian Welty:
> [ By request, resending to include amd-gfx + intel-gfx.  Since resending,
> I fixed the nit with ordering of header includes that Sam noted. ]
>
> This RFC series is first implementation of some ideas expressed
> earlier on dri-devel [1].
>
> Some of the goals (open for much debate) are:
> - Create common base structure (subclass) for memory regions (patch 
> #1)
> - Create common memory region types (patch #2)
> - Create common set of memory_region function callbacks (based on
>   ttm_mem_type_manager_funcs and intel_memory_regions_ops)
> - Create common helpers that operate on drm_mem_region to be leveraged
>   by both TTM drivers and i915, reducing code duplication
> - Above might start with refactoring ttm_bo_manager.c as these are
>   helpers for using drm_mm's range allocator and could be made to
>   operate on DRM structures instead of TTM ones.
> - Larger goal might be to make LRU management of GEM objects common, 
> and
>   migrate those fields into drm_mem_region and drm_gem_object 
> strucures.
>
> Patches 1-2 implement the proposed struct drm_mem_region and adds
> associated common set of definitions for memory region type.
>
> Patch #3 is update to

Re: [PATCH 06/13] mm: remove superflous arguments from hmm_range_register

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 08:51:56AM +0300, Christoph Hellwig wrote:
> The start, end and page_shift values are all saved in the range
> structure, so we might as well use that for argument passing.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  Documentation/vm/hmm.rst|  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  7 +--
>  drivers/gpu/drm/nouveau/nouveau_svm.c   |  5 ++---
>  include/linux/hmm.h |  6 +-
>  mm/hmm.c| 20 +---
>  5 files changed, 14 insertions(+), 26 deletions(-)

I also don't really like how the API sets up some things in the struct
and some things via arguments, so:

Reviewed-by: Jason Gunthorpe 

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 03/13] nouveau: pass struct nouveau_svmm to nouveau_range_fault

2019-07-30 Thread Christoph Hellwig

On Tue, Jul 30, 2019 at 01:14:58PM +, Jason Gunthorpe wrote:
> I have a patch deleting hmm->mm, so using svmm seems cleaner churn
> here, we could defer and I can fold this into that patch?

Sounds good.  If I don't need to resend feel fee to fold it, otherwise
I'll fix it up.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 13/13] mm: allow HMM_MIRROR on all architectures with MMU

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 03:03:46PM -0300, Jason Gunthorpe wrote:
> On Tue, Jul 30, 2019 at 08:52:03AM +0300, Christoph Hellwig wrote:
> > There isn't really any architecture specific code in this page table
> > walk implementation, so drop the dependencies.
> > 
> > Signed-off-by: Christoph Hellwig 
> >  mm/Kconfig | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> Happy to see it, lets try to get this patch + the required parts of
> this series into linux-next to be really sure
> 
> Reviewed-by: Jason Gunthorpe 

Oh, can we make this into a non-user selectable option now? 

ie have the drivers that use the API select it?

Thanks,
Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 07/13] mm: remove the page_shift member from struct hmm_range

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 03:14:30PM +0200, Christoph Hellwig wrote:
> On Tue, Jul 30, 2019 at 12:55:17PM +, Jason Gunthorpe wrote:
> > I suspect this was added for the ODP conversion that does use both
> > page sizes. I think the ODP code for this is kind of broken, but I
> > haven't delved into that..
> > 
> > The challenge is that the driver needs to know what page size to
> > configure the hardware before it does any range stuff.
> > 
> > The other challenge is that the HW is configured to do only one page
> > size, and if the underlying CPU page side changes it goes south.
> > 
> > What I would prefer is if the driver could somehow dynamically adjust
> > the the page size after each dma map, but I don't know if ODP HW can
> > do that.
> > 
> > Since this is all driving toward making ODP use this maybe we should
> > keep this API? 
> > 
> > I'm not sure I can loose the crappy huge page support in ODP.
> 
> The problem is that I see no way how to use the current API.  To know
> the huge page size you need to have the vma, and the current API
> doesn't require a vma to be passed in.

The way ODP seems to work is once in hugetlb mode the dma addresses
must give huge pages or the page fault will be failed. I think that is
a terrible design, but this is how the driver is ..

So, from this HMM perspective if the caller asked for huge pages then
the results have to be all huge pages or a hard failure.

It is not negotiated as an optimization like you are thinking.

[note, I haven't yet checked carefully how this works in ODP, every
 time I look at parts of it the thing seems crazy]

> That's why I suggested an api where we pass in a flag that huge pages
> are ok into hmm_range_fault, and it then could pass the shift out, and
> limits itself to a single vma (which it normally doesn't, that is an
> additional complication).  But all this seems really awkward in terms
> of an API still.  AFAIK ODP is only used by mlx5, and mlx5 unlike other
> IB HCAs can use scatterlist style MRs with variable length per entry,
> so even if we pass multiple pages per entry from hmm it could coalesce
> them.  

When the driver takes faults it has to repair the MR mapping, and
fixing a page in the middle of a variable length SGL would be pretty
complicated. Even so, I don't think the SG_GAPs feature and ODP are
compatible - I'm pretty sure ODP has to be page lists not SGL..

However, what ODP can maybe do is represent a full multi-level page
table, so we could have 2M entries that map to a single DMA or to
another page table w/ 4k pages (have to check on this)

But the driver isn't set up to do that right now.

> The best API for mlx4 would of course be to pass a biovec-style
> variable length structure that hmm_fault could fill out, but that would
> be a major restructure.

It would work, but the driver has to expand that into a page list
right awayhow.

We can't even dma map the biovec with today's dma API as it needs the
ability to remap on a page granularity.

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v5 13/29] compat_ioctl: move more drivers to compat_ptr_ioctl

2019-07-30 Thread Dan Williams

On Tue, Jul 30, 2019 at 12:59 PM Arnd Bergmann  wrote:
>
> The .ioctl and .compat_ioctl file operations have the same prototype so
> they can both point to the same function, which works great almost all
> the time when all the commands are compatible.
>
> One exception is the s390 architecture, where a compat pointer is only
> 31 bit wide, and converting it into a 64-bit pointer requires calling
> compat_ptr(). Most drivers here will never run in s390, but since we now
> have a generic helper for it, it's easy enough to use it consistently.
>
> I double-checked all these drivers to ensure that all ioctl arguments
> are used as pointers or are ignored, but are not interpreted as integer
> values.
>
> Acked-by: Jason Gunthorpe 
> Acked-by: Daniel Vetter 
> Acked-by: Mauro Carvalho Chehab 
> Acked-by: Greg Kroah-Hartman 
> Acked-by: David Sterba 
> Acked-by: Darren Hart (VMware) 
> Acked-by: Jonathan Cameron 
> Acked-by: Bjorn Andersson 
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/nvdimm/bus.c| 4 ++--
[..]
> diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
> index 798c5c4aea9c..6ca142d833ab 100644
> --- a/drivers/nvdimm/bus.c
> +++ b/drivers/nvdimm/bus.c
> @@ -1229,7 +1229,7 @@ static const struct file_operations nvdimm_bus_fops = {
> .owner = THIS_MODULE,
> .open = nd_open,
> .unlocked_ioctl = bus_ioctl,
> -   .compat_ioctl = bus_ioctl,
> +   .compat_ioctl = compat_ptr_ioctl,
> .llseek = noop_llseek,
>  };
>
> @@ -1237,7 +1237,7 @@ static const struct file_operations nvdimm_fops = {
> .owner = THIS_MODULE,
> .open = nd_open,
> .unlocked_ioctl = dimm_ioctl,
> -   .compat_ioctl = dimm_ioctl,
> +   .compat_ioctl = compat_ptr_ioctl,
> .llseek = noop_llseek,
>  };

Acked-by: Dan Williams 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 08/13] mm: remove the mask variable in hmm_vma_walk_hugetlb_entry

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 08:51:58AM +0300, Christoph Hellwig wrote:
> The pagewalk code already passes the value as the hmask parameter.
> 
> Signed-off-by: Christoph Hellwig 
>  mm/hmm.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/hmm.c b/mm/hmm.c
> index f26d6abc4ed2..88b77a4a6a1e 100644
> +++ b/mm/hmm.c
> @@ -771,19 +771,16 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, 
> unsigned long hmask,
> struct mm_walk *walk)
>  {
>  #ifdef CONFIG_HUGETLB_PAGE
> - unsigned long addr = start, i, pfn, mask;
> + unsigned long addr = start, i, pfn;
>   struct hmm_vma_walk *hmm_vma_walk = walk->private;
>   struct hmm_range *range = hmm_vma_walk->range;
>   struct vm_area_struct *vma = walk->vma;
> - struct hstate *h = hstate_vma(vma);
>   uint64_t orig_pfn, cpu_flags;
>   bool fault, write_fault;
>   spinlock_t *ptl;
>   pte_t entry;
>   int ret = 0;
>  
> - mask = huge_page_size(h) - 1;
> -
>   ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
>   entry = huge_ptep_get(pte);
>  
> @@ -799,7 +796,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, 
> unsigned long hmask,
>   goto unlock;
>   }
>  
> - pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT);
> + pfn = pte_pfn(entry) + ((start & hmask) >> PAGE_SHIFT);

I don't know this hstate stuff, but this doesn't look the same?

static int walk_hugetlb_range(unsigned long addr, unsigned long end, {
struct hstate *h = hstate_vma(vma);
unsigned long hmask = huge_page_mask(h); // aka h->mask

err = walk->hugetlb_entry(pte, hmask, addr, next, walk);

And the first place I found setting h->mask is:

void __init hugetlb_add_hstate(unsigned int order) {
h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);

Compared with
mask = huge_page_size(h) - 1;
 = ((unsigned long)PAGE_SIZE << h->order) - 1

Looks like hmask == ~mask

?

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 12/13] mm: cleanup the hmm_vma_walk_hugetlb_entry stub

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 08:52:02AM +0300, Christoph Hellwig wrote:
> Stub out the whole function and assign NULL to the .hugetlb_entry method
> if CONFIG_HUGETLB_PAGE is not set, as the method won't ever be called in
> that case.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  mm/hmm.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v5 13/29] compat_ioctl: move more drivers to compat_ptr_ioctl

2019-07-30 Thread Arnd Bergmann

The .ioctl and .compat_ioctl file operations have the same prototype so
they can both point to the same function, which works great almost all
the time when all the commands are compatible.

One exception is the s390 architecture, where a compat pointer is only
31 bit wide, and converting it into a 64-bit pointer requires calling
compat_ptr(). Most drivers here will never run in s390, but since we now
have a generic helper for it, it's easy enough to use it consistently.

I double-checked all these drivers to ensure that all ioctl arguments
are used as pointers or are ignored, but are not interpreted as integer
values.

Acked-by: Jason Gunthorpe 
Acked-by: Daniel Vetter 
Acked-by: Mauro Carvalho Chehab 
Acked-by: Greg Kroah-Hartman 
Acked-by: David Sterba 
Acked-by: Darren Hart (VMware) 
Acked-by: Jonathan Cameron 
Acked-by: Bjorn Andersson 
Signed-off-by: Arnd Bergmann 
---
 drivers/android/binder.c| 2 +-
 drivers/crypto/qat/qat_common/adf_ctl_drv.c | 2 +-
 drivers/dma-buf/dma-buf.c   | 4 +---
 drivers/dma-buf/sw_sync.c   | 2 +-
 drivers/dma-buf/sync_file.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c| 2 +-
 drivers/hid/hidraw.c| 4 +---
 drivers/iio/industrialio-core.c | 2 +-
 drivers/infiniband/core/uverbs_main.c   | 4 ++--
 drivers/media/rc/lirc_dev.c | 4 +---
 drivers/mfd/cros_ec_dev.c   | 4 +---
 drivers/misc/vmw_vmci/vmci_host.c   | 2 +-
 drivers/nvdimm/bus.c| 4 ++--
 drivers/nvme/host/core.c| 2 +-
 drivers/pci/switch/switchtec.c  | 2 +-
 drivers/platform/x86/wmi.c  | 2 +-
 drivers/rpmsg/rpmsg_char.c  | 4 ++--
 drivers/sbus/char/display7seg.c | 2 +-
 drivers/sbus/char/envctrl.c | 4 +---
 drivers/scsi/3w-.c  | 4 +---
 drivers/scsi/cxlflash/main.c| 2 +-
 drivers/scsi/esas2r/esas2r_main.c   | 2 +-
 drivers/scsi/pmcraid.c  | 4 +---
 drivers/staging/android/ion/ion.c   | 4 +---
 drivers/staging/vme/devices/vme_user.c  | 2 +-
 drivers/tee/tee_core.c  | 2 +-
 drivers/usb/class/cdc-wdm.c | 2 +-
 drivers/usb/class/usbtmc.c  | 4 +---
 drivers/virt/fsl_hypervisor.c   | 2 +-
 fs/btrfs/super.c| 2 +-
 fs/fuse/dev.c   | 2 +-
 fs/notify/fanotify/fanotify_user.c  | 2 +-
 fs/userfaultfd.c| 2 +-
 net/rfkill/core.c   | 2 +-
 34 files changed, 37 insertions(+), 55 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index dc1c83eafc22..79955e82544a 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -6043,7 +6043,7 @@ const struct file_operations binder_fops = {
.owner = THIS_MODULE,
.poll = binder_poll,
.unlocked_ioctl = binder_ioctl,
-   .compat_ioctl = binder_ioctl,
+   .compat_ioctl = compat_ptr_ioctl,
.mmap = binder_mmap,
.open = binder_open,
.flush = binder_flush,
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c 
b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index abc7a7f64d64..ef0e482ee04f 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -68,7 +68,7 @@ static long adf_ctl_ioctl(struct file *fp, unsigned int cmd, 
unsigned long arg);
 static const struct file_operations adf_ctl_ops = {
.owner = THIS_MODULE,
.unlocked_ioctl = adf_ctl_ioctl,
-   .compat_ioctl = adf_ctl_ioctl,
+   .compat_ioctl = compat_ptr_ioctl,
 };
 
 struct adf_ctl_drv_info {
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f45bfb29ef96..f6d9047b7a69 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -415,9 +415,7 @@ static const struct file_operations dma_buf_fops = {
.llseek = dma_buf_llseek,
.poll   = dma_buf_poll,
.unlocked_ioctl = dma_buf_ioctl,
-#ifdef CONFIG_COMPAT
-   .compat_ioctl   = dma_buf_ioctl,
-#endif
+   .compat_ioctl   = compat_ptr_ioctl,
.show_fdinfo= dma_buf_show_fdinfo,
 };
 
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 051f6c2873c7..51026cb08801 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -410,5 +410,5 @@ const struct file_operations sw_sync_debugfs_fops = {
.open   = sw_sync_debugfs_open,
.release= sw_sync_debugfs_release,
.unlocked_ioctl = sw_sync_ioctl,
-   .compat_ioctl   = sw_sync_ioctl,
+   .compat_ioctl   = compat_ptr_ioctl,
 };
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index ee4d1a96d779..85b96757fc76 100644
--- a/drivers/dma-buf/sync_file.c
+++

Re: [PATCH 13/13] mm: allow HMM_MIRROR on all architectures with MMU

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 08:52:03AM +0300, Christoph Hellwig wrote:
> There isn't really any architecture specific code in this page table
> walk implementation, so drop the dependencies.
> 
> Signed-off-by: Christoph Hellwig 
>  mm/Kconfig | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)

Happy to see it, lets try to get this patch + the required parts of
this series into linux-next to be really sure

Reviewed-by: Jason Gunthorpe 

Thanks,
Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 10/13] mm: only define hmm_vma_walk_pud if needed

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 08:52:00AM +0300, Christoph Hellwig wrote:
> We only need the special pud_entry walker if PUD-sized hugepages and
> pte mappings are supported, else the common pagewalk code will take
> care of the iteration.  Not implementing this callback reduced the
> amount of code compiled for non-x86 platforms, and also fixes compile
> failures with other architectures when helpers like pud_pfn are not
> implemented.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  mm/hmm.c | 29 -
>  1 file changed, 16 insertions(+), 13 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 11/13] mm: cleanup the hmm_vma_handle_pmd stub

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 08:52:01AM +0300, Christoph Hellwig wrote:
> Stub out the whole function when CONFIG_TRANSPARENT_HUGEPAGE is not set
> to make the function easier to read.
> 
> Signed-off-by: Christoph Hellwig 
>  mm/hmm.c | 18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 4d3bd41b6522..f4e90ea5779f 100644
> +++ b/mm/hmm.c
> @@ -455,13 +455,10 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct 
> hmm_range *range, pmd_t pmd)
>   range->flags[HMM_PFN_VALID];
>  }
>  
> -static int hmm_vma_handle_pmd(struct mm_walk *walk,
> -   unsigned long addr,
> -   unsigned long end,
> -   uint64_t *pfns,
> -   pmd_t pmd)
> -{
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
> + unsigned long end, uint64_t *pfns, pmd_t pmd)
> +{
>   struct hmm_vma_walk *hmm_vma_walk = walk->private;
>   struct hmm_range *range = hmm_vma_walk->range;
>   struct dev_pagemap *pgmap = NULL;
> @@ -490,11 +487,14 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
>   put_dev_pagemap(pgmap);
>   hmm_vma_walk->last = end;
>   return 0;
> -#else
> - /* If THP is not enabled then we should never reach this 

This old comment says we should never get here

> +}
> +#else /* CONFIG_TRANSPARENT_HUGEPAGE */
> +static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
> + unsigned long end, uint64_t *pfns, pmd_t pmd)
> +{
>   return -EINVAL;

So could we just do
   #define hmm_vma_handle_pmd NULL

?

At the very least this seems like a WARN_ON too?

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: between commits d7d170a8e357 and 22051d9c4a57 begins issue page allocation failure in gnome-shell process

2019-07-30 Thread Mikhail Gavrilov

Is anyone here? Is everyone so busy what is wrong?
RC2 is still affected by this issue and unusable for every day because
opening a video in totem player cause DE a hang with a lot of
messages:

[ 1072.283518] amdgpu :0b:00.0: [gfxhub] retry page fault
(src_id:0 ring:0 vmid:3 pasid:32769, for process gnome-shell pid 1948
thread gnome-shel:cs0 pid 1983)
[ 1072.283530] amdgpu :0b:00.0:   in page starting at address
0x0001c3385000 from 27
[ 1072.283533] amdgpu :0b:00.0: VM_L2_PROTECTION_FAULT_STATUS:0x00301031
[ 1072.283539] amdgpu :0b:00.0: [gfxhub] retry page fault
(src_id:0 ring:0 vmid:3 pasid:32769, for process gnome-shell pid 1948
thread gnome-shel:cs0 pid 1983)
[ 1072.283541] amdgpu :0b:00.0:   in page starting at address
0x0001c3389000 from 27
[ 1072.283543] amdgpu :0b:00.0: VM_L2_PROTECTION_FAULT_STATUS:0x00301031
[ 1072.283548] amdgpu :0b:00.0: [gfxhub] retry page fault
(src_id:0 ring:0 vmid:3 pasid:32769, for process gnome-shell pid 1948
thread gnome-shel:cs0 pid 1983)
[ 1072.283551] amdgpu :0b:00.0:   in page starting at address
0x0001c3387000 from 27
[ 1072.283552] amdgpu :0b:00.0: VM_L2_PROTECTION_FAULT_STATUS:0x00301031
[ 1072.283557] amdgpu :0b:00.0: [gfxhub] retry page fault
(src_id:0 ring:0 vmid:3 pasid:32769, for process gnome-shell pid 1948
thread gnome-shel:cs0 pid 1983)
[ 1072.283559] amdgpu :0b:00.0:   in page starting at address
0x0001c3388000 from 27
[ 1072.283560] amdgpu :0b:00.0: VM_L2_PROTECTION_FAULT_STATUS:0x00301031
[ 1072.283565] amdgpu :0b:00.0: [gfxhub] retry page fault
(src_id:0 ring:0 vmid:3 pasid:32769, for process gnome-shell pid 1948
thread gnome-shel:cs0 pid 1983)
[ 1072.283566] amdgpu :0b:00.0:   in page starting at address
0x0001c338c000 from 27
[ 1072.283568] amdgpu :0b:00.0: VM_L2_PROTECTION_FAULT_STATUS:0x00301031

All Vega GPU is affected: Vega 56, Vega 64, Radeon VII

--
Best Regards,
Mike Gavrilov.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH 0/3] Propose new struct drm_mem_region

2019-07-30 Thread Daniel Vetter

On Tue, Jul 30, 2019 at 4:30 PM Michel Dänzer  wrote:
> On 2019-07-30 12:45 p.m., Daniel Vetter wrote:
> > On Tue, Jul 30, 2019 at 12:24 PM Koenig, Christian
> >  wrote:
> >> Am 30.07.19 um 11:38 schrieb Daniel Vetter:
> >>> On Tue, Jul 30, 2019 at 08:45:57AM +, Koenig, Christian wrote:
> >
> > +#define DRM_MEM_SYSTEM 0
> > +#define DRM_MEM_STOLEN 1
>  I think we need a better naming for that.
> 
>  STOLEN sounds way to much like stolen VRAM for integrated GPUs, but at
>  least for TTM this is the system memory currently GPU accessible.
> >>> Yup this is wrong, for i915 we use this as stolen, for ttm it's the gpu
> >>> translation table window into system memory. Not the same thing at all.
> >>
> >> Thought so. The closest I have in mind is GTT, but everything else works
> >> as well.
> >
> > Would your GPU_MAPPED above work for TT? I think we'll also need
> > STOLEN, I'm even hearing noises that there's going to be stolen for
> > discrete vram for us ...
>
> Could i915 use DRM_MEM_PRIV for stolen? Or is there other hardware with
> something similar?

I don't think it matters much what we name it ... _PRIV sounds as good
as anything else. As long as we make it clear that userspace bo also
might end up in there I think it's all good.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH 0/3] Propose new struct drm_mem_region

2019-07-30 Thread Michel Dänzer

On 2019-07-30 12:45 p.m., Daniel Vetter wrote:
> On Tue, Jul 30, 2019 at 12:24 PM Koenig, Christian
>  wrote:
>> Am 30.07.19 um 11:38 schrieb Daniel Vetter:
>>> On Tue, Jul 30, 2019 at 08:45:57AM +, Koenig, Christian wrote:
> 
> +#define DRM_MEM_SYSTEM 0
> +#define DRM_MEM_STOLEN 1
 I think we need a better naming for that.

 STOLEN sounds way to much like stolen VRAM for integrated GPUs, but at
 least for TTM this is the system memory currently GPU accessible.
>>> Yup this is wrong, for i915 we use this as stolen, for ttm it's the gpu
>>> translation table window into system memory. Not the same thing at all.
>>
>> Thought so. The closest I have in mind is GTT, but everything else works
>> as well.
> 
> Would your GPU_MAPPED above work for TT? I think we'll also need
> STOLEN, I'm even hearing noises that there's going to be stolen for
> discrete vram for us ...

Could i915 use DRM_MEM_PRIV for stolen? Or is there other hardware with
something similar?


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amd/display: Embed DCN2 SOC bounding box

2019-07-30 Thread Ernst Sjöstrand

Isn't NV10 a family of chips in Nouveau?

Regards
//Ernst

Den tis 30 juli 2019 kl 15:57 skrev Nicholas Kazlauskas
:
>
> [Why]
> In order to support uclk switching on NV10 the SOC bounding box
> needs to be updated.
>
> [How]
> We currently read the constants from the gpu info FW, but supporting
> workarounds in DC for different versions of the FW adds additional
> complexity to the codebase.
>
> NV10 has been released so it's cleanest to keep the bounding box and
> source code in sync by embedding the bounding box like we do for
> other ASICs.
>
> Fixes: 02316e963a5a ("drm/amd/display: Force uclk to max for every state")
>
> Cc: Alex Deucher 
> Cc: Harry Wentland 
> Signed-off-by: Nicholas Kazlauskas 
> ---
>  .../drm/amd/display/dc/dcn20/dcn20_resource.c | 114 +-
>  1 file changed, 112 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
> b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
> index 44537651f0a1..ff30f5cc4981 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
> @@ -80,7 +80,7 @@
>
>  #include "amdgpu_socbb.h"
>
> -#define SOC_BOUNDING_BOX_VALID false
> +#define SOC_BOUNDING_BOX_VALID true
>  #define DC_LOGGER_INIT(logger)
>
>  struct _vcs_dpi_ip_params_st dcn2_0_ip = {
> @@ -154,7 +154,117 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = {
> .xfc_fill_constant_bytes = 0,
>  };
>
> -struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = { 0 };
> +struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
> +   /* Defaults that get patched on driver load from firmware. */
> +   .clock_limits = {
> +   {
> +   .state = 0,
> +   .dcfclk_mhz = 560.0,
> +   .fabricclk_mhz = 560.0,
> +   .dispclk_mhz = 513.0,
> +   .dppclk_mhz = 513.0,
> +   .phyclk_mhz = 540.0,
> +   .socclk_mhz = 560.0,
> +   .dscclk_mhz = 171.0,
> +   .dram_speed_mts = 8960.0,
> +   },
> +   {
> +   .state = 1,
> +   .dcfclk_mhz = 694.0,
> +   .fabricclk_mhz = 694.0,
> +   .dispclk_mhz = 642.0,
> +   .dppclk_mhz = 642.0,
> +   .phyclk_mhz = 600.0,
> +   .socclk_mhz = 694.0,
> +   .dscclk_mhz = 214.0,
> +   .dram_speed_mts = 11104.0,
> +   },
> +   {
> +   .state = 2,
> +   .dcfclk_mhz = 875.0,
> +   .fabricclk_mhz = 875.0,
> +   .dispclk_mhz = 734.0,
> +   .dppclk_mhz = 734.0,
> +   .phyclk_mhz = 810.0,
> +   .socclk_mhz = 875.0,
> +   .dscclk_mhz = 245.0,
> +   .dram_speed_mts = 14000.0,
> +   },
> +   {
> +   .state = 3,
> +   .dcfclk_mhz = 1000.0,
> +   .fabricclk_mhz = 1000.0,
> +   .dispclk_mhz = 1100.0,
> +   .dppclk_mhz = 1100.0,
> +   .phyclk_mhz = 810.0,
> +   .socclk_mhz = 1000.0,
> +   .dscclk_mhz = 367.0,
> +   .dram_speed_mts = 16000.0,
> +   },
> +   {
> +   .state = 4,
> +   .dcfclk_mhz = 1200.0,
> +   .fabricclk_mhz = 1200.0,
> +   .dispclk_mhz = 1284.0,
> +   .dppclk_mhz = 1284.0,
> +   .phyclk_mhz = 810.0,
> +   .socclk_mhz = 1200.0,
> +   .dscclk_mhz = 428.0,
> +   .dram_speed_mts = 16000.0,
> +   },
> +   /*Extra state, no dispclk ramping*/
> +   {
> +   .state = 5,
> +   .dcfclk_mhz = 1200.0,
> +   .fabricclk_mhz = 1200.0,
> +   .dispclk_mhz = 1284.0,
> +   .dppclk_mhz = 1284.0,
> +   .phyclk_mhz = 810.0,
> +   .socclk_mhz = 1200.0,
> +   .dscclk_mhz = 428.0,
> +

Re: [PATCH 2/2] drm/amd/display: Support uclk switching for DCN2

2019-07-30 Thread Alex Deucher

Series is:
Acked-by: Alex Deucher 

On Tue, Jul 30, 2019 at 9:58 AM Nicholas Kazlauskas
 wrote:
>
> [Why]
> We were previously forcing the uclk for every state to max and reducing
> the switch time to prevent uclk switching from occuring. This workaround
> was previously needed in order to avoid hangs + underflow under certain
> display configurations.
>
> Now that DC has the proper fix complete we can drop the hacks and
> improve power for most display configurations.
>
> [How]
> We still need the function pointers hooked up to grab the real uclk
> states from pplib. The rest of the prior hack can be reverted.
>
> The key requirements here are really just DC support, updated firmware,
> and support for disabling p-state support when needed in pplib/smu.
>
> When these requirements are met uclk switching works without underflow
> or hangs.
>
> Fixes: 02316e963a5a ("drm/amd/display: Force uclk to max for every state")
>
> Cc: Leo Li 
> Cc: Harry Wentland 
> Signed-off-by: Nicholas Kazlauskas 
> ---
>  drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 10 --
>  1 file changed, 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
> b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
> index ff30f5cc4981..42d3666f2037 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
> @@ -2817,9 +2817,6 @@ static void cap_soc_clocks(
> && max_clocks.uClockInKhz != 
> 0)
> bb->clock_limits[i].dram_speed_mts = 
> (max_clocks.uClockInKhz / 1000) * 16;
>
> -   // HACK: Force every uclk to max for now to "disable" uclk 
> switching.
> -   bb->clock_limits[i].dram_speed_mts = (max_clocks.uClockInKhz 
> / 1000) * 16;
> -
> if ((bb->clock_limits[i].fabricclk_mhz > 
> (max_clocks.fabricClockInKhz / 1000))
> && 
> max_clocks.fabricClockInKhz != 0)
> bb->clock_limits[i].fabricclk_mhz = 
> (max_clocks.fabricClockInKhz / 1000);
> @@ -3035,8 +3032,6 @@ static bool init_soc_bounding_box(struct dc *dc,
> le32_to_cpu(bb->vmm_page_size_bytes);
> dcn2_0_soc.dram_clock_change_latency_us =
> 
> fixed16_to_double_to_cpu(bb->dram_clock_change_latency_us);
> -   // HACK!! Lower uclock latency switch time so we don't switch
> -   dcn2_0_soc.dram_clock_change_latency_us = 10;
> dcn2_0_soc.writeback_dram_clock_change_latency_us =
> 
> fixed16_to_double_to_cpu(bb->writeback_dram_clock_change_latency_us);
> dcn2_0_soc.return_bus_width_bytes =
> @@ -3078,7 +3073,6 @@ static bool init_soc_bounding_box(struct dc *dc,
> struct pp_smu_nv_clock_table max_clocks = {0};
> unsigned int uclk_states[8] = {0};
> unsigned int num_states = 0;
> -   int i;
> enum pp_smu_status status;
> bool clock_limits_available = false;
> bool uclk_states_available = false;
> @@ -3100,10 +3094,6 @@ static bool init_soc_bounding_box(struct dc *dc,
> clock_limits_available = (status == PP_SMU_RESULT_OK);
> }
>
> -   // HACK: Use the max uclk_states value for all elements.
> -   for (i = 0; i < num_states; i++)
> -   uclk_states[i] = uclk_states[num_states - 1];
> -
> if (clock_limits_available && uclk_states_available && 
> num_states)
> update_bounding_box(dc, _0_soc, _clocks, 
> uclk_states, num_states);
> else if (clock_limits_available)
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amd/display: Embed DCN2 SOC bounding box

2019-07-30 Thread Nicholas Kazlauskas

[Why]
In order to support uclk switching on NV10 the SOC bounding box
needs to be updated.

[How]
We currently read the constants from the gpu info FW, but supporting
workarounds in DC for different versions of the FW adds additional
complexity to the codebase.

NV10 has been released so it's cleanest to keep the bounding box and
source code in sync by embedding the bounding box like we do for
other ASICs.

Fixes: 02316e963a5a ("drm/amd/display: Force uclk to max for every state")

Cc: Alex Deucher 
Cc: Harry Wentland 
Signed-off-by: Nicholas Kazlauskas 
---
 .../drm/amd/display/dc/dcn20/dcn20_resource.c | 114 +-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 44537651f0a1..ff30f5cc4981 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -80,7 +80,7 @@
 
 #include "amdgpu_socbb.h"
 
-#define SOC_BOUNDING_BOX_VALID false
+#define SOC_BOUNDING_BOX_VALID true
 #define DC_LOGGER_INIT(logger)
 
 struct _vcs_dpi_ip_params_st dcn2_0_ip = {
@@ -154,7 +154,117 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = {
.xfc_fill_constant_bytes = 0,
 };
 
-struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = { 0 };
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
+   /* Defaults that get patched on driver load from firmware. */
+   .clock_limits = {
+   {
+   .state = 0,
+   .dcfclk_mhz = 560.0,
+   .fabricclk_mhz = 560.0,
+   .dispclk_mhz = 513.0,
+   .dppclk_mhz = 513.0,
+   .phyclk_mhz = 540.0,
+   .socclk_mhz = 560.0,
+   .dscclk_mhz = 171.0,
+   .dram_speed_mts = 8960.0,
+   },
+   {
+   .state = 1,
+   .dcfclk_mhz = 694.0,
+   .fabricclk_mhz = 694.0,
+   .dispclk_mhz = 642.0,
+   .dppclk_mhz = 642.0,
+   .phyclk_mhz = 600.0,
+   .socclk_mhz = 694.0,
+   .dscclk_mhz = 214.0,
+   .dram_speed_mts = 11104.0,
+   },
+   {
+   .state = 2,
+   .dcfclk_mhz = 875.0,
+   .fabricclk_mhz = 875.0,
+   .dispclk_mhz = 734.0,
+   .dppclk_mhz = 734.0,
+   .phyclk_mhz = 810.0,
+   .socclk_mhz = 875.0,
+   .dscclk_mhz = 245.0,
+   .dram_speed_mts = 14000.0,
+   },
+   {
+   .state = 3,
+   .dcfclk_mhz = 1000.0,
+   .fabricclk_mhz = 1000.0,
+   .dispclk_mhz = 1100.0,
+   .dppclk_mhz = 1100.0,
+   .phyclk_mhz = 810.0,
+   .socclk_mhz = 1000.0,
+   .dscclk_mhz = 367.0,
+   .dram_speed_mts = 16000.0,
+   },
+   {
+   .state = 4,
+   .dcfclk_mhz = 1200.0,
+   .fabricclk_mhz = 1200.0,
+   .dispclk_mhz = 1284.0,
+   .dppclk_mhz = 1284.0,
+   .phyclk_mhz = 810.0,
+   .socclk_mhz = 1200.0,
+   .dscclk_mhz = 428.0,
+   .dram_speed_mts = 16000.0,
+   },
+   /*Extra state, no dispclk ramping*/
+   {
+   .state = 5,
+   .dcfclk_mhz = 1200.0,
+   .fabricclk_mhz = 1200.0,
+   .dispclk_mhz = 1284.0,
+   .dppclk_mhz = 1284.0,
+   .phyclk_mhz = 810.0,
+   .socclk_mhz = 1200.0,
+   .dscclk_mhz = 428.0,
+   .dram_speed_mts = 16000.0,
+   },
+   },
+   .num_states = 5,
+   .sr_exit_time_us = 8.6,
+   .sr_enter_plus_exit_time_us = 10.9,
+   .urgent_latency_us = 4.0,
+   .urgent_latency_pixel_data_only_us = 4.0,
+

AMDPU breaks suspend after kernel 5.0

2019-07-30 Thread Paul Gover

Hi Likun,

Sorry if you don't want emails like this.  I added info. to
https://bugs.freedesktop.org/show_bug.cgi?id=110258
but people on Gentoo forums said email would be better.

Git bisect lead me to you:
---
106c7d6148e5aadd394e6701f7e498df49b869d1 is the first bad commit
commit 106c7d6148e5aadd394e6701f7e498df49b869d1
Author: Likun Gao 
Date:   Thu Nov 8 20:19:54 2018 +0800

drm/amdgpu: abstract the function of enter/exit safe mode for RLC

Abstract the function of amdgpu_gfx_rlc_enter/exit_safe_mode and some part 
of rlc_init to improve the reusability of RLC.

Signed-off-by: Likun Gao 
Acked-by: Christian König 
Reviewed-by: Alex Deucher 
Signed-off-by: Alex Deucher 

:04 04 8f3b365496f3bbd380a62032f20642ace51c8fef 
e14ec968011019e3f601df3f15682bb9ae0bafc6 M  drivers
-
Symptoms are when resuming after pm-suspend, the screen is blank or corrupt,
the keyboard dead, and syslog shows

kernel: [   81.09] [drm:amdgpu_job_timedout] *ERROR* ring gfx timeout, 
signaled seq=51, emitted seq=52
kernel: [   81.096671] [drm] IP block:gfx_v8_0 is hung!
kernel: [   81.096734] [drm] GPU recovery disabled.
-
or similar.  The problem occurs with all kernels since 5.0 up to and including 
5.3-rc2.  My laptop is:

HP 15-bw0xx
cpu:AMD A9-9420 RADEON R5, 5 COMPUTE CORES 2C+3G
with integrated graphics:
Stoney [Radeon R2/R3/R4/R5 Graphics] [1002:98E4]

There are several similar reports on the web, most or all for Stoney hardware, 
but that might be a coincidence as laptop users are more concerned about 
suspend, and there are a lot of laptops with similar integrated graphics 
motherboards.

I'm running Gentoo with a custom kernel, the most relevant bits of the config
CONFIG_DRM_AMDGPU=y
# CONFIG_DRM_AMDGPU_SI is not set
# CONFIG_DRM_AMDGPU_CIK is not set
# CONFIG_DRM_AMDGPU_USERPTR is not set

If you tell me how, I'm willing to try to collect traces etc.

Paul Gover


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 01/13] amdgpu: remove -EAGAIN handling for hmm_range_fault

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 08:51:51AM +0300, Christoph Hellwig wrote:
> hmm_range_fault can only return -EAGAIN if called with the block
> argument set to false, so remove the special handling for it.
> 
> Signed-off-by: Christoph Hellwig 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 23 +++
>  1 file changed, 3 insertions(+), 20 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 03/13] nouveau: pass struct nouveau_svmm to nouveau_range_fault

2019-07-30 Thread Christoph Hellwig

On Tue, Jul 30, 2019 at 12:35:59PM +, Jason Gunthorpe wrote:
> On Tue, Jul 30, 2019 at 08:51:53AM +0300, Christoph Hellwig wrote:
> > This avoid having to abuse the vma field in struct hmm_range to unlock
> > the mmap_sem.
> 
> I think the change inside hmm_range_fault got lost on rebase, it is
> now using:
> 
> up_read(>hmm->mm->mmap_sem);
> 
> But, yes, lets change it to use svmm->mm and try to keep struct hmm
> opaque to drivers

It got lost somewhat intentionally as I didn't want the churn, but I
forgot to update the changelog.  But if you are fine with changing it
over I can bring it back.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 03/13] nouveau: pass struct nouveau_svmm to nouveau_range_fault

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 03:10:38PM +0200, Christoph Hellwig wrote:
> On Tue, Jul 30, 2019 at 12:35:59PM +, Jason Gunthorpe wrote:
> > On Tue, Jul 30, 2019 at 08:51:53AM +0300, Christoph Hellwig wrote:
> > > This avoid having to abuse the vma field in struct hmm_range to unlock
> > > the mmap_sem.
> > 
> > I think the change inside hmm_range_fault got lost on rebase, it is
> > now using:
> > 
> > up_read(>hmm->mm->mmap_sem);
> > 
> > But, yes, lets change it to use svmm->mm and try to keep struct hmm
> > opaque to drivers
> 
> It got lost somewhat intentionally as I didn't want the churn, but I
> forgot to update the changelog.  But if you are fine with changing it
> over I can bring it back.

I have a patch deleting hmm->mm, so using svmm seems cleaner churn
here, we could defer and I can fold this into that patch?

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 07/13] mm: remove the page_shift member from struct hmm_range

2019-07-30 Thread Christoph Hellwig

On Tue, Jul 30, 2019 at 12:55:17PM +, Jason Gunthorpe wrote:
> I suspect this was added for the ODP conversion that does use both
> page sizes. I think the ODP code for this is kind of broken, but I
> haven't delved into that..
> 
> The challenge is that the driver needs to know what page size to
> configure the hardware before it does any range stuff.
> 
> The other challenge is that the HW is configured to do only one page
> size, and if the underlying CPU page side changes it goes south.
> 
> What I would prefer is if the driver could somehow dynamically adjust
> the the page size after each dma map, but I don't know if ODP HW can
> do that.
> 
> Since this is all driving toward making ODP use this maybe we should
> keep this API? 
> 
> I'm not sure I can loose the crappy huge page support in ODP.

The problem is that I see no way how to use the current API.  To know
the huge page size you need to have the vma, and the current API
doesn't require a vma to be passed in.

That's why I suggested an api where we pass in a flag that huge pages
are ok into hmm_range_fault, and it then could pass the shift out, and
limits itself to a single vma (which it normally doesn't, that is an
additional complication).  But all this seems really awkward in terms
of an API still.  AFAIK ODP is only used by mlx5, and mlx5 unlike other
IB HCAs can use scatterlist style MRs with variable length per entry,
so even if we pass multiple pages per entry from hmm it could coalesce
them.  The best API for mlx4 would of course be to pass a biovec-style
variable length structure that hmm_fault could fill out, but that would
be a major restructure.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

amdgpf: BUG: NULL pointer dereference and memory leak

2019-07-30 Thread 亿一

Hi  alll,
 While analyzing the source code, I notice that function
amdgpu_cs_process_fence_dep() may exist NULL pointer dereference and
memory leak in the following code fragments:


fence = amdgpu_ctx_get_fence(ctx, entity,
deps[i].handle);

if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
struct dma_fence *old = fence;

fence = dma_fence_get(_fence->scheduled);
dma_fence_put(old);
}

if (IS_ERR(fence)) {
 r = PTR_ERR(fence);
 amdgpu_ctx_put(ctx);
 return r;
  } else if (fence) {
  r = amdgpu_sync_fence(p->adev, >job->sync, fence,
 true);
  dma_fence_put(fence);
   amdgpu_ctx_put(ctx);
   if (r)
   return r;
   }

function amdgpu_ctx_get_fence may return NULL pointer,  which will
cause NULL pointer dereference. What's more,  IS_ERR() would not
return true when pointer is NULL,  which will cause the ctx reference
leaked.
But I don't know how to fix it, so report it to you all.

Best Regards.
Lin Yi.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 03/13] nouveau: pass struct nouveau_svmm to nouveau_range_fault

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 08:51:53AM +0300, Christoph Hellwig wrote:
> This avoid having to abuse the vma field in struct hmm_range to unlock
> the mmap_sem.

I think the change inside hmm_range_fault got lost on rebase, it is
now using:

up_read(>hmm->mm->mmap_sem);

But, yes, lets change it to use svmm->mm and try to keep struct hmm
opaque to drivers

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 02/13] amdgpu: don't initialize range->list in amdgpu_hmm_init_range

2019-07-30 Thread Jason Gunthorpe

On Tue, Jul 30, 2019 at 08:51:52AM +0300, Christoph Hellwig wrote:
> The list is used to add the range to another list as an entry in the
> core hmm code, so there is no need to initialize it in a driver.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 1 -
>  1 file changed, 1 deletion(-)

Reviewed-by: Jason Gunthorpe 

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v6 16/24] drm: sti: Provide ddc symlink in hdmi connector sysfs directory

2019-07-30 Thread Benjamin Gaignard

Le ven. 26 juil. 2019 à 19:27, Andrzej Pietrasiewicz
 a écrit :
>
> Use the ddc pointer provided by the generic connector.
>
> Signed-off-by: Andrzej Pietrasiewicz 

Reviewed-by: Benjamin Gaignard 

> ---
>  drivers/gpu/drm/sti/sti_hdmi.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
> index f03d617edc4c..33d06e0a9168 100644
> --- a/drivers/gpu/drm/sti/sti_hdmi.c
> +++ b/drivers/gpu/drm/sti/sti_hdmi.c
> @@ -1284,8 +1284,10 @@ static int sti_hdmi_bind(struct device *dev, struct 
> device *master, void *data)
>
> drm_connector->polled = DRM_CONNECTOR_POLL_HPD;
>
> -   drm_connector_init(drm_dev, drm_connector,
> -   _hdmi_connector_funcs, DRM_MODE_CONNECTOR_HDMIA);
> +   drm_connector_init_with_ddc(drm_dev, drm_connector,
> +   _hdmi_connector_funcs,
> +   DRM_MODE_CONNECTOR_HDMIA,
> +   hdmi->ddc_adapt);
> drm_connector_helper_add(drm_connector,
> _hdmi_connector_helper_funcs);
>
> --
> 2.17.1
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v6 21/24] drm/bridge: ti-tfp410: Provide ddc symlink in connector sysfs directory

2019-07-30 Thread Neil Armstrong

On 26/07/2019 19:23, Andrzej Pietrasiewicz wrote:
> Use the ddc pointer provided by the generic connector.
> 
> Signed-off-by: Andrzej Pietrasiewicz 
> ---
>  drivers/gpu/drm/bridge/ti-tfp410.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c 
> b/drivers/gpu/drm/bridge/ti-tfp410.c
> index dbf35c7bc85e..61cc2354ef1b 100644
> --- a/drivers/gpu/drm/bridge/ti-tfp410.c
> +++ b/drivers/gpu/drm/bridge/ti-tfp410.c
> @@ -134,8 +134,10 @@ static int tfp410_attach(struct drm_bridge *bridge)
>  
>   drm_connector_helper_add(>connector,
>_con_helper_funcs);
> - ret = drm_connector_init(bridge->dev, >connector,
> -  _con_funcs, dvi->connector_type);
> + ret = drm_connector_init_with_ddc(bridge->dev, >connector,
> +   _con_funcs,
> +   dvi->connector_type,
> +   dvi->ddc);
>   if (ret) {
>   dev_err(dvi->dev, "drm_connector_init() failed: %d\n", ret);
>   return ret;
> 

Reviewed-by: Neil Armstrong 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v6 19/24] drm/bridge: dumb-vga-dac: Provide ddc symlink in connector sysfs directory

2019-07-30 Thread Neil Armstrong

On 26/07/2019 19:23, Andrzej Pietrasiewicz wrote:
> Use the ddc pointer provided by the generic connector.
> 
> Signed-off-by: Andrzej Pietrasiewicz 
> ---
>  drivers/gpu/drm/bridge/dumb-vga-dac.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c 
> b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> index d32885b906ae..8ef6539ae78a 100644
> --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
> +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> @@ -111,8 +111,10 @@ static int dumb_vga_attach(struct drm_bridge *bridge)
>  
>   drm_connector_helper_add(>connector,
>_vga_con_helper_funcs);
> - ret = drm_connector_init(bridge->dev, >connector,
> -  _vga_con_funcs, DRM_MODE_CONNECTOR_VGA);
> + ret = drm_connector_init_with_ddc(bridge->dev, >connector,
> +   _vga_con_funcs,
> +   DRM_MODE_CONNECTOR_VGA,
> +   vga->ddc);
>   if (ret) {
>   DRM_ERROR("Failed to initialize connector\n");
>   return ret;
> 

Reviewed-by: Neil Armstrong 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v4 20/23] drm/bridge: ti-tfp410: Provide ddc symlink in connector sysfs directory

2019-07-30 Thread Neil Armstrong

On 11/07/2019 13:26, Andrzej Pietrasiewicz wrote:
> Use the ddc pointer provided by the generic connector.
> 
> Signed-off-by: Andrzej Pietrasiewicz 
> ---
>  drivers/gpu/drm/bridge/ti-tfp410.c | 19 +--
>  1 file changed, 9 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c 
> b/drivers/gpu/drm/bridge/ti-tfp410.c
> index dbf35c7bc85e..e55358f0a5ba 100644
> --- a/drivers/gpu/drm/bridge/ti-tfp410.c
> +++ b/drivers/gpu/drm/bridge/ti-tfp410.c
> @@ -26,7 +26,6 @@ struct tfp410 {
>   unsigned intconnector_type;
>  
>   u32 bus_format;
> - struct i2c_adapter  *ddc;
>   struct gpio_desc*hpd;
>   int hpd_irq;
>   struct delayed_work hpd_work;
> @@ -55,10 +54,10 @@ static int tfp410_get_modes(struct drm_connector 
> *connector)
>   struct edid *edid;
>   int ret;
>  
> - if (!dvi->ddc)
> + if (!dvi->connector.ddc)
>   goto fallback;
>  
> - edid = drm_get_edid(connector, dvi->ddc);
> + edid = drm_get_edid(connector, dvi->connector.ddc);
>   if (!edid) {
>   DRM_INFO("EDID read failed. Fallback to standard modes\n");
>   goto fallback;
> @@ -98,8 +97,8 @@ tfp410_connector_detect(struct drm_connector *connector, 
> bool force)
>   return connector_status_disconnected;
>   }
>  
> - if (dvi->ddc) {
> - if (drm_probe_ddc(dvi->ddc))
> + if (dvi->connector.ddc) {
> + if (drm_probe_ddc(dvi->connector.ddc))
>   return connector_status_connected;
>   else
>   return connector_status_disconnected;
> @@ -297,8 +296,8 @@ static int tfp410_get_connector_properties(struct tfp410 
> *dvi)
>   if (!ddc_phandle)
>   goto fail;
>  
> - dvi->ddc = of_get_i2c_adapter_by_node(ddc_phandle);
> - if (dvi->ddc)
> + dvi->connector.ddc = of_get_i2c_adapter_by_node(ddc_phandle);
> + if (dvi->connector.ddc)
>   dev_info(dvi->dev, "Connector's ddc i2c bus found\n");
>   else
>   ret = -EPROBE_DEFER;
> @@ -367,7 +366,7 @@ static int tfp410_init(struct device *dev, bool i2c)
>  
>   return 0;
>  fail:
> - i2c_put_adapter(dvi->ddc);
> + i2c_put_adapter(dvi->connector.ddc);
>   if (dvi->hpd)
>   gpiod_put(dvi->hpd);
>   return ret;
> @@ -382,8 +381,8 @@ static int tfp410_fini(struct device *dev)
>  
>   drm_bridge_remove(>bridge);
>  
> - if (dvi->ddc)
> - i2c_put_adapter(dvi->ddc);
> + if (dvi->connector.ddc)
> + i2c_put_adapter(dvi->connector.ddc);
>   if (dvi->hpd)
>   gpiod_put(dvi->hpd);
>  
> 

Reviewed-by: Neil Armstrong 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v4 19/23] drm/bridge: dw-hdmi: Provide ddc symlink in connector sysfs directory

2019-07-30 Thread Neil Armstrong

On 11/07/2019 13:26, Andrzej Pietrasiewicz wrote:
> Use the ddc pointer provided by the generic connector.
> 
> Signed-off-by: Andrzej Pietrasiewicz 
> ---
>  drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 40 +++
>  1 file changed, 20 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c 
> b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
> index c6490949d9db..0b9c9f2619da 100644
> --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
> +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
> @@ -161,7 +161,6 @@ struct dw_hdmi {
>  
>   struct drm_display_mode previous_mode;
>  
> - struct i2c_adapter *ddc;
>   void __iomem *regs;
>   bool sink_is_hdmi;
>   bool sink_has_audio;
> @@ -1118,7 +1117,7 @@ static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi)
>   return false;
>  
>   /* Disable if no DDC bus */
> - if (!hdmi->ddc)
> + if (!hdmi->connector.ddc)
>   return false;
>  
>   /* Disable if SCDC is not supported, or if an HF-VSDB block is absent */
> @@ -1156,10 +1155,11 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi 
> *hdmi)
>  
>   /* Control for TMDS Bit Period/TMDS Clock-Period Ratio */
>   if (dw_hdmi_support_scdc(hdmi)) {
> + struct i2c_adapter *ddc = hdmi->connector.ddc;
>   if (mtmdsclock > HDMI14_MAX_TMDSCLK)
> - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1);
> + drm_scdc_set_high_tmds_clock_ratio(ddc, 1);
>   else
> - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 0);
> + drm_scdc_set_high_tmds_clock_ratio(ddc, 0);
>   }
>  }
>  EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio);
> @@ -1750,6 +1750,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
>   if (dw_hdmi_support_scdc(hdmi)) {
>   if (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK ||
>   hdmi_info->scdc.scrambling.low_rates) {
> + struct i2c_adapter *ddc = hdmi->connector.ddc;
>   /*
>* HDMI2.0 Specifies the following procedure:
>* After the Source Device has determined that
> @@ -1759,13 +1760,12 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
>* Source Devices compliant shall set the
>* Source Version = 1.
>*/
> - drm_scdc_readb(hdmi->ddc, SCDC_SINK_VERSION,
> -);
> - drm_scdc_writeb(hdmi->ddc, SCDC_SOURCE_VERSION,
> + drm_scdc_readb(ddc, SCDC_SINK_VERSION, );
> + drm_scdc_writeb(ddc, SCDC_SOURCE_VERSION,
>   min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
>  
>   /* Enabled Scrambling in the Sink */
> - drm_scdc_set_scrambling(hdmi->ddc, 1);
> + drm_scdc_set_scrambling(hdmi->connector.ddc, 1);
>  
>   /*
>* To activate the scrambler feature, you must ensure
> @@ -1781,7 +1781,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
>   hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL);
>   hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ,
>   HDMI_MC_SWRSTZ);
> - drm_scdc_set_scrambling(hdmi->ddc, 0);
> + drm_scdc_set_scrambling(hdmi->connector.ddc, 0);
>   }
>   }
>  
> @@ -2127,10 +2127,10 @@ static int dw_hdmi_connector_get_modes(struct 
> drm_connector *connector)
>   struct edid *edid;
>   int ret = 0;
>  
> - if (!hdmi->ddc)
> + if (!hdmi->connector.ddc)
>   return 0;
>  
> - edid = drm_get_edid(connector, hdmi->ddc);
> + edid = drm_get_edid(connector, hdmi->connector.ddc);
>   if (edid) {
>   dev_dbg(hdmi->dev, "got edid: width[%d] x height[%d]\n",
>   edid->width_cm, edid->height_cm);
> @@ -2548,9 +2548,9 @@ __dw_hdmi_probe(struct platform_device *pdev,
>  
>   ddc_node = of_parse_phandle(np, "ddc-i2c-bus", 0);
>   if (ddc_node) {
> - hdmi->ddc = of_get_i2c_adapter_by_node(ddc_node);
> + hdmi->connector.ddc = of_get_i2c_adapter_by_node(ddc_node);
>   of_node_put(ddc_node);
> - if (!hdmi->ddc) {
> + if (!hdmi->connector.ddc) {
>   dev_dbg(hdmi->dev, "failed to read ddc node\n");
>   return ERR_PTR(-EPROBE_DEFER);
>   }
> @@ -2689,7 +2689,7 @@ __dw_hdmi_probe(struct platform_device *pdev,
>   hdmi_init_clk_regenerator(hdmi);
>  
>   /* If DDC bus is not specified, try to register HDMI I2C bus */
> - if (!hdmi->ddc) {
> + if (!hdmi->connector.ddc) {
>   /* Look for (optional) stuff related to unwedging */
>

Re: [PATCH v4 18/23] drm/bridge: dumb-vga-dac: Provide ddc symlink in connector sysfs directory

2019-07-30 Thread Neil Armstrong

On 11/07/2019 13:26, Andrzej Pietrasiewicz wrote:
> Use the ddc pointer provided by the generic connector.
> 
> Signed-off-by: Andrzej Pietrasiewicz 
> ---
>  drivers/gpu/drm/bridge/dumb-vga-dac.c | 19 +--
>  1 file changed, 9 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c 
> b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> index d32885b906ae..b4cc3238400a 100644
> --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
> +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> @@ -20,7 +20,6 @@ struct dumb_vga {
>   struct drm_bridge   bridge;
>   struct drm_connectorconnector;
>  
> - struct i2c_adapter  *ddc;
>   struct regulator*vdd;
>  };
>  
> @@ -42,10 +41,10 @@ static int dumb_vga_get_modes(struct drm_connector 
> *connector)
>   struct edid *edid;
>   int ret;
>  
> - if (IS_ERR(vga->ddc))
> + if (IS_ERR(vga->connector.ddc))
>   goto fallback;
>  
> - edid = drm_get_edid(connector, vga->ddc);
> + edid = drm_get_edid(connector, vga->connector.ddc);
>   if (!edid) {
>   DRM_INFO("EDID readout failed, falling back to standard 
> modes\n");
>   goto fallback;
> @@ -84,7 +83,7 @@ dumb_vga_connector_detect(struct drm_connector *connector, 
> bool force)
>* wire the DDC pins, or the I2C bus might not be working at
>* all.
>*/
> - if (!IS_ERR(vga->ddc) && drm_probe_ddc(vga->ddc))
> + if (!IS_ERR(vga->connector.ddc) && drm_probe_ddc(vga->connector.ddc))
>   return connector_status_connected;
>  
>   return connector_status_unknown;
> @@ -190,14 +189,14 @@ static int dumb_vga_probe(struct platform_device *pdev)
>   dev_dbg(>dev, "No vdd regulator found: %d\n", ret);
>   }
>  
> - vga->ddc = dumb_vga_retrieve_ddc(>dev);
> - if (IS_ERR(vga->ddc)) {
> - if (PTR_ERR(vga->ddc) == -ENODEV) {
> + vga->connector.ddc = dumb_vga_retrieve_ddc(>dev);
> + if (IS_ERR(vga->connector.ddc)) {
> + if (PTR_ERR(vga->connector.ddc) == -ENODEV) {
>   dev_dbg(>dev,
>   "No i2c bus specified. Disabling EDID 
> readout\n");
>   } else {
>   dev_err(>dev, "Couldn't retrieve i2c bus\n");
> - return PTR_ERR(vga->ddc);
> + return PTR_ERR(vga->connector.ddc);
>   }
>   }
>  
> @@ -216,8 +215,8 @@ static int dumb_vga_remove(struct platform_device *pdev)
>  
>   drm_bridge_remove(>bridge);
>  
> - if (!IS_ERR(vga->ddc))
> - i2c_put_adapter(vga->ddc);
> + if (!IS_ERR(vga->connector.ddc))
> + i2c_put_adapter(vga->connector.ddc);
>  
>   return 0;
>  }
> 

Reviewed-by: Neil Armstrong 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[GIT PULL] Please pull hmm changes

2019-07-30 Thread Jason Gunthorpe

Hi Linus,

Locking fix for nouveau's use of HMM

This small series was posted by Christoph before the merge window, but didn't
make it in time for the PR. It fixes various locking errors in the nouveau
driver's use of the hmm_range_* functions.

The diffstat is a bit big as Christoph did a comprehensive job to move the
obsolete API from the core header and into the driver before fixing its flow,
but the risk of regression from this code motion is low.

I don't intend to often send -rc patches for hmm, but this is entangled with
other changes already, so it is simpler to keep it on the hmm git branch.

Thanks,
Jason

The following changes since commit 5f9e832c137075045d15cd6899ab0505cfb2ca4b:

  Linus 5.3-rc1 (2019-07-21 14:05:38 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git tags/for-linus-hmm

for you to fetch changes up to de4ee728465f7c0c29241550e083139b2ce9159c:

  nouveau: unlock mmap_sem on all errors from nouveau_range_fault (2019-07-25 
16:14:40 -0300)


HMM patches for 5.3-rc

Fix the locking around nouveau's use of the hmm_range_* APIs. It works
correctly in the success case, but many of the the edge cases have missing
unlocks or double unlocks.


Christoph Hellwig (4):
  mm/hmm: always return EBUSY for invalid ranges in 
hmm_range_{fault,snapshot}
  mm/hmm: move hmm_vma_range_done and hmm_vma_fault to nouveau
  nouveau: remove the block parameter to nouveau_range_fault
  nouveau: unlock mmap_sem on all errors from nouveau_range_fault

 Documentation/vm/hmm.rst  |  2 +-
 drivers/gpu/drm/nouveau/nouveau_svm.c | 47 --
 include/linux/hmm.h   | 54 ---
 mm/hmm.c  | 10 +++
 4 files changed, 49 insertions(+), 64 deletions(-)


signature.asc
Description: PGP signature

Re: [PATCH] drm/amdgpu: fix a potential information leaking bug

2019-07-30 Thread Christian König


Am 27.07.19 um 11:37 schrieb Chunming Zhou:

在 2019/7/27 17:30, Wang Xiayang 写道:

Coccinelle reports a path that the array "data" is never initialized.
The path skips the checks in the conditional branches when either
of callback functions, read_wave_vgprs and read_wave_sgprs, is not
registered. Later, the uninitialized "data" array is read
in the while-loop below and passed to put_user().

Fix the path by allocating the array with kcalloc().

The patch is simplier than adding a fall-back branch that explicitly
calls memset(data, 0, ...). Also it does not need the multiplication
1024*sizeof(*data) as the size parameter for memset() though there is
no risk of integer overflow.

Signed-off-by: Wang Xiayang 

Reviewed-by: Chunming Zhou 


Picked that up, reviewed it and pushed it into our internal branch.

Christian.



-David


---
   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 6d54decef7f8..5652cc72ed3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -707,7 +707,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char 
__user *buf,
thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
   
-	data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL);

+   data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
   

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH 0/3] Propose new struct drm_mem_region

2019-07-30 Thread Daniel Vetter

On Tue, Jul 30, 2019 at 12:24 PM Koenig, Christian
 wrote:
>
> Am 30.07.19 um 11:38 schrieb Daniel Vetter:
> > On Tue, Jul 30, 2019 at 08:45:57AM +, Koenig, Christian wrote:
> >> Yeah, that looks like a good start. Just a couple of random design
> >> comments/requirements.
> >>
> >> First of all please restructure the changes so that you more or less
> >> have the following:
> >> 1. Adding of the new structures and functionality without any change to
> >> existing code.
> >> 2. Replacing the existing functionality in TTM and all of its drivers.
> >> 3. Replacing the existing functionality in i915.
> >>
> >> This should make it much easier to review the new functionality when it
> >> is not mixed with existing TTM stuff.
> >>
> >>
> >> Second please completely drop the concept of gpu_offset or start of the
> >> memory region like here:
> >>> drm_printf(p, "gpu_offset: 0x%08llX\n", man->region.start);
> >> At least on AMD hardware we have the following address spaces which are
> >> sometimes even partially overlapping: VM, MC, SYSTEM, FB, AGP, XGMI, bus
> >> addresses and physical addresses.
> >>
> >> Pushing a concept of a general GPU address space into the memory
> >> management was a rather bad design mistake in TTM and we should not
> >> repeat that here.
> >>
> >> A region should only consists of a size in bytes and (internal to the
> >> region manager) allocations in that region.
> >>
> >>
> >> Third please don't use any CPU or architecture specific types in any
> >> data structures:
> >>> +struct drm_mem_region {
> >>> +   resource_size_t start; /* within GPU physical address space */
> >>> +   resource_size_t io_start; /* BAR address (CPU accessible) */
> >>> +   resource_size_t size;
> >> I knew that resource_size is mostly 64bit on modern architectures, but
> >> dGPUs are completely separate to the architecture and we always need
> >> 64bits here at least for AMD hardware.
> >>
> >> So this should either be always uint64_t, or something like
> >> gpu_resource_size which depends on what the compiled in drivers require
> >> if we really need that.
> >>
> >> And by the way: Please always use bytes for things like sizes and not
> >> number of pages, cause page size is again CPU/architecture specific and
> >> GPU drivers don't necessary care about that.
> >>
> >>
> >> And here also a few direct comments on the code:
> >>> +   union {
> >>> +   struct drm_mm *mm;
> >>> +   /* FIXME (for i915): struct drm_buddy_mm *buddy_mm; */
> >>> +   void *priv;
> >>> +   };
> >> Maybe just always use void *mm here.
> >>
> >>> +   spinlock_t move_lock;
> >>> +   struct dma_fence *move;
> >> That is TTM specific and I'm not sure if we want it in the common memory
> >> management handling.
> >>
> >> If we want that here we should probably replace the lock with some rcu
> >> and atomic fence pointer exchange first.
> >>
> >>> +/*
> >>> + * Memory types for drm_mem_region
> >>> + */
> >> #define DRM_MEM_SWAP?
> > btw what did you have in mind for this? Since we use shmem we kinda don't
> > know whether the BO is actually swapped out or not, at least on the i915
> > side. So this would be more NOT_CURRENTLY_PINNED_AND_POSSIBLY_SWAPPED_OUT.
>
> Yeah, the problem is not everybody can use shmem. For some use cases you
> have to use memory allocated through dma_alloc_coherent().
>
> So to be able to swap this out you need a separate domain to copy it
> from whatever is backing it currently to shmem.
>
> So we essentially have:
> DRM_MEM_SYS_SWAPABLE
> DRM_MEM_SYS_NOT_GPU_MAPPED
> DRM_MEM_SYS_GPU_MAPPED
>
> Or something like that.

Yeah i915-gem is similar. We oportunistically keep the pages pinned
sometimes even if not currently mapped into the (what ttm calls) TT.
So I think these three for system memory make sense for us too. I
think that's similar (at least in spirit) to the dma_alloc cache you
have going on. Mabye instead of the somewhat cumbersome NOT_GPU_MAPPED
we could have something like PINNED or so. Although it's not
permanently pinned, so maybe that's confusing too.

> >> TTM was clearly missing that resulting in a whole bunch of extra
> >> handling and rather complicated handling.
> >>
> >>> +#define DRM_MEM_SYSTEM 0
> >>> +#define DRM_MEM_STOLEN 1
> >> I think we need a better naming for that.
> >>
> >> STOLEN sounds way to much like stolen VRAM for integrated GPUs, but at
> >> least for TTM this is the system memory currently GPU accessible.
> > Yup this is wrong, for i915 we use this as stolen, for ttm it's the gpu
> > translation table window into system memory. Not the same thing at all.
>
> Thought so. The closest I have in mind is GTT, but everything else works
> as well.

Would your GPU_MAPPED above work for TT? I think we'll also need
STOLEN, I'm even hearing noises that there's going to be stolen for
discrete vram for us ... Also if we expand I guess we need to teach
ttm to cope with more, or maybe treat the DRM one as some kind of
sub-flavour.
-Daniel

>
>

Re: [RFC PATCH 0/3] Propose new struct drm_mem_region

2019-07-30 Thread Daniel Vetter

On Tue, Jul 30, 2019 at 08:45:57AM +, Koenig, Christian wrote:
> Yeah, that looks like a good start. Just a couple of random design 
> comments/requirements.
> 
> First of all please restructure the changes so that you more or less 
> have the following:
> 1. Adding of the new structures and functionality without any change to 
> existing code.
> 2. Replacing the existing functionality in TTM and all of its drivers.
> 3. Replacing the existing functionality in i915.
> 
> This should make it much easier to review the new functionality when it 
> is not mixed with existing TTM stuff.
> 
> 
> Second please completely drop the concept of gpu_offset or start of the 
> memory region like here:
> > drm_printf(p, "gpu_offset: 0x%08llX\n", man->region.start);
> At least on AMD hardware we have the following address spaces which are 
> sometimes even partially overlapping: VM, MC, SYSTEM, FB, AGP, XGMI, bus 
> addresses and physical addresses.
> 
> Pushing a concept of a general GPU address space into the memory 
> management was a rather bad design mistake in TTM and we should not 
> repeat that here.
> 
> A region should only consists of a size in bytes and (internal to the 
> region manager) allocations in that region.
> 
> 
> Third please don't use any CPU or architecture specific types in any 
> data structures:
> > +struct drm_mem_region {
> > +   resource_size_t start; /* within GPU physical address space */
> > +   resource_size_t io_start; /* BAR address (CPU accessible) */
> > +   resource_size_t size;
> 
> I knew that resource_size is mostly 64bit on modern architectures, but 
> dGPUs are completely separate to the architecture and we always need 
> 64bits here at least for AMD hardware.
> 
> So this should either be always uint64_t, or something like 
> gpu_resource_size which depends on what the compiled in drivers require 
> if we really need that.
> 
> And by the way: Please always use bytes for things like sizes and not 
> number of pages, cause page size is again CPU/architecture specific and 
> GPU drivers don't necessary care about that.
> 
> 
> And here also a few direct comments on the code:
> > +   union {
> > +   struct drm_mm *mm;
> > +   /* FIXME (for i915): struct drm_buddy_mm *buddy_mm; */
> > +   void *priv;
> > +   };
> Maybe just always use void *mm here.
> 
> > +   spinlock_t move_lock;
> > +   struct dma_fence *move;
> 
> That is TTM specific and I'm not sure if we want it in the common memory 
> management handling.
> 
> If we want that here we should probably replace the lock with some rcu 
> and atomic fence pointer exchange first.
> 
> > +/*
> > + * Memory types for drm_mem_region
> > + */
> 
> #define DRM_MEM_SWAP    ?

btw what did you have in mind for this? Since we use shmem we kinda don't
know whether the BO is actually swapped out or not, at least on the i915
side. So this would be more NOT_CURRENTLY_PINNED_AND_POSSIBLY_SWAPPED_OUT.

> TTM was clearly missing that resulting in a whole bunch of extra 
> handling and rather complicated handling.
> 
> > +#define DRM_MEM_SYSTEM 0
> > +#define DRM_MEM_STOLEN 1
> 
> I think we need a better naming for that.
> 
> STOLEN sounds way to much like stolen VRAM for integrated GPUs, but at 
> least for TTM this is the system memory currently GPU accessible.

Yup this is wrong, for i915 we use this as stolen, for ttm it's the gpu
translation table window into system memory. Not the same thing at all.
-Daniel

> 
> 
> Thanks for looking into that,
> Christian.
> 
> Am 30.07.19 um 02:32 schrieb Brian Welty:
> > [ By request, resending to include amd-gfx + intel-gfx.  Since resending,
> >I fixed the nit with ordering of header includes that Sam noted. ]
> >
> > This RFC series is first implementation of some ideas expressed
> > earlier on dri-devel [1].
> >
> > Some of the goals (open for much debate) are:
> >- Create common base structure (subclass) for memory regions (patch #1)
> >- Create common memory region types (patch #2)
> >- Create common set of memory_region function callbacks (based on
> >  ttm_mem_type_manager_funcs and intel_memory_regions_ops)
> >- Create common helpers that operate on drm_mem_region to be leveraged
> >  by both TTM drivers and i915, reducing code duplication
> >- Above might start with refactoring ttm_bo_manager.c as these are
> >  helpers for using drm_mm's range allocator and could be made to
> >  operate on DRM structures instead of TTM ones.
> >- Larger goal might be to make LRU management of GEM objects common, and
> >  migrate those fields into drm_mem_region and drm_gem_object strucures.
> >
> > Patches 1-2 implement the proposed struct drm_mem_region and adds
> > associated common set of definitions for memory region type.
> >
> > Patch #3 is update to i915 and is based upon another series which is
> > in progress to add vram support to i915 [2].
> >
> > [1]

Re: [RFC PATCH 0/3] Propose new struct drm_mem_region

2019-07-30 Thread Daniel Vetter

On Tue, Jul 30, 2019 at 08:45:57AM +, Koenig, Christian wrote:
> Yeah, that looks like a good start. Just a couple of random design 
> comments/requirements.
> 
> First of all please restructure the changes so that you more or less 
> have the following:
> 1. Adding of the new structures and functionality without any change to 
> existing code.
> 2. Replacing the existing functionality in TTM and all of its drivers.
> 3. Replacing the existing functionality in i915.
> 
> This should make it much easier to review the new functionality when it 
> is not mixed with existing TTM stuff.
> 
> 
> Second please completely drop the concept of gpu_offset or start of the 
> memory region like here:
> > drm_printf(p, "gpu_offset: 0x%08llX\n", man->region.start);
> At least on AMD hardware we have the following address spaces which are 
> sometimes even partially overlapping: VM, MC, SYSTEM, FB, AGP, XGMI, bus 
> addresses and physical addresses.
> 
> Pushing a concept of a general GPU address space into the memory 
> management was a rather bad design mistake in TTM and we should not 
> repeat that here.
> 
> A region should only consists of a size in bytes and (internal to the 
> region manager) allocations in that region.
> 
> 
> Third please don't use any CPU or architecture specific types in any 
> data structures:
> > +struct drm_mem_region {
> > +   resource_size_t start; /* within GPU physical address space */
> > +   resource_size_t io_start; /* BAR address (CPU accessible) */
> > +   resource_size_t size;
> 
> I knew that resource_size is mostly 64bit on modern architectures, but 
> dGPUs are completely separate to the architecture and we always need 
> 64bits here at least for AMD hardware.
> 
> So this should either be always uint64_t, or something like 
> gpu_resource_size which depends on what the compiled in drivers require 
> if we really need that.
> 
> And by the way: Please always use bytes for things like sizes and not 
> number of pages, cause page size is again CPU/architecture specific and 
> GPU drivers don't necessary care about that.
> 
> 
> And here also a few direct comments on the code:
> > +   union {
> > +   struct drm_mm *mm;
> > +   /* FIXME (for i915): struct drm_buddy_mm *buddy_mm; */
> > +   void *priv;
> > +   };
> Maybe just always use void *mm here.

I'd say lets drop this outright, and handle private data by embedding this
structure in the right place. That's how we handle everything in drm now
as much as possible, and it's so much cleaner. I think ttm still loves
priv pointers a bit too much in some places.

> > +   spinlock_t move_lock;
> > +   struct dma_fence *move;
> 
> That is TTM specific and I'm not sure if we want it in the common memory 
> management handling.
> 
> If we want that here we should probably replace the lock with some rcu 
> and atomic fence pointer exchange first.

Yeah  not sure we want any of these details in this shared structure
either.

> 
> > +/*
> > + * Memory types for drm_mem_region
> > + */
> 
> #define DRM_MEM_SWAP    ?
> 
> TTM was clearly missing that resulting in a whole bunch of extra 
> handling and rather complicated handling.
> 
> > +#define DRM_MEM_SYSTEM 0
> > +#define DRM_MEM_STOLEN 1
> 
> I think we need a better naming for that.
> 
> STOLEN sounds way to much like stolen VRAM for integrated GPUs, but at 
> least for TTM this is the system memory currently GPU accessible.

Yeah I think the crux here of having a common drm_mem_region is how do we
name stuff. I think what Brian didn't mention is that the goal here is to
have something we can use for managing using cgroups.

> Thanks for looking into that,
> Christian.
> 
> Am 30.07.19 um 02:32 schrieb Brian Welty:
> > [ By request, resending to include amd-gfx + intel-gfx.  Since resending,
> >I fixed the nit with ordering of header includes that Sam noted. ]
> >
> > This RFC series is first implementation of some ideas expressed
> > earlier on dri-devel [1].
> >
> > Some of the goals (open for much debate) are:
> >- Create common base structure (subclass) for memory regions (patch #1)
> >- Create common memory region types (patch #2)
> >- Create common set of memory_region function callbacks (based on
> >  ttm_mem_type_manager_funcs and intel_memory_regions_ops)
> >- Create common helpers that operate on drm_mem_region to be leveraged
> >  by both TTM drivers and i915, reducing code duplication
> >- Above might start with refactoring ttm_bo_manager.c as these are
> >  helpers for using drm_mm's range allocator and could be made to
> >  operate on DRM structures instead of TTM ones.
> >- Larger goal might be to make LRU management of GEM objects common, and
> >  migrate those fields into drm_mem_region and drm_gem_object strucures.

I'm not sure how much of all that we really want in a drm_mem_region ...
Otherwise we just reimplement the same midlayer we have already, but with
a drm_ instead of ttm_

Re: [PATCH] drm/amdgpu: fix error handling in amdgpu_cs_process_fence_dep

2019-07-30 Thread zhoucm1


Looks very clean, Reviewed-by: Chunming Zhou 


On 2019年07月30日 17:18, Christian König wrote:

We always need to drop the ctx reference and should check
for errors first and then dereference the fence pointer.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 26 --
  1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c691df6f7a57..def029ab5657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1042,29 +1042,27 @@ static int amdgpu_cs_process_fence_dep(struct 
amdgpu_cs_parser *p,
return r;
}
  
-		fence = amdgpu_ctx_get_fence(ctx, entity,

-deps[i].handle);
+   fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
+   amdgpu_ctx_put(ctx);
+
+   if (IS_ERR(fence))
+   return PTR_ERR(fence);
+   else if (!fence)
+   continue;
  
  		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {

-   struct drm_sched_fence *s_fence = 
to_drm_sched_fence(fence);
+   struct drm_sched_fence *s_fence;
struct dma_fence *old = fence;
  
+			s_fence = to_drm_sched_fence(fence);

fence = dma_fence_get(_fence->scheduled);
dma_fence_put(old);
}
  
-		if (IS_ERR(fence)) {

-   r = PTR_ERR(fence);
-   amdgpu_ctx_put(ctx);
+   r = amdgpu_sync_fence(p->adev, >job->sync, fence, true);
+   dma_fence_put(fence);
+   if (r)
return r;
-   } else if (fence) {
-   r = amdgpu_sync_fence(p->adev, >job->sync, fence,
-   true);
-   dma_fence_put(fence);
-   amdgpu_ctx_put(ctx);
-   if (r)
-   return r;
-   }
}
return 0;
  }


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: amdgpf: BUG: NULL pointer dereference and memory leak

2019-07-30 Thread Koenig, Christian

Am 30.07.19 um 11:14 schrieb zhoucm1:
>
>
> On 2019年07月30日 17:04, Koenig, Christian wrote:
>> Am 30.07.19 um 10:47 schrieb 亿一:
>>> Hi  alll,
>>>    While analyzing the source code, I notice that function
>>> amdgpu_cs_process_fence_dep() may exist NULL pointer dereference and
>>> memory leak in the following code fragments:
>>>
>>>
>>> fence = amdgpu_ctx_get_fence(ctx, entity,
>>>   deps[i].handle);
>>>
>>> if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
>>>   struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
>>>   struct dma_fence *old = fence;
>>>
>>>   fence = dma_fence_get(_fence->scheduled);
>>>   dma_fence_put(old);
>>> }
>>>
>>> if (IS_ERR(fence)) {
>>>    r = PTR_ERR(fence);
>>>    amdgpu_ctx_put(ctx);
>>>    return r;
>>>     } else if (fence) {
>>>     r = amdgpu_sync_fence(p->adev, >job->sync, fence,
>>>    true);
>>>     dma_fence_put(fence);
>>>  amdgpu_ctx_put(ctx);
>>>  if (r)
>>>  return r;
>>>  }
>>>
>>> function amdgpu_ctx_get_fence may return NULL pointer,  which will
>>> cause NULL pointer dereference. What's more,  IS_ERR() would not
>>> return true when pointer is NULL,  which will cause the ctx reference
>>> leaked.
>> That handling is actually correct.
>>
>> The problem is the "if (chunk->chunk_id ==
>> AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES)" stuff above.
>>
>> That comes to early and needs to be moved below checking the fence for
>> errors. Going to send a fix for this to the mailing list in a minute.
> Lin Yi is right I think, we leaked ctx reference when fence is NULL.

Indeed, but what I meant was the a NULL fence here is not an error.

Just send out a patch to fix that stuff up, please review.

Christian.

>
> -David
>>
>> Thanks for the notice,
>> Christian.
>>
>>> But I don't know how to fix it, so report it to you all.
>>>
>>> Best Regards.
>>> Lin Yi.
>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix error handling in amdgpu_cs_process_fence_dep

2019-07-30 Thread Christian König

We always need to drop the ctx reference and should check
for errors first and then dereference the fence pointer.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c691df6f7a57..def029ab5657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1042,29 +1042,27 @@ static int amdgpu_cs_process_fence_dep(struct 
amdgpu_cs_parser *p,
return r;
}
 
-   fence = amdgpu_ctx_get_fence(ctx, entity,
-deps[i].handle);
+   fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
+   amdgpu_ctx_put(ctx);
+
+   if (IS_ERR(fence))
+   return PTR_ERR(fence);
+   else if (!fence)
+   continue;
 
if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
-   struct drm_sched_fence *s_fence = 
to_drm_sched_fence(fence);
+   struct drm_sched_fence *s_fence;
struct dma_fence *old = fence;
 
+   s_fence = to_drm_sched_fence(fence);
fence = dma_fence_get(_fence->scheduled);
dma_fence_put(old);
}
 
-   if (IS_ERR(fence)) {
-   r = PTR_ERR(fence);
-   amdgpu_ctx_put(ctx);
+   r = amdgpu_sync_fence(p->adev, >job->sync, fence, true);
+   dma_fence_put(fence);
+   if (r)
return r;
-   } else if (fence) {
-   r = amdgpu_sync_fence(p->adev, >job->sync, fence,
-   true);
-   dma_fence_put(fence);
-   amdgpu_ctx_put(ctx);
-   if (r)
-   return r;
-   }
}
return 0;
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amd/powerplay: guard consistency between CPU copy and local VRAM

2019-07-30 Thread Feng, Kenneth

Reviewed-by: Kenneth Feng 


-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Evan 
Quan
Sent: Tuesday, July 30, 2019 4:59 PM
To: amd-gfx@lists.freedesktop.org
Cc: Quan, Evan 
Subject: [PATCH] drm/amd/powerplay: guard consistency between CPU copy and 
local VRAM

[CAUTION: External Email]

This can prevent CPU to use the out-dated copy.

Change-Id: Ia18e89a923e3522e01717aa4d5ba35f8f4f20763
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c   | 4 
 drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c  | 4   
drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 4   
drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c | 4   
drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c | 8 
 5 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 9b5661dc10da..d99a8aa0defb 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -314,6 +314,7 @@ int smu_update_table(struct smu_context *smu, enum 
smu_table_id table_index,
 void *table_data, bool drv2smu)  {
struct smu_table_context *smu_table = >smu_table;
+   struct amdgpu_device *adev = smu->adev;
struct smu_table *table = NULL;
int ret = 0;
int table_id = smu_table_get_index(smu, table_index); @@ -341,6 +342,9 
@@ int smu_update_table(struct smu_context *smu, enum smu_table_id table_index,
if (ret)
return ret;

+   /* flush hdp cache */
+   adev->nbio_funcs->hdp_flush(adev, NULL);
+
if (!drv2smu)
memcpy(table_data, table->cpu_addr, table->size);

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
index ca660351a363..59b11ac5b53b 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
@@ -116,6 +116,7 @@ static int smu10_copy_table_from_smc(struct pp_hwmgr 
*hwmgr,  {
struct smu10_smumgr *priv =
(struct smu10_smumgr *)(hwmgr->smu_backend);
+   struct amdgpu_device *adev = hwmgr->adev;

PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
"Invalid SMU Table ID!", return -EINVAL;); @@ -133,6 
+134,9 @@ static int smu10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
PPSMC_MSG_TransferTableSmu2Dram,
priv->smu_tables.entry[table_id].table_id);

+   /* flush hdp cache */
+   adev->nbio_funcs->hdp_flush(adev, NULL);
+
memcpy(table, (uint8_t *)priv->smu_tables.entry[table_id].table,
priv->smu_tables.entry[table_id].size);

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 7bfef8d85cda..8e07fc1fb9ce 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -37,6 +37,7 @@ static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
uint8_t *table, int16_t table_id)  {
struct vega10_smumgr *priv = hwmgr->smu_backend;
+   struct amdgpu_device *adev = hwmgr->adev;

PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
"Invalid SMU Table ID!", return -EINVAL); @@ -54,6 
+55,9 @@ static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
PPSMC_MSG_TransferTableSmu2Dram,
priv->smu_tables.entry[table_id].table_id);

+   /* flush hdp cache */
+   adev->nbio_funcs->hdp_flush(adev, NULL);
+
memcpy(table, priv->smu_tables.entry[table_id].table,
priv->smu_tables.entry[table_id].size);

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index 9ad07a91c38b..c11dae720a35 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -42,6 +42,7 @@ static int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr, 
 {
struct vega12_smumgr *priv =
(struct vega12_smumgr *)(hwmgr->smu_backend);
+   struct amdgpu_device *adev = hwmgr->adev;

PP_ASSERT_WITH_CODE(table_id < TABLE_COUNT,
"Invalid SMU Table ID!", return -EINVAL); @@ -64,6 
+65,9 @@ static int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr,
"[CopyTableFromSMC] Attempt to Transfer Table From SMU 
Failed!",
return -EINVAL);

+   /* flush hdp cache */
+   adev->nbio_funcs->hdp_flush(adev, NULL);
+
memcpy(table, priv->smu_tables.entry[table_id].table,
priv->smu_tables.entry[table_id].size);

diff --git

Re: amdgpf: BUG: NULL pointer dereference and memory leak

2019-07-30 Thread zhoucm1




On 2019年07月30日 17:04, Koenig, Christian wrote:

Am 30.07.19 um 10:47 schrieb 亿一:

Hi  alll,
   While analyzing the source code, I notice that function
amdgpu_cs_process_fence_dep() may exist NULL pointer dereference and
memory leak in the following code fragments:


fence = amdgpu_ctx_get_fence(ctx, entity,
  deps[i].handle);

if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
  struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
  struct dma_fence *old = fence;

  fence = dma_fence_get(_fence->scheduled);
  dma_fence_put(old);
}

if (IS_ERR(fence)) {
   r = PTR_ERR(fence);
   amdgpu_ctx_put(ctx);
   return r;
} else if (fence) {
r = amdgpu_sync_fence(p->adev, >job->sync, fence,
   true);
dma_fence_put(fence);
 amdgpu_ctx_put(ctx);
 if (r)
 return r;
 }

function amdgpu_ctx_get_fence may return NULL pointer,  which will
cause NULL pointer dereference. What's more,  IS_ERR() would not
return true when pointer is NULL,  which will cause the ctx reference
leaked.

That handling is actually correct.

The problem is the "if (chunk->chunk_id ==
AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES)" stuff above.

That comes to early and needs to be moved below checking the fence for
errors. Going to send a fix for this to the mailing list in a minute.

Lin Yi is right I think, we leaked ctx reference when fence is NULL.

-David


Thanks for the notice,
Christian.


But I don't know how to fix it, so report it to you all.

Best Regards.
Lin Yi.


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: amdgpf: BUG: NULL pointer dereference and memory leak

2019-07-30 Thread Koenig, Christian

Am 30.07.19 um 10:47 schrieb 亿一:
> Hi  alll,
>   While analyzing the source code, I notice that function
> amdgpu_cs_process_fence_dep() may exist NULL pointer dereference and
> memory leak in the following code fragments:
>
>
> fence = amdgpu_ctx_get_fence(ctx, entity,
>  deps[i].handle);
>
> if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
>  struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
>  struct dma_fence *old = fence;
>
>  fence = dma_fence_get(_fence->scheduled);
>  dma_fence_put(old);
> }
>
> if (IS_ERR(fence)) {
>   r = PTR_ERR(fence);
>   amdgpu_ctx_put(ctx);
>   return r;
>} else if (fence) {
>r = amdgpu_sync_fence(p->adev, >job->sync, fence,
>   true);
>dma_fence_put(fence);
> amdgpu_ctx_put(ctx);
> if (r)
> return r;
> }
>
> function amdgpu_ctx_get_fence may return NULL pointer,  which will
> cause NULL pointer dereference. What's more,  IS_ERR() would not
> return true when pointer is NULL,  which will cause the ctx reference
> leaked.

That handling is actually correct.

The problem is the "if (chunk->chunk_id == 
AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES)" stuff above.

That comes to early and needs to be moved below checking the fence for 
errors. Going to send a fix for this to the mailing list in a minute.

Thanks for the notice,
Christian.

> But I don't know how to fix it, so report it to you all.
>
> Best Regards.
> Lin Yi.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amd/powerplay: guard consistency between CPU copy and local VRAM

2019-07-30 Thread Evan Quan

This can prevent CPU to use the out-dated copy.

Change-Id: Ia18e89a923e3522e01717aa4d5ba35f8f4f20763
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c   | 4 
 drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c  | 4 
 drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 4 
 drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c | 4 
 drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c | 8 
 5 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 9b5661dc10da..d99a8aa0defb 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -314,6 +314,7 @@ int smu_update_table(struct smu_context *smu, enum 
smu_table_id table_index,
 void *table_data, bool drv2smu)
 {
struct smu_table_context *smu_table = >smu_table;
+   struct amdgpu_device *adev = smu->adev;
struct smu_table *table = NULL;
int ret = 0;
int table_id = smu_table_get_index(smu, table_index);
@@ -341,6 +342,9 @@ int smu_update_table(struct smu_context *smu, enum 
smu_table_id table_index,
if (ret)
return ret;
 
+   /* flush hdp cache */
+   adev->nbio_funcs->hdp_flush(adev, NULL);
+
if (!drv2smu)
memcpy(table_data, table->cpu_addr, table->size);
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
index ca660351a363..59b11ac5b53b 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
@@ -116,6 +116,7 @@ static int smu10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 {
struct smu10_smumgr *priv =
(struct smu10_smumgr *)(hwmgr->smu_backend);
+   struct amdgpu_device *adev = hwmgr->adev;
 
PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
"Invalid SMU Table ID!", return -EINVAL;);
@@ -133,6 +134,9 @@ static int smu10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
PPSMC_MSG_TransferTableSmu2Dram,
priv->smu_tables.entry[table_id].table_id);
 
+   /* flush hdp cache */
+   adev->nbio_funcs->hdp_flush(adev, NULL);
+
memcpy(table, (uint8_t *)priv->smu_tables.entry[table_id].table,
priv->smu_tables.entry[table_id].size);
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 7bfef8d85cda..8e07fc1fb9ce 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -37,6 +37,7 @@ static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
uint8_t *table, int16_t table_id)
 {
struct vega10_smumgr *priv = hwmgr->smu_backend;
+   struct amdgpu_device *adev = hwmgr->adev;
 
PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE,
"Invalid SMU Table ID!", return -EINVAL);
@@ -54,6 +55,9 @@ static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
PPSMC_MSG_TransferTableSmu2Dram,
priv->smu_tables.entry[table_id].table_id);
 
+   /* flush hdp cache */
+   adev->nbio_funcs->hdp_flush(adev, NULL);
+
memcpy(table, priv->smu_tables.entry[table_id].table,
priv->smu_tables.entry[table_id].size);
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index 9ad07a91c38b..c11dae720a35 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -42,6 +42,7 @@ static int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 {
struct vega12_smumgr *priv =
(struct vega12_smumgr *)(hwmgr->smu_backend);
+   struct amdgpu_device *adev = hwmgr->adev;
 
PP_ASSERT_WITH_CODE(table_id < TABLE_COUNT,
"Invalid SMU Table ID!", return -EINVAL);
@@ -64,6 +65,9 @@ static int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr,
"[CopyTableFromSMC] Attempt to Transfer Table From SMU 
Failed!",
return -EINVAL);
 
+   /* flush hdp cache */
+   adev->nbio_funcs->hdp_flush(adev, NULL);
+
memcpy(table, priv->smu_tables.entry[table_id].table,
priv->smu_tables.entry[table_id].size);
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c 
b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
index 957446cf467e..3e97b83950dc 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
@@ -163,6 +163,7 @@ static int vega20_copy_table_from_smc(struct pp_hwmgr 
*hwmgr,
 {
struct

Re: [RFC PATCH 0/3] Propose new struct drm_mem_region

2019-07-30 Thread Koenig, Christian

Yeah, that looks like a good start. Just a couple of random design 
comments/requirements.

First of all please restructure the changes so that you more or less 
have the following:
1. Adding of the new structures and functionality without any change to 
existing code.
2. Replacing the existing functionality in TTM and all of its drivers.
3. Replacing the existing functionality in i915.

This should make it much easier to review the new functionality when it 
is not mixed with existing TTM stuff.


Second please completely drop the concept of gpu_offset or start of the 
memory region like here:
> drm_printf(p, "gpu_offset: 0x%08llX\n", man->region.start);
At least on AMD hardware we have the following address spaces which are 
sometimes even partially overlapping: VM, MC, SYSTEM, FB, AGP, XGMI, bus 
addresses and physical addresses.

Pushing a concept of a general GPU address space into the memory 
management was a rather bad design mistake in TTM and we should not 
repeat that here.

A region should only consists of a size in bytes and (internal to the 
region manager) allocations in that region.


Third please don't use any CPU or architecture specific types in any 
data structures:
> +struct drm_mem_region {
> + resource_size_t start; /* within GPU physical address space */
> + resource_size_t io_start; /* BAR address (CPU accessible) */
> + resource_size_t size;

I knew that resource_size is mostly 64bit on modern architectures, but 
dGPUs are completely separate to the architecture and we always need 
64bits here at least for AMD hardware.

So this should either be always uint64_t, or something like 
gpu_resource_size which depends on what the compiled in drivers require 
if we really need that.

And by the way: Please always use bytes for things like sizes and not 
number of pages, cause page size is again CPU/architecture specific and 
GPU drivers don't necessary care about that.


And here also a few direct comments on the code:
> + union {
> + struct drm_mm *mm;
> + /* FIXME (for i915): struct drm_buddy_mm *buddy_mm; */
> + void *priv;
> + };
Maybe just always use void *mm here.

> + spinlock_t move_lock;
> + struct dma_fence *move;

That is TTM specific and I'm not sure if we want it in the common memory 
management handling.

If we want that here we should probably replace the lock with some rcu 
and atomic fence pointer exchange first.

> +/*
> + * Memory types for drm_mem_region
> + */

#define DRM_MEM_SWAP    ?

TTM was clearly missing that resulting in a whole bunch of extra 
handling and rather complicated handling.

> +#define DRM_MEM_SYSTEM   0
> +#define DRM_MEM_STOLEN   1

I think we need a better naming for that.

STOLEN sounds way to much like stolen VRAM for integrated GPUs, but at 
least for TTM this is the system memory currently GPU accessible.


Thanks for looking into that,
Christian.

Am 30.07.19 um 02:32 schrieb Brian Welty:
> [ By request, resending to include amd-gfx + intel-gfx.  Since resending,
>I fixed the nit with ordering of header includes that Sam noted. ]
>
> This RFC series is first implementation of some ideas expressed
> earlier on dri-devel [1].
>
> Some of the goals (open for much debate) are:
>- Create common base structure (subclass) for memory regions (patch #1)
>- Create common memory region types (patch #2)
>- Create common set of memory_region function callbacks (based on
>  ttm_mem_type_manager_funcs and intel_memory_regions_ops)
>- Create common helpers that operate on drm_mem_region to be leveraged
>  by both TTM drivers and i915, reducing code duplication
>- Above might start with refactoring ttm_bo_manager.c as these are
>  helpers for using drm_mm's range allocator and could be made to
>  operate on DRM structures instead of TTM ones.
>- Larger goal might be to make LRU management of GEM objects common, and
>  migrate those fields into drm_mem_region and drm_gem_object strucures.
>
> Patches 1-2 implement the proposed struct drm_mem_region and adds
> associated common set of definitions for memory region type.
>
> Patch #3 is update to i915 and is based upon another series which is
> in progress to add vram support to i915 [2].
>
> [1] https://lists.freedesktop.org/archives/dri-devel/2019-June/224501.html
> [2] https://lists.freedesktop.org/archives/intel-gfx/2019-June/203649.html
>
> Brian Welty (3):
>drm: introduce new struct drm_mem_region
>drm: Introduce DRM_MEM defines for specifying type of drm_mem_region
>drm/i915: Update intel_memory_region to use nested drm_mem_region
>
>   drivers/gpu/drm/i915/gem/i915_gem_object.c|  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  2 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.c   | 10 ++---
>   drivers/gpu/drm/i915/i915_gpu_error.c |  2 +-
>   drivers/gpu/drm/i915/i915_query.c |  2 +-
>   drivers/gpu/drm/i915/intel_memory_region.c

[PATCH 12/13] mm: cleanup the hmm_vma_walk_hugetlb_entry stub

2019-07-30 Thread Christoph Hellwig

Stub out the whole function and assign NULL to the .hugetlb_entry method
if CONFIG_HUGETLB_PAGE is not set, as the method won't ever be called in
that case.

Signed-off-by: Christoph Hellwig 
---
 mm/hmm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index f4e90ea5779f..2b56a4af1001 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -769,11 +769,11 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long 
start, unsigned long end,
 #define hmm_vma_walk_pud   NULL
 #endif
 
+#ifdef CONFIG_HUGETLB_PAGE
 static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
  unsigned long start, unsigned long end,
  struct mm_walk *walk)
 {
-#ifdef CONFIG_HUGETLB_PAGE
unsigned long addr = start, i, pfn;
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
@@ -812,10 +812,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, 
unsigned long hmask,
return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 
return ret;
-#else /* CONFIG_HUGETLB_PAGE */
-   return -EINVAL;
-#endif
 }
+#else
+#define hmm_vma_walk_hugetlb_entry NULL
+#endif /* CONFIG_HUGETLB_PAGE */
 
 static void hmm_pfns_clear(struct hmm_range *range,
   uint64_t *pfns,
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

hmm_range_fault related fixes and legacy API removal v3

2019-07-30 Thread Christoph Hellwig


Hi Jérôme, Ben, Felxi and Jason,

below is a series against the hmm tree which cleans up various minor
bits and allows HMM_MIRROR to be built on all architectures.

Diffstat:

7 files changed, 81 insertions(+), 171 deletions(-)

A git tree is also available at:

git://git.infradead.org/users/hch/misc.git hmm-cleanups

Gitweb:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/hmm-cleanups
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 09/13] mm: don't abuse pte_index() in hmm_vma_handle_pmd

2019-07-30 Thread Christoph Hellwig

pte_index is an internal arch helper in various architectures,
without consistent semantics.  Open code that calculation of a PMD
index based on the virtual address instead.

Signed-off-by: Christoph Hellwig 
---
 mm/hmm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 88b77a4a6a1e..e63ab7f11334 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -486,7 +486,7 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
if (pmd_protnone(pmd) || fault || write_fault)
return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 
-   pfn = pmd_pfn(pmd) + pte_index(addr);
+   pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) {
if (pmd_devmap(pmd)) {
pgmap = get_dev_pagemap(pfn, pgmap);
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 03/13] nouveau: pass struct nouveau_svmm to nouveau_range_fault

2019-07-30 Thread Christoph Hellwig

This avoid having to abuse the vma field in struct hmm_range to unlock
the mmap_sem.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/nouveau/nouveau_svm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c 
b/drivers/gpu/drm/nouveau/nouveau_svm.c
index a74530b5a523..b889d5ec4c7e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -485,14 +485,14 @@ nouveau_range_done(struct hmm_range *range)
 }
 
 static int
-nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range)
+nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range)
 {
long ret;
 
range->default_flags = 0;
range->pfn_flags_mask = -1UL;
 
-   ret = hmm_range_register(range, mirror,
+   ret = hmm_range_register(range, >mirror,
 range->start, range->end,
 PAGE_SHIFT);
if (ret) {
@@ -689,7 +689,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
range.values = nouveau_svm_pfn_values;
range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT;
 again:
-   ret = nouveau_range_fault(>mirror, );
+   ret = nouveau_range_fault(svmm, );
if (ret == 0) {
mutex_lock(>mutex);
if (!nouveau_range_done()) {
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 07/13] mm: remove the page_shift member from struct hmm_range

2019-07-30 Thread Christoph Hellwig

All users pass PAGE_SIZE here, and if we wanted to support single
entries for huge pages we should really just add a HMM_FAULT_HUGEPAGE
flag instead that uses the huge page size instead of having the
caller calculate that size once, just for the hmm code to verify it.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 -
 drivers/gpu/drm/nouveau/nouveau_svm.c   |  1 -
 include/linux/hmm.h | 22 -
 mm/hmm.c| 42 ++---
 4 files changed, 9 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 71d6e7087b0b..8bf79288c4e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -818,7 +818,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
0 : range->flags[HMM_PFN_WRITE];
range->pfn_flags_mask = 0;
range->pfns = pfns;
-   range->page_shift = PAGE_SHIFT;
range->start = start;
range->end = start + ttm->num_pages * PAGE_SIZE;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c 
b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 40e706234554..e7068ce46949 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -680,7 +680,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
 args.i.p.addr + args.i.p.size, fn - fi);
 
/* Have HMM fault pages within the fault window to the GPU. */
-   range.page_shift = PAGE_SHIFT;
range.start = args.i.p.addr;
range.end = args.i.p.addr + args.i.p.size;
range.pfns = args.phys;
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index c5b51376b453..51e18fbb8953 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -158,7 +158,6 @@ enum hmm_pfn_value_e {
  * @values: pfn value for some special case (none, special, error, ...)
  * @default_flags: default flags for the range (write, read, ... see hmm doc)
  * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
- * @page_shift: device virtual address shift value (should be >= PAGE_SHIFT)
  * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
  * @valid: pfns array did not change since it has been fill by an HMM function
  */
@@ -172,31 +171,10 @@ struct hmm_range {
const uint64_t  *values;
uint64_tdefault_flags;
uint64_tpfn_flags_mask;
-   uint8_t page_shift;
uint8_t pfn_shift;
boolvalid;
 };
 
-/*
- * hmm_range_page_shift() - return the page shift for the range
- * @range: range being queried
- * Return: page shift (page size = 1 << page shift) for the range
- */
-static inline unsigned hmm_range_page_shift(const struct hmm_range *range)
-{
-   return range->page_shift;
-}
-
-/*
- * hmm_range_page_size() - return the page size for the range
- * @range: range being queried
- * Return: page size for the range in bytes
- */
-static inline unsigned long hmm_range_page_size(const struct hmm_range *range)
-{
-   return 1UL << hmm_range_page_shift(range);
-}
-
 /*
  * hmm_range_wait_until_valid() - wait for range to be valid
  * @range: range affected by invalidation to wait on
diff --git a/mm/hmm.c b/mm/hmm.c
index 926735a3aef9..f26d6abc4ed2 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -344,13 +344,12 @@ static int hmm_vma_walk_hole_(unsigned long addr, 
unsigned long end,
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
uint64_t *pfns = range->pfns;
-   unsigned long i, page_size;
+   unsigned long i;
 
hmm_vma_walk->last = addr;
-   page_size = hmm_range_page_size(range);
-   i = (addr - range->start) >> range->page_shift;
+   i = (addr - range->start) >> PAGE_SHIFT;
 
-   for (; addr < end; addr += page_size, i++) {
+   for (; addr < end; addr += PAGE_SIZE, i++) {
pfns[i] = range->values[HMM_PFN_NONE];
if (fault || write_fault) {
int ret;
@@ -772,7 +771,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned 
long hmask,
  struct mm_walk *walk)
 {
 #ifdef CONFIG_HUGETLB_PAGE
-   unsigned long addr = start, i, pfn, mask, size, pfn_inc;
+   unsigned long addr = start, i, pfn, mask;
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;
@@ -783,24 +782,12 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, 
unsigned long hmask,
pte_t entry;
int ret = 0;
 
-   size = huge_page_size(h);
-   mask = size - 1;
-   if (range->page_shift != PAGE_SHIFT) {
-

[PATCH 05/13] mm: remove the unused vma argument to hmm_range_dma_unmap

2019-07-30 Thread Christoph Hellwig

Signed-off-by: Christoph Hellwig 
---
 include/linux/hmm.h | 1 -
 mm/hmm.c| 2 --
 2 files changed, 3 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 82265118d94a..59be0aa2476d 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -422,7 +422,6 @@ long hmm_range_dma_map(struct hmm_range *range,
   dma_addr_t *daddrs,
   unsigned int flags);
 long hmm_range_dma_unmap(struct hmm_range *range,
-struct vm_area_struct *vma,
 struct device *device,
 dma_addr_t *daddrs,
 bool dirty);
diff --git a/mm/hmm.c b/mm/hmm.c
index d66fa29b42e0..3a3852660757 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1121,7 +1121,6 @@ EXPORT_SYMBOL(hmm_range_dma_map);
 /**
  * hmm_range_dma_unmap() - unmap range of that was map with hmm_range_dma_map()
  * @range: range being unmapped
- * @vma: the vma against which the range (optional)
  * @device: device against which dma map was done
  * @daddrs: dma address of mapped pages
  * @dirty: dirty page if it had the write flag set
@@ -1133,7 +1132,6 @@ EXPORT_SYMBOL(hmm_range_dma_map);
  * concurrent mmu notifier or sync_cpu_device_pagetables() to make progress.
  */
 long hmm_range_dma_unmap(struct hmm_range *range,
-struct vm_area_struct *vma,
 struct device *device,
 dma_addr_t *daddrs,
 bool dirty)
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 06/13] mm: remove superflous arguments from hmm_range_register

2019-07-30 Thread Christoph Hellwig

The start, end and page_shift values are all saved in the range
structure, so we might as well use that for argument passing.

Signed-off-by: Christoph Hellwig 
---
 Documentation/vm/hmm.rst|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  7 +--
 drivers/gpu/drm/nouveau/nouveau_svm.c   |  5 ++---
 include/linux/hmm.h |  6 +-
 mm/hmm.c| 20 +---
 5 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index ddcb5ca8b296..e63c11f7e0e0 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -222,7 +222,7 @@ The usage pattern is::
   range.flags = ...;
   range.values = ...;
   range.pfn_shift = ...;
-  hmm_range_register();
+  hmm_range_register(, mirror);
 
   /*
* Just wait for range to be valid, safe to ignore return value as we
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index f0821638bbc6..71d6e7087b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -818,8 +818,11 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
0 : range->flags[HMM_PFN_WRITE];
range->pfn_flags_mask = 0;
range->pfns = pfns;
-   hmm_range_register(range, mirror, start,
-  start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
+   range->page_shift = PAGE_SHIFT;
+   range->start = start;
+   range->end = start + ttm->num_pages * PAGE_SIZE;
+
+   hmm_range_register(range, mirror);
 
/*
 * Just wait for range to be valid, safe to ignore return value as we
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c 
b/drivers/gpu/drm/nouveau/nouveau_svm.c
index b889d5ec4c7e..40e706234554 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -492,9 +492,7 @@ nouveau_range_fault(struct nouveau_svmm *svmm, struct 
hmm_range *range)
range->default_flags = 0;
range->pfn_flags_mask = -1UL;
 
-   ret = hmm_range_register(range, >mirror,
-range->start, range->end,
-PAGE_SHIFT);
+   ret = hmm_range_register(range, >mirror);
if (ret) {
up_read(>hmm->mm->mmap_sem);
return (int)ret;
@@ -682,6 +680,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
 args.i.p.addr + args.i.p.size, fn - fi);
 
/* Have HMM fault pages within the fault window to the GPU. */
+   range.page_shift = PAGE_SHIFT;
range.start = args.i.p.addr;
range.end = args.i.p.addr + args.i.p.size;
range.pfns = args.phys;
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 59be0aa2476d..c5b51376b453 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -400,11 +400,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror);
 /*
  * Please see Documentation/vm/hmm.rst for how to use the range API.
  */
-int hmm_range_register(struct hmm_range *range,
-  struct hmm_mirror *mirror,
-  unsigned long start,
-  unsigned long end,
-  unsigned page_shift);
+int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror);
 void hmm_range_unregister(struct hmm_range *range);
 
 /*
diff --git a/mm/hmm.c b/mm/hmm.c
index 3a3852660757..926735a3aef9 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -843,35 +843,25 @@ static void hmm_pfns_clear(struct hmm_range *range,
  * hmm_range_register() - start tracking change to CPU page table over a range
  * @range: range
  * @mm: the mm struct for the range of virtual address
- * @start: start virtual address (inclusive)
- * @end: end virtual address (exclusive)
- * @page_shift: expect page shift for the range
+ *
  * Return: 0 on success, -EFAULT if the address space is no longer valid
  *
  * Track updates to the CPU page table see include/linux/hmm.h
  */
-int hmm_range_register(struct hmm_range *range,
-  struct hmm_mirror *mirror,
-  unsigned long start,
-  unsigned long end,
-  unsigned page_shift)
+int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
 {
-   unsigned long mask = ((1UL << page_shift) - 1UL);
+   unsigned long mask = ((1UL << range->page_shift) - 1UL);
struct hmm *hmm = mirror->hmm;
unsigned long flags;
 
range->valid = false;
range->hmm = NULL;
 
-   if ((start & mask) || (end & mask))
+   if ((range->start & mask) || (range->end & mask))
return -EINVAL;
-   if (start >= end)
+   if (range->start >= range->end)
return -EINVAL;
 
-   range->page_shift = page_shift;
-

[PATCH 02/13] amdgpu: don't initialize range->list in amdgpu_hmm_init_range

2019-07-30 Thread Christoph Hellwig

The list is used to add the range to another list as an entry in the
core hmm code, so there is no need to initialize it in a driver.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index b698b423b25d..60b9fc9561d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -484,6 +484,5 @@ void amdgpu_hmm_init_range(struct hmm_range *range)
range->flags = hmm_range_flags;
range->values = hmm_range_values;
range->pfn_shift = PAGE_SHIFT;
-   INIT_LIST_HEAD(>list);
}
 }
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 08/13] mm: remove the mask variable in hmm_vma_walk_hugetlb_entry

2019-07-30 Thread Christoph Hellwig

The pagewalk code already passes the value as the hmask parameter.

Signed-off-by: Christoph Hellwig 
---
 mm/hmm.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index f26d6abc4ed2..88b77a4a6a1e 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -771,19 +771,16 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, 
unsigned long hmask,
  struct mm_walk *walk)
 {
 #ifdef CONFIG_HUGETLB_PAGE
-   unsigned long addr = start, i, pfn, mask;
+   unsigned long addr = start, i, pfn;
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;
-   struct hstate *h = hstate_vma(vma);
uint64_t orig_pfn, cpu_flags;
bool fault, write_fault;
spinlock_t *ptl;
pte_t entry;
int ret = 0;
 
-   mask = huge_page_size(h) - 1;
-
ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
entry = huge_ptep_get(pte);
 
@@ -799,7 +796,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned 
long hmask,
goto unlock;
}
 
-   pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT);
+   pfn = pte_pfn(entry) + ((start & hmask) >> PAGE_SHIFT);
for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
 cpu_flags;
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 04/13] mm: remove the pgmap field from struct hmm_vma_walk

2019-07-30 Thread Christoph Hellwig

There is only a single place where the pgmap is passed over a function
call, so replace it with local variables in the places where we deal
with the pgmap.

Signed-off-by: Christoph Hellwig 
---
 mm/hmm.c | 62 
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 9a908902e4cc..d66fa29b42e0 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -278,7 +278,6 @@ EXPORT_SYMBOL(hmm_mirror_unregister);
 
 struct hmm_vma_walk {
struct hmm_range*range;
-   struct dev_pagemap  *pgmap;
unsigned long   last;
unsigned intflags;
 };
@@ -475,6 +474,7 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
+   struct dev_pagemap *pgmap = NULL;
unsigned long pfn, npages, i;
bool fault, write_fault;
uint64_t cpu_flags;
@@ -490,17 +490,14 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
pfn = pmd_pfn(pmd) + pte_index(addr);
for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) {
if (pmd_devmap(pmd)) {
-   hmm_vma_walk->pgmap = get_dev_pagemap(pfn,
- hmm_vma_walk->pgmap);
-   if (unlikely(!hmm_vma_walk->pgmap))
+   pgmap = get_dev_pagemap(pfn, pgmap);
+   if (unlikely(!pgmap))
return -EBUSY;
}
pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags;
}
-   if (hmm_vma_walk->pgmap) {
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
+   if (pgmap)
+   put_dev_pagemap(pgmap);
hmm_vma_walk->last = end;
return 0;
 #else
@@ -520,7 +517,7 @@ static inline uint64_t pte_to_hmm_pfn_flags(struct 
hmm_range *range, pte_t pte)
 
 static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
  unsigned long end, pmd_t *pmdp, pte_t *ptep,
- uint64_t *pfn)
+ uint64_t *pfn, struct dev_pagemap **pgmap)
 {
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
@@ -591,9 +588,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
goto fault;
 
if (pte_devmap(pte)) {
-   hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte),
- hmm_vma_walk->pgmap);
-   if (unlikely(!hmm_vma_walk->pgmap))
+   *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
+   if (unlikely(!*pgmap))
return -EBUSY;
} else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) 
{
*pfn = range->values[HMM_PFN_SPECIAL];
@@ -604,10 +600,10 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, 
unsigned long addr,
return 0;
 
 fault:
-   if (hmm_vma_walk->pgmap) {
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
+   if (*pgmap)
+   put_dev_pagemap(*pgmap);
+   *pgmap = NULL;
+
pte_unmap(ptep);
/* Fault any virtual address we were asked to fault */
return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
@@ -620,6 +616,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 {
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
+   struct dev_pagemap *pgmap = NULL;
uint64_t *pfns = range->pfns;
unsigned long addr = start, i;
pte_t *ptep;
@@ -683,23 +680,21 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
int r;
 
-   r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i]);
+   r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, [i],
+   );
if (r) {
/* hmm_vma_handle_pte() did unmap pte directory */
hmm_vma_walk->last = addr;
return r;
}
}
-   if (hmm_vma_walk->pgmap) {
-   /*
-* We do put_dev_pagemap() here and not in hmm_vma_handle_pte()
-* so that we can leverage get_dev_pagemap() optimization which
-* will not re-take a reference on a pgmap if we already have
-* one.
-*/
-   put_dev_pagemap(hmm_vma_walk->pgmap);
-   hmm_vma_walk->pgmap = NULL;
-   }
+   /*
+* We do put_dev_pagemap() here and not in hmm_vma_handle_pte() so that
+* we

[PATCH 13/13] mm: allow HMM_MIRROR on all architectures with MMU

2019-07-30 Thread Christoph Hellwig

There isn't really any architecture specific code in this page table
walk implementation, so drop the dependencies.

Signed-off-by: Christoph Hellwig 
---
 mm/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index 56cec636a1fc..b18782be969c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -677,8 +677,7 @@ config DEV_PAGEMAP_OPS
 
 config HMM_MIRROR
bool "HMM mirror CPU page table into a device page table"
-   depends on (X86_64 || PPC64)
-   depends on MMU && 64BIT
+   depends on MMU
select MMU_NOTIFIER
help
  Select HMM_MIRROR if you want to mirror range of the CPU page table 
of a
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 01/13] amdgpu: remove -EAGAIN handling for hmm_range_fault

2019-07-30 Thread Christoph Hellwig

hmm_range_fault can only return -EAGAIN if called with the block
argument set to false, so remove the special handling for it.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 23 +++
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 12a59ac83f72..f0821638bbc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -778,7 +778,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
struct hmm_range *range;
unsigned long i;
uint64_t *pfns;
-   int retry = 0;
int r = 0;
 
if (!mm) /* Happens during process shutdown */
@@ -822,7 +821,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
hmm_range_register(range, mirror, start,
   start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
 
-retry:
/*
 * Just wait for range to be valid, safe to ignore return value as we
 * will use the return value of hmm_range_fault() below under the
@@ -831,24 +829,12 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
 
down_read(>mmap_sem);
-
r = hmm_range_fault(range, 0);
-   if (unlikely(r < 0)) {
-   if (likely(r == -EAGAIN)) {
-   /*
-* return -EAGAIN, mmap_sem is dropped
-*/
-   if (retry++ < MAX_RETRY_HMM_RANGE_FAULT)
-   goto retry;
-   else
-   pr_err("Retry hmm fault too many times\n");
-   }
-
-   goto out_up_read;
-   }
-
up_read(>mmap_sem);
 
+   if (unlikely(r < 0))
+   goto out_free_pfns;
+
for (i = 0; i < ttm->num_pages; i++) {
pages[i] = hmm_device_entry_to_page(range, pfns[i]);
if (unlikely(!pages[i])) {
@@ -864,9 +850,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
 
return 0;
 
-out_up_read:
-   if (likely(r != -EAGAIN))
-   up_read(>mmap_sem);
 out_free_pfns:
hmm_range_unregister(range);
kvfree(pfns);
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 10/13] mm: only define hmm_vma_walk_pud if needed

2019-07-30 Thread Christoph Hellwig

We only need the special pud_entry walker if PUD-sized hugepages and
pte mappings are supported, else the common pagewalk code will take
care of the iteration.  Not implementing this callback reduced the
amount of code compiled for non-x86 platforms, and also fixes compile
failures with other architectures when helpers like pud_pfn are not
implemented.

Signed-off-by: Christoph Hellwig 
---
 mm/hmm.c | 29 -
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index e63ab7f11334..4d3bd41b6522 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -455,15 +455,6 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct 
hmm_range *range, pmd_t pmd)
range->flags[HMM_PFN_VALID];
 }
 
-static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
-{
-   if (!pud_present(pud))
-   return 0;
-   return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
-   range->flags[HMM_PFN_WRITE] :
-   range->flags[HMM_PFN_VALID];
-}
-
 static int hmm_vma_handle_pmd(struct mm_walk *walk,
  unsigned long addr,
  unsigned long end,
@@ -700,10 +691,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return 0;
 }
 
-static int hmm_vma_walk_pud(pud_t *pudp,
-   unsigned long start,
-   unsigned long end,
-   struct mm_walk *walk)
+#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
+defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
+{
+   if (!pud_present(pud))
+   return 0;
+   return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
+   range->flags[HMM_PFN_WRITE] :
+   range->flags[HMM_PFN_VALID];
+}
+
+static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long 
end,
+   struct mm_walk *walk)
 {
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
@@ -765,6 +765,9 @@ static int hmm_vma_walk_pud(pud_t *pudp,
 
return 0;
 }
+#else
+#define hmm_vma_walk_pud   NULL
+#endif
 
 static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
  unsigned long start, unsigned long end,
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 11/13] mm: cleanup the hmm_vma_handle_pmd stub

2019-07-30 Thread Christoph Hellwig

Stub out the whole function when CONFIG_TRANSPARENT_HUGEPAGE is not set
to make the function easier to read.

Signed-off-by: Christoph Hellwig 
---
 mm/hmm.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 4d3bd41b6522..f4e90ea5779f 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -455,13 +455,10 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct 
hmm_range *range, pmd_t pmd)
range->flags[HMM_PFN_VALID];
 }
 
-static int hmm_vma_handle_pmd(struct mm_walk *walk,
- unsigned long addr,
- unsigned long end,
- uint64_t *pfns,
- pmd_t pmd)
-{
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
+   unsigned long end, uint64_t *pfns, pmd_t pmd)
+{
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
struct dev_pagemap *pgmap = NULL;
@@ -490,11 +487,14 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
put_dev_pagemap(pgmap);
hmm_vma_walk->last = end;
return 0;
-#else
-   /* If THP is not enabled then we should never reach this code ! */
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
+   unsigned long end, uint64_t *pfns, pmd_t pmd)
+{
return -EINVAL;
-#endif
 }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
 {
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v2] drm/amdgpu/powerplay: provide the interface to disable uclk switch for DAL

2019-07-30 Thread Kevin Wang

It's looks fine for me.

Reviewed-by: Kevin Wang 

Best Regards,
Kevin

On 7/30/19 2:01 PM, Kenneth Feng wrote:
> provide the interface for DAL to disable uclk switch on navi10.
> in this case, the uclk will be fixed to maximum.
> this is a workaround when display configuration causes underflow issue.
>
> Signed-off-by: Kenneth Feng 
> ---
>   .../drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c   | 14 
>   drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  5 +
>   drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 25 
> ++
>   drivers/gpu/drm/amd/powerplay/smu_v11_0.c  |  7 ++
>   4 files changed, 51 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> index 7bc7abc..583f8fb 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
> @@ -802,6 +802,19 @@ enum pp_smu_status 
> pp_nv_set_hard_min_uclk_by_freq(struct pp_smu *pp, int mhz)
>   return PP_SMU_RESULT_OK;
>   }
>   
> +enum pp_smu_status pp_nv_set_pstate_handshake_support(
> + struct pp_smu *pp, BOOLEAN pstate_handshake_supported)
> +{
> + const struct dc_context *ctx = pp->dm;
> + struct amdgpu_device *adev = ctx->driver_context;
> + struct smu_context *smu = >smu;
> +
> + if (smu_display_disable_memory_clock_switch(smu, 
> !pstate_handshake_supported))
> + return PP_SMU_RESULT_FAIL;
> +
> + return PP_SMU_RESULT_OK;
> +}
> +
>   enum pp_smu_status pp_nv_set_voltage_by_freq(struct pp_smu *pp,
>   enum pp_smu_nv_clock_id clock_id, int mhz)
>   {
> @@ -917,6 +930,7 @@ void dm_pp_get_funcs(
>   funcs->nv_funcs.get_maximum_sustainable_clocks = 
> pp_nv_get_maximum_sustainable_clocks;
>   /*todo  compare data with window driver */
>   funcs->nv_funcs.get_uclk_dpm_states = pp_nv_get_uclk_dpm_states;
> + funcs->nv_funcs.set_pstate_handshake_support = 
> pp_nv_set_pstate_handshake_support;
>   break;
>   #endif
>   default:
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h 
> b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> index 33d2d75..8242cd1 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> @@ -549,6 +549,8 @@ struct smu_context
>   #define WATERMARKS_EXIST(1 << 0)
>   #define WATERMARKS_LOADED   (1 << 1)
>   uint32_t watermarks_bitmap;
> + uint32_t hard_min_uclk_req_from_dal;
> + bool disable_uclk_switch;
>   
>   uint32_t workload_mask;
>   uint32_t workload_prority[WORKLOAD_POLICY_MAX];
> @@ -632,6 +634,7 @@ struct pptable_funcs {
>   int (*get_uclk_dpm_states)(struct smu_context *smu, uint32_t 
> *clocks_in_khz, uint32_t *num_states);
>   int (*set_default_od_settings)(struct smu_context *smu, bool 
> initialize);
>   int (*set_performance_level)(struct smu_context *smu, enum 
> amd_dpm_forced_level level);
> + int (*display_disable_memory_clock_switch)(struct smu_context *smu, 
> bool disable_memory_clock_switch);
>   };
>   
>   struct smu_funcs
> @@ -884,6 +887,8 @@ struct smu_funcs
>   ((smu)->ppt_funcs->get_clock_by_type_with_voltage ? 
> (smu)->ppt_funcs->get_clock_by_type_with_voltage((smu), (type), (clocks)) : 0)
>   #define smu_display_clock_voltage_request(smu, clock_req) \
>   ((smu)->funcs->display_clock_voltage_request ? 
> (smu)->funcs->display_clock_voltage_request((smu), (clock_req)) : 0)
> +#define smu_display_disable_memory_clock_switch(smu, 
> disable_memory_clock_switch) \
> + ((smu)->ppt_funcs->display_disable_memory_clock_switch ? 
> (smu)->ppt_funcs->display_disable_memory_clock_switch((smu), 
> (disable_memory_clock_switch)) : -EINVAL)
>   #define smu_get_dal_power_level(smu, clocks) \
>   ((smu)->funcs->get_dal_power_level ? 
> (smu)->funcs->get_dal_power_level((smu), (clocks)) : 0)
>   #define smu_get_perf_level(smu, designation, level) \
> diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c 
> b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
> index c873228..a8c98c4 100644
> --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
> @@ -1655,6 +1655,30 @@ static int navi10_get_thermal_temperature_range(struct 
> smu_context *smu,
>   return 0;
>   }
>   
> +static int navi10_display_disable_memory_clock_switch(struct smu_context 
> *smu,
> + bool 
> disable_memory_clock_switch)
> +{
> + int ret = 0;
> + struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks =
> + (struct smu_11_0_max_sustainable_clocks *)
> + smu->smu_table.max_sustainable_clocks;
> + uint32_t min_memory_clock = smu->hard_min_uclk_req_from_dal;
> + uint32_t max_memory_clock = max_sustainable_clocks->uclock;
> +
> +

[PATCH v2] drm/amdgpu/powerplay: provide the interface to disable uclk switch for DAL

2019-07-30 Thread Kenneth Feng

provide the interface for DAL to disable uclk switch on navi10.
in this case, the uclk will be fixed to maximum.
this is a workaround when display configuration causes underflow issue.

Signed-off-by: Kenneth Feng 
---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c   | 14 
 drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  5 +
 drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 25 ++
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c  |  7 ++
 4 files changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index 7bc7abc..583f8fb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -802,6 +802,19 @@ enum pp_smu_status pp_nv_set_hard_min_uclk_by_freq(struct 
pp_smu *pp, int mhz)
return PP_SMU_RESULT_OK;
 }
 
+enum pp_smu_status pp_nv_set_pstate_handshake_support(
+   struct pp_smu *pp, BOOLEAN pstate_handshake_supported)
+{
+   const struct dc_context *ctx = pp->dm;
+   struct amdgpu_device *adev = ctx->driver_context;
+   struct smu_context *smu = >smu;
+
+   if (smu_display_disable_memory_clock_switch(smu, 
!pstate_handshake_supported))
+   return PP_SMU_RESULT_FAIL;
+
+   return PP_SMU_RESULT_OK;
+}
+
 enum pp_smu_status pp_nv_set_voltage_by_freq(struct pp_smu *pp,
enum pp_smu_nv_clock_id clock_id, int mhz)
 {
@@ -917,6 +930,7 @@ void dm_pp_get_funcs(
funcs->nv_funcs.get_maximum_sustainable_clocks = 
pp_nv_get_maximum_sustainable_clocks;
/*todo  compare data with window driver */
funcs->nv_funcs.get_uclk_dpm_states = pp_nv_get_uclk_dpm_states;
+   funcs->nv_funcs.set_pstate_handshake_support = 
pp_nv_set_pstate_handshake_support;
break;
 #endif
default:
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index 33d2d75..8242cd1 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -549,6 +549,8 @@ struct smu_context
 #define WATERMARKS_EXIST   (1 << 0)
 #define WATERMARKS_LOADED  (1 << 1)
uint32_t watermarks_bitmap;
+   uint32_t hard_min_uclk_req_from_dal;
+   bool disable_uclk_switch;
 
uint32_t workload_mask;
uint32_t workload_prority[WORKLOAD_POLICY_MAX];
@@ -632,6 +634,7 @@ struct pptable_funcs {
int (*get_uclk_dpm_states)(struct smu_context *smu, uint32_t 
*clocks_in_khz, uint32_t *num_states);
int (*set_default_od_settings)(struct smu_context *smu, bool 
initialize);
int (*set_performance_level)(struct smu_context *smu, enum 
amd_dpm_forced_level level);
+   int (*display_disable_memory_clock_switch)(struct smu_context *smu, 
bool disable_memory_clock_switch);
 };
 
 struct smu_funcs
@@ -884,6 +887,8 @@ struct smu_funcs
((smu)->ppt_funcs->get_clock_by_type_with_voltage ? 
(smu)->ppt_funcs->get_clock_by_type_with_voltage((smu), (type), (clocks)) : 0)
 #define smu_display_clock_voltage_request(smu, clock_req) \
((smu)->funcs->display_clock_voltage_request ? 
(smu)->funcs->display_clock_voltage_request((smu), (clock_req)) : 0)
+#define smu_display_disable_memory_clock_switch(smu, 
disable_memory_clock_switch) \
+   ((smu)->ppt_funcs->display_disable_memory_clock_switch ? 
(smu)->ppt_funcs->display_disable_memory_clock_switch((smu), 
(disable_memory_clock_switch)) : -EINVAL)
 #define smu_get_dal_power_level(smu, clocks) \
((smu)->funcs->get_dal_power_level ? 
(smu)->funcs->get_dal_power_level((smu), (clocks)) : 0)
 #define smu_get_perf_level(smu, designation, level) \
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c 
b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index c873228..a8c98c4 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -1655,6 +1655,30 @@ static int navi10_get_thermal_temperature_range(struct 
smu_context *smu,
return 0;
 }
 
+static int navi10_display_disable_memory_clock_switch(struct smu_context *smu,
+   bool 
disable_memory_clock_switch)
+{
+   int ret = 0;
+   struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks =
+   (struct smu_11_0_max_sustainable_clocks *)
+   smu->smu_table.max_sustainable_clocks;
+   uint32_t min_memory_clock = smu->hard_min_uclk_req_from_dal;
+   uint32_t max_memory_clock = max_sustainable_clocks->uclock;
+
+   if(smu->disable_uclk_switch == disable_memory_clock_switch)
+   return 0;
+
+   if(disable_memory_clock_switch)
+   ret = smu_set_hard_freq_range(smu, SMU_UCLK, max_memory_clock, 
0);
+   else
+   ret = smu_set_hard_freq_range(smu, SMU_UCLK,

76 matches

Mail list logo