[PATCH 3/3] drm/amd/powerplay: implement stop dpm task for vega10.

2017-04-27 Thread Rex Zhu
Change-Id: I19202f1e54ce6a1b8b54aacbc0d42dbba7605662
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 97 ++
 .../gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c | 23 +
 .../gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h |  1 +
 .../gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c   |  2 +-
 .../gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h   |  1 +
 5 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 5e3e89b..68eae52 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -2420,6 +2420,26 @@ static int vega10_enable_thermal_protection(struct 
pp_hwmgr *hwmgr)
return 0;
 }
 
+static int vega10_disable_thermal_protection(struct pp_hwmgr *hwmgr)
+{
+   struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+
+   if (data->smu_features[GNLD_THERMAL].supported) {
+   if (!data->smu_features[GNLD_THERMAL].enabled)
+   pr_info("THERMAL Feature Already disabled!");
+
+   PP_ASSERT_WITH_CODE(
+   !vega10_enable_smc_features(hwmgr->smumgr,
+   false,
+   
data->smu_features[GNLD_THERMAL].smu_feature_bitmap),
+   "disable THERMAL Feature Failed!",
+   return -1);
+   data->smu_features[GNLD_THERMAL].enabled = false;
+   }
+
+   return 0;
+}
+
 static int vega10_enable_vrhot_feature(struct pp_hwmgr *hwmgr)
 {
struct vega10_hwmgr *data =
@@ -2498,6 +2518,37 @@ static int vega10_enable_deep_sleep_master_switch(struct 
pp_hwmgr *hwmgr)
return 0;
 }
 
+static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap)
+{
+   struct vega10_hwmgr *data =
+   (struct vega10_hwmgr *)(hwmgr->backend);
+   uint32_t i, feature_mask = 0;
+
+
+   if(data->smu_features[GNLD_LED_DISPLAY].supported == true){
+   PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr,
+   true, 
data->smu_features[GNLD_LED_DISPLAY].smu_feature_bitmap),
+   "Attempt to Enable LED DPM feature Failed!", return -EINVAL);
+   data->smu_features[GNLD_LED_DISPLAY].enabled = true;
+   }
+
+   for (i = 0; i < GNLD_DPM_MAX; i++) {
+   if (data->smu_features[i].smu_feature_bitmap & bitmap) {
+   if (data->smu_features[i].supported) {
+   if (data->smu_features[i].enabled) {
+   feature_mask |= data->smu_features[i].
+   smu_feature_bitmap;
+   data->smu_features[i].enabled = false;
+   }
+   }
+   }
+   }
+
+   vega10_enable_smc_features(hwmgr->smumgr, false, feature_mask);
+
+   return 0;
+}
+
 /**
  * @brief Tell SMC to enabled the supported DPMs.
  *
@@ -4356,11 +4407,55 @@ static int vega10_check_states_equal(struct pp_hwmgr 
*hwmgr,
return is_update_required;
 }
 
+static int vega10_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
+{
+   int tmp_result, result = 0;
+
+   tmp_result = (vega10_is_dpm_running(hwmgr)) ? 0 : -1;
+   PP_ASSERT_WITH_CODE(tmp_result == 0,
+   "DPM is not running right now, no need to disable DPM!",
+   return 0);
+
+   if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+   PHM_PlatformCaps_ThermalController))
+   vega10_disable_thermal_protection(hwmgr);
+
+   tmp_result = vega10_disable_power_containment(hwmgr);
+   PP_ASSERT_WITH_CODE((tmp_result == 0),
+   "Failed to disable power containment!", result = 
tmp_result);
+
+   tmp_result = vega10_avfs_enable(hwmgr, false);
+   PP_ASSERT_WITH_CODE((tmp_result == 0),
+   "Failed to disable AVFS!", result = tmp_result);
+
+   tmp_result = vega10_stop_dpm(hwmgr, SMC_DPM_FEATURES);
+   PP_ASSERT_WITH_CODE((tmp_result == 0),
+   "Failed to stop DPM!", result = tmp_result);
+
+   return result;
+}
+
+static int vega10_power_off_asic(struct pp_hwmgr *hwmgr)
+{
+   struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
+   int result;
+
+   result = vega10_disable_dpm_tasks(hwmgr);
+   PP_ASSERT_WITH_CODE((0 == result),
+   "[disable_dpm_tasks] Failed to disable DPM!",
+   );
+   data->water_marks_bitmap &= ~(WaterMarksLoaded);
+
+   return result;
+}
+
+
 static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
.backend_init = vega10_hwmgr_backend_init,

[PATCH 1/3] drm/amd/powerplay: add disable_smc_ctf callback in hwmgr.

2017-04-27 Thread Rex Zhu
export disablesmcctf to eventmgr.
need to disable temperature alert when s3/s4.
otherwise, when resume back,enable temperature
alert will fail.

Change-Id: I6bbc85b0fc389e442a44ad4d0db3db3342ff5955
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 10 ++
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c  |  1 +
 drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h   |  2 +-
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h |  1 +
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 23bba2c..fcc722e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -501,3 +501,13 @@ int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct 
amd_pp_simple_clock_i
 
return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks);
 }
+
+int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr)
+{
+   PHM_FUNC_CHECK(hwmgr);
+
+   if (hwmgr->hwmgr_func->disable_smc_firmware_ctf == NULL)
+   return -EINVAL;
+
+   return hwmgr->hwmgr_func->disable_smc_firmware_ctf(hwmgr);
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 8f663ab..4ad8453 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -4695,6 +4695,7 @@ static int smu7_avfs_control(struct pp_hwmgr *hwmgr, bool 
enable)
.release_firmware = smu7_release_firmware,
.set_power_profile_state = smu7_set_power_profile_state,
.avfs_control = smu7_avfs_control,
+   .disable_smc_firmware_ctf = smu7_thermal_disable_alert,
 };
 
 uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h 
b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index ee58e4c..9ea3d73 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -431,6 +431,6 @@ extern int phm_display_clock_voltage_request(struct 
pp_hwmgr *hwmgr,
struct pp_display_clock_request *clock);
 
 extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct 
amd_pp_simple_clock_info *clocks);
-
+extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr);
 #endif /* _HARDWARE_MANAGER_H_ */
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h 
b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 320225d..98d38c3 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -373,6 +373,7 @@ struct pp_hwmgr_func {
int (*set_power_profile_state)(struct pp_hwmgr *hwmgr,
struct amd_pp_profile *request);
int (*avfs_control)(struct pp_hwmgr *hwmgr, bool enable);
+   int (*disable_smc_firmware_ctf)(struct pp_hwmgr *hwmgr);
 };
 
 struct pp_table_func {
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/3] drm/amd/powerplay: complete disable_smc_firmware_ctf_tasks.

2017-04-27 Thread Rex Zhu
Change-Id: I7d7f760c5996ddb00a17b34d4b0a3070a965
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c | 2 +-
 drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c | 5 +
 drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c 
b/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
index 9ef2d90..b82c43a 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
@@ -219,7 +219,7 @@
 };
 
 const pem_event_action disable_smc_firmware_ctf_tasks[] = {
-   /* PEM_Task_DisableSMCFirmwareCTF,*/
+   pem_task_disable_smc_firmware_ctf,
NULL
 };
 
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c 
b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
index e04216e..8c4ebaa 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
@@ -173,6 +173,11 @@ int pem_task_stop_asic_block_usage(struct pp_eventmgr 
*eventmgr, struct pem_even
return 0;
 }
 
+int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct 
pem_event_data *event_data)
+{
+   return phm_disable_smc_firmware_ctf(eventmgr->hwmgr);
+}
+
 int pem_task_setup_asic(struct pp_eventmgr *eventmgr, struct pem_event_data 
*event_data)
 {
return phm_setup_asic(eventmgr->hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h 
b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
index 6c6297e..37e7ca5 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
@@ -84,5 +84,6 @@
 /*thermal */
 int pem_task_initialize_thermal_controller(struct pp_eventmgr *eventmgr, 
struct pem_event_data *event_data);
 int pem_task_uninitialize_thermal_controller(struct pp_eventmgr *eventmgr, 
struct pem_event_data *event_data);
+int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct 
pem_event_data *event_data);
 
 #endif /* _EVENT_TASKS_H_ */
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add cu info wave_front_size

2017-04-27 Thread Wang, Ken
Reviewed-by: Ken Wang 


From: Junwei Zhang 
Sent: Friday, April 28, 2017 11:10:46 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander; Wang, Ken; Zhang, Jerry
Subject: [PATCH] drm/amdgpu: add cu info wave_front_size

missed that for gfx v9 info export

Signed-off-by: Junwei Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 503010a..e330009 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -986,6 +986,7 @@ struct amdgpu_gfx_config {
 struct amdgpu_cu_info {
 uint32_t number; /* total active CU number */
 uint32_t ao_cu_mask;
+   uint32_t wave_front_size;
 uint32_t bitmap[4][4];
 };

--
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] amdgpu: add vm ioctl support

2017-04-27 Thread Zhang, Jerry
Reviewed-by: Junwei Zhang 

Regards,
Jerry (Junwei Zhang)

Linux Base Graphics
SRDC Software Development
_


> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of
> zhoucm1
> Sent: Friday, April 28, 2017 10:59
> To: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] amdgpu: add vm ioctl support
> 
> This is drm patch for dedicated vmid.
> Kernel patch has pushed to staging, anyone could have a quick review on this?
> Jerry? Guess Christian is in dream at current time.
> 
> Regards,
> David Zhou
> 
> On 2017年04月28日 10:38, Chunming Zhou wrote:
> > Change-Id: I3c1ea377dad8f6d64a992b071a6ec36a1143e416
> > Signed-off-by: Chunming Zhou 
> > ---
> >   include/drm/amdgpu_drm.h | 22 ++
> >   1 file changed, 22 insertions(+)
> >
> > diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index
> > 26d02ba..28cda3a 100644
> > --- a/include/drm/amdgpu_drm.h
> > +++ b/include/drm/amdgpu_drm.h
> > @@ -47,6 +47,7 @@
> >   #define DRM_AMDGPU_GEM_OP 0x10
> >   #define DRM_AMDGPU_GEM_USERPTR0x11
> >   #define DRM_AMDGPU_WAIT_FENCES0x12
> > +#define DRM_AMDGPU_VM  0x13
> >   /* hybrid specific ioctls */
> >   #define DRM_AMDGPU_SEM  0x5b
> >   #define DRM_AMDGPU_GEM_DGMA   0x5c
> > @@ -66,6 +67,7 @@
> >   #define DRM_IOCTL_AMDGPU_GEM_OP
>   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct
> drm_amdgpu_gem_op)
> >   #define DRM_IOCTL_AMDGPU_GEM_USERPTR
>   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR,
> struct drm_amdgpu_gem_userptr)
> >   #define DRM_IOCTL_AMDGPU_WAIT_FENCES
>   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES,
> union drm_amdgpu_wait_fences)
> > +#define DRM_IOCTL_AMDGPU_VM
>   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union
> drm_amdgpu_vm)
> >   /* hybrid specific ioctls */
> >   #define DRM_IOCTL_AMDGPU_GEM_DGMA
>   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA,
> struct drm_amdgpu_gem_dgma)
> >   #define DRM_IOCTL_AMDGPU_FREESYNC
>   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FREESYNC,
> struct drm_amdgpu_freesync)
> > @@ -201,6 +203,26 @@ union drm_amdgpu_ctx {
> > union drm_amdgpu_ctx_out out;
> >   };
> >
> > +/* vm ioctl */
> > +#define AMDGPU_VM_OP_RESERVE_VMID  1
> > +#define AMDGPU_VM_OP_UNRESERVE_VMID2
> > +
> > +struct drm_amdgpu_vm_in {
> > +   /** AMDGPU_VM_OP_* */
> > +   __u32   op;
> > +   __u32   flags;
> > +};
> > +
> > +struct drm_amdgpu_vm_out {
> > +   /** For future use, no flags defined so far */
> > +   __u64   flags;
> > +};
> > +
> > +union drm_amdgpu_vm {
> > +   struct drm_amdgpu_vm_in in;
> > +   struct drm_amdgpu_vm_out out;
> > +};
> > +
> >   /* sync file related */
> >   #define AMDGPU_SEM_OP_CREATE_SEM   1
> >   #define AMDGPU_SEM_OP_WAIT_SEM 2
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] amdgpu: add vm ioctl support

2017-04-27 Thread zhoucm1

This is drm patch for dedicated vmid.
Kernel patch has pushed to staging, anyone could have a quick review on 
this? Jerry? Guess Christian is in dream at current time.


Regards,
David Zhou

On 2017年04月28日 10:38, Chunming Zhou wrote:

Change-Id: I3c1ea377dad8f6d64a992b071a6ec36a1143e416
Signed-off-by: Chunming Zhou 
---
  include/drm/amdgpu_drm.h | 22 ++
  1 file changed, 22 insertions(+)

diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 26d02ba..28cda3a 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -47,6 +47,7 @@
  #define DRM_AMDGPU_GEM_OP 0x10
  #define DRM_AMDGPU_GEM_USERPTR0x11
  #define DRM_AMDGPU_WAIT_FENCES0x12
+#define DRM_AMDGPU_VM  0x13
  /* hybrid specific ioctls */
  #define DRM_AMDGPU_SEM  0x5b
  #define DRM_AMDGPU_GEM_DGMA   0x5c
@@ -66,6 +67,7 @@
  #define DRM_IOCTL_AMDGPU_GEM_OP   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
  #define DRM_IOCTL_AMDGPU_GEM_USERPTR  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
  #define DRM_IOCTL_AMDGPU_WAIT_FENCES  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
  /* hybrid specific ioctls */
  #define DRM_IOCTL_AMDGPU_GEM_DGMA DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma)
  #define DRM_IOCTL_AMDGPU_FREESYNC DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FREESYNC, struct drm_amdgpu_freesync)
@@ -201,6 +203,26 @@ union drm_amdgpu_ctx {
union drm_amdgpu_ctx_out out;
  };
  
+/* vm ioctl */

+#define AMDGPU_VM_OP_RESERVE_VMID  1
+#define AMDGPU_VM_OP_UNRESERVE_VMID2
+
+struct drm_amdgpu_vm_in {
+   /** AMDGPU_VM_OP_* */
+   __u32   op;
+   __u32   flags;
+};
+
+struct drm_amdgpu_vm_out {
+   /** For future use, no flags defined so far */
+   __u64   flags;
+};
+
+union drm_amdgpu_vm {
+   struct drm_amdgpu_vm_in in;
+   struct drm_amdgpu_vm_out out;
+};
+
  /* sync file related */
  #define AMDGPU_SEM_OP_CREATE_SEM   1
  #define AMDGPU_SEM_OP_WAIT_SEM 2


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset v2

2017-04-27 Thread Zhang, Jerry
Nice catch!
Reviewed-by: Junwei Zhang 

Regards,
Jerry (Junwei Zhang)

Linux Base Graphics
SRDC Software Development
_


> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of
> Chunming Zhou
> Sent: Friday, April 28, 2017 10:46
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhou, David(ChunMing)
> Subject: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu
> reset v2
> 
> the case could happen when gpu reset:
> 1. when gpu reset, cs can be continue until sw queue is full, then push job 
> will
> wait with holding pd reservation.
> 2. gpu_reset routine will also need pd reservation to restore page table from
> their shadow.
> 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs 
> releases
> reservation.
> 
> v2: handle amdgpu_cs_submit error path.
> 
> Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
> Signed-off-by: Chunming Zhou 
> Reviewed-by: Christian König 
> Reviewed-by: Junwei Zhang 
> Reviewed-by: Monk Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 26168df..699f5fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1074,6 +1074,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser
> *p,
>   cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
>   job->uf_sequence = cs->out.handle;
>   amdgpu_job_free_resources(job);
> + amdgpu_cs_parser_fini(p, 0, true);
> 
>   trace_amdgpu_cs_ioctl(job);
>   amd_sched_entity_push_job(>base);
> @@ -1129,7 +1130,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void
> *data, struct drm_file *filp)
>   goto out;
> 
>   r = amdgpu_cs_submit(, cs);
> + if (r)
> + goto out;
> 
> + return 0;
>  out:
>   amdgpu_cs_parser_fini(, r, reserved_buffers);
>   return r;
> --
> 1.9.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset v2

2017-04-27 Thread Chunming Zhou
the case could happen when gpu reset:
1. when gpu reset, cs can be continue until sw queue is full, then push job 
will wait with holding pd reservation.
2. gpu_reset routine will also need pd reservation to restore page table from 
their shadow.
3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs 
releases reservation.

v2: handle amdgpu_cs_submit error path.

Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
Signed-off-by: Chunming Zhou 
Reviewed-by: Christian König 
Reviewed-by: Junwei Zhang 
Reviewed-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 26168df..699f5fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1074,6 +1074,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
job->uf_sequence = cs->out.handle;
amdgpu_job_free_resources(job);
+   amdgpu_cs_parser_fini(p, 0, true);
 
trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(>base);
@@ -1129,7 +1130,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
goto out;
 
r = amdgpu_cs_submit(, cs);
+   if (r)
+   goto out;
 
+   return 0;
 out:
amdgpu_cs_parser_fini(, r, reserved_buffers);
return r;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] amdgpu: add vm ioctl support

2017-04-27 Thread Chunming Zhou
Change-Id: I3c1ea377dad8f6d64a992b071a6ec36a1143e416
Signed-off-by: Chunming Zhou 
---
 include/drm/amdgpu_drm.h | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 26d02ba..28cda3a 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -47,6 +47,7 @@
 #define DRM_AMDGPU_GEM_OP  0x10
 #define DRM_AMDGPU_GEM_USERPTR 0x11
 #define DRM_AMDGPU_WAIT_FENCES 0x12
+#define DRM_AMDGPU_VM  0x13
 /* hybrid specific ioctls */
 #define DRM_AMDGPU_SEM  0x5b
 #define DRM_AMDGPU_GEM_DGMA0x5c
@@ -66,6 +67,7 @@
 #define DRM_IOCTL_AMDGPU_GEM_OPDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
 #define DRM_IOCTL_AMDGPU_WAIT_FENCES   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
 /* hybrid specific ioctls */
 #define DRM_IOCTL_AMDGPU_GEM_DGMA  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma)
 #define DRM_IOCTL_AMDGPU_FREESYNC  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FREESYNC, struct drm_amdgpu_freesync)
@@ -201,6 +203,26 @@ union drm_amdgpu_ctx {
union drm_amdgpu_ctx_out out;
 };
 
+/* vm ioctl */
+#define AMDGPU_VM_OP_RESERVE_VMID  1
+#define AMDGPU_VM_OP_UNRESERVE_VMID2
+
+struct drm_amdgpu_vm_in {
+   /** AMDGPU_VM_OP_* */
+   __u32   op;
+   __u32   flags;
+};
+
+struct drm_amdgpu_vm_out {
+   /** For future use, no flags defined so far */
+   __u64   flags;
+};
+
+union drm_amdgpu_vm {
+   struct drm_amdgpu_vm_in in;
+   struct drm_amdgpu_vm_out out;
+};
+
 /* sync file related */
 #define AMDGPU_SEM_OP_CREATE_SEM   1
 #define AMDGPU_SEM_OP_WAIT_SEM 2
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset

2017-04-27 Thread Zhang, Jerry (Junwei)

On 04/27/2017 08:04 PM, Christian König wrote:

Am 27.04.2017 um 12:23 schrieb zhoucm1:



On 2017年04月27日 18:11, Christian König wrote:

Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei):

On 04/27/2017 04:25 PM, Chunming Zhou wrote:

the case could happen when gpu reset:
1. when gpu reset, cs can be continue until sw queue is full, then push
job will wait with holding pd reservation.
2. gpu_reset routine will also need pd reservation to restore page table
from their shadow.
3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs
releases reservation.

Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9edb1a4..a6722a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
  amdgpu_job_free_resources(job);

  trace_amdgpu_cs_ioctl(job);
+amdgpu_cs_parser_fini(p, 0, true);


Please confirm:
It will not free/release more things that may be accessed in job working
process, as cs_parse_fini free so many things related to parser.


Yeah, that indeed won't work. amdgpu_cs_parser_fini() does:


No, with my patch, parser->job already is NULL here, same as previous
sequence, just put push_job action after amdgpu_cs_parser_fini()


Ah! You add that into amdgpu_cs_submit! That was the point I was missing.


Thanks your all input.
See it too.

Feel free to add my RB, if need.
Reviewed-by: Junwei Zhang 

Jerry




  trace_amdgpu_cs_ioctl(job);
+amdgpu_cs_parser_fini(p, 0, true);
  amd_sched_entity_push_job(>base);

Can we keep the trace_amdgpu_cs_ioctl(); directly before the
amd_sched_entity_push_job()?

With that changed, the patch is Reviewed-by: Christian König
.

Regards,
Christian.


if (parser->job)
amdgpu_job_free(parser->job);

And amdgpu_cs_submit() sets the fence, so it must be called before
amdgpu_cs_parser_fini().

yes, fence is already generated, so amdgpu_cs_parser_fini before pushing job
is completely safe here.


But apart from that this is a rather nifty idea. What was the problem with
the initial patch?

Just as comments in patch, which is found by Monk.

Regards,
David Zhou




Or we can just release the pd reservation here?


We could run into problem with the reservation ticked with that. So I would
want to avoid that.

Christian.



Jerry


amd_sched_entity_push_job(>base);

  return 0;
@@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void
*data, struct drm_file *filp)

  r = amdgpu_cs_submit(, cs);

+return r;
  out:
  amdgpu_cs_parser_fini(, r, reserved_buffers);
  return r;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[RFC] drm/amd/amdgpu: get rid of else branch

2017-04-27 Thread Nikola Pajkovsky
This is super simple elimination of else branch and I should
probably even use unlikely in

if (ring->count_dw < count_dw) {

However, amdgpu_ring_write() has similar if condition, but does not
return after DRM_ERROR and it looks suspicious. On error, we still
adding v to ring and keeping count_dw-- below zero.

if (ring->count_dw <= 0)
DRM_ERROR("amdgpu: writing more dwords to the ring than 
expected!\n");
ring->ring[ring->wptr++] = v;
ring->wptr &= ring->ptr_mask;
ring->count_dw--;

I can obviously be totaly wrong. Hmm?

8<8<8<8<8<8<8<8<8<
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c1b913541739..c6f4f874ea68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1596,28 +1596,29 @@ static inline void amdgpu_ring_write_multiple(struct 
amdgpu_ring *ring, void *sr
 
if (ring->count_dw < count_dw) {
DRM_ERROR("amdgpu: writing more dwords to the ring than 
expected!\n");
-   } else {
-   occupied = ring->wptr & ring->ptr_mask;
-   dst = (void *)>ring[occupied];
-   chunk1 = ring->ptr_mask + 1 - occupied;
-   chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
-   chunk2 = count_dw - chunk1;
-   chunk1 <<= 2;
-   chunk2 <<= 2;
-
-   if (chunk1)
-   memcpy(dst, src, chunk1);
-
-   if (chunk2) {
-   src += chunk1;
-   dst = (void *)ring->ring;
-   memcpy(dst, src, chunk2);
-   }
-
-   ring->wptr += count_dw;
-   ring->wptr &= ring->ptr_mask;
-   ring->count_dw -= count_dw;
+   return;
}
+
+   occupied = ring->wptr & ring->ptr_mask;
+   dst = (void *)>ring[occupied];
+   chunk1 = ring->ptr_mask + 1 - occupied;
+   chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
+   chunk2 = count_dw - chunk1;
+   chunk1 <<= 2;
+   chunk2 <<= 2;
+
+   if (chunk1)
+   memcpy(dst, src, chunk1);
+
+   if (chunk2) {
+   src += chunk1;
+   dst = (void *)ring->ring;
+   memcpy(dst, src, chunk2);
+   }
+
+   ring->wptr += count_dw;
+   ring->wptr &= ring->ptr_mask;
+   ring->count_dw -= count_dw;
 }
 
 static inline struct amdgpu_sdma_instance *
-- 
2.12.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH umr] Avoid opening the DRM file by default

2017-04-27 Thread Tom St Denis
Opening the DRM file (/dev/dri/card%d) triggers all sorts of KMD
work to happen which is not useful if the KMD is hung or not working.

Since --top is the only user of the file currently we simply defer
opening it until --top is invoked.

Signed-off-by: Tom St Denis 
---
 src/app/top.c  | 7 +++
 src/lib/discover.c | 3 +--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/app/top.c b/src/app/top.c
index 364180eb70f6..a4d3aa8e699d 100644
--- a/src/app/top.c
+++ b/src/app/top.c
@@ -933,8 +933,15 @@ void umr_top(struct umr_asic *asic)
time_t tt;
uint64_t ts;
char hostname[64] = { 0 };
+   char fname[64];
pthread_t sensor_thread;
 
+   // open drm file if not already open
+   if (asic->fd.drm < 0) {
+   snprintf(fname, sizeof(fname)-1, "/dev/dri/card%d", 
asic->instance);
+   asic->fd.drm = open(fname, O_RDWR);
+   }
+
if (getenv("HOSTNAME")) strcpy(hostname, getenv("HOSTNAME"));
 
// init stats
diff --git a/src/lib/discover.c b/src/lib/discover.c
index d561efafe4d4..c9c2f74a4818 100644
--- a/src/lib/discover.c
+++ b/src/lib/discover.c
@@ -127,8 +127,7 @@ struct umr_asic *umr_discover_asic(struct umr_options 
*options)
asic->fd.vram = open(fname, O_RDWR);
snprintf(fname, sizeof(fname)-1, 
"/sys/kernel/debug/dri/%d/amdgpu_gpr", asic->instance);
asic->fd.gpr = open(fname, O_RDWR);
-   snprintf(fname, sizeof(fname)-1, "/dev/dri/card%d", 
asic->instance);
-   asic->fd.drm = open(fname, O_RDWR);
+   asic->fd.drm = -1; // default to closed
// if appending to the fd list remember to update close_asic() 
and discover_by_did()...
 
if (options->use_pci) {
-- 
2.12.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 10/11] drm/amdgpu: forward operation context to ttm_bo_mem_space

2017-04-27 Thread Christian König
From: Christian König 

This way we can finally use some more stats.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 30 --
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 061fd5f..5c90601 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -364,12 +364,10 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
return r;
 }
 
-static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible,
-   bool no_wait_gpu,
+static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
+   struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
 {
-   struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu };
struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = >mem;
struct ttm_mem_reg tmp_mem;
@@ -387,7 +385,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object 
*bo,
placements.fpfn = 0;
placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-   r = ttm_bo_mem_space(bo, , _mem, );
+   r = ttm_bo_mem_space(bo, , _mem, ctx);
if (unlikely(r)) {
return r;
}
@@ -401,22 +399,20 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object 
*bo,
if (unlikely(r)) {
goto out_cleanup;
}
-   r = amdgpu_move_blit(bo, true, no_wait_gpu, _mem, old_mem);
+   r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, _mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
-   r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
+   r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, new_mem);
 out_cleanup:
ttm_bo_mem_put(bo, _mem);
return r;
 }
 
-static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible,
-   bool no_wait_gpu,
+static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
+   struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
 {
-   struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu };
struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = >mem;
struct ttm_mem_reg tmp_mem;
@@ -434,15 +430,15 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object 
*bo,
placements.fpfn = 0;
placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-   r = ttm_bo_mem_space(bo, , _mem, );
+   r = ttm_bo_mem_space(bo, , _mem, ctx);
if (unlikely(r)) {
return r;
}
-   r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, _mem);
+   r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, _mem);
if (unlikely(r)) {
goto out_cleanup;
}
-   r = amdgpu_move_blit(bo, true, no_wait_gpu, new_mem, old_mem);
+   r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
@@ -488,12 +484,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, 
bool evict,
 
if (old_mem->mem_type == TTM_PL_VRAM &&
new_mem->mem_type == TTM_PL_SYSTEM) {
-   r = amdgpu_move_vram_ram(bo, evict, ctx->interruptible,
-   ctx->no_wait_gpu, new_mem);
+   r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem);
} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
   new_mem->mem_type == TTM_PL_VRAM) {
-   r = amdgpu_move_ram_vram(bo, evict, ctx->interruptible,
-   ctx->no_wait_gpu, new_mem);
+   r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem);
} else {
r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu,
 new_mem, old_mem);
-- 
2.5.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 06/11] drm/ttm: use an operation context for ttm_bo_mem_space

2017-04-27 Thread Christian König
From: Christian König 

Instead of specifying interruptible and no_wait_gpu manually.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  8 
 drivers/gpu/drm/nouveau/nouveau_bo.c|  6 --
 drivers/gpu/drm/radeon/radeon_ttm.c |  8 
 drivers/gpu/drm/ttm/ttm_bo.c| 22 +++---
 include/drm/ttm/ttm_bo_driver.h |  3 +--
 5 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 58873cf..e386657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -369,6 +369,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object 
*bo,
bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
 {
+   struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu };
struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = >mem;
struct ttm_mem_reg tmp_mem;
@@ -386,8 +387,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object 
*bo,
placements.fpfn = 0;
placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-   r = ttm_bo_mem_space(bo, , _mem,
-interruptible, no_wait_gpu);
+   r = ttm_bo_mem_space(bo, , _mem, );
if (unlikely(r)) {
return r;
}
@@ -416,6 +416,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object 
*bo,
bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
 {
+   struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu };
struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = >mem;
struct ttm_mem_reg tmp_mem;
@@ -433,8 +434,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object 
*bo,
placements.fpfn = 0;
placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-   r = ttm_bo_mem_space(bo, , _mem,
-interruptible, no_wait_gpu);
+   r = ttm_bo_mem_space(bo, , _mem, );
if (unlikely(r)) {
return r;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index ad3778b..8486da6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1126,6 +1126,7 @@ static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
  bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
+   struct ttm_operation_ctx ctx = { intr, no_wait_gpu };
struct ttm_place placement_memtype = {
.fpfn = 0,
.lpfn = 0,
@@ -1140,7 +1141,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
 
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
-   ret = ttm_bo_mem_space(bo, , _mem, intr, no_wait_gpu);
+   ret = ttm_bo_mem_space(bo, , _mem, );
if (ret)
return ret;
 
@@ -1162,6 +1163,7 @@ static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
  bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
+   struct ttm_operation_ctx ctx = { intr, no_wait_gpu };
struct ttm_place placement_memtype = {
.fpfn = 0,
.lpfn = 0,
@@ -1176,7 +1178,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool 
evict, bool intr,
 
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
-   ret = ttm_bo_mem_space(bo, , _mem, intr, no_wait_gpu);
+   ret = ttm_bo_mem_space(bo, , _mem, );
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index d07ff84..b4bcff5 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -311,6 +311,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
 {
+   struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu };
struct radeon_device *rdev;
struct ttm_mem_reg *old_mem = >mem;
struct ttm_mem_reg tmp_mem;
@@ -328,8 +329,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
placements.fpfn = 0;
placements.lpfn = 0;
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-   r = ttm_bo_mem_space(bo, , _mem,
-interruptible, no_wait_gpu);
+   r = ttm_bo_mem_space(bo, , _mem, );
if (unlikely(r)) {
return r;
}
@@ -358,6 +358,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object 
*bo,

[PATCH 04/11] drm/ttm: add operation ctx to ttm_bo_validate

2017-04-27 Thread Christian König
From: Christian König 

Give moving a BO into place an operation context to work with.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  9 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c|  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c|  6 --
 drivers/gpu/drm/ast/ast_ttm.c  |  9 ++---
 drivers/gpu/drm/bochs/bochs_mm.c   |  6 --
 drivers/gpu/drm/cirrus/cirrus_ttm.c|  6 --
 drivers/gpu/drm/mgag200/mgag200_ttm.c  |  9 ++---
 drivers/gpu/drm/nouveau/nouveau_bo.c   |  4 ++--
 drivers/gpu/drm/qxl/qxl_ioctl.c|  4 ++--
 drivers/gpu/drm/qxl/qxl_object.c   |  6 --
 drivers/gpu/drm/qxl/qxl_release.c  |  4 ++--
 drivers/gpu/drm/radeon/radeon_gem.c|  3 ++-
 drivers/gpu/drm/radeon/radeon_mn.c |  3 ++-
 drivers/gpu/drm/radeon/radeon_object.c | 14 +-
 drivers/gpu/drm/radeon/radeon_vm.c |  3 ++-
 drivers/gpu/drm/ttm/ttm_bo.c   | 16 +---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 11 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c|  3 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c | 21 +
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c|  9 -
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c   |  6 --
 drivers/gpu/drm/vmwgfx/vmwgfx_shader.c |  3 ++-
 include/drm/ttm/ttm_bo_api.h   | 20 
 25 files changed, 124 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 26168df..31613ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -373,6 +373,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 struct amdgpu_bo *bo)
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   struct ttm_operation_ctx ctx = { true, false };
u64 initial_bytes_moved;
uint32_t domain;
int r;
@@ -391,7 +392,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 retry:
amdgpu_ttm_placement_from_domain(bo, domain);
initial_bytes_moved = atomic64_read(>num_bytes_moved);
-   r = ttm_bo_validate(>tbo, >placement, true, false);
+   r = ttm_bo_validate(>tbo, >placement, );
p->bytes_moved += atomic64_read(>num_bytes_moved) -
initial_bytes_moved;
 
@@ -408,6 +409,7 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
struct amdgpu_bo *validated)
 {
uint32_t domain = validated->allowed_domains;
+   struct ttm_operation_ctx ctx = { true, false };
int r;
 
if (!p->evictable)
@@ -440,7 +442,7 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
/* Good we can try to move this BO somewhere else */
amdgpu_ttm_placement_from_domain(bo, other);
initial_bytes_moved = atomic64_read(>num_bytes_moved);
-   r = ttm_bo_validate(>tbo, >placement, true, false);
+   r = ttm_bo_validate(>tbo, >placement, );
p->bytes_moved += atomic64_read(>num_bytes_moved) -
initial_bytes_moved;
 
@@ -1419,6 +1421,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
  */
 int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
 {
+   struct ttm_operation_ctx ctx = { false, false };
unsigned i;
int r;
 
@@ -1437,7 +1440,7 @@ int amdgpu_cs_sysvm_access_required(struct 
amdgpu_cs_parser *parser)
 
bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains);
-   r = ttm_bo_validate(>tbo, >placement, false, false);
+   r = ttm_bo_validate(>tbo, >placement, );
if (unlikely(r))
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 67be795..e5157b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -306,6 +306,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void 
*data,
 int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 struct drm_file *filp)
 {
+   struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_device *adev = dev->dev_private;
struct drm_amdgpu_gem_userptr *args = data;
struct drm_gem_object *gobj;
@@ -362,7 +363,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void 
*data,
goto free_pages;
 
amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
-   r = 

[PATCH 11/11] drm/amdgpu: use the new TTM bytes moved counter

2017-04-27 Thread Christian König
From: Christian König 

Instead of the global statistics use the per context bytes moved counter.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 31613ab..7486827 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -372,9 +372,7 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device 
*adev, u64 num_bytes)
 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 struct amdgpu_bo *bo)
 {
-   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { true, false };
-   u64 initial_bytes_moved;
uint32_t domain;
int r;
 
@@ -391,16 +389,13 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser 
*p,
 
 retry:
amdgpu_ttm_placement_from_domain(bo, domain);
-   initial_bytes_moved = atomic64_read(>num_bytes_moved);
r = ttm_bo_validate(>tbo, >placement, );
-   p->bytes_moved += atomic64_read(>num_bytes_moved) -
-   initial_bytes_moved;
-
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
domain = bo->allowed_domains;
goto retry;
}
 
+   p->bytes_moved += ctx.bytes_moved;
return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 364d894..8f49b85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -323,7 +323,6 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
struct amdgpu_bo *bo;
enum ttm_bo_type type;
unsigned long page_align;
-   u64 initial_bytes_moved;
size_t acc_size;
int r;
 
@@ -395,12 +394,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device 
*adev,
amdgpu_fill_placement_to_bo(bo, placement);
/* Kernel allocation are uninterruptible */
 
-   initial_bytes_moved = atomic64_read(>num_bytes_moved);
r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, type,
 >placement, page_align, , NULL,
 acc_size, sg, resv, _ttm_bo_destroy);
-   amdgpu_cs_report_moved_bytes(adev,
-   atomic64_read(>num_bytes_moved) - initial_bytes_moved);
+   amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved);
 
if (unlikely(r != 0))
return r;
-- 
2.5.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 09/11] drm/ttm: add number of bytes moved to the operation context

2017-04-27 Thread Christian König
From: Christian König 

Add some statistics how many bytes we have moved.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 1 +
 include/drm/ttm/ttm_bo_api.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 0c3c06a..0c5bc9e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -378,6 +378,7 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
else
bo->offset = 0;
 
+   ctx->bytes_moved += bo->num_pages << PAGE_SHIFT;
return 0;
 
 out_err:
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4b4e9e0..91b4afb 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -270,6 +270,7 @@ struct ttm_bo_kmap_obj {
 struct ttm_operation_ctx {
bool interruptible;
bool no_wait_gpu;
+   uint64_t bytes_moved;
 };
 
 /**
-- 
2.5.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 07/11] drm/ttm: use the operation context inside TTM

2017-04-27 Thread Christian König
From: Christian König 

Instead of passing down the parameters manually to every function.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 69 +++-
 1 file changed, 30 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index caddcf6..c288852 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -286,9 +286,8 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
 }
 
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
- struct ttm_mem_reg *mem,
- bool evict, bool interruptible,
- bool no_wait_gpu)
+ struct ttm_mem_reg *mem, bool evict,
+ struct ttm_operation_ctx *ctx)
 {
struct ttm_bo_device *bdev = bo->bdev;
bool old_is_pci = ttm_mem_reg_is_pci(bdev, >mem);
@@ -342,12 +341,14 @@ static int ttm_bo_handle_move_mem(struct 
ttm_buffer_object *bo,
 
if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
-   ret = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, mem);
+   ret = ttm_bo_move_ttm(bo, ctx->interruptible,
+ ctx->no_wait_gpu, mem);
else if (bdev->driver->move)
-   ret = bdev->driver->move(bo, evict, interruptible,
-no_wait_gpu, mem);
+   ret = bdev->driver->move(bo, evict, ctx->interruptible,
+ctx->no_wait_gpu, mem);
else
-   ret = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, mem);
+   ret = ttm_bo_move_memcpy(bo, ctx->interruptible,
+ctx->no_wait_gpu, mem);
 
if (ret) {
if (bdev->driver->move_notify) {
@@ -662,10 +663,9 @@ void ttm_bo_unlock_delayed_workqueue(struct ttm_bo_device 
*bdev, int resched)
 }
 EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue);
 
-static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
-   bool no_wait_gpu)
+static int ttm_bo_evict(struct ttm_buffer_object *bo,
+   struct ttm_operation_ctx *ctx)
 {
-   struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu };
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_reg evict_mem;
struct ttm_placement placement;
@@ -681,7 +681,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
placement.num_placement = 0;
placement.num_busy_placement = 0;
bdev->driver->evict_flags(bo, );
-   ret = ttm_bo_mem_space(bo, , _mem, );
+   ret = ttm_bo_mem_space(bo, , _mem, ctx);
if (ret) {
if (ret != -ERESTARTSYS) {
pr_err("Failed to find memory space for buffer 0x%p 
eviction\n",
@@ -691,8 +691,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
goto out;
}
 
-   ret = ttm_bo_handle_move_mem(bo, _mem, true,
-interruptible, no_wait_gpu);
+   ret = ttm_bo_handle_move_mem(bo, _mem, true, ctx);
if (unlikely(ret)) {
if (ret != -ERESTARTSYS)
pr_err("Buffer eviction failed\n");
@@ -719,10 +718,9 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-   uint32_t mem_type,
-   const struct ttm_place *place,
-   bool interruptible,
-   bool no_wait_gpu)
+  uint32_t mem_type,
+  const struct ttm_place *place,
+  struct ttm_operation_ctx *ctx)
 {
struct ttm_bo_global *glob = bdev->glob;
struct ttm_mem_type_manager *man = >man[mem_type];
@@ -759,8 +757,8 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
kref_get(>list_kref);
 
if (!list_empty(>ddestroy)) {
-   ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible,
-no_wait_gpu);
+   ret = ttm_bo_cleanup_refs_and_unlock(bo, ctx->interruptible,
+ctx->no_wait_gpu);
kref_put(>list_kref, ttm_bo_release_list);
return ret;
}
@@ -772,7 +770,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 
ttm_bo_list_ref_sub(bo, put_count, true);
 
-   ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
+   ret = ttm_bo_evict(bo, ctx);

[PATCH 02/11] drm/ttm: cleanup ttm_bo_driver.h

2017-04-27 Thread Christian König
From: Christian König 

Extern is the default for function declerations anyway.

Signed-off-by: Christian König 
---
 include/drm/ttm/ttm_bo_driver.h | 113 +++-
 1 file changed, 54 insertions(+), 59 deletions(-)

diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index cb5072a..9ce2017 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -608,12 +608,12 @@ ttm_flag_masked(uint32_t *old, uint32_t new, uint32_t 
mask)
  * Returns:
  * NULL: Out of memory.
  */
-extern int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
-   unsigned long size, uint32_t page_flags,
-   struct page *dummy_read_page);
-extern int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device 
*bdev,
-  unsigned long size, uint32_t page_flags,
-  struct page *dummy_read_page);
+int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
+   unsigned long size, uint32_t page_flags,
+   struct page *dummy_read_page);
+int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev,
+   unsigned long size, uint32_t page_flags,
+   struct page *dummy_read_page);
 
 /**
  * ttm_tt_fini
@@ -622,8 +622,8 @@ extern int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, 
struct ttm_bo_device *bde
  *
  * Free memory of ttm_tt structure
  */
-extern void ttm_tt_fini(struct ttm_tt *ttm);
-extern void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma);
+void ttm_tt_fini(struct ttm_tt *ttm);
+void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma);
 
 /**
  * ttm_ttm_bind:
@@ -633,7 +633,7 @@ extern void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma);
  *
  * Bind the pages of @ttm to an aperture location identified by @bo_mem
  */
-extern int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
+int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
 
 /**
  * ttm_ttm_destroy:
@@ -642,7 +642,7 @@ extern int ttm_tt_bind(struct ttm_tt *ttm, struct 
ttm_mem_reg *bo_mem);
  *
  * Unbind, unpopulate and destroy common struct ttm_tt.
  */
-extern void ttm_tt_destroy(struct ttm_tt *ttm);
+void ttm_tt_destroy(struct ttm_tt *ttm);
 
 /**
  * ttm_ttm_unbind:
@@ -651,7 +651,7 @@ extern void ttm_tt_destroy(struct ttm_tt *ttm);
  *
  * Unbind a struct ttm_tt.
  */
-extern void ttm_tt_unbind(struct ttm_tt *ttm);
+void ttm_tt_unbind(struct ttm_tt *ttm);
 
 /**
  * ttm_tt_swapin:
@@ -660,7 +660,7 @@ extern void ttm_tt_unbind(struct ttm_tt *ttm);
  *
  * Swap in a previously swap out ttm_tt.
  */
-extern int ttm_tt_swapin(struct ttm_tt *ttm);
+int ttm_tt_swapin(struct ttm_tt *ttm);
 
 /**
  * ttm_tt_set_placement_caching:
@@ -675,9 +675,8 @@ extern int ttm_tt_swapin(struct ttm_tt *ttm);
  * hit RAM. This function may be very costly as it involves global TLB
  * and cache flushes and potential page splitting / combining.
  */
-extern int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t 
placement);
-extern int ttm_tt_swapout(struct ttm_tt *ttm,
- struct file *persistent_swap_storage);
+int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement);
+int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage);
 
 /**
  * ttm_tt_unpopulate - free pages from a ttm
@@ -686,7 +685,7 @@ extern int ttm_tt_swapout(struct ttm_tt *ttm,
  *
  * Calls the driver method to free all pages from a ttm
  */
-extern void ttm_tt_unpopulate(struct ttm_tt *ttm);
+void ttm_tt_unpopulate(struct ttm_tt *ttm);
 
 /*
  * ttm_bo.c
@@ -701,8 +700,7 @@ extern void ttm_tt_unpopulate(struct ttm_tt *ttm);
  * Returns true if the memory described by @mem is PCI memory,
  * false otherwise.
  */
-extern bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev,
-  struct ttm_mem_reg *mem);
+bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem);
 
 /**
  * ttm_bo_mem_space
@@ -723,21 +721,20 @@ extern bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev,
  * fragmentation or concurrent allocators.
  * -ERESTARTSYS: An interruptible sleep was interrupted by a signal.
  */
-extern int ttm_bo_mem_space(struct ttm_buffer_object *bo,
-   struct ttm_placement *placement,
-   struct ttm_mem_reg *mem,
-   bool interruptible,
-   bool no_wait_gpu);
-
-extern void ttm_bo_mem_put(struct ttm_buffer_object *bo,
+int ttm_bo_mem_space(struct ttm_buffer_object *bo,
+struct ttm_placement *placement,
+struct ttm_mem_reg *mem,
+bool interruptible,
+bool no_wait_gpu);
+
+void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem);
+void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo,

[PATCH 05/11] drm/ttm: use an operation ctx for ttm_bo_init_reserved

2017-04-27 Thread Christian König
From: Christian König 

Instead of specifying if sleeping should be interruptible.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  3 ++-
 drivers/gpu/drm/ttm/ttm_bo.c   | 12 +---
 include/drm/ttm/ttm_bo_api.h   |  5 ++---
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5ef9be6..364d894 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -319,6 +319,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
struct reservation_object *resv,
struct amdgpu_bo **bo_ptr)
 {
+   struct ttm_operation_ctx ctx = { !kernel, false };
struct amdgpu_bo *bo;
enum ttm_bo_type type;
unsigned long page_align;
@@ -396,7 +397,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 
initial_bytes_moved = atomic64_read(>num_bytes_moved);
r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, type,
->placement, page_align, !kernel, NULL,
+>placement, page_align, , NULL,
 acc_size, sg, resv, _ttm_bo_destroy);
amdgpu_cs_report_moved_bytes(adev,
atomic64_read(>num_bytes_moved) - initial_bytes_moved);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 10707923..f1e8833 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1130,7 +1130,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
 enum ttm_bo_type type,
 struct ttm_placement *placement,
 uint32_t page_alignment,
-bool interruptible,
+struct ttm_operation_ctx *ctx,
 struct file *persistent_swap_storage,
 size_t acc_size,
 struct sg_table *sg,
@@ -1216,11 +1216,8 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
WARN_ON(!locked);
}
 
-   if (likely(!ret)) {
-   struct ttm_operation_ctx ctx = { interruptible, false };
-
-   ret = ttm_bo_validate(bo, placement, );
-   }
+   if (likely(!ret))
+   ret = ttm_bo_validate(bo, placement, ctx);
 
if (unlikely(ret)) {
if (!resv)
@@ -1253,10 +1250,11 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
struct reservation_object *resv,
void (*destroy) (struct ttm_buffer_object *))
 {
+   struct ttm_operation_ctx ctx = { interruptible, false };
int ret;
 
ret = ttm_bo_init_reserved(bdev, bo, size, type, placement,
-  page_alignment, interruptible,
+  page_alignment, ,
   persistent_swap_storage, acc_size,
   sg, resv, destroy);
if (ret)
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 25c1ca8..4b4e9e0 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -468,8 +468,7 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev,
  * @type: Requested type of buffer object.
  * @flags: Initial placement flags.
  * @page_alignment: Data alignment in pages.
- * @interruptible: If needing to sleep to wait for GPU resources,
- * sleep interruptible.
+ * @ctx: TTM operation context for memory allocation.
  * @persistent_swap_storage: Usually the swap storage is deleted for buffers
  * pinned in physical memory. If this behaviour is not desired, this member
  * holds a pointer to a persistent shmem object. Typically, this would
@@ -506,7 +505,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
 enum ttm_bo_type type,
 struct ttm_placement *placement,
 uint32_t page_alignment,
-bool interrubtible,
+struct ttm_operation_ctx *ctx,
 struct file *persistent_swap_storage,
 size_t acc_size,
 struct sg_table *sg,
-- 
2.5.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 01/11] drm/ttm: cleanup coding style in ttm_bo_api.h

2017-04-27 Thread Christian König
From: Christian König 

Extern is the default for function declerations anyway and this
solves a bunch of 80char per line issues.

Signed-off-by: Christian König 
---
 include/drm/ttm/ttm_bo_api.h | 135 ++-
 1 file changed, 56 insertions(+), 79 deletions(-)

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 3b302a5..425f3b4 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -288,8 +288,7 @@ ttm_bo_reference(struct ttm_buffer_object *bo)
  * Returns -EBUSY if no_wait is true and the buffer is busy.
  * Returns -ERESTARTSYS if interrupted by a signal.
  */
-extern int ttm_bo_wait(struct ttm_buffer_object *bo,
-  bool interruptible, bool no_wait);
+int ttm_bo_wait(struct ttm_buffer_object *bo, bool interruptible, bool 
no_wait);
 
 /**
  * ttm_bo_mem_compat - Check if proposed placement is compatible with a bo
@@ -300,9 +299,8 @@ extern int ttm_bo_wait(struct ttm_buffer_object *bo,
  *
  * Returns true if the placement is compatible
  */
-extern bool ttm_bo_mem_compat(struct ttm_placement *placement,
- struct ttm_mem_reg *mem,
- uint32_t *new_flags);
+bool ttm_bo_mem_compat(struct ttm_placement *placement, struct ttm_mem_reg 
*mem,
+  uint32_t *new_flags);
 
 /**
  * ttm_bo_validate
@@ -320,10 +318,10 @@ extern bool ttm_bo_mem_compat(struct ttm_placement 
*placement,
  * -EBUSY if no_wait is true and buffer busy.
  * -ERESTARTSYS if interrupted by a signal.
  */
-extern int ttm_bo_validate(struct ttm_buffer_object *bo,
-   struct ttm_placement *placement,
-   bool interruptible,
-   bool no_wait_gpu);
+int ttm_bo_validate(struct ttm_buffer_object *bo,
+   struct ttm_placement *placement,
+   bool interruptible,
+   bool no_wait_gpu);
 
 /**
  * ttm_bo_unref
@@ -332,7 +330,7 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo,
  *
  * Unreference and clear a pointer to a buffer object.
  */
-extern void ttm_bo_unref(struct ttm_buffer_object **bo);
+void ttm_bo_unref(struct ttm_buffer_object **bo);
 
 
 /**
@@ -344,8 +342,8 @@ extern void ttm_bo_unref(struct ttm_buffer_object **bo);
  *
  * Release @count lru list references to this buffer object.
  */
-extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
-   bool never_free);
+void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
+bool never_free);
 
 /**
  * ttm_bo_add_to_lru
@@ -357,7 +355,7 @@ extern void ttm_bo_list_ref_sub(struct ttm_buffer_object 
*bo, int count,
  * This function must be called with struct ttm_bo_global::lru_lock held, and
  * is typically called immediately prior to unreserving a bo.
  */
-extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo);
+void ttm_bo_add_to_lru(struct ttm_buffer_object *bo);
 
 /**
  * ttm_bo_del_from_lru
@@ -369,7 +367,7 @@ extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo);
  * and is usually called just immediately after the bo has been reserved to
  * avoid recursive reservation from lru lists.
  */
-extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo);
+int ttm_bo_del_from_lru(struct ttm_buffer_object *bo);
 
 /**
  * ttm_bo_move_to_lru_tail
@@ -380,7 +378,7 @@ extern int ttm_bo_del_from_lru(struct ttm_buffer_object 
*bo);
  * object. This function must be called with struct ttm_bo_global::lru_lock
  * held, and is used to make a BO less likely to be considered for eviction.
  */
-extern void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo);
+void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo);
 
 /**
  * ttm_bo_lock_delayed_workqueue
@@ -389,15 +387,14 @@ extern void ttm_bo_move_to_lru_tail(struct 
ttm_buffer_object *bo);
  * Returns
  * True if the workqueue was queued at the time
  */
-extern int ttm_bo_lock_delayed_workqueue(struct ttm_bo_device *bdev);
+int ttm_bo_lock_delayed_workqueue(struct ttm_bo_device *bdev);
 
 /**
  * ttm_bo_unlock_delayed_workqueue
  *
  * Allows the delayed workqueue to run.
  */
-extern void ttm_bo_unlock_delayed_workqueue(struct ttm_bo_device *bdev,
-   int resched);
+void ttm_bo_unlock_delayed_workqueue(struct ttm_bo_device *bdev, int resched);
 
 /**
  * ttm_bo_eviction_valuable
@@ -424,8 +421,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
  * -EBUSY if the buffer is busy and no_wait is true.
  * -ERESTARTSYS if interrupted by a signal.
  */
-extern int
-ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait);
+int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait);
 
 /**
  * ttm_bo_synccpu_write_release:
@@ -434,7 +430,7 @@ ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, 
bool 

[PATCH 08/11] drm/ttm: add context to driver move callback as well

2017-04-27 Thread Christian König
From: Christian König 

Instead of passing the parameters manually.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 21 +++--
 drivers/gpu/drm/nouveau/nouveau_bo.c| 27 ---
 drivers/gpu/drm/qxl/qxl_ttm.c   |  9 -
 drivers/gpu/drm/radeon/radeon_ttm.c | 23 ---
 drivers/gpu/drm/ttm/ttm_bo.c|  3 +--
 drivers/gpu/drm/virtio/virtgpu_ttm.c|  7 +++
 include/drm/ttm/ttm_bo_driver.h |  6 ++
 7 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e386657..061fd5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -451,10 +451,9 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object 
*bo,
return r;
 }
 
-static int amdgpu_bo_move(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible,
-   bool no_wait_gpu,
-   struct ttm_mem_reg *new_mem)
+static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_mem_reg *new_mem)
 {
struct amdgpu_device *adev;
struct amdgpu_bo *abo;
@@ -489,19 +488,21 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 
if (old_mem->mem_type == TTM_PL_VRAM &&
new_mem->mem_type == TTM_PL_SYSTEM) {
-   r = amdgpu_move_vram_ram(bo, evict, interruptible,
-   no_wait_gpu, new_mem);
+   r = amdgpu_move_vram_ram(bo, evict, ctx->interruptible,
+   ctx->no_wait_gpu, new_mem);
} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
   new_mem->mem_type == TTM_PL_VRAM) {
-   r = amdgpu_move_ram_vram(bo, evict, interruptible,
-   no_wait_gpu, new_mem);
+   r = amdgpu_move_ram_vram(bo, evict, ctx->interruptible,
+   ctx->no_wait_gpu, new_mem);
} else {
-   r = amdgpu_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
+   r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu,
+new_mem, old_mem);
}
 
if (r) {
 memcpy:
-   r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
+   r = ttm_bo_move_memcpy(bo, ctx->interruptible,
+  ctx->no_wait_gpu, new_mem);
if (r) {
return r;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 8486da6..39e328e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1253,8 +1253,9 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 }
 
 static int
-nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
-   bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
+   struct ttm_operation_ctx *ctx,
+   struct ttm_mem_reg *new_mem)
 {
struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
struct nouveau_bo *nvbo = nouveau_bo(bo);
@@ -1262,7 +1263,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, 
bool intr,
struct nouveau_drm_tile *new_tile = NULL;
int ret = 0;
 
-   ret = ttm_bo_wait(bo, intr, no_wait_gpu);
+   ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu);
if (ret)
return ret;
 
@@ -1286,22 +1287,26 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool 
evict, bool intr,
/* Hardware assisted copy. */
if (drm->ttm.move) {
if (new_mem->mem_type == TTM_PL_SYSTEM)
-   ret = nouveau_bo_move_flipd(bo, evict, intr,
-   no_wait_gpu, new_mem);
+   ret = nouveau_bo_move_flipd(bo, evict,
+   ctx->interruptible,
+   ctx->no_wait_gpu, new_mem);
else if (old_mem->mem_type == TTM_PL_SYSTEM)
-   ret = nouveau_bo_move_flips(bo, evict, intr,
-   no_wait_gpu, new_mem);
+   ret = nouveau_bo_move_flips(bo, evict,
+   ctx->interruptible,
+   ctx->no_wait_gpu, new_mem);
else
-   ret = nouveau_bo_move_m2mf(bo, evict, intr,
-  no_wait_gpu, new_mem);
+  

[PATCH 03/11] drm/ttm: remove cur_placement

2017-04-27 Thread Christian König
From: Christian König 

Not used any more.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 5 ++---
 include/drm/ttm/ttm_bo_api.h | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 04cbecc..70335a3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -372,11 +372,10 @@ static int ttm_bo_handle_move_mem(struct 
ttm_buffer_object *bo,
bo->evicted = false;
}
 
-   if (bo->mem.mm_node) {
+   if (bo->mem.mm_node)
bo->offset = (bo->mem.start << PAGE_SHIFT) +
bdev->man[bo->mem.mem_type].gpu_offset;
-   bo->cur_placement = bo->mem.placement;
-   } else
+   else
bo->offset = 0;
 
return 0;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 425f3b4..1d99cc1 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -224,7 +224,6 @@ struct ttm_buffer_object {
 */
 
uint64_t offset; /* GPU address space is independent of CPU word size */
-   uint32_t cur_placement;
 
struct sg_table *sg;
 
-- 
2.5.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions

2017-04-27 Thread Kuehling, Felix
KFD doesn't use CGS.

Regards,
  Felix

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Deucher, Alexander
Sent: Wednesday, April 26, 2017 12:36 PM
To: 'Christian König'; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS 
functions

> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf 
> Of Christian König
> Sent: Wednesday, April 26, 2017 9:14 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH] drm/amdgpu: remove unused and mostly unimplemented 
> CGS functions
> 
> From: Christian König 
> 
> Those functions are all unused and some not even implemented.
> 
> Signed-off-by: Christian König 

I think some of this is used by the ACP driver (cgs_get_pci_resource for 
example).  Make sure that is enabled in your build.  You might also want to 
check with Felix for KFD.

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c  | 233 
>   drivers/gpu/drm/amd/include/cgs_common.h | 
> 303 -
> --
>  2 files changed, 536 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> index a1a2d44..93f5dcc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> @@ -42,82 +42,6 @@ struct amdgpu_cgs_device {
>   struct amdgpu_device *adev =\
>   ((struct amdgpu_cgs_device *)cgs_device)->adev
> 
> -static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, 
> enum cgs_gpu_mem_type type,
> -uint64_t *mc_start, uint64_t *mc_size,
> -uint64_t *mem_size)
> -{
> - CGS_FUNC_ADEV;
> - switch(type) {
> - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
> - case CGS_GPU_MEM_TYPE__VISIBLE_FB:
> - *mc_start = 0;
> - *mc_size = adev->mc.visible_vram_size;
> - *mem_size = adev->mc.visible_vram_size - adev-
> >vram_pin_size;
> - break;
> - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
> - case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
> - *mc_start = adev->mc.visible_vram_size;
> - *mc_size = adev->mc.real_vram_size - adev-
> >mc.visible_vram_size;
> - *mem_size = *mc_size;
> - break;
> - case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
> - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
> - *mc_start = adev->mc.gtt_start;
> - *mc_size = adev->mc.gtt_size;
> - *mem_size = adev->mc.gtt_size - adev->gart_pin_size;
> - break;
> - default:
> - return -EINVAL;
> - }
> -
> - return 0;
> -}
> -
> -static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void 
> *kmem,
> - uint64_t size,
> - uint64_t min_offset, uint64_t max_offset,
> - cgs_handle_t *kmem_handle, uint64_t
> *mcaddr)
> -{
> - CGS_FUNC_ADEV;
> - int ret;
> - struct amdgpu_bo *bo;
> - struct page *kmem_page = vmalloc_to_page(kmem);
> - int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT;
> -
> - struct sg_table *sg = drm_prime_pages_to_sg(_page,
> npages);
> - ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false,
> -AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL,
> );
> - if (ret)
> - return ret;
> - ret = amdgpu_bo_reserve(bo, false);
> - if (unlikely(ret != 0))
> - return ret;
> -
> - /* pin buffer into GTT */
> - ret = amdgpu_bo_pin_restricted(bo,
> AMDGPU_GEM_DOMAIN_GTT,
> -min_offset, max_offset, mcaddr);
> - amdgpu_bo_unreserve(bo);
> -
> - *kmem_handle = (cgs_handle_t)bo;
> - return ret;
> -}
> -
> -static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, 
> cgs_handle_t kmem_handle) -{
> - struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle;
> -
> - if (obj) {
> - int r = amdgpu_bo_reserve(obj, false);
> - if (likely(r == 0)) {
> - amdgpu_bo_unpin(obj);
> - amdgpu_bo_unreserve(obj);
> - }
> - amdgpu_bo_unref();
> -
> - }
> - return 0;
> -}
> -
>  static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
>   enum cgs_gpu_mem_type type,
>   uint64_t size, uint64_t align, @@ -349,96 
> +273,6 @@ static 
> void amdgpu_cgs_write_ind_register(struct
> cgs_device *cgs_device,
>   WARN(1, "Invalid indirect register space");  }
> 
> -static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device 
> *cgs_device, unsigned addr) -{
> - CGS_FUNC_ADEV;
> - uint8_t val;
> - int ret = 

Re: [PATCH v2] drm/radeon: only warn once in radeon_ttm_bo_destroy if va list not empty

2017-04-27 Thread Christian König

Am 27.04.2017 um 16:10 schrieb Julien Isorce:

Encountered a dozen of exact same backtraces when mesa's
pb_cache_release_all_buffers is called after that a gpu reset failed.

v2: Remove superfluous error message added in v1.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96271

Signed-off-by: Julien Isorce 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/radeon/radeon_object.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index bec2ec0..8b72229 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -81,7 +81,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object 
*tbo)
list_del_init(>list);
mutex_unlock(>rdev->gem.mutex);
radeon_bo_clear_surface_reg(bo);
-   WARN_ON(!list_empty(>va));
+   WARN_ON_ONCE(!list_empty(>va));
drm_gem_object_release(>gem_base);
kfree(bo);
  }



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2] drm/radeon: only warn once in radeon_ttm_bo_destroy if va list not empty

2017-04-27 Thread Julien Isorce
Encountered a dozen of exact same backtraces when mesa's
pb_cache_release_all_buffers is called after that a gpu reset failed.

v2: Remove superfluous error message added in v1.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96271

Signed-off-by: Julien Isorce 
---
 drivers/gpu/drm/radeon/radeon_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index bec2ec0..8b72229 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -81,7 +81,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object 
*tbo)
list_del_init(>list);
mutex_unlock(>rdev->gem.mutex);
radeon_bo_clear_surface_reg(bo);
-   WARN_ON(!list_empty(>va));
+   WARN_ON_ONCE(!list_empty(>va));
drm_gem_object_release(>gem_base);
kfree(bo);
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu: bump version for exporting gpu info for gfx9

2017-04-27 Thread Deucher, Alexander
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
> Of Junwei Zhang
> Sent: Thursday, April 27, 2017 4:31 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander; Wang, Ken; Zhang, Jerry
> Subject: [PATCH 2/2] drm/amdgpu: bump version for exporting gpu info for
> gfx9
> 
> Signed-off-by: Junwei Zhang 

Series is:
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index ead00d7..857c3be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -64,9 +64,10 @@
>   * - 3.12.0 - Add query for double offchip LDS buffers
>   * - 3.13.0 - Add PRT support
>   * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
> + * - 3.15.0 - Export more gpu info for gfx9
>   */
>  #define KMS_DRIVER_MAJOR 3
> -#define KMS_DRIVER_MINOR 14
> +#define KMS_DRIVER_MINOR 15
>  #define KMS_DRIVER_PATCHLEVEL0
> 
>  int amdgpu_vram_limit = 0;
> --
> 1.9.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset

2017-04-27 Thread Liu, Monk
A really good improvement 

Reviewed-by: Monk Liu 

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
zhoucm1
Sent: Thursday, April 27, 2017 6:24 PM
To: Christian König ; Zhang, Jerry 
; amd-gfx@lists.freedesktop.org
Cc: Liu, Monk 
Subject: Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu 
reset



On 2017年04月27日 18:11, Christian König wrote:
> Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei):
>> On 04/27/2017 04:25 PM, Chunming Zhou wrote:
>>> the case could happen when gpu reset:
>>> 1. when gpu reset, cs can be continue until sw queue is full, then 
>>> push job will wait with holding pd reservation.
>>> 2. gpu_reset routine will also need pd reservation to restore page 
>>> table from their shadow.
>>> 3. cs is waiting for gpu_reset complete, but gpu reset is waiting 
>>> for cs releases reservation.
>>>
>>> Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
>>> Signed-off-by: Chunming Zhou 
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
>>>   1 file changed, 2 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 9edb1a4..a6722a7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct 
>>> amdgpu_cs_parser *p,
>>>   amdgpu_job_free_resources(job);
>>>
>>>   trace_amdgpu_cs_ioctl(job);
>>> +amdgpu_cs_parser_fini(p, 0, true);
>>
>> Please confirm:
>> It will not free/release more things that may be accessed in job 
>> working process, as cs_parse_fini free so many things related to parser.
>
> Yeah, that indeed won't work. amdgpu_cs_parser_fini() does:
>
No, with my patch, parser->job already is NULL here, same as previous sequence, 
just put push_job action after amdgpu_cs_parser_fini()
> if (parser->job)
> amdgpu_job_free(parser->job);
>
> And amdgpu_cs_submit() sets the fence, so it must be called before 
> amdgpu_cs_parser_fini().
yes, fence is already generated, so amdgpu_cs_parser_fini before pushing job is 
completely safe here.
>
> But apart from that this is a rather nifty idea. What was the problem 
> with the initial patch?
Just as comments in patch, which is found by Monk.

Regards,
David Zhou
>
>>
>> Or we can just release the pd reservation here?
>
> We could run into problem with the reservation ticked with that. So I 
> would want to avoid that.
>
> Christian.
>
>>
>> Jerry
>>
>>> amd_sched_entity_push_job(>base);
>>>
>>>   return 0;
>>> @@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>>> void *data, struct drm_file *filp)
>>>
>>>   r = amdgpu_cs_submit(, cs);
>>>
>>> +return r;
>>>   out:
>>>   amdgpu_cs_parser_fini(, r, reserved_buffers);
>>>   return r;
>>>
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions v2

2017-04-27 Thread Alex Deucher
On Thu, Apr 27, 2017 at 9:22 AM, Christian König
 wrote:
> From: Christian König 
>
> Those functions are all unused and some not even implemented.
>
> v2: keep cgs_get_pci_resource it is used by the ACP driver.
>
> Signed-off-by: Christian König 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c  | 198 ---
>  drivers/gpu/drm/amd/include/cgs_common.h | 270 
> ---
>  2 files changed, 468 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> index a1a2d44..9d22ebd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
> @@ -42,82 +42,6 @@ struct amdgpu_cgs_device {
> struct amdgpu_device *adev =\
> ((struct amdgpu_cgs_device *)cgs_device)->adev
>
> -static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum 
> cgs_gpu_mem_type type,
> -  uint64_t *mc_start, uint64_t *mc_size,
> -  uint64_t *mem_size)
> -{
> -   CGS_FUNC_ADEV;
> -   switch(type) {
> -   case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
> -   case CGS_GPU_MEM_TYPE__VISIBLE_FB:
> -   *mc_start = 0;
> -   *mc_size = adev->mc.visible_vram_size;
> -   *mem_size = adev->mc.visible_vram_size - adev->vram_pin_size;
> -   break;
> -   case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
> -   case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
> -   *mc_start = adev->mc.visible_vram_size;
> -   *mc_size = adev->mc.real_vram_size - 
> adev->mc.visible_vram_size;
> -   *mem_size = *mc_size;
> -   break;
> -   case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
> -   case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
> -   *mc_start = adev->mc.gtt_start;
> -   *mc_size = adev->mc.gtt_size;
> -   *mem_size = adev->mc.gtt_size - adev->gart_pin_size;
> -   break;
> -   default:
> -   return -EINVAL;
> -   }
> -
> -   return 0;
> -}
> -
> -static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem,
> -   uint64_t size,
> -   uint64_t min_offset, uint64_t max_offset,
> -   cgs_handle_t *kmem_handle, uint64_t *mcaddr)
> -{
> -   CGS_FUNC_ADEV;
> -   int ret;
> -   struct amdgpu_bo *bo;
> -   struct page *kmem_page = vmalloc_to_page(kmem);
> -   int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT;
> -
> -   struct sg_table *sg = drm_prime_pages_to_sg(_page, npages);
> -   ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false,
> -  AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, );
> -   if (ret)
> -   return ret;
> -   ret = amdgpu_bo_reserve(bo, false);
> -   if (unlikely(ret != 0))
> -   return ret;
> -
> -   /* pin buffer into GTT */
> -   ret = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_GTT,
> -  min_offset, max_offset, mcaddr);
> -   amdgpu_bo_unreserve(bo);
> -
> -   *kmem_handle = (cgs_handle_t)bo;
> -   return ret;
> -}
> -
> -static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, 
> cgs_handle_t kmem_handle)
> -{
> -   struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle;
> -
> -   if (obj) {
> -   int r = amdgpu_bo_reserve(obj, false);
> -   if (likely(r == 0)) {
> -   amdgpu_bo_unpin(obj);
> -   amdgpu_bo_unreserve(obj);
> -   }
> -   amdgpu_bo_unref();
> -
> -   }
> -   return 0;
> -}
> -
>  static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
> enum cgs_gpu_mem_type type,
> uint64_t size, uint64_t align,
> @@ -349,62 +273,6 @@ static void amdgpu_cgs_write_ind_register(struct 
> cgs_device *cgs_device,
> WARN(1, "Invalid indirect register space");
>  }
>
> -static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device 
> *cgs_device, unsigned addr)
> -{
> -   CGS_FUNC_ADEV;
> -   uint8_t val;
> -   int ret = pci_read_config_byte(adev->pdev, addr, );
> -   if (WARN(ret, "pci_read_config_byte error"))
> -   return 0;
> -   return val;
> -}
> -
> -static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device 
> *cgs_device, unsigned addr)
> -{
> -   CGS_FUNC_ADEV;
> -   uint16_t val;
> -   int ret = pci_read_config_word(adev->pdev, addr, );
> -   if (WARN(ret, "pci_read_config_word error"))
> -   return 0;
> -   return val;
> -}
> -
> -static uint32_t 

Re: [PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions

2017-04-27 Thread Christian König

Am 26.04.2017 um 18:36 schrieb Deucher, Alexander:

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
Of Christian König
Sent: Wednesday, April 26, 2017 9:14 AM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH] drm/amdgpu: remove unused and mostly unimplemented
CGS functions

From: Christian König 

Those functions are all unused and some not even implemented.

Signed-off-by: Christian König 

I think some of this is used by the ACP driver (cgs_get_pci_resource for 
example).  Make sure that is enabled in your build.  You might also want to 
check with Felix for KFD.


Good point. I was only searching outside of the amdgpu subdirectory for 
users. Why is a file which is part of amdgpu even using this?


v2 is on the list.

Christian.



Alex


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c  | 233 
  drivers/gpu/drm/amd/include/cgs_common.h | 303 -
--
  2 files changed, 536 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index a1a2d44..93f5dcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -42,82 +42,6 @@ struct amdgpu_cgs_device {
struct amdgpu_device *adev =\
((struct amdgpu_cgs_device *)cgs_device)->adev

-static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum
cgs_gpu_mem_type type,
-  uint64_t *mc_start, uint64_t *mc_size,
-  uint64_t *mem_size)
-{
-   CGS_FUNC_ADEV;
-   switch(type) {
-   case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
-   case CGS_GPU_MEM_TYPE__VISIBLE_FB:
-   *mc_start = 0;
-   *mc_size = adev->mc.visible_vram_size;
-   *mem_size = adev->mc.visible_vram_size - adev-

vram_pin_size;

-   break;
-   case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
-   case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
-   *mc_start = adev->mc.visible_vram_size;
-   *mc_size = adev->mc.real_vram_size - adev-

mc.visible_vram_size;

-   *mem_size = *mc_size;
-   break;
-   case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
-   case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
-   *mc_start = adev->mc.gtt_start;
-   *mc_size = adev->mc.gtt_size;
-   *mem_size = adev->mc.gtt_size - adev->gart_pin_size;
-   break;
-   default:
-   return -EINVAL;
-   }
-
-   return 0;
-}
-
-static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void
*kmem,
-   uint64_t size,
-   uint64_t min_offset, uint64_t max_offset,
-   cgs_handle_t *kmem_handle, uint64_t
*mcaddr)
-{
-   CGS_FUNC_ADEV;
-   int ret;
-   struct amdgpu_bo *bo;
-   struct page *kmem_page = vmalloc_to_page(kmem);
-   int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT;
-
-   struct sg_table *sg = drm_prime_pages_to_sg(_page,
npages);
-   ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false,
-  AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL,
);
-   if (ret)
-   return ret;
-   ret = amdgpu_bo_reserve(bo, false);
-   if (unlikely(ret != 0))
-   return ret;
-
-   /* pin buffer into GTT */
-   ret = amdgpu_bo_pin_restricted(bo,
AMDGPU_GEM_DOMAIN_GTT,
-  min_offset, max_offset, mcaddr);
-   amdgpu_bo_unreserve(bo);
-
-   *kmem_handle = (cgs_handle_t)bo;
-   return ret;
-}
-
-static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device,
cgs_handle_t kmem_handle)
-{
-   struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle;
-
-   if (obj) {
-   int r = amdgpu_bo_reserve(obj, false);
-   if (likely(r == 0)) {
-   amdgpu_bo_unpin(obj);
-   amdgpu_bo_unreserve(obj);
-   }
-   amdgpu_bo_unref();
-
-   }
-   return 0;
-}
-
  static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
enum cgs_gpu_mem_type type,
uint64_t size, uint64_t align,
@@ -349,96 +273,6 @@ static void amdgpu_cgs_write_ind_register(struct
cgs_device *cgs_device,
WARN(1, "Invalid indirect register space");
  }

-static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device
*cgs_device, unsigned addr)
-{
-   CGS_FUNC_ADEV;
-   uint8_t val;
-   int ret = pci_read_config_byte(adev->pdev, addr, );
-   if (WARN(ret, "pci_read_config_byte error"))
-   return 0;
-   return val;
-}
-
-static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device
*cgs_device, unsigned addr)
-{
-   CGS_FUNC_ADEV;
-   

[PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions v2

2017-04-27 Thread Christian König
From: Christian König 

Those functions are all unused and some not even implemented.

v2: keep cgs_get_pci_resource it is used by the ACP driver.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c  | 198 ---
 drivers/gpu/drm/amd/include/cgs_common.h | 270 ---
 2 files changed, 468 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index a1a2d44..9d22ebd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -42,82 +42,6 @@ struct amdgpu_cgs_device {
struct amdgpu_device *adev =\
((struct amdgpu_cgs_device *)cgs_device)->adev
 
-static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum 
cgs_gpu_mem_type type,
-  uint64_t *mc_start, uint64_t *mc_size,
-  uint64_t *mem_size)
-{
-   CGS_FUNC_ADEV;
-   switch(type) {
-   case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
-   case CGS_GPU_MEM_TYPE__VISIBLE_FB:
-   *mc_start = 0;
-   *mc_size = adev->mc.visible_vram_size;
-   *mem_size = adev->mc.visible_vram_size - adev->vram_pin_size;
-   break;
-   case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
-   case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
-   *mc_start = adev->mc.visible_vram_size;
-   *mc_size = adev->mc.real_vram_size - adev->mc.visible_vram_size;
-   *mem_size = *mc_size;
-   break;
-   case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
-   case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
-   *mc_start = adev->mc.gtt_start;
-   *mc_size = adev->mc.gtt_size;
-   *mem_size = adev->mc.gtt_size - adev->gart_pin_size;
-   break;
-   default:
-   return -EINVAL;
-   }
-
-   return 0;
-}
-
-static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem,
-   uint64_t size,
-   uint64_t min_offset, uint64_t max_offset,
-   cgs_handle_t *kmem_handle, uint64_t *mcaddr)
-{
-   CGS_FUNC_ADEV;
-   int ret;
-   struct amdgpu_bo *bo;
-   struct page *kmem_page = vmalloc_to_page(kmem);
-   int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT;
-
-   struct sg_table *sg = drm_prime_pages_to_sg(_page, npages);
-   ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false,
-  AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, );
-   if (ret)
-   return ret;
-   ret = amdgpu_bo_reserve(bo, false);
-   if (unlikely(ret != 0))
-   return ret;
-
-   /* pin buffer into GTT */
-   ret = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_GTT,
-  min_offset, max_offset, mcaddr);
-   amdgpu_bo_unreserve(bo);
-
-   *kmem_handle = (cgs_handle_t)bo;
-   return ret;
-}
-
-static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t 
kmem_handle)
-{
-   struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle;
-
-   if (obj) {
-   int r = amdgpu_bo_reserve(obj, false);
-   if (likely(r == 0)) {
-   amdgpu_bo_unpin(obj);
-   amdgpu_bo_unreserve(obj);
-   }
-   amdgpu_bo_unref();
-
-   }
-   return 0;
-}
-
 static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
enum cgs_gpu_mem_type type,
uint64_t size, uint64_t align,
@@ -349,62 +273,6 @@ static void amdgpu_cgs_write_ind_register(struct 
cgs_device *cgs_device,
WARN(1, "Invalid indirect register space");
 }
 
-static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, 
unsigned addr)
-{
-   CGS_FUNC_ADEV;
-   uint8_t val;
-   int ret = pci_read_config_byte(adev->pdev, addr, );
-   if (WARN(ret, "pci_read_config_byte error"))
-   return 0;
-   return val;
-}
-
-static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, 
unsigned addr)
-{
-   CGS_FUNC_ADEV;
-   uint16_t val;
-   int ret = pci_read_config_word(adev->pdev, addr, );
-   if (WARN(ret, "pci_read_config_word error"))
-   return 0;
-   return val;
-}
-
-static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device,
-unsigned addr)
-{
-   CGS_FUNC_ADEV;
-   uint32_t val;
-   int ret = pci_read_config_dword(adev->pdev, addr, );
-   if (WARN(ret, "pci_read_config_dword error"))
-   return 0;
-   return val;
-}
-
-static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device, 

Re: [PATCH 0/6 v5] *** Dedicated vmid per process v5 ***

2017-04-27 Thread Christian König

Reviewed-by: Christian König  for patch #3 and #5.

Am 27.04.2017 um 15:09 schrieb Chunming Zhou:

The current kernel implementation, which grabs the idle VMID from pool when 
emitting the job may:

 The back-to-back submission from one process could use different VMID.
 The submission to different queues from single process could use different 
VMID

It works well in most case but cannot work for the SQ thread trace capture.

The VMID for the submission that set the {SQTT}_BASE, which refers to the 
address of the trace buffer, is stored in shader engine.

If the profiling application have to use different VMIDs to submit IBs in its 
life cycle:

 Some trace is not captured since it actually uses different VMID to submit 
jobs.
 Some part of captured trace may come from different application since they 
are accidentally uses the owner’s VMID to submit jobs.

V2:
1. address Christian's comments:
a. drop context flags for tag process, instead, add vm ioctl.
b. change order of patches.
c. sync waiting only when vm flush needs.

2. address Alex's comments;
bump module version

V3:
   address Jerry and Christian's comments.
   and only reserve gfxhub vmid

v4:
   address Jerry and Christian's comments.
   fix some race condistions.

v5:
   patch#5: peek_fence instead of get_fence.
   fix potential context starved.

Chunming Zhou (6):
   drm/amdgpu: add vm ioctl
   drm/amdgpu: add reserved vmid field in vm struct v2
   drm/amdgpu: reserve/unreserve vmid by vm ioctl v4
   drm/amdgpu: add limitation for dedicated vm number v4
   drm/amdgpu: implement grab reserved vmid V4
   drm/amdgpu: bump module verion for reserved vmid

  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 159 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   6 ++
  include/uapi/drm/amdgpu_drm.h   |  22 +
  5 files changed, 185 insertions(+), 6 deletions(-)



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/6] drm/amdgpu: reserve/unreserve vmid by vm ioctl v4

2017-04-27 Thread Chunming Zhou
add reserve/unreserve vmid funtions.
v3:
only reserve vmid from gfxhub
v4:
fix racy condition

Change-Id: I5f80dc39dc9d44660a96a2b710b0dbb4d3b9039d
Signed-off-by: Chunming Zhou 
Reviewed-by: Junwei Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 62 +++---
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 261aaea..e37421e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -546,6 +546,45 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
return r;
 }
 
+static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ unsigned vmhub)
+{
+   struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub];
+
+   mutex_lock(_mgr->lock);
+   if (vm->reserved_vmid[vmhub]) {
+   list_add(>reserved_vmid[vmhub]->list,
+   _mgr->ids_lru);
+   vm->reserved_vmid[vmhub] = NULL;
+   }
+   mutex_unlock(_mgr->lock);
+}
+
+static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev,
+struct amdgpu_vm *vm,
+unsigned vmhub)
+{
+   struct amdgpu_vm_id_manager *id_mgr;
+   struct amdgpu_vm_id *idle;
+   int r = 0;
+
+   id_mgr = >vm_manager.id_mgr[vmhub];
+   mutex_lock(_mgr->lock);
+   if (vm->reserved_vmid[vmhub])
+   goto unlock;
+   /* Select the first entry VMID */
+   idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, list);
+   list_del_init(>list);
+   vm->reserved_vmid[vmhub] = idle;
+   mutex_unlock(_mgr->lock);
+
+   return 0;
+unlock:
+   mutex_unlock(_mgr->lock);
+   return r;
+}
+
 static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
 {
struct amdgpu_device *adev = ring->adev;
@@ -2316,18 +2355,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
 
amdgpu_vm_free_levels(>root);
fence_put(vm->last_dir_update);
-   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
-   struct amdgpu_vm_id_manager *id_mgr =
-   >vm_manager.id_mgr[i];
-
-   mutex_lock(_mgr->lock);
-   if (vm->reserved_vmid[i]) {
-   list_add(>reserved_vmid[i]->list,
-_mgr->ids_lru);
-   vm->reserved_vmid[i] = NULL;
-   }
-   mutex_unlock(_mgr->lock);
-   }
+   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
+   amdgpu_vm_free_reserved_vmid(adev, vm, i);
 }
 
 /**
@@ -2400,11 +2429,18 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
+   int r;
 
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
+   /* current, we only have requirement to reserve vmid from 
gfxhub */
+   r = amdgpu_vm_alloc_reserved_vmid(adev, >vm,
+ AMDGPU_GFXHUB);
+   if (r)
+   return r;
+   break;
case AMDGPU_VM_OP_UNRESERVE_VMID:
-   return -EINVAL;
+   amdgpu_vm_free_reserved_vmid(adev, >vm, AMDGPU_GFXHUB);
break;
default:
return -EINVAL;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/6] drm/amdgpu: add limitation for dedicated vm number v4

2017-04-27 Thread Chunming Zhou
v2: move #define to amdgpu_vm.h
v3: move reserved vmid counter to id_manager,
and increase counter before allocating vmid
v4: rename to reserved_vmid_num

Change-Id: Ie5958cf6dbdc1c8278e61d9158483472d6f5c6e3
Signed-off-by: Chunming Zhou 
Reviewed-by: Junwei Zhang 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e37421e..e6fdfa4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -557,6 +557,7 @@ static void amdgpu_vm_free_reserved_vmid(struct 
amdgpu_device *adev,
list_add(>reserved_vmid[vmhub]->list,
_mgr->ids_lru);
vm->reserved_vmid[vmhub] = NULL;
+   atomic_dec(_mgr->reserved_vmid_num);
}
mutex_unlock(_mgr->lock);
 }
@@ -573,6 +574,13 @@ static int amdgpu_vm_alloc_reserved_vmid(struct 
amdgpu_device *adev,
mutex_lock(_mgr->lock);
if (vm->reserved_vmid[vmhub])
goto unlock;
+   if (atomic_inc_return(_mgr->reserved_vmid_num) >
+   AMDGPU_VM_MAX_RESERVED_VMID) {
+   DRM_ERROR("Over limitation of reserved vmid\n");
+   atomic_dec(_mgr->reserved_vmid_num);
+   r = -EINVAL;
+   goto unlock;
+   }
/* Select the first entry VMID */
idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, list);
list_del_init(>list);
@@ -2376,6 +2384,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 
mutex_init(_mgr->lock);
INIT_LIST_HEAD(_mgr->ids_lru);
+   atomic_set(_mgr->reserved_vmid_num, 0);
 
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 8eedca1..9828fcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -84,6 +84,8 @@
 
 /* hardcode that limit for now */
 #define AMDGPU_VA_RESERVED_SIZE(8 << 20)
+/* max vmids dedicated for process */
+#define AMDGPU_VM_MAX_RESERVED_VMID1
 
 struct amdgpu_vm_pt {
struct amdgpu_bo*bo;
@@ -157,6 +159,7 @@ struct amdgpu_vm_id_manager {
unsignednum_ids;
struct list_headids_lru;
struct amdgpu_vm_id ids[AMDGPU_NUM_VM];
+   atomic_treserved_vmid_num;
 };
 
 struct amdgpu_vm_manager {
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 0/6 v5] *** Dedicated vmid per process v5 ***

2017-04-27 Thread Chunming Zhou
The current kernel implementation, which grabs the idle VMID from pool when 
emitting the job may:

The back-to-back submission from one process could use different VMID.
The submission to different queues from single process could use different 
VMID

It works well in most case but cannot work for the SQ thread trace capture.

The VMID for the submission that set the {SQTT}_BASE, which refers to the 
address of the trace buffer, is stored in shader engine.

If the profiling application have to use different VMIDs to submit IBs in its 
life cycle:

Some trace is not captured since it actually uses different VMID to submit 
jobs.
Some part of captured trace may come from different application since they 
are accidentally uses the owner’s VMID to submit jobs.

V2:
1. address Christian's comments:
a. drop context flags for tag process, instead, add vm ioctl.
b. change order of patches.
c. sync waiting only when vm flush needs.

2. address Alex's comments;
bump module version

V3:
  address Jerry and Christian's comments.
  and only reserve gfxhub vmid

v4:
  address Jerry and Christian's comments.
  fix some race condistions.

v5:
  patch#5: peek_fence instead of get_fence.
   fix potential context starved.

Chunming Zhou (6):
  drm/amdgpu: add vm ioctl
  drm/amdgpu: add reserved vmid field in vm struct v2
  drm/amdgpu: reserve/unreserve vmid by vm ioctl v4
  drm/amdgpu: add limitation for dedicated vm number v4
  drm/amdgpu: implement grab reserved vmid V4
  drm/amdgpu: bump module verion for reserved vmid

 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 159 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   6 ++
 include/uapi/drm/amdgpu_drm.h   |  22 +
 5 files changed, 185 insertions(+), 6 deletions(-)

-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/6] drm/amdgpu: add vm ioctl

2017-04-27 Thread Chunming Zhou
It will be used for reserving vmid.

Change-Id: Ib7169ea999690c8e82d0dcbccdd2d97760c0270a
Signed-off-by: Chunming Zhou 
Reviewed-by: Junwei Zhang 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 18 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
 include/uapi/drm/amdgpu_drm.h   | 22 ++
 4 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 052e6a5..8ea33c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1062,6 +1062,7 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device 
*dev, unsigned int pipe,
 const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
/* KMS */
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a8cd23b..8698177 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2379,3 +2379,21 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
}
}
 }
+
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+   union drm_amdgpu_vm *args = data;
+   struct amdgpu_device *adev = dev->dev_private;
+   struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+   switch (args->in.op) {
+   case AMDGPU_VM_OP_RESERVE_VMID:
+   case AMDGPU_VM_OP_UNRESERVE_VMID:
+   return -EINVAL;
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 8eba2d3..16e0aaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -248,5 +248,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
  struct amdgpu_bo_va *bo_va);
 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 
 #endif
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index b98b371..1c95775 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -51,6 +51,7 @@
 #define DRM_AMDGPU_GEM_OP  0x10
 #define DRM_AMDGPU_GEM_USERPTR 0x11
 #define DRM_AMDGPU_WAIT_FENCES 0x12
+#define DRM_AMDGPU_VM  0x13
 
 /* hybrid specific ioctls */
 #define DRM_AMDGPU_SEM 0x5b
@@ -71,6 +72,7 @@
 #define DRM_IOCTL_AMDGPU_GEM_OPDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
 #define DRM_IOCTL_AMDGPU_WAIT_FENCES   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
 
 /* hybrid specific ioctls */
 #define DRM_IOCTL_AMDGPU_GEM_DGMA  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma)
@@ -212,6 +214,26 @@ struct drm_amdgpu_ctx_in {
union drm_amdgpu_ctx_out out;
 };
 
+/* vm ioctl */
+#define AMDGPU_VM_OP_RESERVE_VMID  1
+#define AMDGPU_VM_OP_UNRESERVE_VMID2
+
+struct drm_amdgpu_vm_in {
+   /** AMDGPU_VM_OP_* */
+   __u32   op;
+   __u32   flags;
+};
+
+struct drm_amdgpu_vm_out {
+   /** For future use, no flags defined so far */
+   __u64   flags;
+};
+
+union drm_amdgpu_vm {
+   struct drm_amdgpu_vm_in in;
+   struct drm_amdgpu_vm_out out;
+};
+
 /* sem related */
 #define AMDGPU_SEM_OP_CREATE_SEM1
 #define AMDGPU_SEM_OP_WAIT_SEM 2
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/6] drm/amdgpu: add reserved vmid field in vm struct v2

2017-04-27 Thread Chunming Zhou
v2: rename dedicated_vmid to reserved_vmid

Change-Id: Id728e20366c8a1ae90d4e901dc80e136e2a613bb
Signed-off-by: Chunming Zhou 
Reviewed-by: Junwei Zhang 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8698177..261aaea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2163,10 +2163,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
unsigned ring_instance;
struct amdgpu_ring *ring;
struct amd_sched_rq *rq;
-   int r;
+   int r, i;
 
vm->va = RB_ROOT;
vm->client_id = atomic64_inc_return(>vm_manager.client_counter);
+   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
+   vm->reserved_vmid[i] = NULL;
spin_lock_init(>status_lock);
INIT_LIST_HEAD(>invalidated);
INIT_LIST_HEAD(>cleared);
@@ -2270,6 +2272,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
 {
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+   int i;
 
if (vm->is_kfd_vm) {
struct amdgpu_vm_id_manager *id_mgr =
@@ -2313,6 +2316,18 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
 
amdgpu_vm_free_levels(>root);
fence_put(vm->last_dir_update);
+   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
+   struct amdgpu_vm_id_manager *id_mgr =
+   >vm_manager.id_mgr[i];
+
+   mutex_lock(_mgr->lock);
+   if (vm->reserved_vmid[i]) {
+   list_add(>reserved_vmid[i]->list,
+_mgr->ids_lru);
+   vm->reserved_vmid[i] = NULL;
+   }
+   mutex_unlock(_mgr->lock);
+   }
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 16e0aaa..8eedca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -123,6 +123,8 @@ struct amdgpu_vm {
 
/* client id */
u64 client_id;
+   /* dedicated to vm */
+   struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS];
/* each VM will map on CSA */
struct amdgpu_bo_va *csa_bo_va;
 
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/6] drm/amdgpu: bump module verion for reserved vmid

2017-04-27 Thread Chunming Zhou
Change-Id: I1065e0430ed44f7ee6c29214b72e35a7343ea02b
Signed-off-by: Chunming Zhou 
Reviewed-by: Junwei Zhang 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 55322b4..6799829 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -64,9 +64,10 @@
  * - 3.12.0 - Add query for double offchip LDS buffers
  * - 3.13.0 - Add PRT support
  * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
+ * - 3.15.0 - Add reserved vmid support
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   14
+#define KMS_DRIVER_MINOR   15
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/6] drm/amdgpu: implement grab reserved vmid V4

2017-04-27 Thread Chunming Zhou
v2: move sync waiting only when flush needs
v3: fix racy
v4: peek fence instead of get fence, and fix potential context starved.

Change-Id: I64da2701c9fdcf986afb90ba1492a78d5bef1b6c
Signed-off-by: Chunming Zhou 
Reviewed-by: Junwei Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 --
 1 file changed, 75 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e6fdfa4..b35b853 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -397,6 +397,72 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device 
*adev,
atomic_read(>gpu_reset_counter);
 }
 
+static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
+{
+   return !!vm->reserved_vmid[vmhub];
+}
+
+/* idr_mgr->lock must be held */
+static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
+  struct amdgpu_ring *ring,
+  struct amdgpu_sync *sync,
+  struct fence *fence,
+  struct amdgpu_job *job)
+{
+   struct amdgpu_device *adev = ring->adev;
+   unsigned vmhub = ring->funcs->vmhub;
+   uint64_t fence_context = adev->fence_context + ring->idx;
+   struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
+   struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub];
+   struct fence *updates = sync->last_vm_update;
+   int r = 0;
+   struct fence *flushed, *tmp;
+   bool needs_flush = false;
+
+   flushed  = id->flushed_updates;
+   if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
+   (atomic64_read(>owner) != vm->client_id) ||
+   (job->vm_pd_addr != id->pd_gpu_addr) ||
+   (updates && (!flushed || updates->context != flushed->context ||
+   fence_is_later(updates, flushed))) ||
+   (!id->last_flush || (id->last_flush->context != fence_context &&
+!fence_is_signaled(id->last_flush {
+   needs_flush = true;
+   /* to prevent one context starved by another context */
+   id->pd_gpu_addr = 0;
+   tmp = amdgpu_sync_peek_fence(>active, ring);
+   if (tmp) {
+   r = amdgpu_sync_fence(adev, sync, tmp);
+   return r;
+   }
+   }
+
+   /* Good we can use this VMID. Remember this submission as
+   * user of the VMID.
+   */
+   r = amdgpu_sync_fence(ring->adev, >active, fence);
+   if (r)
+   goto out;
+
+   if (updates && (!flushed || updates->context != flushed->context ||
+   fence_is_later(updates, flushed))) {
+   fence_put(id->flushed_updates);
+   id->flushed_updates = fence_get(updates);
+   }
+   id->pd_gpu_addr = job->vm_pd_addr;
+   id->current_gpu_reset_count = atomic_read(>gpu_reset_counter);
+   atomic64_set(>owner, vm->client_id);
+   job->vm_needs_flush = needs_flush;
+   if (needs_flush) {
+   fence_put(id->last_flush);
+   id->last_flush = NULL;
+   }
+   job->vm_id = id - id_mgr->ids;
+   trace_amdgpu_vm_grab_id(vm, ring, job);
+out:
+   return r;
+}
+
 /**
  * amdgpu_vm_grab_id - allocate the next free VMID
  *
@@ -421,12 +487,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
unsigned i;
int r = 0;
 
+   mutex_lock(_mgr->lock);
+   if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
+   r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, 
job);
+   mutex_unlock(_mgr->lock);
+   return r;
+   }
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
-   if (!fences)
+   if (!fences) {
+   mutex_unlock(_mgr->lock);
return -ENOMEM;
-
-   mutex_lock(_mgr->lock);
-
+   }
/* Check if we have an idle VMID */
i = 0;
list_for_each_entry(idle, _mgr->ids_lru, list) {
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/radeon: only warn once in radeon_ttm_bo_destroy if va list not empty

2017-04-27 Thread Christian König

Am 27.04.2017 um 12:58 schrieb Julien Isorce:

But always print an error.

Encountered a dozen of exact same backtraces when mesa's
pb_cache_release_all_buffers is called after that a gpu reset failed.

An other approach would be to check rdev->vm_manager.enabled instead
of rdev->accel_working in the other function radeon_gem_object_close.
But it will only work if the vm_manager succeeded to be re-enabled
after a gpu reset.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96271

Signed-off-by: Julien Isorce 
---
  drivers/gpu/drm/radeon/radeon_object.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index bec2ec0..76cc039 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -81,7 +81,13 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object 
*tbo)
list_del_init(>list);
mutex_unlock(>rdev->gem.mutex);
radeon_bo_clear_surface_reg(bo);
-   WARN_ON(!list_empty(>va));
+
+   if (!list_empty(>va)) {
+   DRM_ERROR("Virtual address list not empty, accel: %d\n",
+ bo->rdev->accel_working);
+   WARN_ON_ONCE(1);
+   }
+


The message is superfluous if we initially started up the GPU and found 
later that we can't get GFX working again after a GPU reset it is pretty 
much impossible to continue.


I would just change the "WARN_ON(!list_empty(>va));" to a 
"WARN_ON_ONCE(!list_empty(>va));".


Regards,
Christian.


drm_gem_object_release(>gem_base);
kfree(bo);
  }



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/6 v4] *** Dedicated vmid per process v4 ***

2017-04-27 Thread Christian König

Am 27.04.2017 um 12:26 schrieb zhoucm1:



On 2017年04月27日 17:51, Christian König wrote:
Patch #1, #2, #4 and #6 are Reviewed-by: Christian König 
.


Patch #3:

+/* Select the first entry VMID */
+idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, 
list);

+list_del_init(>list);
+vm->reserved_vmid[vmhub] = idle;
+mutex_unlock(_mgr->lock);
I think we should wait for the VMID to be completely idle before we 
can use it, but that possible also go into the handling in patch #5.

Yes.


Patch #5:

+if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
+(atomic64_read(>owner) != vm->client_id) ||
+(job->vm_pd_addr != id->pd_gpu_addr) ||
+(updates && (!flushed || updates->context != 
flushed->context ||

+fence_is_later(updates, flushed {

You also need this check here as well:
if (!id->last_flush ||
(id->last_flush->context != fence_context &&
 !fence_is_signaled(id->last_flush)))

added.



+tmp = amdgpu_sync_get_fence(>active);
+if (tmp) {
+r = amdgpu_sync_fence(adev, sync, tmp);
+fence_put(tmp);
+return r;
+}

That won't work correctly.

The first problem is that amdgpu_sync_get_fence() removes the fence 
from the active fences. So a another command submission from a 
different context won't wait for all the necessary fences. I think 
using amdgpu_sync_peek_fence() instead should work here.

good catch.



The second problem is that a context could be starved when it needs a 
VM flush while another context can still submit jobs without a flush. 
I think you could work around that by setting id->pd_gpu_addr to an 
invalid value when we hit this case. This way all other contexts 
would need to do a VM flush as well.
I don't catch your opinion, when you object concurrent flush, isn't it 
expected?


Imagine the following scenario:

Two contexts in use by the application.

Context 1 has a bunch of jobs to do, but needs a VM flush before 
starting them.


So each job will just pick the first fence to wait for go to sleep again.

Context 2 has a bunch of jobs to do, but does NOT need a VM flush.

This will add more and more fences to the collection of active jobs for 
this id.


The result is that Context 1 will never be able to submit any of it's 
jobs because context 2 keeps the ID busy all the time.


Setting the pd_gpu_addr to some invalid value (or maybe the ID owner?) 
should fix that. In this case context 2 needs to flush as well and so 
context 1 will sooner or later get a chance as well.


Regards,
Christian.







+id->pd_gpu_addr = job->vm_pd_addr;
+id->current_gpu_reset_count = 
atomic_read(>gpu_reset_counter);

+atomic64_set(>owner, vm->client_id);
+job->vm_needs_flush = needs_flush;
If we need a flush id->last_flush needs to be set to NULL here as 
well. E.g. do

fence_put(id->last_flush);
id->last_flush = NULL;

changed.

Regards,
David Zhou


Regards,
Christian.

Am 27.04.2017 um 07:00 schrieb Chunming Zhou:
The current kernel implementation, which grabs the idle VMID from 
pool when emitting the job may:


 The back-to-back submission from one process could use 
different VMID.
 The submission to different queues from single process could 
use different VMID


It works well in most case but cannot work for the SQ thread trace 
capture.


The VMID for the submission that set the {SQTT}_BASE, which refers 
to the address of the trace buffer, is stored in shader engine.


If the profiling application have to use different VMIDs to submit 
IBs in its life cycle:


 Some trace is not captured since it actually uses different 
VMID to submit jobs.
 Some part of captured trace may come from different application 
since they are accidentally uses the owner’s VMID to submit jobs.


V2:
1. address Christian's comments:
a. drop context flags for tag process, instead, add vm ioctl.
b. change order of patches.
c. sync waiting only when vm flush needs.

2. address Alex's comments;
bump module version

V3:
   address Jerry and Christian's comments.
   and only reserve gfxhub vmid

v4:
   address Jerry and Christian's comments.
   fix some race condistions.

Chunming Zhou (6):
   drm/amdgpu: add vm ioctl
   drm/amdgpu: add reserved vmid field in vm struct v2
   drm/amdgpu: reserve/unreserve vmid by vm ioctl v4
   drm/amdgpu: add limitation for dedicated vm number v4
   drm/amdgpu: implement grab reserved vmid V3
   drm/amdgpu: bump module verion for reserved vmid

  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 152 
++--

  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   6 ++
  include/uapi/drm/amdgpu_drm.h   |  22 +
  5 files changed, 178 insertions(+), 6 deletions(-)








Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset

2017-04-27 Thread Christian König

Am 27.04.2017 um 12:23 schrieb zhoucm1:



On 2017年04月27日 18:11, Christian König wrote:

Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei):

On 04/27/2017 04:25 PM, Chunming Zhou wrote:

the case could happen when gpu reset:
1. when gpu reset, cs can be continue until sw queue is full, then 
push job will wait with holding pd reservation.
2. gpu_reset routine will also need pd reservation to restore page 
table from their shadow.
3. cs is waiting for gpu_reset complete, but gpu reset is waiting 
for cs releases reservation.


Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 9edb1a4..a6722a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  amdgpu_job_free_resources(job);

  trace_amdgpu_cs_ioctl(job);
+amdgpu_cs_parser_fini(p, 0, true);


Please confirm:
It will not free/release more things that may be accessed in job 
working process, as cs_parse_fini free so many things related to 
parser.


Yeah, that indeed won't work. amdgpu_cs_parser_fini() does:

No, with my patch, parser->job already is NULL here, same as previous 
sequence, just put push_job action after amdgpu_cs_parser_fini()


Ah! You add that into amdgpu_cs_submit! That was the point I was missing.


trace_amdgpu_cs_ioctl(job);
+   amdgpu_cs_parser_fini(p, 0, true);
amd_sched_entity_push_job(>base);
Can we keep the trace_amdgpu_cs_ioctl(); directly before the 
amd_sched_entity_push_job()?


With that changed, the patch is Reviewed-by: Christian König 
.


Regards,
Christian.


if (parser->job)
amdgpu_job_free(parser->job);

And amdgpu_cs_submit() sets the fence, so it must be called before 
amdgpu_cs_parser_fini().
yes, fence is already generated, so amdgpu_cs_parser_fini before 
pushing job is completely safe here.


But apart from that this is a rather nifty idea. What was the problem 
with the initial patch?

Just as comments in patch, which is found by Monk.

Regards,
David Zhou




Or we can just release the pd reservation here?


We could run into problem with the reservation ticked with that. So I 
would want to avoid that.


Christian.



Jerry


amd_sched_entity_push_job(>base);

  return 0;
@@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
void *data, struct drm_file *filp)


  r = amdgpu_cs_submit(, cs);

+return r;
  out:
  amdgpu_cs_parser_fini(, r, reserved_buffers);
  return r;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/radeon: reject bo creation from ioctl when the gpu is disabled

2017-04-27 Thread Christian König

Am 27.04.2017 um 12:57 schrieb Julien Isorce:

Like done in radeon_cs_ioctl.

In particular it avoids mesa to call map/unmap:
   radeon_create_bo
 ioctl(DRM_RADEON_GEM_CREATE) -> ok
 ioctl(DRM_RADEON_GEM_VA-MAP)
   radeon_destroy_bo
 ioctl(DRM_RADEON_GEM_VA-UNMAP)

Encountered also cases where the vm_manager succeeded to be enabled after
a gpu reset while the GFX ring was not responding so accel was disabled.

https://bugs.freedesktop.org/show_bug.cgi?id=96271


NAK, even if accel isn't working we want to display something and for 
that we need to be able to create BOs.


Christian.



Signed-off-by: Julien Isorce 
---
  drivers/gpu/drm/radeon/radeon_gem.c | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
b/drivers/gpu/drm/radeon/radeon_gem.c
index b97c92b..1030001 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -259,6 +259,13 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void 
*data,
int r;
  
  	down_read(>exclusive_lock);

+
+   if (!rdev->accel_working &&
+   args->initial_domain != RADEON_GEM_DOMAIN_CPU) {
+   up_read(>exclusive_lock);
+   return -EBUSY;
+   }
+
/* create a gem object to contain this object in */
args->size = roundup(args->size, PAGE_SIZE);
r = radeon_gem_object_create(rdev, args->size, args->alignment,



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/radeon: only warn once in radeon_ttm_bo_destroy if va list not empty

2017-04-27 Thread Julien Isorce
But always print an error.

Encountered a dozen of exact same backtraces when mesa's
pb_cache_release_all_buffers is called after that a gpu reset failed.

An other approach would be to check rdev->vm_manager.enabled instead
of rdev->accel_working in the other function radeon_gem_object_close.
But it will only work if the vm_manager succeeded to be re-enabled
after a gpu reset.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=96271

Signed-off-by: Julien Isorce 
---
 drivers/gpu/drm/radeon/radeon_object.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index bec2ec0..76cc039 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -81,7 +81,13 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object 
*tbo)
list_del_init(>list);
mutex_unlock(>rdev->gem.mutex);
radeon_bo_clear_surface_reg(bo);
-   WARN_ON(!list_empty(>va));
+
+   if (!list_empty(>va)) {
+   DRM_ERROR("Virtual address list not empty, accel: %d\n",
+ bo->rdev->accel_working);
+   WARN_ON_ONCE(1);
+   }
+
drm_gem_object_release(>gem_base);
kfree(bo);
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/radeon: reject bo creation from ioctl when the gpu is disabled

2017-04-27 Thread Julien Isorce
Like done in radeon_cs_ioctl.

In particular it avoids mesa to call map/unmap:
  radeon_create_bo
ioctl(DRM_RADEON_GEM_CREATE) -> ok
ioctl(DRM_RADEON_GEM_VA-MAP)
  radeon_destroy_bo
ioctl(DRM_RADEON_GEM_VA-UNMAP)

Encountered also cases where the vm_manager succeeded to be enabled after
a gpu reset while the GFX ring was not responding so accel was disabled.

https://bugs.freedesktop.org/show_bug.cgi?id=96271

Signed-off-by: Julien Isorce 
---
 drivers/gpu/drm/radeon/radeon_gem.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
b/drivers/gpu/drm/radeon/radeon_gem.c
index b97c92b..1030001 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -259,6 +259,13 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void 
*data,
int r;
 
down_read(>exclusive_lock);
+
+   if (!rdev->accel_working &&
+   args->initial_domain != RADEON_GEM_DOMAIN_CPU) {
+   up_read(>exclusive_lock);
+   return -EBUSY;
+   }
+
/* create a gem object to contain this object in */
args->size = roundup(args->size, PAGE_SIZE);
r = radeon_gem_object_create(rdev, args->size, args->alignment,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/6 v4] *** Dedicated vmid per process v4 ***

2017-04-27 Thread zhoucm1



On 2017年04月27日 17:51, Christian König wrote:
Patch #1, #2, #4 and #6 are Reviewed-by: Christian König 
.


Patch #3:

+/* Select the first entry VMID */
+idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, 
list);

+list_del_init(>list);
+vm->reserved_vmid[vmhub] = idle;
+mutex_unlock(_mgr->lock);
I think we should wait for the VMID to be completely idle before we 
can use it, but that possible also go into the handling in patch #5.

Yes.


Patch #5:

+if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
+(atomic64_read(>owner) != vm->client_id) ||
+(job->vm_pd_addr != id->pd_gpu_addr) ||
+(updates && (!flushed || updates->context != 
flushed->context ||

+fence_is_later(updates, flushed {

You also need this check here as well:
if (!id->last_flush ||
(id->last_flush->context != fence_context &&
 !fence_is_signaled(id->last_flush)))

added.



+tmp = amdgpu_sync_get_fence(>active);
+if (tmp) {
+r = amdgpu_sync_fence(adev, sync, tmp);
+fence_put(tmp);
+return r;
+}

That won't work correctly.

The first problem is that amdgpu_sync_get_fence() removes the fence 
from the active fences. So a another command submission from a 
different context won't wait for all the necessary fences. I think 
using amdgpu_sync_peek_fence() instead should work here.

good catch.



The second problem is that a context could be starved when it needs a 
VM flush while another context can still submit jobs without a flush. 
I think you could work around that by setting id->pd_gpu_addr to an 
invalid value when we hit this case. This way all other contexts would 
need to do a VM flush as well.
I don't catch your opinion, when you object concurrent flush, isn't it 
expected?





+id->pd_gpu_addr = job->vm_pd_addr;
+id->current_gpu_reset_count = 
atomic_read(>gpu_reset_counter);

+atomic64_set(>owner, vm->client_id);
+job->vm_needs_flush = needs_flush;
If we need a flush id->last_flush needs to be set to NULL here as 
well. E.g. do

fence_put(id->last_flush);
id->last_flush = NULL;

changed.

Regards,
David Zhou


Regards,
Christian.

Am 27.04.2017 um 07:00 schrieb Chunming Zhou:
The current kernel implementation, which grabs the idle VMID from 
pool when emitting the job may:


 The back-to-back submission from one process could use different 
VMID.
 The submission to different queues from single process could use 
different VMID


It works well in most case but cannot work for the SQ thread trace 
capture.


The VMID for the submission that set the {SQTT}_BASE, which refers to 
the address of the trace buffer, is stored in shader engine.


If the profiling application have to use different VMIDs to submit 
IBs in its life cycle:


 Some trace is not captured since it actually uses different VMID 
to submit jobs.
 Some part of captured trace may come from different application 
since they are accidentally uses the owner’s VMID to submit jobs.


V2:
1. address Christian's comments:
a. drop context flags for tag process, instead, add vm ioctl.
b. change order of patches.
c. sync waiting only when vm flush needs.

2. address Alex's comments;
bump module version

V3:
   address Jerry and Christian's comments.
   and only reserve gfxhub vmid

v4:
   address Jerry and Christian's comments.
   fix some race condistions.

Chunming Zhou (6):
   drm/amdgpu: add vm ioctl
   drm/amdgpu: add reserved vmid field in vm struct v2
   drm/amdgpu: reserve/unreserve vmid by vm ioctl v4
   drm/amdgpu: add limitation for dedicated vm number v4
   drm/amdgpu: implement grab reserved vmid V3
   drm/amdgpu: bump module verion for reserved vmid

  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 152 
++--

  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   6 ++
  include/uapi/drm/amdgpu_drm.h   |  22 +
  5 files changed, 178 insertions(+), 6 deletions(-)





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset

2017-04-27 Thread zhoucm1



On 2017年04月27日 18:11, Christian König wrote:

Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei):

On 04/27/2017 04:25 PM, Chunming Zhou wrote:

the case could happen when gpu reset:
1. when gpu reset, cs can be continue until sw queue is full, then 
push job will wait with holding pd reservation.
2. gpu_reset routine will also need pd reservation to restore page 
table from their shadow.
3. cs is waiting for gpu_reset complete, but gpu reset is waiting 
for cs releases reservation.


Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 9edb1a4..a6722a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  amdgpu_job_free_resources(job);

  trace_amdgpu_cs_ioctl(job);
+amdgpu_cs_parser_fini(p, 0, true);


Please confirm:
It will not free/release more things that may be accessed in job 
working process, as cs_parse_fini free so many things related to parser.


Yeah, that indeed won't work. amdgpu_cs_parser_fini() does:

No, with my patch, parser->job already is NULL here, same as previous 
sequence, just put push_job action after amdgpu_cs_parser_fini()

if (parser->job)
amdgpu_job_free(parser->job);

And amdgpu_cs_submit() sets the fence, so it must be called before 
amdgpu_cs_parser_fini().
yes, fence is already generated, so amdgpu_cs_parser_fini before pushing 
job is completely safe here.


But apart from that this is a rather nifty idea. What was the problem 
with the initial patch?

Just as comments in patch, which is found by Monk.

Regards,
David Zhou




Or we can just release the pd reservation here?


We could run into problem with the reservation ticked with that. So I 
would want to avoid that.


Christian.



Jerry


amd_sched_entity_push_job(>base);

  return 0;
@@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
void *data, struct drm_file *filp)


  r = amdgpu_cs_submit(, cs);

+return r;
  out:
  amdgpu_cs_parser_fini(, r, reserved_buffers);
  return r;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset

2017-04-27 Thread Christian König

Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei):

On 04/27/2017 04:25 PM, Chunming Zhou wrote:

the case could happen when gpu reset:
1. when gpu reset, cs can be continue until sw queue is full, then 
push job will wait with holding pd reservation.
2. gpu_reset routine will also need pd reservation to restore page 
table from their shadow.
3. cs is waiting for gpu_reset complete, but gpu reset is waiting for 
cs releases reservation.


Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 9edb1a4..a6722a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct 
amdgpu_cs_parser *p,

  amdgpu_job_free_resources(job);

  trace_amdgpu_cs_ioctl(job);
+amdgpu_cs_parser_fini(p, 0, true);


Please confirm:
It will not free/release more things that may be accessed in job 
working process, as cs_parse_fini free so many things related to parser.


Yeah, that indeed won't work. amdgpu_cs_parser_fini() does:

if (parser->job)
amdgpu_job_free(parser->job);

And amdgpu_cs_submit() sets the fence, so it must be called before 
amdgpu_cs_parser_fini().


But apart from that this is a rather nifty idea. What was the problem 
with the initial patch?




Or we can just release the pd reservation here?


We could run into problem with the reservation ticked with that. So I 
would want to avoid that.


Christian.



Jerry


amd_sched_entity_push_job(>base);

  return 0;
@@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
void *data, struct drm_file *filp)


  r = amdgpu_cs_submit(, cs);

+return r;
  out:
  amdgpu_cs_parser_fini(, r, reserved_buffers);
  return r;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] Revert "drm/amdgpu: Refactor flip into prepare submit and submit. (v2)"

2017-04-27 Thread Christian König

Am 27.04.2017 um 10:18 schrieb Michel Dänzer:

From: Michel Dänzer 

This reverts commit cb341a319f7e66f879d69af929c3dadfc1a8f31e.

The purpose of the refactor was for amdgpu_crtc_prepare/submit_flip to
be used by the DC code, but that's no longer the case.

Signed-off-by: Michel Dänzer 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 147 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h|  17 
  2 files changed, 29 insertions(+), 135 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 7cf226dfb602..0415875656c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -138,56 +138,10 @@ static void amdgpu_unpin_work_func(struct work_struct 
*__work)
kfree(work);
  }
  
-

-static void amdgpu_flip_work_cleanup(struct amdgpu_flip_work *work)
-{
-   int i;
-
-   amdgpu_bo_unref(>old_abo);
-   fence_put(work->excl);
-   for (i = 0; i < work->shared_count; ++i)
-   fence_put(work->shared[i]);
-   kfree(work->shared);
-   kfree(work);
-}
-
-static void amdgpu_flip_cleanup_unreserve(
-   struct amdgpu_flip_work *work,
-   struct amdgpu_bo *new_abo)
-{
-   amdgpu_bo_unreserve(new_abo);
-   amdgpu_flip_work_cleanup(work);
-}
-
-static void amdgpu_flip_cleanup_unpin(
-   struct amdgpu_flip_work *work,
-   struct amdgpu_bo *new_abo)
-{
-   if (unlikely(amdgpu_bo_unpin(new_abo) != 0))
-   DRM_ERROR("failed to unpin new abo in error path\n");
-   amdgpu_flip_cleanup_unreserve(work, new_abo);
-}
-
-void amdgpu_crtc_cleanup_flip_ctx(
-   struct amdgpu_flip_work *work,
-   struct amdgpu_bo *new_abo)
-{
-   if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) {
-   DRM_ERROR("failed to reserve new abo in error path\n");
-   amdgpu_flip_work_cleanup(work);
-   return;
-   }
-   amdgpu_flip_cleanup_unpin(work, new_abo);
-}
-
-int amdgpu_crtc_prepare_flip(
-   struct drm_crtc *crtc,
-   struct drm_framebuffer *fb,
-   struct drm_pending_vblank_event *event,
-   uint32_t page_flip_flags,
-   uint32_t target,
-   struct amdgpu_flip_work **work_p,
-   struct amdgpu_bo **new_abo_p)
+int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
+struct drm_framebuffer *fb,
+struct drm_pending_vblank_event *event,
+uint32_t page_flip_flags, uint32_t target)
  {
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
@@ -200,7 +154,7 @@ int amdgpu_crtc_prepare_flip(
unsigned long flags;
u64 tiling_flags;
u64 base;
-   int r;
+   int i, r;
  
  	work = kzalloc(sizeof *work, GFP_KERNEL);

if (work == NULL)
@@ -261,84 +215,41 @@ int amdgpu_crtc_prepare_flip(
spin_unlock_irqrestore(>dev->event_lock, flags);
r = -EBUSY;
goto pflip_cleanup;
-
}
-   spin_unlock_irqrestore(>dev->event_lock, flags);
-
-   *work_p = work;
-   *new_abo_p = new_abo;
  
-	return 0;

-
-pflip_cleanup:
-   amdgpu_crtc_cleanup_flip_ctx(work, new_abo);
-   return r;
-
-unpin:
-   amdgpu_flip_cleanup_unpin(work, new_abo);
-   return r;
-
-unreserve:
-   amdgpu_flip_cleanup_unreserve(work, new_abo);
-   return r;
-
-cleanup:
-   amdgpu_flip_work_cleanup(work);
-   return r;
-
-}
-
-void amdgpu_crtc_submit_flip(
-   struct drm_crtc *crtc,
-   struct drm_framebuffer *fb,
-   struct amdgpu_flip_work *work,
-   struct amdgpu_bo *new_abo)
-{
-   unsigned long flags;
-   struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
-   spin_lock_irqsave(>dev->event_lock, flags);
amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING;
amdgpu_crtc->pflip_works = work;
  
+

+   DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: 
%p,\n",
+amdgpu_crtc->crtc_id, amdgpu_crtc, work);
/* update crtc fb */
crtc->primary->fb = fb;
spin_unlock_irqrestore(>dev->event_lock, flags);
-
-   DRM_DEBUG_DRIVER(
-   "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: 
%p,\n",
-   amdgpu_crtc->crtc_id, amdgpu_crtc, work);
-
amdgpu_flip_work_func(>flip_work.work);
-}
+   return 0;
  
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,

- struct drm_framebuffer *fb,
- struct drm_pending_vblank_event 

Re: [PATCH] drm/amdgpu: Enable chained IB MCBP support

2017-04-27 Thread Christian König

Am 27.04.2017 um 09:18 schrieb Trigger Huang:

Support for MCBP/Virtualization in combination with chained IBs is
formal released on firmware feature version #46. So enable it
according to firmware feature version, otherwise, world switch will
hang.

Signed-off-by: Trigger Huang 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++--
  1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 2ff5f19..9a8c042 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -941,12 +941,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device 
*adev)
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
  
-	/* chain ib ucode isn't formal released, just disable it by far

-* TODO: when ucod ready we should use ucode version to judge if
-* chain-ib support or not.
-*/
-   adev->virt.chained_ib_support = false;
-
adev->gfx.me_feature_version = 
le32_to_cpu(cp_hdr->ucode_feature_version);
  
  	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);

@@ -960,6 +954,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device 
*adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = 
le32_to_cpu(cp_hdr->ucode_feature_version);
  
+	/*

+* Support for MCBP/Virtualization in combination with chained IBs is
+* formal released on feature version #46
+*/
+   if (adev->gfx.ce_feature_version >= 46 &&
+   adev->gfx.pfp_feature_version >= 46) {
+   adev->virt.chained_ib_support = true;
+   DRM_INFO("Chained IB support enabled!\n");
+   } else
+   adev->virt.chained_ib_support = false;
+
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev);
if (err)



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/6 v4] *** Dedicated vmid per process v4 ***

2017-04-27 Thread Christian König
Patch #1, #2, #4 and #6 are Reviewed-by: Christian König 
.


Patch #3:

+   /* Select the first entry VMID */
+   idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, list);
+   list_del_init(>list);
+   vm->reserved_vmid[vmhub] = idle;
+   mutex_unlock(_mgr->lock);
I think we should wait for the VMID to be completely idle before we can 
use it, but that possible also go into the handling in patch #5.


Patch #5:

+   if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
+   (atomic64_read(>owner) != vm->client_id) ||
+   (job->vm_pd_addr != id->pd_gpu_addr) ||
+   (updates && (!flushed || updates->context != flushed->context ||
+   fence_is_later(updates, flushed {

You also need this check here as well:
if (!id->last_flush ||
(id->last_flush->context != fence_context &&
 !fence_is_signaled(id->last_flush)))


+   tmp = amdgpu_sync_get_fence(>active);
+   if (tmp) {
+   r = amdgpu_sync_fence(adev, sync, tmp);
+   fence_put(tmp);
+   return r;
+   }

That won't work correctly.

The first problem is that amdgpu_sync_get_fence() removes the fence from 
the active fences. So a another command submission from a different 
context won't wait for all the necessary fences. I think using 
amdgpu_sync_peek_fence() instead should work here.


The second problem is that a context could be starved when it needs a VM 
flush while another context can still submit jobs without a flush. I 
think you could work around that by setting id->pd_gpu_addr to an 
invalid value when we hit this case. This way all other contexts would 
need to do a VM flush as well.



+   id->pd_gpu_addr = job->vm_pd_addr;
+   id->current_gpu_reset_count = atomic_read(>gpu_reset_counter);
+   atomic64_set(>owner, vm->client_id);
+   job->vm_needs_flush = needs_flush;
If we need a flush id->last_flush needs to be set to NULL here as well. 
E.g. do

fence_put(id->last_flush);
id->last_flush = NULL;

Regards,
Christian.

Am 27.04.2017 um 07:00 schrieb Chunming Zhou:

The current kernel implementation, which grabs the idle VMID from pool when 
emitting the job may:

 The back-to-back submission from one process could use different VMID.
 The submission to different queues from single process could use different 
VMID

It works well in most case but cannot work for the SQ thread trace capture.

The VMID for the submission that set the {SQTT}_BASE, which refers to the 
address of the trace buffer, is stored in shader engine.

If the profiling application have to use different VMIDs to submit IBs in its 
life cycle:

 Some trace is not captured since it actually uses different VMID to submit 
jobs.
 Some part of captured trace may come from different application since they 
are accidentally uses the owner’s VMID to submit jobs.

V2:
1. address Christian's comments:
a. drop context flags for tag process, instead, add vm ioctl.
b. change order of patches.
c. sync waiting only when vm flush needs.

2. address Alex's comments;
bump module version

V3:
   address Jerry and Christian's comments.
   and only reserve gfxhub vmid

v4:
   address Jerry and Christian's comments.
   fix some race condistions.

Chunming Zhou (6):
   drm/amdgpu: add vm ioctl
   drm/amdgpu: add reserved vmid field in vm struct v2
   drm/amdgpu: reserve/unreserve vmid by vm ioctl v4
   drm/amdgpu: add limitation for dedicated vm number v4
   drm/amdgpu: implement grab reserved vmid V3
   drm/amdgpu: bump module verion for reserved vmid

  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 152 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   6 ++
  include/uapi/drm/amdgpu_drm.h   |  22 +
  5 files changed, 178 insertions(+), 6 deletions(-)



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Fix use of interruptible waiting

2017-04-27 Thread Michel Dänzer
On 27/04/17 04:39 PM, Michel Dänzer wrote:
> On 27/04/17 04:19 AM, Alex Xie wrote:
>> Hi,
>>
>> I knew this is part of ioctl. And I intended to fix this interruptible
>> waiting in an ioctl.
> 
> It's by design, nothing that needs fixing in the case of an ioctl.
> 
> 
>> 1. The wait is short. There is not much benefit by interruptible
>> waiting.  The BO is a frame buffer BO. Are there many competitors to
>> lock this BO? If not, the wait is very short. This is my main reason to
>> change.
> 
> Irrelevant. If amdgpu_bo_reserve returns -EINTR, so will other functions
> down the line, and amdgpu_crtc_page_flip_target will have to propagate
> that anyway. So doing an uninterruptible wait will just delay the
> inevitable and delivery of the signal to userspace.

To be clear, I meant -ERESTARTSYS instead of -EINTR.


>> 2. In this function and caller functions, the error handling for such
>> interrupt is complicated and risky.
> 
> If you're concerned about the cascade of cleanup functions, let's just
> revert commit 0169b52ce575 ("drm/amdgpu: Refactor flip into prepare
> submit and submit. (v2)"). amdgpu_crtc_prepare_flip was split out to be
> used by DC, but DC is now using a DRM core atomic helper for flips and
> no longer using amdgpu_crtc_prepare_flip.
> 
> 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/6] drm/amdgpu: implement grab dedicated vmid V2

2017-04-27 Thread Christian König

Am 27.04.2017 um 06:42 schrieb zhoucm1:



On 2017年04月27日 10:52, Zhang, Jerry (Junwei) wrote:

On 04/26/2017 07:10 PM, Chunming Zhou wrote:

v2: move sync waiting only when flush needs

Change-Id: I64da2701c9fdcf986afb90ba1492a78d5bef1b6c
Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 61 
++

  1 file changed, 61 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 214ac50..bce7701 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -402,6 +402,63 @@ static bool 
amdgpu_vm_dedicated_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)

  return !!vm->dedicated_vmid[vmhub];
  }

+static int amdgpu_vm_grab_dedicated_vmid(struct amdgpu_vm *vm,
+ struct amdgpu_ring *ring,
+ struct amdgpu_sync *sync,
+ struct fence *fence,
+ struct amdgpu_job *job)
+{
+struct amdgpu_device *adev = ring->adev;
+unsigned vmhub = ring->funcs->vmhub;
+struct amdgpu_vm_id *id = vm->dedicated_vmid[vmhub];
+struct amdgpu_vm_id_manager *id_mgr = 
>vm_manager.id_mgr[vmhub];

+struct fence *updates = sync->last_vm_update;
+int r = 0;
+struct fence *flushed, *tmp;
+bool needs_flush = false;
+
+mutex_lock(_mgr->lock);
+if (amdgpu_vm_had_gpu_reset(adev, id))
+needs_flush = true;
+
+flushed  = id->flushed_updates;
+if (updates && (!flushed || updates->context != 
flushed->context ||

+fence_is_later(updates, flushed)))
+needs_flush = true;


Just question:
Do we need to consider concurrent flush for Vega10 like grab id func?

Christian has pointed that old asic has hardware bug.


Which is fixed for Vega10, on that hardware concurrent flushing works fine.

It's just that Tonga/Fiji have a real problem with that and for 
CIK/Polaris it's more a subtle one which is hard to trigger.


Regards,
Christian.



Regards,
David Zhou


Jerry


+if (needs_flush) {
+tmp = amdgpu_sync_get_fence(>active);
+if (tmp) {
+r = amdgpu_sync_fence(adev, sync, tmp);
+fence_put(tmp);
+mutex_unlock(_mgr->lock);
+return r;
+}
+}
+
+/* Good we can use this VMID. Remember this submission as
+* user of the VMID.
+*/
+r = amdgpu_sync_fence(ring->adev, >active, fence);
+if (r)
+goto out;
+
+if (updates && (!flushed || updates->context != 
flushed->context ||

+fence_is_later(updates, flushed))) {
+fence_put(id->flushed_updates);
+id->flushed_updates = fence_get(updates);
+}
+id->pd_gpu_addr = job->vm_pd_addr;
+id->current_gpu_reset_count = 
atomic_read(>gpu_reset_counter);

+atomic64_set(>owner, vm->client_id);
+job->vm_needs_flush = needs_flush;
+
+job->vm_id = id - id_mgr->ids;
+trace_amdgpu_vm_grab_id(vm, ring, job);
+out:
+mutex_unlock(_mgr->lock);
+return r;
+}
+
  /**
   * amdgpu_vm_grab_id - allocate the next free VMID
   *
@@ -426,6 +483,10 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
struct amdgpu_ring *ring,

  unsigned i;
  int r = 0;

+if (amdgpu_vm_dedicated_vmid_ready(vm, vmhub))
+return amdgpu_vm_grab_dedicated_vmid(vm, ring, sync,
+ fence, job);
+
  fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, 
GFP_KERNEL);

  if (!fences)
  return -ENOMEM;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Fix use of interruptible waiting

2017-04-27 Thread Michel Dänzer
On 27/04/17 06:05 PM, Christian König wrote:
> 
>> 2. In this function and caller functions, the error handling for such
>> interrupt is complicated and risky. When the waiting is interrupted by
>> a signal, the callers of this function need to handle this interrupt
>> error. I traced the calling stack all the way back to function
>> drm_mode_page_flip_ioctl. I could not find an issue. But I am not sure
>> about even upper layer. Is this IOCTL restartable? How about further
>> higher layer? Since I didn't see benefit in point 1. So I thought it
>> was a good idea to change.
> 
> The page flip IOCTL should be restartable. I think all drm IOCTLs with
> driver callbacks are restartable, but I'm not 100% sure, Michel probably
> knows that better.

Yes, I'm pretty sure it is.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/6] drm/amdgpu: reserve/unreserve vmid by vm ioctl v3

2017-04-27 Thread Christian König

Am 27.04.2017 um 04:24 schrieb Zhang, Jerry (Junwei):

On 04/27/2017 10:13 AM, zhoucm1 wrote:



On 2017年04月26日 20:26, Christian König wrote:

Am 26.04.2017 um 13:10 schrieb Chunming Zhou:

add reserve/unreserve vmid funtions.
v3:
only reserve vmid from gfxhub

Change-Id: I5f80dc39dc9d44660a96a2b710b0dbb4d3b9039d
Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 70
+++---
  1 file changed, 57 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ec87d64..a783e8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -397,6 +397,11 @@ static bool amdgpu_vm_had_gpu_reset(struct
amdgpu_device *adev,
  atomic_read(>gpu_reset_counter);
  }
  +static bool amdgpu_vm_dedicated_vmid_ready(struct amdgpu_vm *vm, 
unsigned

vmhub)
+{
+return !!vm->dedicated_vmid[vmhub];
+}
+
  /**
   * amdgpu_vm_grab_id - allocate the next free VMID
   *
@@ -546,6 +551,47 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
struct

amdgpu_ring *ring,
  return r;
  }
  +static void amdgpu_vm_free_dedicated_vmid(struct amdgpu_device 
*adev,

+  struct amdgpu_vm *vm,
+  unsigned vmhub)
+{
+struct amdgpu_vm_id_manager *id_mgr = 
>vm_manager.id_mgr[vmhub];

+
+mutex_lock(_mgr->lock);
+if (vm->dedicated_vmid[vmhub]) {
+ list_add(>dedicated_vmid[vmhub]->list,
+_mgr->ids_lru);
+vm->dedicated_vmid[vmhub] = NULL;
+}
+mutex_unlock(_mgr->lock);
+}
+
+static int amdgpu_vm_alloc_dedicated_vmid(struct amdgpu_device *adev,
+  struct amdgpu_vm *vm,
+  unsigned vmhub)
+{
+struct amdgpu_vm_id_manager *id_mgr;
+struct amdgpu_vm_id *idle;
+int r;
+
+id_mgr = >vm_manager.id_mgr[vmhub];
+mutex_lock(_mgr->lock);
+/* Select the first entry VMID */
+idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, 
list);

+list_del_init(>list);
+vm->dedicated_vmid[vmhub] = idle;


We need a check here if there isn't an ID already reserved for the VM.

Additional to that I would still rename the field to reserved_vmid, 
that

describes better what it is actually doing.
the vmid is global, reserve it from global lru, so it makes sense to 
name

'reserved_vmid' like in patch#4.
the reserved vmid is dedicated to one vm, so the field in vm is
'dedicated_vmid' like here, which is my reason to name it.


Just the same thing looking at different views.

IMHO, it's reserved vmid from global group, naming "reserved_vmid".
And in Patch#4, it constrains the number, possibly naming 
"reserved_vmid_seq" or "reserved_vmid_ref".


Any of them looks reasonable, select the one you like. :)


Yeah, agree. More like a gut feeling that reserved is more common 
wording, but I don't have a strong opinion on that either.


Christian.



Jerry



Anyway, the alternative is ok to me although I prefer mine, I hope we 
can

deliver this solution by today, many RGP issues depend on it.

Thanks,
David Zhou



+ mutex_unlock(_mgr->lock);
+
+r = amdgpu_sync_wait(>active);
+if (r)
+goto err;


This is racy. A command submission could happen while we wait for 
the id to

become idle.

The easiest way to solve this is to hold the lock while waiting for 
the ID to

become available.


+
+return 0;
+err:
+amdgpu_vm_free_dedicated_vmid(adev, vm, vmhub);
+return r;
+}
+
  static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring 
*ring)

  {
  struct amdgpu_device *adev = ring->adev;
@@ -2316,18 +2362,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
amdgpu_vm_free_levels(>root);
  fence_put(vm->last_dir_update);
-for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
-struct amdgpu_vm_id_manager *id_mgr =
->vm_manager.id_mgr[i];
-
-mutex_lock(_mgr->lock);
-if (vm->dedicated_vmid[i]) {
- list_add(>dedicated_vmid[i]->list,
- _mgr->ids_lru);
-vm->dedicated_vmid[i] = NULL;
-}
-mutex_unlock(_mgr->lock);
-}
+for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
+amdgpu_vm_free_dedicated_vmid(adev, vm, i);
  }
/**
@@ -2400,11 +2436,19 @@ int amdgpu_vm_ioctl(struct drm_device *dev, 
void

*data, struct drm_file *filp)
  union drm_amdgpu_vm *args = data;
  struct amdgpu_device *adev = dev->dev_private;
  struct amdgpu_fpriv *fpriv = filp->driver_priv;
+int r;
switch (args->in.op) {
  case AMDGPU_VM_OP_RESERVE_VMID:
+/* current, we only have requirement to reserve vmid from 
gfxhub */

+if (!amdgpu_vm_dedicated_vmid_ready(>vm, 0)) {
+r = amdgpu_vm_alloc_dedicated_vmid(adev, >vm, 0);


This is racy as well.

Two threads could try to reserve a VMID at the same time. You need to
integrate the check if it's already allocated into

Re: [PATCH] drm: Harmonize CIK ASIC support in radeon and amdgpu (v2)

2017-04-27 Thread Michel Dänzer
On 27/04/17 05:15 AM, Felix Kuehling wrote:
> 
> Hi Michel,
> 
> You said in an earlier email that it doesn't have to be convenient. With
> that in mind, I went back to a minimalistic solution that doesn't need
> any additional Kconfig options and only uses two module parameters (one
> in radeon, one in amdgpu).
> 
> I'm attaching my WIP patches for reference (currently based on
> amd-kfd-staging). For an official review I'd rebase them on
> amd-staging-4.9, remove the #if-0-ed hack that didn't work out, and add
> the same for SI.
> 
> Can everyone can agree that this is good enough?

Yes, something like this could be a good first step in the right
direction. Some comments:

The radeon options should be available regardless of
CONFIG_DRM_AMDGPU_CIK/SI, or they won't be useful for amdgpu-pro / other
out-of-tree amdgpu builds.

The default at this point should possibly still be for CIK GPUs to be
driven by radeon, even if CONFIG_DRM_AMDGPU_CIK is enabled; more
definitely so for SI. Then we can flip the default and remove
CONFIG_DRM_AMDGPU_CIK/SI at some point in the future, and (much) later
maybe remove the CIK/SI code from radeon.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Fix use of interruptible waiting

2017-04-27 Thread Christian König
1. The wait is short. There is not much benefit by interruptible 
waiting.  The BO is a frame buffer BO. Are there many competitors to 
lock this BO? If not, the wait is very short. This is my main reason 
to change.
The problem is that all those waits can block the MM subsystem from 
reclaiming memory in an out of memory situation, e.g. when the process 
is terminated with a SIGKILL.


That's why the usual policy used in the drivers is to wait interruptible 
unless you absolutely need the wait uninterrupted (e.g. in a cleanup 
path for example).


2. In this function and caller functions, the error handling for such 
interrupt is complicated and risky. When the waiting is interrupted by 
a signal, the callers of this function need to handle this interrupt 
error. I traced the calling stack all the way back to function 
drm_mode_page_flip_ioctl. I could not find an issue. But I am not sure 
about even upper layer. Is this IOCTL restartable? How about further 
higher layer? Since I didn't see benefit in point 1. So I thought it 
was a good idea to change.


The page flip IOCTL should be restartable. I think all drm IOCTLs with 
driver callbacks are restartable, but I'm not 100% sure, Michel probably 
knows that better.


There is even the CONFIG_DEBUG_WW_MUTEX_SLOWPATH option to throw random 
backoff cases into reserving the BO for testing. But I think that only 
applies to when multiple BOs are reserved at the same time.


Might be a good idea to create something similar for all interruptible 
lock acquires to test if they are really restartable. That will most 
likely yield quite a bunch of cases where the handling isn't correct.


Regards,
Christian.

Am 26.04.2017 um 21:19 schrieb Alex Xie:

Hi,

I knew this is part of ioctl. And I intended to fix this interruptible 
waiting in an ioctl.


ioctl is one of driver interfaces, where interruptible waiting is good 
in some scenarios. For example, if ioctl performs IO operations in 
atomic way, we don't want ioctl to be interrupted by a signal.


Interruptible waiting is good when we need to wait for longer time, 
for example, waiting for an input data for indefinite time. We don't 
want the process not responding to signals. Interruptible waitings can 
be good in read/write/ioctl interfaces.


For this patch,

1. The wait is short. There is not much benefit by interruptible 
waiting.  The BO is a frame buffer BO. Are there many competitors to 
lock this BO? If not, the wait is very short. This is my main reason 
to change.
2. In this function and caller functions, the error handling for such 
interrupt is complicated and risky. When the waiting is interrupted by 
a signal, the callers of this function need to handle this interrupt 
error. I traced the calling stack all the way back to function 
drm_mode_page_flip_ioctl. I could not find an issue. But I am not sure 
about even upper layer. Is this IOCTL restartable? How about further 
higher layer? Since I didn't see benefit in point 1. So I thought it 
was a good idea to change.


Anyway, it is up to you. A change might bring other unseen risk. If 
you still have concern, I will drop this patch. This IOCTL is used by 
graphic operation. There may not be a lot of signals to a GUI process 
which uses this IOCTL.


Thanks,
Alex Bin


On 17-04-26 04:34 AM, Christian König wrote:

Am 26.04.2017 um 03:17 schrieb Michel Dänzer:

On 26/04/17 06:25 AM, Alex Xie wrote:

1. The wait is short. There is not much benefit by
interruptible waiting.
2. In this function and caller functions, the error
handling for such interrupt is complicated and risky.

Change-Id: I289674ecd3f5ef20c93fe63e33df6d668b3c2edc
Signed-off-by: Alex Xie 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c

index 43082bf..c006cc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -227,7 +227,7 @@ int amdgpu_crtc_prepare_flip(
  new_abo = gem_to_amdgpu_bo(obj);
/* pin the new buffer */
-r = amdgpu_bo_reserve(new_abo, false);
+r = amdgpu_bo_reserve(new_abo, true);
  if (unlikely(r != 0)) {
  DRM_ERROR("failed to reserve new abo buffer before flip\n");
  goto cleanup;

I'm afraid we have no choice but to use interruptible here, because 
this

code runs as part of an ioctl (DRM_IOCTL_MODE_PAGE_FLIP).


Yes, agree. But the error message is incorrect here and should be 
removed.


Christian.




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm: Harmonize CIK ASIC support in radeon and amdgpu (v2)

2017-04-27 Thread Christian König

Am 26.04.2017 um 22:15 schrieb Felix Kuehling:

On 17-04-25 02:28 AM, Michel Dänzer wrote:

On 22/04/17 02:05 AM, Felix Kuehling wrote:

__setup doesn't work in modules.

Right. We could build something like
drivers/video/fbdev/core/fb_cmdline.c:video_setup() into the kernel to
handle this, but it's a bit ugly, which is one reason why I was leaning
towards:



s8250_options is only compiled if the driver is not a module.

That doesn't prevent us from using __module_param_call directly, does it?

Although, that still doesn't work as I envision if only one driver's
option is set e.g. in /etc/modprobe.d/*.conf .


So, I'm starting to think we need a shared module for this, which
provides one or multiple module parameters to choose which driver to use
for CIK/SI[0], and provides the result to the amdgpu/radeon drivers.
That might make things easier for amdgpu-pro / other standalone amdgpu
versions in the long run as well, as they could add files to
/etc/modprobe.d/ choosing themselves by default, without having to
blacklist radeon.

What do you guys think?

Hi Michel,

You said in an earlier email that it doesn't have to be convenient. With
that in mind, I went back to a minimalistic solution that doesn't need
any additional Kconfig options and only uses two module parameters (one
in radeon, one in amdgpu).

I'm attaching my WIP patches for reference (currently based on
amd-kfd-staging). For an official review I'd rebase them on
amd-staging-4.9, remove the #if-0-ed hack that didn't work out, and add
the same for SI.

Can everyone can agree that this is good enough?


It's certainly good enough for me.

With the disabled dummy code removed feel free to stick a Reviewed-by: 
Christian König  at those patches.


Regards,
Christian.



Regards,
   Felix



[0] or possibly even more fine-grained in the future




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset

2017-04-27 Thread Zhang, Jerry (Junwei)

On 04/27/2017 04:25 PM, Chunming Zhou wrote:

the case could happen when gpu reset:
1. when gpu reset, cs can be continue until sw queue is full, then push job 
will wait with holding pd reservation.
2. gpu_reset routine will also need pd reservation to restore page table from 
their shadow.
3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs 
releases reservation.

Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9edb1a4..a6722a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_job_free_resources(job);

trace_amdgpu_cs_ioctl(job);
+   amdgpu_cs_parser_fini(p, 0, true);


Please confirm:
It will not free/release more things that may be accessed in job working 
process, as cs_parse_fini free so many things related to parser.


Or we can just release the pd reservation here?

Jerry


amd_sched_entity_push_job(>base);

return 0;
@@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)

r = amdgpu_cs_submit(, cs);

+   return r;
  out:
amdgpu_cs_parser_fini(, r, reserved_buffers);
return r;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: export more gpu info for gfx9

2017-04-27 Thread Junwei Zhang
v2: 64-bit aligned for gpu info

Signed-off-by: Ken Wang 
Signed-off-by: Junwei Zhang 
Reviewed-by: Alex Deucher 
Reviewed-by: Qiang Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 11 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  3 +++
 include/uapi/drm/amdgpu_drm.h   | 19 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4a16e3c..503010a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -969,6 +969,9 @@ struct amdgpu_gfx_config {
unsigned mc_arb_ramcfg;
unsigned gb_addr_config;
unsigned num_rbs;
+   unsigned gs_vgt_table_depth;
+   unsigned gs_prim_buffer_depth;
+   unsigned max_gs_waves_per_vgt;
 
uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 1006d7c..42e5993 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -546,10 +546,21 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
 
if (amdgpu_ngg) {
dev_info.prim_buf_gpu_addr = 
adev->gfx.ngg.buf[PRIM].gpu_addr;
+   dev_info.prim_buf_size = adev->gfx.ngg.buf[PRIM].size;
dev_info.pos_buf_gpu_addr = 
adev->gfx.ngg.buf[POS].gpu_addr;
+   dev_info.pos_buf_size = adev->gfx.ngg.buf[POS].size;
dev_info.cntl_sb_buf_gpu_addr = 
adev->gfx.ngg.buf[CNTL].gpu_addr;
+   dev_info.cntl_sb_buf_size = 
adev->gfx.ngg.buf[CNTL].size;
dev_info.param_buf_gpu_addr = 
adev->gfx.ngg.buf[PARAM].gpu_addr;
+   dev_info.param_buf_size = adev->gfx.ngg.buf[PARAM].size;
}
+   dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size;
+   dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs;
+   dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
+   dev_info.num_tcc_blocks = 
adev->gfx.config.max_texture_channel_caches;
+   dev_info.gs_vgt_table_depth = 
adev->gfx.config.gs_vgt_table_depth;
+   dev_info.gs_prim_buffer_depth = 
adev->gfx.config.gs_prim_buffer_depth;
+   dev_info.max_gs_waves_per_vgt = 
adev->gfx.config.max_gs_waves_per_vgt;
 
return copy_to_user(out, _info,
min((size_t)size, sizeof(dev_info))) ? 
-EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 13da795..5249bc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -785,6 +785,9 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device 
*adev)
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+   adev->gfx.config.gs_vgt_table_depth = 32;
+   adev->gfx.config.gs_prim_buffer_depth = 1792;
+   adev->gfx.config.max_gs_waves_per_vgt = 32;
gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
break;
default:
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index d76d525..086ab10 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -769,6 +769,25 @@ struct drm_amdgpu_info_device {
__u64 cntl_sb_buf_gpu_addr;
/* NGG Parameter Cache */
__u64 param_buf_gpu_addr;
+   __u32 prim_buf_size;
+   __u32 pos_buf_size;
+   __u32 cntl_sb_buf_size;
+   __u32 param_buf_size;
+   /* wavefront size*/
+   __u32 wave_front_size;
+   /* shader visible vgprs*/
+   __u32 num_shader_visible_vgprs;
+   /* CU per shader array*/
+   __u32 num_cu_per_sh;
+   /* number of tcc blocks*/
+   __u32 num_tcc_blocks;
+   /* gs vgt table depth*/
+   __u32 gs_vgt_table_depth;
+   /* gs primitive buffer depth*/
+   __u32 gs_prim_buffer_depth;
+   /* max gs wavefront per vgt*/
+   __u32 max_gs_waves_per_vgt;
+   __u32 _pad1;
 };
 
 struct drm_amdgpu_info_hw_ip {
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: bump version for exporting gpu info for gfx9

2017-04-27 Thread Junwei Zhang
Signed-off-by: Junwei Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ead00d7..857c3be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -64,9 +64,10 @@
  * - 3.12.0 - Add query for double offchip LDS buffers
  * - 3.13.0 - Add PRT support
  * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
+ * - 3.15.0 - Export more gpu info for gfx9
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   14
+#define KMS_DRIVER_MINOR   15
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] Revert "drm/amdgpu: Refactor flip into prepare submit and submit. (v2)"

2017-04-27 Thread Michel Dänzer
From: Michel Dänzer 

This reverts commit cb341a319f7e66f879d69af929c3dadfc1a8f31e.

The purpose of the refactor was for amdgpu_crtc_prepare/submit_flip to
be used by the DC code, but that's no longer the case.

Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 147 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h|  17 
 2 files changed, 29 insertions(+), 135 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 7cf226dfb602..0415875656c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -138,56 +138,10 @@ static void amdgpu_unpin_work_func(struct work_struct 
*__work)
kfree(work);
 }
 
-
-static void amdgpu_flip_work_cleanup(struct amdgpu_flip_work *work)
-{
-   int i;
-
-   amdgpu_bo_unref(>old_abo);
-   fence_put(work->excl);
-   for (i = 0; i < work->shared_count; ++i)
-   fence_put(work->shared[i]);
-   kfree(work->shared);
-   kfree(work);
-}
-
-static void amdgpu_flip_cleanup_unreserve(
-   struct amdgpu_flip_work *work,
-   struct amdgpu_bo *new_abo)
-{
-   amdgpu_bo_unreserve(new_abo);
-   amdgpu_flip_work_cleanup(work);
-}
-
-static void amdgpu_flip_cleanup_unpin(
-   struct amdgpu_flip_work *work,
-   struct amdgpu_bo *new_abo)
-{
-   if (unlikely(amdgpu_bo_unpin(new_abo) != 0))
-   DRM_ERROR("failed to unpin new abo in error path\n");
-   amdgpu_flip_cleanup_unreserve(work, new_abo);
-}
-
-void amdgpu_crtc_cleanup_flip_ctx(
-   struct amdgpu_flip_work *work,
-   struct amdgpu_bo *new_abo)
-{
-   if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) {
-   DRM_ERROR("failed to reserve new abo in error path\n");
-   amdgpu_flip_work_cleanup(work);
-   return;
-   }
-   amdgpu_flip_cleanup_unpin(work, new_abo);
-}
-
-int amdgpu_crtc_prepare_flip(
-   struct drm_crtc *crtc,
-   struct drm_framebuffer *fb,
-   struct drm_pending_vblank_event *event,
-   uint32_t page_flip_flags,
-   uint32_t target,
-   struct amdgpu_flip_work **work_p,
-   struct amdgpu_bo **new_abo_p)
+int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
+struct drm_framebuffer *fb,
+struct drm_pending_vblank_event *event,
+uint32_t page_flip_flags, uint32_t target)
 {
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
@@ -200,7 +154,7 @@ int amdgpu_crtc_prepare_flip(
unsigned long flags;
u64 tiling_flags;
u64 base;
-   int r;
+   int i, r;
 
work = kzalloc(sizeof *work, GFP_KERNEL);
if (work == NULL)
@@ -261,84 +215,41 @@ int amdgpu_crtc_prepare_flip(
spin_unlock_irqrestore(>dev->event_lock, flags);
r = -EBUSY;
goto pflip_cleanup;
-
}
-   spin_unlock_irqrestore(>dev->event_lock, flags);
-
-   *work_p = work;
-   *new_abo_p = new_abo;
 
-   return 0;
-
-pflip_cleanup:
-   amdgpu_crtc_cleanup_flip_ctx(work, new_abo);
-   return r;
-
-unpin:
-   amdgpu_flip_cleanup_unpin(work, new_abo);
-   return r;
-
-unreserve:
-   amdgpu_flip_cleanup_unreserve(work, new_abo);
-   return r;
-
-cleanup:
-   amdgpu_flip_work_cleanup(work);
-   return r;
-
-}
-
-void amdgpu_crtc_submit_flip(
-   struct drm_crtc *crtc,
-   struct drm_framebuffer *fb,
-   struct amdgpu_flip_work *work,
-   struct amdgpu_bo *new_abo)
-{
-   unsigned long flags;
-   struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
-   spin_lock_irqsave(>dev->event_lock, flags);
amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING;
amdgpu_crtc->pflip_works = work;
 
+
+   DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: 
%p,\n",
+amdgpu_crtc->crtc_id, amdgpu_crtc, work);
/* update crtc fb */
crtc->primary->fb = fb;
spin_unlock_irqrestore(>dev->event_lock, flags);
-
-   DRM_DEBUG_DRIVER(
-   "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: 
%p,\n",
-   amdgpu_crtc->crtc_id, amdgpu_crtc, work);
-
amdgpu_flip_work_func(>flip_work.work);
-}
+   return 0;
 
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- struct drm_pending_vblank_event *event,
- uint32_t 

Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset

2017-04-27 Thread zhoucm1

pls ignore this one, will send another new one.

On 2017年04月27日 15:20, Chunming Zhou wrote:

the case could happen when gpu reset:
1. when gpu reset, cs can be continue until sw queue is full, then push job 
will wait with holding pd reservation.
2. gpu_reset routine will also need pd reservation to restore page table from 
their shadow.
3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs 
releases reservation.

Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9edb1a4..8c6d036 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1076,7 +1076,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_job_free_resources(job);
  
  	trace_amdgpu_cs_ioctl(job);

-   amd_sched_entity_push_job(>base);
  
  	return 0;

  }
@@ -1132,6 +1131,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
  
  out:

amdgpu_cs_parser_fini(, r, reserved_buffers);
+   if (!r)
+   amd_sched_entity_push_job(>base);
return r;
  }
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/6 v4] *** Dedicated vmid per process v4 ***

2017-04-27 Thread Zhang, Jerry (Junwei)

It looks fine for me now.

Reviewed-by: Junwei Zhang 

Jerry

On 04/27/2017 01:00 PM, Chunming Zhou wrote:

The current kernel implementation, which grabs the idle VMID from pool when 
emitting the job may:

 The back-to-back submission from one process could use different VMID.
 The submission to different queues from single process could use different 
VMID

It works well in most case but cannot work for the SQ thread trace capture.

The VMID for the submission that set the {SQTT}_BASE, which refers to the 
address of the trace buffer, is stored in shader engine.

If the profiling application have to use different VMIDs to submit IBs in its 
life cycle:

 Some trace is not captured since it actually uses different VMID to submit 
jobs.
 Some part of captured trace may come from different application since they 
are accidentally uses the owner’s VMID to submit jobs.

V2:
1. address Christian's comments:
a. drop context flags for tag process, instead, add vm ioctl.
b. change order of patches.
c. sync waiting only when vm flush needs.

2. address Alex's comments;
bump module version

V3:
   address Jerry and Christian's comments.
   and only reserve gfxhub vmid

v4:
   address Jerry and Christian's comments.
   fix some race condistions.

Chunming Zhou (6):
   drm/amdgpu: add vm ioctl
   drm/amdgpu: add reserved vmid field in vm struct v2
   drm/amdgpu: reserve/unreserve vmid by vm ioctl v4
   drm/amdgpu: add limitation for dedicated vm number v4
   drm/amdgpu: implement grab reserved vmid V3
   drm/amdgpu: bump module verion for reserved vmid

  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 152 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   6 ++
  include/uapi/drm/amdgpu_drm.h   |  22 +
  5 files changed, 178 insertions(+), 6 deletions(-)


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/6] drm/amdgpu: implement grab reserved vmid V3

2017-04-27 Thread Zhang, Jerry (Junwei)

On 04/27/2017 01:54 PM, Zhang, Jerry (Junwei) wrote:

On 04/27/2017 01:00 PM, Chunming Zhou wrote:

v2: move sync waiting only when flush needs
v3: fix racy

Change-Id: I64da2701c9fdcf986afb90ba1492a78d5bef1b6c
Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 72 --
  1 file changed, 68 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e6fdfa4..7752279 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -397,6 +397,65 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device
*adev,
  atomic_read(>gpu_reset_counter);
  }

+static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
+{
+return !!vm->reserved_vmid[vmhub];
+}


It may be better to populate it into alloc reserved_vmid func as well, getting
easy maintenance in the future.



+
+/* idr_mgr->lock must be held */
+static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
+   struct amdgpu_ring *ring,
+   struct amdgpu_sync *sync,
+   struct fence *fence,
+   struct amdgpu_job *job)
+{
+struct amdgpu_device *adev = ring->adev;
+unsigned vmhub = ring->funcs->vmhub;
+struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
+struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub];
+struct fence *updates = sync->last_vm_update;
+int r = 0;
+struct fence *flushed, *tmp;
+bool needs_flush = false;
+
+flushed  = id->flushed_updates;
+if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
+(atomic64_read(>owner) != vm->client_id) ||
+(job->vm_pd_addr != id->pd_gpu_addr) ||
+(updates && (!flushed || updates->context != flushed->context ||
+fence_is_later(updates, flushed {
+needs_flush = true;
+tmp = amdgpu_sync_get_fence(>active);
+if (tmp) {
+r = amdgpu_sync_fence(adev, sync, tmp);
+fence_put(tmp);
+return r;


Sorry, I didn't catch up this.
Could you give me some tips?
(in this case, we no need to update id info as below?)


Got that offline to wait for id idle.
That will always use the reserved id when it's idle.
Anyway, it should work.



Jerry


+}
+}
+
+/* Good we can use this VMID. Remember this submission as
+* user of the VMID.
+*/
+r = amdgpu_sync_fence(ring->adev, >active, fence);
+if (r)
+goto out;
+
+if (updates && (!flushed || updates->context != flushed->context ||
+fence_is_later(updates, flushed))) {
+fence_put(id->flushed_updates);
+id->flushed_updates = fence_get(updates);
+}
+id->pd_gpu_addr = job->vm_pd_addr;
+id->current_gpu_reset_count = atomic_read(>gpu_reset_counter);
+atomic64_set(>owner, vm->client_id);
+job->vm_needs_flush = needs_flush;
+
+job->vm_id = id - id_mgr->ids;
+trace_amdgpu_vm_grab_id(vm, ring, job);
+out:
+return r;
+}
+
  /**
   * amdgpu_vm_grab_id - allocate the next free VMID
   *
@@ -421,12 +480,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct
amdgpu_ring *ring,
  unsigned i;
  int r = 0;

+mutex_lock(_mgr->lock);
+if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
+r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job);
+mutex_unlock(_mgr->lock);
+return r;
+}
  fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
-if (!fences)
+if (!fences) {
+mutex_unlock(_mgr->lock);
  return -ENOMEM;
-
-mutex_lock(_mgr->lock);
-
+}
  /* Check if we have an idle VMID */
  i = 0;
  list_for_each_entry(idle, _mgr->ids_lru, list) {


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: Enable chained IB MCBP support

2017-04-27 Thread Yu, Xiangliang
Reviewed-by: Xiangliang Yu 


Thanks!
Xiangliang Yu

> -Original Message-
> From: Trigger Huang [mailto:trigger.hu...@amd.com]
> Sent: Thursday, April 27, 2017 3:18 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk ; Yu, Xiangliang
> ; Huang, Trigger 
> Subject: [PATCH] drm/amdgpu: Enable chained IB MCBP support
> 
> Support for MCBP/Virtualization in combination with chained IBs is formal
> released on firmware feature version #46. So enable it according to firmware
> feature version, otherwise, world switch will hang.
> 
> Signed-off-by: Trigger Huang 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++--
>  1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 2ff5f19..9a8c042 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -941,12 +941,6 @@ static int gfx_v8_0_init_microcode(struct
> amdgpu_device *adev)
>   cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev-
> >gfx.me_fw->data;
>   adev->gfx.me_fw_version = le32_to_cpu(cp_hdr-
> >header.ucode_version);
> 
> - /* chain ib ucode isn't formal released, just disable it by far
> -  * TODO: when ucod ready we should use ucode version to judge if
> -  * chain-ib support or not.
> -  */
> - adev->virt.chained_ib_support = false;
> -
>   adev->gfx.me_feature_version = le32_to_cpu(cp_hdr-
> >ucode_feature_version);
> 
>   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin",
> chip_name); @@ -960,6 +954,17 @@ static int
> gfx_v8_0_init_microcode(struct amdgpu_device *adev)
>   adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr-
> >header.ucode_version);
>   adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr-
> >ucode_feature_version);
> 
> + /*
> +  * Support for MCBP/Virtualization in combination with chained IBs is
> +  * formal released on feature version #46
> +  */
> + if (adev->gfx.ce_feature_version >= 46 &&
> + adev->gfx.pfp_feature_version >= 46) {
> + adev->virt.chained_ib_support = true;
> + DRM_INFO("Chained IB support enabled!\n");
> + } else
> + adev->virt.chained_ib_support = false;
> +
>   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin",
> chip_name);
>   err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev);
>   if (err)
> --
> 2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Fix use of interruptible waiting

2017-04-27 Thread Michel Dänzer
On 27/04/17 04:19 AM, Alex Xie wrote:
> Hi,
> 
> I knew this is part of ioctl. And I intended to fix this interruptible
> waiting in an ioctl.

It's by design, nothing that needs fixing in the case of an ioctl.


> 1. The wait is short. There is not much benefit by interruptible
> waiting.  The BO is a frame buffer BO. Are there many competitors to
> lock this BO? If not, the wait is very short. This is my main reason to
> change.

Irrelevant. If amdgpu_bo_reserve returns -EINTR, so will other functions
down the line, and amdgpu_crtc_page_flip_target will have to propagate
that anyway. So doing an uninterruptible wait will just delay the
inevitable and delivery of the signal to userspace.


> 2. In this function and caller functions, the error handling for such
> interrupt is complicated and risky.

If you're concerned about the cascade of cleanup functions, let's just
revert commit 0169b52ce575 ("drm/amdgpu: Refactor flip into prepare
submit and submit. (v2)"). amdgpu_crtc_prepare_flip was split out to be
used by DC, but DC is now using a DRM core atomic helper for flips and
no longer using amdgpu_crtc_prepare_flip.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Enable chained IB MCBP support

2017-04-27 Thread Trigger Huang
Support for MCBP/Virtualization in combination with chained IBs is
formal released on firmware feature version #46. So enable it
according to firmware feature version, otherwise, world switch will
hang.

Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 2ff5f19..9a8c042 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -941,12 +941,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device 
*adev)
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 
-   /* chain ib ucode isn't formal released, just disable it by far
-* TODO: when ucod ready we should use ucode version to judge if
-* chain-ib support or not.
-*/
-   adev->virt.chained_ib_support = false;
-
adev->gfx.me_feature_version = 
le32_to_cpu(cp_hdr->ucode_feature_version);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
@@ -960,6 +954,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device 
*adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = 
le32_to_cpu(cp_hdr->ucode_feature_version);
 
+   /*
+* Support for MCBP/Virtualization in combination with chained IBs is
+* formal released on feature version #46
+*/
+   if (adev->gfx.ce_feature_version >= 46 &&
+   adev->gfx.pfp_feature_version >= 46) {
+   adev->virt.chained_ib_support = true;
+   DRM_INFO("Chained IB support enabled!\n");
+   } else
+   adev->virt.chained_ib_support = false;
+
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev);
if (err)
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset

2017-04-27 Thread Chunming Zhou
the case could happen when gpu reset:
1. when gpu reset, cs can be continue until sw queue is full, then push job 
will wait with holding pd reservation.
2. gpu_reset routine will also need pd reservation to restore page table from 
their shadow.
3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs 
releases reservation.

Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9edb1a4..8c6d036 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1076,7 +1076,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_job_free_resources(job);
 
trace_amdgpu_cs_ioctl(job);
-   amd_sched_entity_push_job(>base);
 
return 0;
 }
@@ -1132,6 +1131,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
 
 out:
amdgpu_cs_parser_fini(, r, reserved_buffers);
+   if (!r)
+   amd_sched_entity_push_job(>base);
return r;
 }
 
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: Enable chained IB MCBP support

2017-04-27 Thread Liu, Monk
Reviewed-by: Monk Liu 

-Original Message-
From: Trigger Huang [mailto:trigger.hu...@amd.com] 
Sent: Thursday, April 27, 2017 3:18 PM
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Monk ; Yu, Xiangliang ; 
Huang, Trigger 
Subject: [PATCH] drm/amdgpu: Enable chained IB MCBP support

Support for MCBP/Virtualization in combination with chained IBs is formal 
released on firmware feature version #46. So enable it according to firmware 
feature version, otherwise, world switch will hang.

Signed-off-by: Trigger Huang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 2ff5f19..9a8c042 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -941,12 +941,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device 
*adev)
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 
-   /* chain ib ucode isn't formal released, just disable it by far
-* TODO: when ucod ready we should use ucode version to judge if
-* chain-ib support or not.
-*/
-   adev->virt.chained_ib_support = false;
-
adev->gfx.me_feature_version = 
le32_to_cpu(cp_hdr->ucode_feature_version);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); @@ 
-960,6 +954,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = 
le32_to_cpu(cp_hdr->ucode_feature_version);
 
+   /*
+* Support for MCBP/Virtualization in combination with chained IBs is
+* formal released on feature version #46
+*/
+   if (adev->gfx.ce_feature_version >= 46 &&
+   adev->gfx.pfp_feature_version >= 46) {
+   adev->virt.chained_ib_support = true;
+   DRM_INFO("Chained IB support enabled!\n");
+   } else
+   adev->virt.chained_ib_support = false;
+
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev);
if (err)
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/6] drm: fourcc byteorder: add bigendian support to drm_mode_legacy_fb_format

2017-04-27 Thread Michel Dänzer
On 27/04/17 03:45 PM, Gerd Hoffmann wrote:
>   Hi,
> 
>>> That is done using the RADEON_TILING_SWAP_{16,32}BIT flag mentioned in
>>> another thread?
>>
>> Right.
>>
>>
>>> What about dumb bos?  You've mentioned the swap flag isn't used for
>>> those.  Which implies they are in little endian byte order (both gpu and
>>> cpu view).
>>
>> Right, AFAICT from looking at the code.
> 
> Ok.
> 
> And I also don't see an easy way to make them big endian (cpu view)
> using swapping with the existing drm interfaces, given we apply a format
> when we put the bo into use as framebuffer, not when creating it.  So
> userspace can: (1) create dumb bo, (2) map bo, (3) write something bo,
> (4) create fb + attach to crtc.  And at (3) we don't know the format
> yet, so we can't configure swapping accordingly.
> 
> So just not using the swapping indeed looks like the only sensible
> option.  Which in turn implies there is no BGRA support for dumb
> bos.  Hmm, I can see the problem.  Userspace expectation appears to be
> that ADDFB configures a native endian framebuffer, which the driver
> simply can't do on bigendian.

... with pre-R600 GPUs.


> So, what can/should the driver do here?  Throw errors for ADDFB and
> force userspace to use ADDFB2?  From a design point of view the best
> option, but in the other hand I suspect that could break the xorg radeon
> driver ...

Yes, it would.

One thing we could do is provide a way for userspace to query the
effective format(s) as seen by the GPU and/or CPU.

It might also make sense for the radeon driver to set the
RADEON_TILING_SWAP_{16,32}BIT flags for dumb BOs. I wonder about the
status of apps using dumb BOs directly wrt this discussion.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/6] drm: fourcc byteorder: add bigendian support to drm_mode_legacy_fb_format

2017-04-27 Thread Gerd Hoffmann
  Hi,

> > That is done using the RADEON_TILING_SWAP_{16,32}BIT flag mentioned in
> > another thread?
> 
> Right.
> 
> 
> > What about dumb bos?  You've mentioned the swap flag isn't used for
> > those.  Which implies they are in little endian byte order (both gpu and
> > cpu view).
> 
> Right, AFAICT from looking at the code.

Ok.

And I also don't see an easy way to make them big endian (cpu view)
using swapping with the existing drm interfaces, given we apply a format
when we put the bo into use as framebuffer, not when creating it.  So
userspace can: (1) create dumb bo, (2) map bo, (3) write something bo,
(4) create fb + attach to crtc.  And at (3) we don't know the format
yet, so we can't configure swapping accordingly.

So just not using the swapping indeed looks like the only sensible
option.  Which in turn implies there is no BGRA support for dumb
bos.  Hmm, I can see the problem.  Userspace expectation appears to be
that ADDFB configures a native endian framebuffer, which the driver
simply can't do on bigendian.

So, what can/should the driver do here?  Throw errors for ADDFB and
force userspace to use ADDFB2?  From a design point of view the best
option, but in the other hand I suspect that could break the xorg radeon
driver ...

cheers,
  Gerd

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx