[PATCH 3/3] drm/amd/powerplay: implement stop dpm task for vega10.
Change-Id: I19202f1e54ce6a1b8b54aacbc0d42dbba7605662 Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 97 ++ .../gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c | 23 + .../gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h | 1 + .../gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c | 2 +- .../gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h | 1 + 5 files changed, 123 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 5e3e89b..68eae52 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -2420,6 +2420,26 @@ static int vega10_enable_thermal_protection(struct pp_hwmgr *hwmgr) return 0; } +static int vega10_disable_thermal_protection(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); + + if (data->smu_features[GNLD_THERMAL].supported) { + if (!data->smu_features[GNLD_THERMAL].enabled) + pr_info("THERMAL Feature Already disabled!"); + + PP_ASSERT_WITH_CODE( + !vega10_enable_smc_features(hwmgr->smumgr, + false, + data->smu_features[GNLD_THERMAL].smu_feature_bitmap), + "disable THERMAL Feature Failed!", + return -1); + data->smu_features[GNLD_THERMAL].enabled = false; + } + + return 0; +} + static int vega10_enable_vrhot_feature(struct pp_hwmgr *hwmgr) { struct vega10_hwmgr *data = @@ -2498,6 +2518,37 @@ static int vega10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr) return 0; } +static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap) +{ + struct vega10_hwmgr *data = + (struct vega10_hwmgr *)(hwmgr->backend); + uint32_t i, feature_mask = 0; + + + if(data->smu_features[GNLD_LED_DISPLAY].supported == true){ + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr, + true, data->smu_features[GNLD_LED_DISPLAY].smu_feature_bitmap), + "Attempt to Enable LED DPM feature Failed!", return -EINVAL); + data->smu_features[GNLD_LED_DISPLAY].enabled = true; + } + + for (i = 0; i < GNLD_DPM_MAX; i++) { + if (data->smu_features[i].smu_feature_bitmap & bitmap) { + if (data->smu_features[i].supported) { + if (data->smu_features[i].enabled) { + feature_mask |= data->smu_features[i]. + smu_feature_bitmap; + data->smu_features[i].enabled = false; + } + } + } + } + + vega10_enable_smc_features(hwmgr->smumgr, false, feature_mask); + + return 0; +} + /** * @brief Tell SMC to enabled the supported DPMs. * @@ -4356,11 +4407,55 @@ static int vega10_check_states_equal(struct pp_hwmgr *hwmgr, return is_update_required; } +static int vega10_disable_dpm_tasks(struct pp_hwmgr *hwmgr) +{ + int tmp_result, result = 0; + + tmp_result = (vega10_is_dpm_running(hwmgr)) ? 0 : -1; + PP_ASSERT_WITH_CODE(tmp_result == 0, + "DPM is not running right now, no need to disable DPM!", + return 0); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ThermalController)) + vega10_disable_thermal_protection(hwmgr); + + tmp_result = vega10_disable_power_containment(hwmgr); + PP_ASSERT_WITH_CODE((tmp_result == 0), + "Failed to disable power containment!", result = tmp_result); + + tmp_result = vega10_avfs_enable(hwmgr, false); + PP_ASSERT_WITH_CODE((tmp_result == 0), + "Failed to disable AVFS!", result = tmp_result); + + tmp_result = vega10_stop_dpm(hwmgr, SMC_DPM_FEATURES); + PP_ASSERT_WITH_CODE((tmp_result == 0), + "Failed to stop DPM!", result = tmp_result); + + return result; +} + +static int vega10_power_off_asic(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); + int result; + + result = vega10_disable_dpm_tasks(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), + "[disable_dpm_tasks] Failed to disable DPM!", + ); + data->water_marks_bitmap &= ~(WaterMarksLoaded); + + return result; +} + + static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .backend_init = vega10_hwmgr_backend_init,
[PATCH 1/3] drm/amd/powerplay: add disable_smc_ctf callback in hwmgr.
export disablesmcctf to eventmgr. need to disable temperature alert when s3/s4. otherwise, when resume back,enable temperature alert will fail. Change-Id: I6bbc85b0fc389e442a44ad4d0db3db3342ff5955 Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 10 ++ drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 1 + drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h | 2 +- drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 1 + 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 23bba2c..fcc722e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -501,3 +501,13 @@ int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_i return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks); } + +int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr) +{ + PHM_FUNC_CHECK(hwmgr); + + if (hwmgr->hwmgr_func->disable_smc_firmware_ctf == NULL) + return -EINVAL; + + return hwmgr->hwmgr_func->disable_smc_firmware_ctf(hwmgr); +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 8f663ab..4ad8453 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -4695,6 +4695,7 @@ static int smu7_avfs_control(struct pp_hwmgr *hwmgr, bool enable) .release_firmware = smu7_release_firmware, .set_power_profile_state = smu7_set_power_profile_state, .avfs_control = smu7_avfs_control, + .disable_smc_firmware_ctf = smu7_thermal_disable_alert, }; uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock, diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index ee58e4c..9ea3d73 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -431,6 +431,6 @@ extern int phm_display_clock_voltage_request(struct pp_hwmgr *hwmgr, struct pp_display_clock_request *clock); extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); - +extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr); #endif /* _HARDWARE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 320225d..98d38c3 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -373,6 +373,7 @@ struct pp_hwmgr_func { int (*set_power_profile_state)(struct pp_hwmgr *hwmgr, struct amd_pp_profile *request); int (*avfs_control)(struct pp_hwmgr *hwmgr, bool enable); + int (*disable_smc_firmware_ctf)(struct pp_hwmgr *hwmgr); }; struct pp_table_func { -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/3] drm/amd/powerplay: complete disable_smc_firmware_ctf_tasks.
Change-Id: I7d7f760c5996ddb00a17b34d4b0a3070a965 Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c | 2 +- drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c | 5 + drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c index 9ef2d90..b82c43a 100644 --- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c +++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c @@ -219,7 +219,7 @@ }; const pem_event_action disable_smc_firmware_ctf_tasks[] = { - /* PEM_Task_DisableSMCFirmwareCTF,*/ + pem_task_disable_smc_firmware_ctf, NULL }; diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c index e04216e..8c4ebaa 100644 --- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c +++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c @@ -173,6 +173,11 @@ int pem_task_stop_asic_block_usage(struct pp_eventmgr *eventmgr, struct pem_even return 0; } +int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data) +{ + return phm_disable_smc_firmware_ctf(eventmgr->hwmgr); +} + int pem_task_setup_asic(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data) { return phm_setup_asic(eventmgr->hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h index 6c6297e..37e7ca5 100644 --- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h +++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h @@ -84,5 +84,6 @@ /*thermal */ int pem_task_initialize_thermal_controller(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data); int pem_task_uninitialize_thermal_controller(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data); +int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data); #endif /* _EVENT_TASKS_H_ */ -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: add cu info wave_front_size
Reviewed-by: Ken WangFrom: Junwei Zhang Sent: Friday, April 28, 2017 11:10:46 AM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander; Wang, Ken; Zhang, Jerry Subject: [PATCH] drm/amdgpu: add cu info wave_front_size missed that for gfx v9 info export Signed-off-by: Junwei Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 503010a..e330009 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -986,6 +986,7 @@ struct amdgpu_gfx_config { struct amdgpu_cu_info { uint32_t number; /* total active CU number */ uint32_t ao_cu_mask; + uint32_t wave_front_size; uint32_t bitmap[4][4]; }; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] amdgpu: add vm ioctl support
Reviewed-by: Junwei ZhangRegards, Jerry (Junwei Zhang) Linux Base Graphics SRDC Software Development _ > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of > zhoucm1 > Sent: Friday, April 28, 2017 10:59 > To: amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] amdgpu: add vm ioctl support > > This is drm patch for dedicated vmid. > Kernel patch has pushed to staging, anyone could have a quick review on this? > Jerry? Guess Christian is in dream at current time. > > Regards, > David Zhou > > On 2017年04月28日 10:38, Chunming Zhou wrote: > > Change-Id: I3c1ea377dad8f6d64a992b071a6ec36a1143e416 > > Signed-off-by: Chunming Zhou > > --- > > include/drm/amdgpu_drm.h | 22 ++ > > 1 file changed, 22 insertions(+) > > > > diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index > > 26d02ba..28cda3a 100644 > > --- a/include/drm/amdgpu_drm.h > > +++ b/include/drm/amdgpu_drm.h > > @@ -47,6 +47,7 @@ > > #define DRM_AMDGPU_GEM_OP 0x10 > > #define DRM_AMDGPU_GEM_USERPTR0x11 > > #define DRM_AMDGPU_WAIT_FENCES0x12 > > +#define DRM_AMDGPU_VM 0x13 > > /* hybrid specific ioctls */ > > #define DRM_AMDGPU_SEM 0x5b > > #define DRM_AMDGPU_GEM_DGMA 0x5c > > @@ -66,6 +67,7 @@ > > #define DRM_IOCTL_AMDGPU_GEM_OP > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct > drm_amdgpu_gem_op) > > #define DRM_IOCTL_AMDGPU_GEM_USERPTR > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, > struct drm_amdgpu_gem_userptr) > > #define DRM_IOCTL_AMDGPU_WAIT_FENCES > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, > union drm_amdgpu_wait_fences) > > +#define DRM_IOCTL_AMDGPU_VM > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union > drm_amdgpu_vm) > > /* hybrid specific ioctls */ > > #define DRM_IOCTL_AMDGPU_GEM_DGMA > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, > struct drm_amdgpu_gem_dgma) > > #define DRM_IOCTL_AMDGPU_FREESYNC > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FREESYNC, > struct drm_amdgpu_freesync) > > @@ -201,6 +203,26 @@ union drm_amdgpu_ctx { > > union drm_amdgpu_ctx_out out; > > }; > > > > +/* vm ioctl */ > > +#define AMDGPU_VM_OP_RESERVE_VMID 1 > > +#define AMDGPU_VM_OP_UNRESERVE_VMID2 > > + > > +struct drm_amdgpu_vm_in { > > + /** AMDGPU_VM_OP_* */ > > + __u32 op; > > + __u32 flags; > > +}; > > + > > +struct drm_amdgpu_vm_out { > > + /** For future use, no flags defined so far */ > > + __u64 flags; > > +}; > > + > > +union drm_amdgpu_vm { > > + struct drm_amdgpu_vm_in in; > > + struct drm_amdgpu_vm_out out; > > +}; > > + > > /* sync file related */ > > #define AMDGPU_SEM_OP_CREATE_SEM 1 > > #define AMDGPU_SEM_OP_WAIT_SEM 2 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] amdgpu: add vm ioctl support
This is drm patch for dedicated vmid. Kernel patch has pushed to staging, anyone could have a quick review on this? Jerry? Guess Christian is in dream at current time. Regards, David Zhou On 2017年04月28日 10:38, Chunming Zhou wrote: Change-Id: I3c1ea377dad8f6d64a992b071a6ec36a1143e416 Signed-off-by: Chunming Zhou--- include/drm/amdgpu_drm.h | 22 ++ 1 file changed, 22 insertions(+) diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index 26d02ba..28cda3a 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -47,6 +47,7 @@ #define DRM_AMDGPU_GEM_OP 0x10 #define DRM_AMDGPU_GEM_USERPTR0x11 #define DRM_AMDGPU_WAIT_FENCES0x12 +#define DRM_AMDGPU_VM 0x13 /* hybrid specific ioctls */ #define DRM_AMDGPU_SEM 0x5b #define DRM_AMDGPU_GEM_DGMA 0x5c @@ -66,6 +67,7 @@ #define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) +#define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) /* hybrid specific ioctls */ #define DRM_IOCTL_AMDGPU_GEM_DGMA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma) #define DRM_IOCTL_AMDGPU_FREESYNC DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FREESYNC, struct drm_amdgpu_freesync) @@ -201,6 +203,26 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; +/* vm ioctl */ +#define AMDGPU_VM_OP_RESERVE_VMID 1 +#define AMDGPU_VM_OP_UNRESERVE_VMID2 + +struct drm_amdgpu_vm_in { + /** AMDGPU_VM_OP_* */ + __u32 op; + __u32 flags; +}; + +struct drm_amdgpu_vm_out { + /** For future use, no flags defined so far */ + __u64 flags; +}; + +union drm_amdgpu_vm { + struct drm_amdgpu_vm_in in; + struct drm_amdgpu_vm_out out; +}; + /* sync file related */ #define AMDGPU_SEM_OP_CREATE_SEM 1 #define AMDGPU_SEM_OP_WAIT_SEM 2 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset v2
Nice catch! Reviewed-by: Junwei ZhangRegards, Jerry (Junwei Zhang) Linux Base Graphics SRDC Software Development _ > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of > Chunming Zhou > Sent: Friday, April 28, 2017 10:46 > To: amd-gfx@lists.freedesktop.org > Cc: Zhou, David(ChunMing) > Subject: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu > reset v2 > > the case could happen when gpu reset: > 1. when gpu reset, cs can be continue until sw queue is full, then push job > will > wait with holding pd reservation. > 2. gpu_reset routine will also need pd reservation to restore page table from > their shadow. > 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs > releases > reservation. > > v2: handle amdgpu_cs_submit error path. > > Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 > Signed-off-by: Chunming Zhou > Reviewed-by: Christian König > Reviewed-by: Junwei Zhang > Reviewed-by: Monk Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 > 1 file changed, 4 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 26168df..699f5fe 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -1074,6 +1074,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser > *p, > cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); > job->uf_sequence = cs->out.handle; > amdgpu_job_free_resources(job); > + amdgpu_cs_parser_fini(p, 0, true); > > trace_amdgpu_cs_ioctl(job); > amd_sched_entity_push_job(>base); > @@ -1129,7 +1130,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void > *data, struct drm_file *filp) > goto out; > > r = amdgpu_cs_submit(, cs); > + if (r) > + goto out; > > + return 0; > out: > amdgpu_cs_parser_fini(, r, reserved_buffers); > return r; > -- > 1.9.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset v2
the case could happen when gpu reset: 1. when gpu reset, cs can be continue until sw queue is full, then push job will wait with holding pd reservation. 2. gpu_reset routine will also need pd reservation to restore page table from their shadow. 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs releases reservation. v2: handle amdgpu_cs_submit error path. Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 Signed-off-by: Chunming ZhouReviewed-by: Christian König Reviewed-by: Junwei Zhang Reviewed-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 26168df..699f5fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1074,6 +1074,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); + amdgpu_cs_parser_fini(p, 0, true); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(>base); @@ -1129,7 +1130,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; r = amdgpu_cs_submit(, cs); + if (r) + goto out; + return 0; out: amdgpu_cs_parser_fini(, r, reserved_buffers); return r; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] amdgpu: add vm ioctl support
Change-Id: I3c1ea377dad8f6d64a992b071a6ec36a1143e416 Signed-off-by: Chunming Zhou--- include/drm/amdgpu_drm.h | 22 ++ 1 file changed, 22 insertions(+) diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index 26d02ba..28cda3a 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -47,6 +47,7 @@ #define DRM_AMDGPU_GEM_OP 0x10 #define DRM_AMDGPU_GEM_USERPTR 0x11 #define DRM_AMDGPU_WAIT_FENCES 0x12 +#define DRM_AMDGPU_VM 0x13 /* hybrid specific ioctls */ #define DRM_AMDGPU_SEM 0x5b #define DRM_AMDGPU_GEM_DGMA0x5c @@ -66,6 +67,7 @@ #define DRM_IOCTL_AMDGPU_GEM_OPDRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) +#define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) /* hybrid specific ioctls */ #define DRM_IOCTL_AMDGPU_GEM_DGMA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma) #define DRM_IOCTL_AMDGPU_FREESYNC DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FREESYNC, struct drm_amdgpu_freesync) @@ -201,6 +203,26 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; +/* vm ioctl */ +#define AMDGPU_VM_OP_RESERVE_VMID 1 +#define AMDGPU_VM_OP_UNRESERVE_VMID2 + +struct drm_amdgpu_vm_in { + /** AMDGPU_VM_OP_* */ + __u32 op; + __u32 flags; +}; + +struct drm_amdgpu_vm_out { + /** For future use, no flags defined so far */ + __u64 flags; +}; + +union drm_amdgpu_vm { + struct drm_amdgpu_vm_in in; + struct drm_amdgpu_vm_out out; +}; + /* sync file related */ #define AMDGPU_SEM_OP_CREATE_SEM 1 #define AMDGPU_SEM_OP_WAIT_SEM 2 -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset
On 04/27/2017 08:04 PM, Christian König wrote: Am 27.04.2017 um 12:23 schrieb zhoucm1: On 2017年04月27日 18:11, Christian König wrote: Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei): On 04/27/2017 04:25 PM, Chunming Zhou wrote: the case could happen when gpu reset: 1. when gpu reset, cs can be continue until sw queue is full, then push job will wait with holding pd reservation. 2. gpu_reset routine will also need pd reservation to restore page table from their shadow. 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs releases reservation. Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9edb1a4..a6722a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); +amdgpu_cs_parser_fini(p, 0, true); Please confirm: It will not free/release more things that may be accessed in job working process, as cs_parse_fini free so many things related to parser. Yeah, that indeed won't work. amdgpu_cs_parser_fini() does: No, with my patch, parser->job already is NULL here, same as previous sequence, just put push_job action after amdgpu_cs_parser_fini() Ah! You add that into amdgpu_cs_submit! That was the point I was missing. Thanks your all input. See it too. Feel free to add my RB, if need. Reviewed-by: Junwei Zhang Jerry trace_amdgpu_cs_ioctl(job); +amdgpu_cs_parser_fini(p, 0, true); amd_sched_entity_push_job(>base); Can we keep the trace_amdgpu_cs_ioctl(); directly before the amd_sched_entity_push_job()? With that changed, the patch is Reviewed-by: Christian König . Regards, Christian. if (parser->job) amdgpu_job_free(parser->job); And amdgpu_cs_submit() sets the fence, so it must be called before amdgpu_cs_parser_fini(). yes, fence is already generated, so amdgpu_cs_parser_fini before pushing job is completely safe here. But apart from that this is a rather nifty idea. What was the problem with the initial patch? Just as comments in patch, which is found by Monk. Regards, David Zhou Or we can just release the pd reservation here? We could run into problem with the reservation ticked with that. So I would want to avoid that. Christian. Jerry amd_sched_entity_push_job(>base); return 0; @@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_submit(, cs); +return r; out: amdgpu_cs_parser_fini(, r, reserved_buffers); return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[RFC] drm/amd/amdgpu: get rid of else branch
This is super simple elimination of else branch and I should probably even use unlikely in if (ring->count_dw < count_dw) { However, amdgpu_ring_write() has similar if condition, but does not return after DRM_ERROR and it looks suspicious. On error, we still adding v to ring and keeping count_dw-- below zero. if (ring->count_dw <= 0) DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); ring->ring[ring->wptr++] = v; ring->wptr &= ring->ptr_mask; ring->count_dw--; I can obviously be totaly wrong. Hmm? 8<8<8<8<8<8<8<8<8< diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c1b913541739..c6f4f874ea68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1596,28 +1596,29 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr if (ring->count_dw < count_dw) { DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - } else { - occupied = ring->wptr & ring->ptr_mask; - dst = (void *)>ring[occupied]; - chunk1 = ring->ptr_mask + 1 - occupied; - chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; - chunk2 = count_dw - chunk1; - chunk1 <<= 2; - chunk2 <<= 2; - - if (chunk1) - memcpy(dst, src, chunk1); - - if (chunk2) { - src += chunk1; - dst = (void *)ring->ring; - memcpy(dst, src, chunk2); - } - - ring->wptr += count_dw; - ring->wptr &= ring->ptr_mask; - ring->count_dw -= count_dw; + return; } + + occupied = ring->wptr & ring->ptr_mask; + dst = (void *)>ring[occupied]; + chunk1 = ring->ptr_mask + 1 - occupied; + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; + chunk2 = count_dw - chunk1; + chunk1 <<= 2; + chunk2 <<= 2; + + if (chunk1) + memcpy(dst, src, chunk1); + + if (chunk2) { + src += chunk1; + dst = (void *)ring->ring; + memcpy(dst, src, chunk2); + } + + ring->wptr += count_dw; + ring->wptr &= ring->ptr_mask; + ring->count_dw -= count_dw; } static inline struct amdgpu_sdma_instance * -- 2.12.2 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH umr] Avoid opening the DRM file by default
Opening the DRM file (/dev/dri/card%d) triggers all sorts of KMD work to happen which is not useful if the KMD is hung or not working. Since --top is the only user of the file currently we simply defer opening it until --top is invoked. Signed-off-by: Tom St Denis--- src/app/top.c | 7 +++ src/lib/discover.c | 3 +-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/app/top.c b/src/app/top.c index 364180eb70f6..a4d3aa8e699d 100644 --- a/src/app/top.c +++ b/src/app/top.c @@ -933,8 +933,15 @@ void umr_top(struct umr_asic *asic) time_t tt; uint64_t ts; char hostname[64] = { 0 }; + char fname[64]; pthread_t sensor_thread; + // open drm file if not already open + if (asic->fd.drm < 0) { + snprintf(fname, sizeof(fname)-1, "/dev/dri/card%d", asic->instance); + asic->fd.drm = open(fname, O_RDWR); + } + if (getenv("HOSTNAME")) strcpy(hostname, getenv("HOSTNAME")); // init stats diff --git a/src/lib/discover.c b/src/lib/discover.c index d561efafe4d4..c9c2f74a4818 100644 --- a/src/lib/discover.c +++ b/src/lib/discover.c @@ -127,8 +127,7 @@ struct umr_asic *umr_discover_asic(struct umr_options *options) asic->fd.vram = open(fname, O_RDWR); snprintf(fname, sizeof(fname)-1, "/sys/kernel/debug/dri/%d/amdgpu_gpr", asic->instance); asic->fd.gpr = open(fname, O_RDWR); - snprintf(fname, sizeof(fname)-1, "/dev/dri/card%d", asic->instance); - asic->fd.drm = open(fname, O_RDWR); + asic->fd.drm = -1; // default to closed // if appending to the fd list remember to update close_asic() and discover_by_did()... if (options->use_pci) { -- 2.12.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 10/11] drm/amdgpu: forward operation context to ttm_bo_mem_space
From: Christian KönigThis way we can finally use some more stats. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 30 -- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 061fd5f..5c90601 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -364,12 +364,10 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, return r; } -static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, +static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { - struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu }; struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = >mem; struct ttm_mem_reg tmp_mem; @@ -387,7 +385,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, placements.fpfn = 0; placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, , _mem, ); + r = ttm_bo_mem_space(bo, , _mem, ctx); if (unlikely(r)) { return r; } @@ -401,22 +399,20 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = amdgpu_move_blit(bo, true, no_wait_gpu, _mem, old_mem); + r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, _mem, old_mem); if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, _mem); return r; } -static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, +static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { - struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu }; struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = >mem; struct ttm_mem_reg tmp_mem; @@ -434,15 +430,15 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, placements.fpfn = 0; placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, , _mem, ); + r = ttm_bo_mem_space(bo, , _mem, ctx); if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, _mem); + r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, _mem); if (unlikely(r)) { goto out_cleanup; } - r = amdgpu_move_blit(bo, true, no_wait_gpu, new_mem, old_mem); + r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); if (unlikely(r)) { goto out_cleanup; } @@ -488,12 +484,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { - r = amdgpu_move_vram_ram(bo, evict, ctx->interruptible, - ctx->no_wait_gpu, new_mem); + r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem); } else if (old_mem->mem_type == TTM_PL_SYSTEM && new_mem->mem_type == TTM_PL_VRAM) { - r = amdgpu_move_ram_vram(bo, evict, ctx->interruptible, - ctx->no_wait_gpu, new_mem); + r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem); } else { r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, new_mem, old_mem); -- 2.5.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 06/11] drm/ttm: use an operation context for ttm_bo_mem_space
From: Christian KönigInstead of specifying interruptible and no_wait_gpu manually. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 drivers/gpu/drm/nouveau/nouveau_bo.c| 6 -- drivers/gpu/drm/radeon/radeon_ttm.c | 8 drivers/gpu/drm/ttm/ttm_bo.c| 22 +++--- include/drm/ttm/ttm_bo_driver.h | 3 +-- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 58873cf..e386657 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -369,6 +369,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { + struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu }; struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = >mem; struct ttm_mem_reg tmp_mem; @@ -386,8 +387,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, placements.fpfn = 0; placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, , _mem, -interruptible, no_wait_gpu); + r = ttm_bo_mem_space(bo, , _mem, ); if (unlikely(r)) { return r; } @@ -416,6 +416,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { + struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu }; struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = >mem; struct ttm_mem_reg tmp_mem; @@ -433,8 +434,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, placements.fpfn = 0; placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, , _mem, -interruptible, no_wait_gpu); + r = ttm_bo_mem_space(bo, , _mem, ); if (unlikely(r)) { return r; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index ad3778b..8486da6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1126,6 +1126,7 @@ static int nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { + struct ttm_operation_ctx ctx = { intr, no_wait_gpu }; struct ttm_place placement_memtype = { .fpfn = 0, .lpfn = 0, @@ -1140,7 +1141,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, tmp_mem = *new_mem; tmp_mem.mm_node = NULL; - ret = ttm_bo_mem_space(bo, , _mem, intr, no_wait_gpu); + ret = ttm_bo_mem_space(bo, , _mem, ); if (ret) return ret; @@ -1162,6 +1163,7 @@ static int nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { + struct ttm_operation_ctx ctx = { intr, no_wait_gpu }; struct ttm_place placement_memtype = { .fpfn = 0, .lpfn = 0, @@ -1176,7 +1178,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr, tmp_mem = *new_mem; tmp_mem.mm_node = NULL; - ret = ttm_bo_mem_space(bo, , _mem, intr, no_wait_gpu); + ret = ttm_bo_mem_space(bo, , _mem, ); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index d07ff84..b4bcff5 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -311,6 +311,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { + struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu }; struct radeon_device *rdev; struct ttm_mem_reg *old_mem = >mem; struct ttm_mem_reg tmp_mem; @@ -328,8 +329,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, placements.fpfn = 0; placements.lpfn = 0; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, , _mem, -interruptible, no_wait_gpu); + r = ttm_bo_mem_space(bo, , _mem, ); if (unlikely(r)) { return r; } @@ -358,6 +358,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
[PATCH 04/11] drm/ttm: add operation ctx to ttm_bo_validate
From: Christian KönigGive moving a BO into place an operation context to work with. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 9 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c| 6 -- drivers/gpu/drm/ast/ast_ttm.c | 9 ++--- drivers/gpu/drm/bochs/bochs_mm.c | 6 -- drivers/gpu/drm/cirrus/cirrus_ttm.c| 6 -- drivers/gpu/drm/mgag200/mgag200_ttm.c | 9 ++--- drivers/gpu/drm/nouveau/nouveau_bo.c | 4 ++-- drivers/gpu/drm/qxl/qxl_ioctl.c| 4 ++-- drivers/gpu/drm/qxl/qxl_object.c | 6 -- drivers/gpu/drm/qxl/qxl_release.c | 4 ++-- drivers/gpu/drm/radeon/radeon_gem.c| 3 ++- drivers/gpu/drm/radeon/radeon_mn.c | 3 ++- drivers/gpu/drm/radeon/radeon_object.c | 14 +- drivers/gpu/drm/radeon/radeon_vm.c | 3 ++- drivers/gpu/drm/ttm/ttm_bo.c | 16 +--- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 11 ++- drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c| 3 ++- drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c | 21 + drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c| 9 - drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 -- drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 3 ++- include/drm/ttm/ttm_bo_api.h | 20 25 files changed, 124 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 26168df..31613ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -373,6 +373,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { true, false }; u64 initial_bytes_moved; uint32_t domain; int r; @@ -391,7 +392,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, retry: amdgpu_ttm_placement_from_domain(bo, domain); initial_bytes_moved = atomic64_read(>num_bytes_moved); - r = ttm_bo_validate(>tbo, >placement, true, false); + r = ttm_bo_validate(>tbo, >placement, ); p->bytes_moved += atomic64_read(>num_bytes_moved) - initial_bytes_moved; @@ -408,6 +409,7 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo *validated) { uint32_t domain = validated->allowed_domains; + struct ttm_operation_ctx ctx = { true, false }; int r; if (!p->evictable) @@ -440,7 +442,7 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ amdgpu_ttm_placement_from_domain(bo, other); initial_bytes_moved = atomic64_read(>num_bytes_moved); - r = ttm_bo_validate(>tbo, >placement, true, false); + r = ttm_bo_validate(>tbo, >placement, ); p->bytes_moved += atomic64_read(>num_bytes_moved) - initial_bytes_moved; @@ -1419,6 +1421,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, */ int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser) { + struct ttm_operation_ctx ctx = { false, false }; unsigned i; int r; @@ -1437,7 +1440,7 @@ int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser) bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains); - r = ttm_bo_validate(>tbo, >placement, false, false); + r = ttm_bo_validate(>tbo, >placement, ); if (unlikely(r)) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 67be795..e5157b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -306,6 +306,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_device *adev = dev->dev_private; struct drm_amdgpu_gem_userptr *args = data; struct drm_gem_object *gobj; @@ -362,7 +363,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto free_pages; amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); - r =
[PATCH 11/11] drm/amdgpu: use the new TTM bytes moved counter
From: Christian KönigInstead of the global statistics use the per context bytes moved counter. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 + 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 31613ab..7486827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -372,9 +372,7 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { true, false }; - u64 initial_bytes_moved; uint32_t domain; int r; @@ -391,16 +389,13 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, retry: amdgpu_ttm_placement_from_domain(bo, domain); - initial_bytes_moved = atomic64_read(>num_bytes_moved); r = ttm_bo_validate(>tbo, >placement, ); - p->bytes_moved += atomic64_read(>num_bytes_moved) - - initial_bytes_moved; - if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { domain = bo->allowed_domains; goto retry; } + p->bytes_moved += ctx.bytes_moved; return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 364d894..8f49b85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -323,7 +323,6 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct amdgpu_bo *bo; enum ttm_bo_type type; unsigned long page_align; - u64 initial_bytes_moved; size_t acc_size; int r; @@ -395,12 +394,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, amdgpu_fill_placement_to_bo(bo, placement); /* Kernel allocation are uninterruptible */ - initial_bytes_moved = atomic64_read(>num_bytes_moved); r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, type, >placement, page_align, , NULL, acc_size, sg, resv, _ttm_bo_destroy); - amdgpu_cs_report_moved_bytes(adev, - atomic64_read(>num_bytes_moved) - initial_bytes_moved); + amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved); if (unlikely(r != 0)) return r; -- 2.5.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 09/11] drm/ttm: add number of bytes moved to the operation context
From: Christian KönigAdd some statistics how many bytes we have moved. Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c | 1 + include/drm/ttm/ttm_bo_api.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 0c3c06a..0c5bc9e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -378,6 +378,7 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, else bo->offset = 0; + ctx->bytes_moved += bo->num_pages << PAGE_SHIFT; return 0; out_err: diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 4b4e9e0..91b4afb 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -270,6 +270,7 @@ struct ttm_bo_kmap_obj { struct ttm_operation_ctx { bool interruptible; bool no_wait_gpu; + uint64_t bytes_moved; }; /** -- 2.5.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 07/11] drm/ttm: use the operation context inside TTM
From: Christian KönigInstead of passing down the parameters manually to every function. Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c | 69 +++- 1 file changed, 30 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index caddcf6..c288852 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -286,9 +286,8 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc) } static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem, - bool evict, bool interruptible, - bool no_wait_gpu) + struct ttm_mem_reg *mem, bool evict, + struct ttm_operation_ctx *ctx) { struct ttm_bo_device *bdev = bo->bdev; bool old_is_pci = ttm_mem_reg_is_pci(bdev, >mem); @@ -342,12 +341,14 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) && !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) - ret = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, mem); + ret = ttm_bo_move_ttm(bo, ctx->interruptible, + ctx->no_wait_gpu, mem); else if (bdev->driver->move) - ret = bdev->driver->move(bo, evict, interruptible, -no_wait_gpu, mem); + ret = bdev->driver->move(bo, evict, ctx->interruptible, +ctx->no_wait_gpu, mem); else - ret = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, mem); + ret = ttm_bo_move_memcpy(bo, ctx->interruptible, +ctx->no_wait_gpu, mem); if (ret) { if (bdev->driver->move_notify) { @@ -662,10 +663,9 @@ void ttm_bo_unlock_delayed_workqueue(struct ttm_bo_device *bdev, int resched) } EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue); -static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, - bool no_wait_gpu) +static int ttm_bo_evict(struct ttm_buffer_object *bo, + struct ttm_operation_ctx *ctx) { - struct ttm_operation_ctx ctx = { interruptible, no_wait_gpu }; struct ttm_bo_device *bdev = bo->bdev; struct ttm_mem_reg evict_mem; struct ttm_placement placement; @@ -681,7 +681,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, placement.num_placement = 0; placement.num_busy_placement = 0; bdev->driver->evict_flags(bo, ); - ret = ttm_bo_mem_space(bo, , _mem, ); + ret = ttm_bo_mem_space(bo, , _mem, ctx); if (ret) { if (ret != -ERESTARTSYS) { pr_err("Failed to find memory space for buffer 0x%p eviction\n", @@ -691,8 +691,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, goto out; } - ret = ttm_bo_handle_move_mem(bo, _mem, true, -interruptible, no_wait_gpu); + ret = ttm_bo_handle_move_mem(bo, _mem, true, ctx); if (unlikely(ret)) { if (ret != -ERESTARTSYS) pr_err("Buffer eviction failed\n"); @@ -719,10 +718,9 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, EXPORT_SYMBOL(ttm_bo_eviction_valuable); static int ttm_mem_evict_first(struct ttm_bo_device *bdev, - uint32_t mem_type, - const struct ttm_place *place, - bool interruptible, - bool no_wait_gpu) + uint32_t mem_type, + const struct ttm_place *place, + struct ttm_operation_ctx *ctx) { struct ttm_bo_global *glob = bdev->glob; struct ttm_mem_type_manager *man = >man[mem_type]; @@ -759,8 +757,8 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, kref_get(>list_kref); if (!list_empty(>ddestroy)) { - ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible, -no_wait_gpu); + ret = ttm_bo_cleanup_refs_and_unlock(bo, ctx->interruptible, +ctx->no_wait_gpu); kref_put(>list_kref, ttm_bo_release_list); return ret; } @@ -772,7 +770,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, ttm_bo_list_ref_sub(bo, put_count, true); - ret = ttm_bo_evict(bo, interruptible, no_wait_gpu); + ret = ttm_bo_evict(bo, ctx);
[PATCH 02/11] drm/ttm: cleanup ttm_bo_driver.h
From: Christian KönigExtern is the default for function declerations anyway. Signed-off-by: Christian König --- include/drm/ttm/ttm_bo_driver.h | 113 +++- 1 file changed, 54 insertions(+), 59 deletions(-) diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index cb5072a..9ce2017 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -608,12 +608,12 @@ ttm_flag_masked(uint32_t *old, uint32_t new, uint32_t mask) * Returns: * NULL: Out of memory. */ -extern int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev, - unsigned long size, uint32_t page_flags, - struct page *dummy_read_page); -extern int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev, - unsigned long size, uint32_t page_flags, - struct page *dummy_read_page); +int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev, + unsigned long size, uint32_t page_flags, + struct page *dummy_read_page); +int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev, + unsigned long size, uint32_t page_flags, + struct page *dummy_read_page); /** * ttm_tt_fini @@ -622,8 +622,8 @@ extern int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bde * * Free memory of ttm_tt structure */ -extern void ttm_tt_fini(struct ttm_tt *ttm); -extern void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma); +void ttm_tt_fini(struct ttm_tt *ttm); +void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma); /** * ttm_ttm_bind: @@ -633,7 +633,7 @@ extern void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma); * * Bind the pages of @ttm to an aperture location identified by @bo_mem */ -extern int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem); +int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem); /** * ttm_ttm_destroy: @@ -642,7 +642,7 @@ extern int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem); * * Unbind, unpopulate and destroy common struct ttm_tt. */ -extern void ttm_tt_destroy(struct ttm_tt *ttm); +void ttm_tt_destroy(struct ttm_tt *ttm); /** * ttm_ttm_unbind: @@ -651,7 +651,7 @@ extern void ttm_tt_destroy(struct ttm_tt *ttm); * * Unbind a struct ttm_tt. */ -extern void ttm_tt_unbind(struct ttm_tt *ttm); +void ttm_tt_unbind(struct ttm_tt *ttm); /** * ttm_tt_swapin: @@ -660,7 +660,7 @@ extern void ttm_tt_unbind(struct ttm_tt *ttm); * * Swap in a previously swap out ttm_tt. */ -extern int ttm_tt_swapin(struct ttm_tt *ttm); +int ttm_tt_swapin(struct ttm_tt *ttm); /** * ttm_tt_set_placement_caching: @@ -675,9 +675,8 @@ extern int ttm_tt_swapin(struct ttm_tt *ttm); * hit RAM. This function may be very costly as it involves global TLB * and cache flushes and potential page splitting / combining. */ -extern int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement); -extern int ttm_tt_swapout(struct ttm_tt *ttm, - struct file *persistent_swap_storage); +int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement); +int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage); /** * ttm_tt_unpopulate - free pages from a ttm @@ -686,7 +685,7 @@ extern int ttm_tt_swapout(struct ttm_tt *ttm, * * Calls the driver method to free all pages from a ttm */ -extern void ttm_tt_unpopulate(struct ttm_tt *ttm); +void ttm_tt_unpopulate(struct ttm_tt *ttm); /* * ttm_bo.c @@ -701,8 +700,7 @@ extern void ttm_tt_unpopulate(struct ttm_tt *ttm); * Returns true if the memory described by @mem is PCI memory, * false otherwise. */ -extern bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, - struct ttm_mem_reg *mem); +bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem); /** * ttm_bo_mem_space @@ -723,21 +721,20 @@ extern bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, * fragmentation or concurrent allocators. * -ERESTARTSYS: An interruptible sleep was interrupted by a signal. */ -extern int ttm_bo_mem_space(struct ttm_buffer_object *bo, - struct ttm_placement *placement, - struct ttm_mem_reg *mem, - bool interruptible, - bool no_wait_gpu); - -extern void ttm_bo_mem_put(struct ttm_buffer_object *bo, +int ttm_bo_mem_space(struct ttm_buffer_object *bo, +struct ttm_placement *placement, +struct ttm_mem_reg *mem, +bool interruptible, +bool no_wait_gpu); + +void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem); +void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo,
[PATCH 05/11] drm/ttm: use an operation ctx for ttm_bo_init_reserved
From: Christian KönigInstead of specifying if sleeping should be interruptible. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++- drivers/gpu/drm/ttm/ttm_bo.c | 12 +--- include/drm/ttm/ttm_bo_api.h | 5 ++--- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5ef9be6..364d894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -319,6 +319,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct reservation_object *resv, struct amdgpu_bo **bo_ptr) { + struct ttm_operation_ctx ctx = { !kernel, false }; struct amdgpu_bo *bo; enum ttm_bo_type type; unsigned long page_align; @@ -396,7 +397,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, initial_bytes_moved = atomic64_read(>num_bytes_moved); r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, type, ->placement, page_align, !kernel, NULL, +>placement, page_align, , NULL, acc_size, sg, resv, _ttm_bo_destroy); amdgpu_cs_report_moved_bytes(adev, atomic64_read(>num_bytes_moved) - initial_bytes_moved); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 10707923..f1e8833 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1130,7 +1130,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, enum ttm_bo_type type, struct ttm_placement *placement, uint32_t page_alignment, -bool interruptible, +struct ttm_operation_ctx *ctx, struct file *persistent_swap_storage, size_t acc_size, struct sg_table *sg, @@ -1216,11 +1216,8 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, WARN_ON(!locked); } - if (likely(!ret)) { - struct ttm_operation_ctx ctx = { interruptible, false }; - - ret = ttm_bo_validate(bo, placement, ); - } + if (likely(!ret)) + ret = ttm_bo_validate(bo, placement, ctx); if (unlikely(ret)) { if (!resv) @@ -1253,10 +1250,11 @@ int ttm_bo_init(struct ttm_bo_device *bdev, struct reservation_object *resv, void (*destroy) (struct ttm_buffer_object *)) { + struct ttm_operation_ctx ctx = { interruptible, false }; int ret; ret = ttm_bo_init_reserved(bdev, bo, size, type, placement, - page_alignment, interruptible, + page_alignment, , persistent_swap_storage, acc_size, sg, resv, destroy); if (ret) diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 25c1ca8..4b4e9e0 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -468,8 +468,7 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev, * @type: Requested type of buffer object. * @flags: Initial placement flags. * @page_alignment: Data alignment in pages. - * @interruptible: If needing to sleep to wait for GPU resources, - * sleep interruptible. + * @ctx: TTM operation context for memory allocation. * @persistent_swap_storage: Usually the swap storage is deleted for buffers * pinned in physical memory. If this behaviour is not desired, this member * holds a pointer to a persistent shmem object. Typically, this would @@ -506,7 +505,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, enum ttm_bo_type type, struct ttm_placement *placement, uint32_t page_alignment, -bool interrubtible, +struct ttm_operation_ctx *ctx, struct file *persistent_swap_storage, size_t acc_size, struct sg_table *sg, -- 2.5.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 01/11] drm/ttm: cleanup coding style in ttm_bo_api.h
From: Christian KönigExtern is the default for function declerations anyway and this solves a bunch of 80char per line issues. Signed-off-by: Christian König --- include/drm/ttm/ttm_bo_api.h | 135 ++- 1 file changed, 56 insertions(+), 79 deletions(-) diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 3b302a5..425f3b4 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -288,8 +288,7 @@ ttm_bo_reference(struct ttm_buffer_object *bo) * Returns -EBUSY if no_wait is true and the buffer is busy. * Returns -ERESTARTSYS if interrupted by a signal. */ -extern int ttm_bo_wait(struct ttm_buffer_object *bo, - bool interruptible, bool no_wait); +int ttm_bo_wait(struct ttm_buffer_object *bo, bool interruptible, bool no_wait); /** * ttm_bo_mem_compat - Check if proposed placement is compatible with a bo @@ -300,9 +299,8 @@ extern int ttm_bo_wait(struct ttm_buffer_object *bo, * * Returns true if the placement is compatible */ -extern bool ttm_bo_mem_compat(struct ttm_placement *placement, - struct ttm_mem_reg *mem, - uint32_t *new_flags); +bool ttm_bo_mem_compat(struct ttm_placement *placement, struct ttm_mem_reg *mem, + uint32_t *new_flags); /** * ttm_bo_validate @@ -320,10 +318,10 @@ extern bool ttm_bo_mem_compat(struct ttm_placement *placement, * -EBUSY if no_wait is true and buffer busy. * -ERESTARTSYS if interrupted by a signal. */ -extern int ttm_bo_validate(struct ttm_buffer_object *bo, - struct ttm_placement *placement, - bool interruptible, - bool no_wait_gpu); +int ttm_bo_validate(struct ttm_buffer_object *bo, + struct ttm_placement *placement, + bool interruptible, + bool no_wait_gpu); /** * ttm_bo_unref @@ -332,7 +330,7 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo, * * Unreference and clear a pointer to a buffer object. */ -extern void ttm_bo_unref(struct ttm_buffer_object **bo); +void ttm_bo_unref(struct ttm_buffer_object **bo); /** @@ -344,8 +342,8 @@ extern void ttm_bo_unref(struct ttm_buffer_object **bo); * * Release @count lru list references to this buffer object. */ -extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, - bool never_free); +void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, +bool never_free); /** * ttm_bo_add_to_lru @@ -357,7 +355,7 @@ extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, * This function must be called with struct ttm_bo_global::lru_lock held, and * is typically called immediately prior to unreserving a bo. */ -extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo); +void ttm_bo_add_to_lru(struct ttm_buffer_object *bo); /** * ttm_bo_del_from_lru @@ -369,7 +367,7 @@ extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo); * and is usually called just immediately after the bo has been reserved to * avoid recursive reservation from lru lists. */ -extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo); +int ttm_bo_del_from_lru(struct ttm_buffer_object *bo); /** * ttm_bo_move_to_lru_tail @@ -380,7 +378,7 @@ extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo); * object. This function must be called with struct ttm_bo_global::lru_lock * held, and is used to make a BO less likely to be considered for eviction. */ -extern void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo); +void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo); /** * ttm_bo_lock_delayed_workqueue @@ -389,15 +387,14 @@ extern void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo); * Returns * True if the workqueue was queued at the time */ -extern int ttm_bo_lock_delayed_workqueue(struct ttm_bo_device *bdev); +int ttm_bo_lock_delayed_workqueue(struct ttm_bo_device *bdev); /** * ttm_bo_unlock_delayed_workqueue * * Allows the delayed workqueue to run. */ -extern void ttm_bo_unlock_delayed_workqueue(struct ttm_bo_device *bdev, - int resched); +void ttm_bo_unlock_delayed_workqueue(struct ttm_bo_device *bdev, int resched); /** * ttm_bo_eviction_valuable @@ -424,8 +421,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * -EBUSY if the buffer is busy and no_wait is true. * -ERESTARTSYS if interrupted by a signal. */ -extern int -ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait); +int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait); /** * ttm_bo_synccpu_write_release: @@ -434,7 +430,7 @@ ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool
[PATCH 08/11] drm/ttm: add context to driver move callback as well
From: Christian KönigInstead of passing the parameters manually. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 21 +++-- drivers/gpu/drm/nouveau/nouveau_bo.c| 27 --- drivers/gpu/drm/qxl/qxl_ttm.c | 9 - drivers/gpu/drm/radeon/radeon_ttm.c | 23 --- drivers/gpu/drm/ttm/ttm_bo.c| 3 +-- drivers/gpu/drm/virtio/virtgpu_ttm.c| 7 +++ include/drm/ttm/ttm_bo_driver.h | 6 ++ 7 files changed, 49 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e386657..061fd5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -451,10 +451,9 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, return r; } -static int amdgpu_bo_move(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, - struct ttm_mem_reg *new_mem) +static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_mem_reg *new_mem) { struct amdgpu_device *adev; struct amdgpu_bo *abo; @@ -489,19 +488,21 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { - r = amdgpu_move_vram_ram(bo, evict, interruptible, - no_wait_gpu, new_mem); + r = amdgpu_move_vram_ram(bo, evict, ctx->interruptible, + ctx->no_wait_gpu, new_mem); } else if (old_mem->mem_type == TTM_PL_SYSTEM && new_mem->mem_type == TTM_PL_VRAM) { - r = amdgpu_move_ram_vram(bo, evict, interruptible, - no_wait_gpu, new_mem); + r = amdgpu_move_ram_vram(bo, evict, ctx->interruptible, + ctx->no_wait_gpu, new_mem); } else { - r = amdgpu_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem); + r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, +new_mem, old_mem); } if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, ctx->interruptible, + ctx->no_wait_gpu, new_mem); if (r) { return r; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 8486da6..39e328e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1253,8 +1253,9 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo, } static int -nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, - bool no_wait_gpu, struct ttm_mem_reg *new_mem) +nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_mem_reg *new_mem) { struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_bo *nvbo = nouveau_bo(bo); @@ -1262,7 +1263,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, struct nouveau_drm_tile *new_tile = NULL; int ret = 0; - ret = ttm_bo_wait(bo, intr, no_wait_gpu); + ret = ttm_bo_wait(bo, ctx->interruptible, ctx->no_wait_gpu); if (ret) return ret; @@ -1286,22 +1287,26 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, /* Hardware assisted copy. */ if (drm->ttm.move) { if (new_mem->mem_type == TTM_PL_SYSTEM) - ret = nouveau_bo_move_flipd(bo, evict, intr, - no_wait_gpu, new_mem); + ret = nouveau_bo_move_flipd(bo, evict, + ctx->interruptible, + ctx->no_wait_gpu, new_mem); else if (old_mem->mem_type == TTM_PL_SYSTEM) - ret = nouveau_bo_move_flips(bo, evict, intr, - no_wait_gpu, new_mem); + ret = nouveau_bo_move_flips(bo, evict, + ctx->interruptible, + ctx->no_wait_gpu, new_mem); else - ret = nouveau_bo_move_m2mf(bo, evict, intr, - no_wait_gpu, new_mem); +
[PATCH 03/11] drm/ttm: remove cur_placement
From: Christian KönigNot used any more. Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c | 5 ++--- include/drm/ttm/ttm_bo_api.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 04cbecc..70335a3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -372,11 +372,10 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, bo->evicted = false; } - if (bo->mem.mm_node) { + if (bo->mem.mm_node) bo->offset = (bo->mem.start << PAGE_SHIFT) + bdev->man[bo->mem.mem_type].gpu_offset; - bo->cur_placement = bo->mem.placement; - } else + else bo->offset = 0; return 0; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 425f3b4..1d99cc1 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -224,7 +224,6 @@ struct ttm_buffer_object { */ uint64_t offset; /* GPU address space is independent of CPU word size */ - uint32_t cur_placement; struct sg_table *sg; -- 2.5.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions
KFD doesn't use CGS. Regards, Felix -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Deucher, Alexander Sent: Wednesday, April 26, 2017 12:36 PM To: 'Christian König'; amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Christian König > Sent: Wednesday, April 26, 2017 9:14 AM > To: amd-gfx@lists.freedesktop.org > Subject: [PATCH] drm/amdgpu: remove unused and mostly unimplemented > CGS functions > > From: Christian König> > Those functions are all unused and some not even implemented. > > Signed-off-by: Christian König I think some of this is used by the ACP driver (cgs_get_pci_resource for example). Make sure that is enabled in your build. You might also want to check with Felix for KFD. Alex > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 233 > drivers/gpu/drm/amd/include/cgs_common.h | > 303 - > -- > 2 files changed, 536 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c > index a1a2d44..93f5dcc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c > @@ -42,82 +42,6 @@ struct amdgpu_cgs_device { > struct amdgpu_device *adev =\ > ((struct amdgpu_cgs_device *)cgs_device)->adev > > -static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, > enum cgs_gpu_mem_type type, > -uint64_t *mc_start, uint64_t *mc_size, > -uint64_t *mem_size) > -{ > - CGS_FUNC_ADEV; > - switch(type) { > - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB: > - case CGS_GPU_MEM_TYPE__VISIBLE_FB: > - *mc_start = 0; > - *mc_size = adev->mc.visible_vram_size; > - *mem_size = adev->mc.visible_vram_size - adev- > >vram_pin_size; > - break; > - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: > - case CGS_GPU_MEM_TYPE__INVISIBLE_FB: > - *mc_start = adev->mc.visible_vram_size; > - *mc_size = adev->mc.real_vram_size - adev- > >mc.visible_vram_size; > - *mem_size = *mc_size; > - break; > - case CGS_GPU_MEM_TYPE__GART_CACHEABLE: > - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: > - *mc_start = adev->mc.gtt_start; > - *mc_size = adev->mc.gtt_size; > - *mem_size = adev->mc.gtt_size - adev->gart_pin_size; > - break; > - default: > - return -EINVAL; > - } > - > - return 0; > -} > - > -static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void > *kmem, > - uint64_t size, > - uint64_t min_offset, uint64_t max_offset, > - cgs_handle_t *kmem_handle, uint64_t > *mcaddr) > -{ > - CGS_FUNC_ADEV; > - int ret; > - struct amdgpu_bo *bo; > - struct page *kmem_page = vmalloc_to_page(kmem); > - int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; > - > - struct sg_table *sg = drm_prime_pages_to_sg(_page, > npages); > - ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false, > -AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, > ); > - if (ret) > - return ret; > - ret = amdgpu_bo_reserve(bo, false); > - if (unlikely(ret != 0)) > - return ret; > - > - /* pin buffer into GTT */ > - ret = amdgpu_bo_pin_restricted(bo, > AMDGPU_GEM_DOMAIN_GTT, > -min_offset, max_offset, mcaddr); > - amdgpu_bo_unreserve(bo); > - > - *kmem_handle = (cgs_handle_t)bo; > - return ret; > -} > - > -static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, > cgs_handle_t kmem_handle) -{ > - struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle; > - > - if (obj) { > - int r = amdgpu_bo_reserve(obj, false); > - if (likely(r == 0)) { > - amdgpu_bo_unpin(obj); > - amdgpu_bo_unreserve(obj); > - } > - amdgpu_bo_unref(); > - > - } > - return 0; > -} > - > static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, > enum cgs_gpu_mem_type type, > uint64_t size, uint64_t align, @@ -349,96 > +273,6 @@ static > void amdgpu_cgs_write_ind_register(struct > cgs_device *cgs_device, > WARN(1, "Invalid indirect register space"); } > > -static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device > *cgs_device, unsigned addr) -{ > - CGS_FUNC_ADEV; > - uint8_t val; > - int ret =
Re: [PATCH v2] drm/radeon: only warn once in radeon_ttm_bo_destroy if va list not empty
Am 27.04.2017 um 16:10 schrieb Julien Isorce: Encountered a dozen of exact same backtraces when mesa's pb_cache_release_all_buffers is called after that a gpu reset failed. v2: Remove superfluous error message added in v1. bug: https://bugs.freedesktop.org/show_bug.cgi?id=96271 Signed-off-by: Julien IsorceReviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index bec2ec0..8b72229 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -81,7 +81,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) list_del_init(>list); mutex_unlock(>rdev->gem.mutex); radeon_bo_clear_surface_reg(bo); - WARN_ON(!list_empty(>va)); + WARN_ON_ONCE(!list_empty(>va)); drm_gem_object_release(>gem_base); kfree(bo); } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v2] drm/radeon: only warn once in radeon_ttm_bo_destroy if va list not empty
Encountered a dozen of exact same backtraces when mesa's pb_cache_release_all_buffers is called after that a gpu reset failed. v2: Remove superfluous error message added in v1. bug: https://bugs.freedesktop.org/show_bug.cgi?id=96271 Signed-off-by: Julien Isorce--- drivers/gpu/drm/radeon/radeon_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index bec2ec0..8b72229 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -81,7 +81,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) list_del_init(>list); mutex_unlock(>rdev->gem.mutex); radeon_bo_clear_surface_reg(bo); - WARN_ON(!list_empty(>va)); + WARN_ON_ONCE(!list_empty(>va)); drm_gem_object_release(>gem_base); kfree(bo); } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 2/2] drm/amdgpu: bump version for exporting gpu info for gfx9
> -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Junwei Zhang > Sent: Thursday, April 27, 2017 4:31 AM > To: amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander; Wang, Ken; Zhang, Jerry > Subject: [PATCH 2/2] drm/amdgpu: bump version for exporting gpu info for > gfx9 > > Signed-off-by: Junwei ZhangSeries is: Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index ead00d7..857c3be 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -64,9 +64,10 @@ > * - 3.12.0 - Add query for double offchip LDS buffers > * - 3.13.0 - Add PRT support > * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality > + * - 3.15.0 - Export more gpu info for gfx9 > */ > #define KMS_DRIVER_MAJOR 3 > -#define KMS_DRIVER_MINOR 14 > +#define KMS_DRIVER_MINOR 15 > #define KMS_DRIVER_PATCHLEVEL0 > > int amdgpu_vram_limit = 0; > -- > 1.9.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset
A really good improvement Reviewed-by: Monk Liu-Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of zhoucm1 Sent: Thursday, April 27, 2017 6:24 PM To: Christian König ; Zhang, Jerry ; amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset On 2017年04月27日 18:11, Christian König wrote: > Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei): >> On 04/27/2017 04:25 PM, Chunming Zhou wrote: >>> the case could happen when gpu reset: >>> 1. when gpu reset, cs can be continue until sw queue is full, then >>> push job will wait with holding pd reservation. >>> 2. gpu_reset routine will also need pd reservation to restore page >>> table from their shadow. >>> 3. cs is waiting for gpu_reset complete, but gpu reset is waiting >>> for cs releases reservation. >>> >>> Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 >>> Signed-off-by: Chunming Zhou >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ >>> 1 file changed, 2 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> index 9edb1a4..a6722a7 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> @@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct >>> amdgpu_cs_parser *p, >>> amdgpu_job_free_resources(job); >>> >>> trace_amdgpu_cs_ioctl(job); >>> +amdgpu_cs_parser_fini(p, 0, true); >> >> Please confirm: >> It will not free/release more things that may be accessed in job >> working process, as cs_parse_fini free so many things related to parser. > > Yeah, that indeed won't work. amdgpu_cs_parser_fini() does: > No, with my patch, parser->job already is NULL here, same as previous sequence, just put push_job action after amdgpu_cs_parser_fini() > if (parser->job) > amdgpu_job_free(parser->job); > > And amdgpu_cs_submit() sets the fence, so it must be called before > amdgpu_cs_parser_fini(). yes, fence is already generated, so amdgpu_cs_parser_fini before pushing job is completely safe here. > > But apart from that this is a rather nifty idea. What was the problem > with the initial patch? Just as comments in patch, which is found by Monk. Regards, David Zhou > >> >> Or we can just release the pd reservation here? > > We could run into problem with the reservation ticked with that. So I > would want to avoid that. > > Christian. > >> >> Jerry >> >>> amd_sched_entity_push_job(>base); >>> >>> return 0; >>> @@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, >>> void *data, struct drm_file *filp) >>> >>> r = amdgpu_cs_submit(, cs); >>> >>> +return r; >>> out: >>> amdgpu_cs_parser_fini(, r, reserved_buffers); >>> return r; >>> >> ___ >> amd-gfx mailing list >> amd-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions v2
On Thu, Apr 27, 2017 at 9:22 AM, Christian Königwrote: > From: Christian König > > Those functions are all unused and some not even implemented. > > v2: keep cgs_get_pci_resource it is used by the ACP driver. > > Signed-off-by: Christian König Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 198 --- > drivers/gpu/drm/amd/include/cgs_common.h | 270 > --- > 2 files changed, 468 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c > index a1a2d44..9d22ebd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c > @@ -42,82 +42,6 @@ struct amdgpu_cgs_device { > struct amdgpu_device *adev =\ > ((struct amdgpu_cgs_device *)cgs_device)->adev > > -static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum > cgs_gpu_mem_type type, > - uint64_t *mc_start, uint64_t *mc_size, > - uint64_t *mem_size) > -{ > - CGS_FUNC_ADEV; > - switch(type) { > - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB: > - case CGS_GPU_MEM_TYPE__VISIBLE_FB: > - *mc_start = 0; > - *mc_size = adev->mc.visible_vram_size; > - *mem_size = adev->mc.visible_vram_size - adev->vram_pin_size; > - break; > - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: > - case CGS_GPU_MEM_TYPE__INVISIBLE_FB: > - *mc_start = adev->mc.visible_vram_size; > - *mc_size = adev->mc.real_vram_size - > adev->mc.visible_vram_size; > - *mem_size = *mc_size; > - break; > - case CGS_GPU_MEM_TYPE__GART_CACHEABLE: > - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: > - *mc_start = adev->mc.gtt_start; > - *mc_size = adev->mc.gtt_size; > - *mem_size = adev->mc.gtt_size - adev->gart_pin_size; > - break; > - default: > - return -EINVAL; > - } > - > - return 0; > -} > - > -static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem, > - uint64_t size, > - uint64_t min_offset, uint64_t max_offset, > - cgs_handle_t *kmem_handle, uint64_t *mcaddr) > -{ > - CGS_FUNC_ADEV; > - int ret; > - struct amdgpu_bo *bo; > - struct page *kmem_page = vmalloc_to_page(kmem); > - int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; > - > - struct sg_table *sg = drm_prime_pages_to_sg(_page, npages); > - ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false, > - AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, ); > - if (ret) > - return ret; > - ret = amdgpu_bo_reserve(bo, false); > - if (unlikely(ret != 0)) > - return ret; > - > - /* pin buffer into GTT */ > - ret = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_GTT, > - min_offset, max_offset, mcaddr); > - amdgpu_bo_unreserve(bo); > - > - *kmem_handle = (cgs_handle_t)bo; > - return ret; > -} > - > -static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, > cgs_handle_t kmem_handle) > -{ > - struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle; > - > - if (obj) { > - int r = amdgpu_bo_reserve(obj, false); > - if (likely(r == 0)) { > - amdgpu_bo_unpin(obj); > - amdgpu_bo_unreserve(obj); > - } > - amdgpu_bo_unref(); > - > - } > - return 0; > -} > - > static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, > enum cgs_gpu_mem_type type, > uint64_t size, uint64_t align, > @@ -349,62 +273,6 @@ static void amdgpu_cgs_write_ind_register(struct > cgs_device *cgs_device, > WARN(1, "Invalid indirect register space"); > } > > -static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device > *cgs_device, unsigned addr) > -{ > - CGS_FUNC_ADEV; > - uint8_t val; > - int ret = pci_read_config_byte(adev->pdev, addr, ); > - if (WARN(ret, "pci_read_config_byte error")) > - return 0; > - return val; > -} > - > -static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device > *cgs_device, unsigned addr) > -{ > - CGS_FUNC_ADEV; > - uint16_t val; > - int ret = pci_read_config_word(adev->pdev, addr, ); > - if (WARN(ret, "pci_read_config_word error")) > - return 0; > - return val; > -} > - > -static uint32_t
Re: [PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions
Am 26.04.2017 um 18:36 schrieb Deucher, Alexander: -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Christian König Sent: Wednesday, April 26, 2017 9:14 AM To: amd-gfx@lists.freedesktop.org Subject: [PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions From: Christian KönigThose functions are all unused and some not even implemented. Signed-off-by: Christian König I think some of this is used by the ACP driver (cgs_get_pci_resource for example). Make sure that is enabled in your build. You might also want to check with Felix for KFD. Good point. I was only searching outside of the amdgpu subdirectory for users. Why is a file which is part of amdgpu even using this? v2 is on the list. Christian. Alex --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 233 drivers/gpu/drm/amd/include/cgs_common.h | 303 - -- 2 files changed, 536 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index a1a2d44..93f5dcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -42,82 +42,6 @@ struct amdgpu_cgs_device { struct amdgpu_device *adev =\ ((struct amdgpu_cgs_device *)cgs_device)->adev -static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, - uint64_t *mc_start, uint64_t *mc_size, - uint64_t *mem_size) -{ - CGS_FUNC_ADEV; - switch(type) { - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__VISIBLE_FB: - *mc_start = 0; - *mc_size = adev->mc.visible_vram_size; - *mem_size = adev->mc.visible_vram_size - adev- vram_pin_size; - break; - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__INVISIBLE_FB: - *mc_start = adev->mc.visible_vram_size; - *mc_size = adev->mc.real_vram_size - adev- mc.visible_vram_size; - *mem_size = *mc_size; - break; - case CGS_GPU_MEM_TYPE__GART_CACHEABLE: - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: - *mc_start = adev->mc.gtt_start; - *mc_size = adev->mc.gtt_size; - *mem_size = adev->mc.gtt_size - adev->gart_pin_size; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem, - uint64_t size, - uint64_t min_offset, uint64_t max_offset, - cgs_handle_t *kmem_handle, uint64_t *mcaddr) -{ - CGS_FUNC_ADEV; - int ret; - struct amdgpu_bo *bo; - struct page *kmem_page = vmalloc_to_page(kmem); - int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; - - struct sg_table *sg = drm_prime_pages_to_sg(_page, npages); - ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, ); - if (ret) - return ret; - ret = amdgpu_bo_reserve(bo, false); - if (unlikely(ret != 0)) - return ret; - - /* pin buffer into GTT */ - ret = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_GTT, - min_offset, max_offset, mcaddr); - amdgpu_bo_unreserve(bo); - - *kmem_handle = (cgs_handle_t)bo; - return ret; -} - -static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle) -{ - struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle; - - if (obj) { - int r = amdgpu_bo_reserve(obj, false); - if (likely(r == 0)) { - amdgpu_bo_unpin(obj); - amdgpu_bo_unreserve(obj); - } - amdgpu_bo_unref(); - - } - return 0; -} - static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, uint64_t size, uint64_t align, @@ -349,96 +273,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, WARN(1, "Invalid indirect register space"); } -static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr) -{ - CGS_FUNC_ADEV; - uint8_t val; - int ret = pci_read_config_byte(adev->pdev, addr, ); - if (WARN(ret, "pci_read_config_byte error")) - return 0; - return val; -} - -static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr) -{ - CGS_FUNC_ADEV; -
[PATCH] drm/amdgpu: remove unused and mostly unimplemented CGS functions v2
From: Christian KönigThose functions are all unused and some not even implemented. v2: keep cgs_get_pci_resource it is used by the ACP driver. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 198 --- drivers/gpu/drm/amd/include/cgs_common.h | 270 --- 2 files changed, 468 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index a1a2d44..9d22ebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -42,82 +42,6 @@ struct amdgpu_cgs_device { struct amdgpu_device *adev =\ ((struct amdgpu_cgs_device *)cgs_device)->adev -static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, - uint64_t *mc_start, uint64_t *mc_size, - uint64_t *mem_size) -{ - CGS_FUNC_ADEV; - switch(type) { - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__VISIBLE_FB: - *mc_start = 0; - *mc_size = adev->mc.visible_vram_size; - *mem_size = adev->mc.visible_vram_size - adev->vram_pin_size; - break; - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__INVISIBLE_FB: - *mc_start = adev->mc.visible_vram_size; - *mc_size = adev->mc.real_vram_size - adev->mc.visible_vram_size; - *mem_size = *mc_size; - break; - case CGS_GPU_MEM_TYPE__GART_CACHEABLE: - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: - *mc_start = adev->mc.gtt_start; - *mc_size = adev->mc.gtt_size; - *mem_size = adev->mc.gtt_size - adev->gart_pin_size; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem, - uint64_t size, - uint64_t min_offset, uint64_t max_offset, - cgs_handle_t *kmem_handle, uint64_t *mcaddr) -{ - CGS_FUNC_ADEV; - int ret; - struct amdgpu_bo *bo; - struct page *kmem_page = vmalloc_to_page(kmem); - int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; - - struct sg_table *sg = drm_prime_pages_to_sg(_page, npages); - ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, ); - if (ret) - return ret; - ret = amdgpu_bo_reserve(bo, false); - if (unlikely(ret != 0)) - return ret; - - /* pin buffer into GTT */ - ret = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_GTT, - min_offset, max_offset, mcaddr); - amdgpu_bo_unreserve(bo); - - *kmem_handle = (cgs_handle_t)bo; - return ret; -} - -static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle) -{ - struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle; - - if (obj) { - int r = amdgpu_bo_reserve(obj, false); - if (likely(r == 0)) { - amdgpu_bo_unpin(obj); - amdgpu_bo_unreserve(obj); - } - amdgpu_bo_unref(); - - } - return 0; -} - static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, uint64_t size, uint64_t align, @@ -349,62 +273,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, WARN(1, "Invalid indirect register space"); } -static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr) -{ - CGS_FUNC_ADEV; - uint8_t val; - int ret = pci_read_config_byte(adev->pdev, addr, ); - if (WARN(ret, "pci_read_config_byte error")) - return 0; - return val; -} - -static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr) -{ - CGS_FUNC_ADEV; - uint16_t val; - int ret = pci_read_config_word(adev->pdev, addr, ); - if (WARN(ret, "pci_read_config_word error")) - return 0; - return val; -} - -static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device, -unsigned addr) -{ - CGS_FUNC_ADEV; - uint32_t val; - int ret = pci_read_config_dword(adev->pdev, addr, ); - if (WARN(ret, "pci_read_config_dword error")) - return 0; - return val; -} - -static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device,
Re: [PATCH 0/6 v5] *** Dedicated vmid per process v5 ***
Reviewed-by: Christian Königfor patch #3 and #5. Am 27.04.2017 um 15:09 schrieb Chunming Zhou: The current kernel implementation, which grabs the idle VMID from pool when emitting the job may: The back-to-back submission from one process could use different VMID. The submission to different queues from single process could use different VMID It works well in most case but cannot work for the SQ thread trace capture. The VMID for the submission that set the {SQTT}_BASE, which refers to the address of the trace buffer, is stored in shader engine. If the profiling application have to use different VMIDs to submit IBs in its life cycle: Some trace is not captured since it actually uses different VMID to submit jobs. Some part of captured trace may come from different application since they are accidentally uses the owner’s VMID to submit jobs. V2: 1. address Christian's comments: a. drop context flags for tag process, instead, add vm ioctl. b. change order of patches. c. sync waiting only when vm flush needs. 2. address Alex's comments; bump module version V3: address Jerry and Christian's comments. and only reserve gfxhub vmid v4: address Jerry and Christian's comments. fix some race condistions. v5: patch#5: peek_fence instead of get_fence. fix potential context starved. Chunming Zhou (6): drm/amdgpu: add vm ioctl drm/amdgpu: add reserved vmid field in vm struct v2 drm/amdgpu: reserve/unreserve vmid by vm ioctl v4 drm/amdgpu: add limitation for dedicated vm number v4 drm/amdgpu: implement grab reserved vmid V4 drm/amdgpu: bump module verion for reserved vmid drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 159 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 ++ include/uapi/drm/amdgpu_drm.h | 22 + 5 files changed, 185 insertions(+), 6 deletions(-) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/6] drm/amdgpu: reserve/unreserve vmid by vm ioctl v4
add reserve/unreserve vmid funtions. v3: only reserve vmid from gfxhub v4: fix racy condition Change-Id: I5f80dc39dc9d44660a96a2b710b0dbb4d3b9039d Signed-off-by: Chunming ZhouReviewed-by: Junwei Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 62 +++--- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 261aaea..e37421e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -546,6 +546,45 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, return r; } +static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub]; + + mutex_lock(_mgr->lock); + if (vm->reserved_vmid[vmhub]) { + list_add(>reserved_vmid[vmhub]->list, + _mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + } + mutex_unlock(_mgr->lock); +} + +static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, +struct amdgpu_vm *vm, +unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr; + struct amdgpu_vm_id *idle; + int r = 0; + + id_mgr = >vm_manager.id_mgr[vmhub]; + mutex_lock(_mgr->lock); + if (vm->reserved_vmid[vmhub]) + goto unlock; + /* Select the first entry VMID */ + idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, list); + list_del_init(>list); + vm->reserved_vmid[vmhub] = idle; + mutex_unlock(_mgr->lock); + + return 0; +unlock: + mutex_unlock(_mgr->lock); + return r; +} + static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2316,18 +2355,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_levels(>root); fence_put(vm->last_dir_update); - for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { - struct amdgpu_vm_id_manager *id_mgr = - >vm_manager.id_mgr[i]; - - mutex_lock(_mgr->lock); - if (vm->reserved_vmid[i]) { - list_add(>reserved_vmid[i]->list, -_mgr->ids_lru); - vm->reserved_vmid[i] = NULL; - } - mutex_unlock(_mgr->lock); - } + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + amdgpu_vm_free_reserved_vmid(adev, vm, i); } /** @@ -2400,11 +2429,18 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_fpriv *fpriv = filp->driver_priv; + int r; switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: + /* current, we only have requirement to reserve vmid from gfxhub */ + r = amdgpu_vm_alloc_reserved_vmid(adev, >vm, + AMDGPU_GFXHUB); + if (r) + return r; + break; case AMDGPU_VM_OP_UNRESERVE_VMID: - return -EINVAL; + amdgpu_vm_free_reserved_vmid(adev, >vm, AMDGPU_GFXHUB); break; default: return -EINVAL; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/6] drm/amdgpu: add limitation for dedicated vm number v4
v2: move #define to amdgpu_vm.h v3: move reserved vmid counter to id_manager, and increase counter before allocating vmid v4: rename to reserved_vmid_num Change-Id: Ie5958cf6dbdc1c8278e61d9158483472d6f5c6e3 Signed-off-by: Chunming ZhouReviewed-by: Junwei Zhang Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e37421e..e6fdfa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -557,6 +557,7 @@ static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, list_add(>reserved_vmid[vmhub]->list, _mgr->ids_lru); vm->reserved_vmid[vmhub] = NULL; + atomic_dec(_mgr->reserved_vmid_num); } mutex_unlock(_mgr->lock); } @@ -573,6 +574,13 @@ static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, mutex_lock(_mgr->lock); if (vm->reserved_vmid[vmhub]) goto unlock; + if (atomic_inc_return(_mgr->reserved_vmid_num) > + AMDGPU_VM_MAX_RESERVED_VMID) { + DRM_ERROR("Over limitation of reserved vmid\n"); + atomic_dec(_mgr->reserved_vmid_num); + r = -EINVAL; + goto unlock; + } /* Select the first entry VMID */ idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, list); list_del_init(>list); @@ -2376,6 +2384,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) mutex_init(_mgr->lock); INIT_LIST_HEAD(_mgr->ids_lru); + atomic_set(_mgr->reserved_vmid_num, 0); /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 8eedca1..9828fcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -84,6 +84,8 @@ /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE(8 << 20) +/* max vmids dedicated for process */ +#define AMDGPU_VM_MAX_RESERVED_VMID1 struct amdgpu_vm_pt { struct amdgpu_bo*bo; @@ -157,6 +159,7 @@ struct amdgpu_vm_id_manager { unsignednum_ids; struct list_headids_lru; struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; + atomic_treserved_vmid_num; }; struct amdgpu_vm_manager { -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 0/6 v5] *** Dedicated vmid per process v5 ***
The current kernel implementation, which grabs the idle VMID from pool when emitting the job may: The back-to-back submission from one process could use different VMID. The submission to different queues from single process could use different VMID It works well in most case but cannot work for the SQ thread trace capture. The VMID for the submission that set the {SQTT}_BASE, which refers to the address of the trace buffer, is stored in shader engine. If the profiling application have to use different VMIDs to submit IBs in its life cycle: Some trace is not captured since it actually uses different VMID to submit jobs. Some part of captured trace may come from different application since they are accidentally uses the owner’s VMID to submit jobs. V2: 1. address Christian's comments: a. drop context flags for tag process, instead, add vm ioctl. b. change order of patches. c. sync waiting only when vm flush needs. 2. address Alex's comments; bump module version V3: address Jerry and Christian's comments. and only reserve gfxhub vmid v4: address Jerry and Christian's comments. fix some race condistions. v5: patch#5: peek_fence instead of get_fence. fix potential context starved. Chunming Zhou (6): drm/amdgpu: add vm ioctl drm/amdgpu: add reserved vmid field in vm struct v2 drm/amdgpu: reserve/unreserve vmid by vm ioctl v4 drm/amdgpu: add limitation for dedicated vm number v4 drm/amdgpu: implement grab reserved vmid V4 drm/amdgpu: bump module verion for reserved vmid drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 159 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 ++ include/uapi/drm/amdgpu_drm.h | 22 + 5 files changed, 185 insertions(+), 6 deletions(-) -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/6] drm/amdgpu: add vm ioctl
It will be used for reserving vmid. Change-Id: Ib7169ea999690c8e82d0dcbccdd2d97760c0270a Signed-off-by: Chunming ZhouReviewed-by: Junwei Zhang Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + include/uapi/drm/amdgpu_drm.h | 22 ++ 4 files changed, 42 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 052e6a5..8ea33c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1062,6 +1062,7 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a8cd23b..8698177 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2379,3 +2379,21 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) } } } + +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + union drm_amdgpu_vm *args = data; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + + switch (args->in.op) { + case AMDGPU_VM_OP_RESERVE_VMID: + case AMDGPU_VM_OP_UNRESERVE_VMID: + return -EINVAL; + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 8eba2d3..16e0aaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -248,5 +248,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); #endif diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index b98b371..1c95775 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -51,6 +51,7 @@ #define DRM_AMDGPU_GEM_OP 0x10 #define DRM_AMDGPU_GEM_USERPTR 0x11 #define DRM_AMDGPU_WAIT_FENCES 0x12 +#define DRM_AMDGPU_VM 0x13 /* hybrid specific ioctls */ #define DRM_AMDGPU_SEM 0x5b @@ -71,6 +72,7 @@ #define DRM_IOCTL_AMDGPU_GEM_OPDRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) +#define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) /* hybrid specific ioctls */ #define DRM_IOCTL_AMDGPU_GEM_DGMA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma) @@ -212,6 +214,26 @@ struct drm_amdgpu_ctx_in { union drm_amdgpu_ctx_out out; }; +/* vm ioctl */ +#define AMDGPU_VM_OP_RESERVE_VMID 1 +#define AMDGPU_VM_OP_UNRESERVE_VMID2 + +struct drm_amdgpu_vm_in { + /** AMDGPU_VM_OP_* */ + __u32 op; + __u32 flags; +}; + +struct drm_amdgpu_vm_out { + /** For future use, no flags defined so far */ + __u64 flags; +}; + +union drm_amdgpu_vm { + struct drm_amdgpu_vm_in in; + struct drm_amdgpu_vm_out out; +}; + /* sem related */ #define AMDGPU_SEM_OP_CREATE_SEM1 #define AMDGPU_SEM_OP_WAIT_SEM 2 -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/6] drm/amdgpu: add reserved vmid field in vm struct v2
v2: rename dedicated_vmid to reserved_vmid Change-Id: Id728e20366c8a1ae90d4e901dc80e136e2a613bb Signed-off-by: Chunming ZhouReviewed-by: Junwei Zhang Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8698177..261aaea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2163,10 +2163,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned ring_instance; struct amdgpu_ring *ring; struct amd_sched_rq *rq; - int r; + int r, i; vm->va = RB_ROOT; vm->client_id = atomic64_inc_return(>vm_manager.client_counter); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + vm->reserved_vmid[i] = NULL; spin_lock_init(>status_lock); INIT_LIST_HEAD(>invalidated); INIT_LIST_HEAD(>cleared); @@ -2270,6 +2272,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + int i; if (vm->is_kfd_vm) { struct amdgpu_vm_id_manager *id_mgr = @@ -2313,6 +2316,18 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_levels(>root); fence_put(vm->last_dir_update); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { + struct amdgpu_vm_id_manager *id_mgr = + >vm_manager.id_mgr[i]; + + mutex_lock(_mgr->lock); + if (vm->reserved_vmid[i]) { + list_add(>reserved_vmid[i]->list, +_mgr->ids_lru); + vm->reserved_vmid[i] = NULL; + } + mutex_unlock(_mgr->lock); + } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 16e0aaa..8eedca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -123,6 +123,8 @@ struct amdgpu_vm { /* client id */ u64 client_id; + /* dedicated to vm */ + struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* each VM will map on CSA */ struct amdgpu_bo_va *csa_bo_va; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 6/6] drm/amdgpu: bump module verion for reserved vmid
Change-Id: I1065e0430ed44f7ee6c29214b72e35a7343ea02b Signed-off-by: Chunming ZhouReviewed-by: Junwei Zhang Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 55322b4..6799829 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -64,9 +64,10 @@ * - 3.12.0 - Add query for double offchip LDS buffers * - 3.13.0 - Add PRT support * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality + * - 3.15.0 - Add reserved vmid support */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 14 +#define KMS_DRIVER_MINOR 15 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 5/6] drm/amdgpu: implement grab reserved vmid V4
v2: move sync waiting only when flush needs v3: fix racy v4: peek fence instead of get fence, and fix potential context starved. Change-Id: I64da2701c9fdcf986afb90ba1492a78d5bef1b6c Signed-off-by: Chunming ZhouReviewed-by: Junwei Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 -- 1 file changed, 75 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e6fdfa4..b35b853 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -397,6 +397,72 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, atomic_read(>gpu_reset_counter); } +static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) +{ + return !!vm->reserved_vmid[vmhub]; +} + +/* idr_mgr->lock must be held */ +static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + uint64_t fence_context = adev->fence_context + ring->idx; + struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; + struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub]; + struct fence *updates = sync->last_vm_update; + int r = 0; + struct fence *flushed, *tmp; + bool needs_flush = false; + + flushed = id->flushed_updates; + if ((amdgpu_vm_had_gpu_reset(adev, id)) || + (atomic64_read(>owner) != vm->client_id) || + (job->vm_pd_addr != id->pd_gpu_addr) || + (updates && (!flushed || updates->context != flushed->context || + fence_is_later(updates, flushed))) || + (!id->last_flush || (id->last_flush->context != fence_context && +!fence_is_signaled(id->last_flush { + needs_flush = true; + /* to prevent one context starved by another context */ + id->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(>active, ring); + if (tmp) { + r = amdgpu_sync_fence(adev, sync, tmp); + return r; + } + } + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, >active, fence); + if (r) + goto out; + + if (updates && (!flushed || updates->context != flushed->context || + fence_is_later(updates, flushed))) { + fence_put(id->flushed_updates); + id->flushed_updates = fence_get(updates); + } + id->pd_gpu_addr = job->vm_pd_addr; + id->current_gpu_reset_count = atomic_read(>gpu_reset_counter); + atomic64_set(>owner, vm->client_id); + job->vm_needs_flush = needs_flush; + if (needs_flush) { + fence_put(id->last_flush); + id->last_flush = NULL; + } + job->vm_id = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); +out: + return r; +} + /** * amdgpu_vm_grab_id - allocate the next free VMID * @@ -421,12 +487,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, unsigned i; int r = 0; + mutex_lock(_mgr->lock); + if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { + r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); + mutex_unlock(_mgr->lock); + return r; + } fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); - if (!fences) + if (!fences) { + mutex_unlock(_mgr->lock); return -ENOMEM; - - mutex_lock(_mgr->lock); - + } /* Check if we have an idle VMID */ i = 0; list_for_each_entry(idle, _mgr->ids_lru, list) { -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/radeon: only warn once in radeon_ttm_bo_destroy if va list not empty
Am 27.04.2017 um 12:58 schrieb Julien Isorce: But always print an error. Encountered a dozen of exact same backtraces when mesa's pb_cache_release_all_buffers is called after that a gpu reset failed. An other approach would be to check rdev->vm_manager.enabled instead of rdev->accel_working in the other function radeon_gem_object_close. But it will only work if the vm_manager succeeded to be re-enabled after a gpu reset. bug: https://bugs.freedesktop.org/show_bug.cgi?id=96271 Signed-off-by: Julien Isorce--- drivers/gpu/drm/radeon/radeon_object.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index bec2ec0..76cc039 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -81,7 +81,13 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) list_del_init(>list); mutex_unlock(>rdev->gem.mutex); radeon_bo_clear_surface_reg(bo); - WARN_ON(!list_empty(>va)); + + if (!list_empty(>va)) { + DRM_ERROR("Virtual address list not empty, accel: %d\n", + bo->rdev->accel_working); + WARN_ON_ONCE(1); + } + The message is superfluous if we initially started up the GPU and found later that we can't get GFX working again after a GPU reset it is pretty much impossible to continue. I would just change the "WARN_ON(!list_empty(>va));" to a "WARN_ON_ONCE(!list_empty(>va));". Regards, Christian. drm_gem_object_release(>gem_base); kfree(bo); } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 0/6 v4] *** Dedicated vmid per process v4 ***
Am 27.04.2017 um 12:26 schrieb zhoucm1: On 2017年04月27日 17:51, Christian König wrote: Patch #1, #2, #4 and #6 are Reviewed-by: Christian König. Patch #3: +/* Select the first entry VMID */ +idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, list); +list_del_init(>list); +vm->reserved_vmid[vmhub] = idle; +mutex_unlock(_mgr->lock); I think we should wait for the VMID to be completely idle before we can use it, but that possible also go into the handling in patch #5. Yes. Patch #5: +if ((amdgpu_vm_had_gpu_reset(adev, id)) || +(atomic64_read(>owner) != vm->client_id) || +(job->vm_pd_addr != id->pd_gpu_addr) || +(updates && (!flushed || updates->context != flushed->context || +fence_is_later(updates, flushed { You also need this check here as well: if (!id->last_flush || (id->last_flush->context != fence_context && !fence_is_signaled(id->last_flush))) added. +tmp = amdgpu_sync_get_fence(>active); +if (tmp) { +r = amdgpu_sync_fence(adev, sync, tmp); +fence_put(tmp); +return r; +} That won't work correctly. The first problem is that amdgpu_sync_get_fence() removes the fence from the active fences. So a another command submission from a different context won't wait for all the necessary fences. I think using amdgpu_sync_peek_fence() instead should work here. good catch. The second problem is that a context could be starved when it needs a VM flush while another context can still submit jobs without a flush. I think you could work around that by setting id->pd_gpu_addr to an invalid value when we hit this case. This way all other contexts would need to do a VM flush as well. I don't catch your opinion, when you object concurrent flush, isn't it expected? Imagine the following scenario: Two contexts in use by the application. Context 1 has a bunch of jobs to do, but needs a VM flush before starting them. So each job will just pick the first fence to wait for go to sleep again. Context 2 has a bunch of jobs to do, but does NOT need a VM flush. This will add more and more fences to the collection of active jobs for this id. The result is that Context 1 will never be able to submit any of it's jobs because context 2 keeps the ID busy all the time. Setting the pd_gpu_addr to some invalid value (or maybe the ID owner?) should fix that. In this case context 2 needs to flush as well and so context 1 will sooner or later get a chance as well. Regards, Christian. +id->pd_gpu_addr = job->vm_pd_addr; +id->current_gpu_reset_count = atomic_read(>gpu_reset_counter); +atomic64_set(>owner, vm->client_id); +job->vm_needs_flush = needs_flush; If we need a flush id->last_flush needs to be set to NULL here as well. E.g. do fence_put(id->last_flush); id->last_flush = NULL; changed. Regards, David Zhou Regards, Christian. Am 27.04.2017 um 07:00 schrieb Chunming Zhou: The current kernel implementation, which grabs the idle VMID from pool when emitting the job may: The back-to-back submission from one process could use different VMID. The submission to different queues from single process could use different VMID It works well in most case but cannot work for the SQ thread trace capture. The VMID for the submission that set the {SQTT}_BASE, which refers to the address of the trace buffer, is stored in shader engine. If the profiling application have to use different VMIDs to submit IBs in its life cycle: Some trace is not captured since it actually uses different VMID to submit jobs. Some part of captured trace may come from different application since they are accidentally uses the owner’s VMID to submit jobs. V2: 1. address Christian's comments: a. drop context flags for tag process, instead, add vm ioctl. b. change order of patches. c. sync waiting only when vm flush needs. 2. address Alex's comments; bump module version V3: address Jerry and Christian's comments. and only reserve gfxhub vmid v4: address Jerry and Christian's comments. fix some race condistions. Chunming Zhou (6): drm/amdgpu: add vm ioctl drm/amdgpu: add reserved vmid field in vm struct v2 drm/amdgpu: reserve/unreserve vmid by vm ioctl v4 drm/amdgpu: add limitation for dedicated vm number v4 drm/amdgpu: implement grab reserved vmid V3 drm/amdgpu: bump module verion for reserved vmid drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 152 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 ++ include/uapi/drm/amdgpu_drm.h | 22 + 5 files changed, 178 insertions(+), 6 deletions(-)
Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset
Am 27.04.2017 um 12:23 schrieb zhoucm1: On 2017年04月27日 18:11, Christian König wrote: Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei): On 04/27/2017 04:25 PM, Chunming Zhou wrote: the case could happen when gpu reset: 1. when gpu reset, cs can be continue until sw queue is full, then push job will wait with holding pd reservation. 2. gpu_reset routine will also need pd reservation to restore page table from their shadow. 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs releases reservation. Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9edb1a4..a6722a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); +amdgpu_cs_parser_fini(p, 0, true); Please confirm: It will not free/release more things that may be accessed in job working process, as cs_parse_fini free so many things related to parser. Yeah, that indeed won't work. amdgpu_cs_parser_fini() does: No, with my patch, parser->job already is NULL here, same as previous sequence, just put push_job action after amdgpu_cs_parser_fini() Ah! You add that into amdgpu_cs_submit! That was the point I was missing. trace_amdgpu_cs_ioctl(job); + amdgpu_cs_parser_fini(p, 0, true); amd_sched_entity_push_job(>base); Can we keep the trace_amdgpu_cs_ioctl(); directly before the amd_sched_entity_push_job()? With that changed, the patch is Reviewed-by: Christian König . Regards, Christian. if (parser->job) amdgpu_job_free(parser->job); And amdgpu_cs_submit() sets the fence, so it must be called before amdgpu_cs_parser_fini(). yes, fence is already generated, so amdgpu_cs_parser_fini before pushing job is completely safe here. But apart from that this is a rather nifty idea. What was the problem with the initial patch? Just as comments in patch, which is found by Monk. Regards, David Zhou Or we can just release the pd reservation here? We could run into problem with the reservation ticked with that. So I would want to avoid that. Christian. Jerry amd_sched_entity_push_job(>base); return 0; @@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_submit(, cs); +return r; out: amdgpu_cs_parser_fini(, r, reserved_buffers); return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/radeon: reject bo creation from ioctl when the gpu is disabled
Am 27.04.2017 um 12:57 schrieb Julien Isorce: Like done in radeon_cs_ioctl. In particular it avoids mesa to call map/unmap: radeon_create_bo ioctl(DRM_RADEON_GEM_CREATE) -> ok ioctl(DRM_RADEON_GEM_VA-MAP) radeon_destroy_bo ioctl(DRM_RADEON_GEM_VA-UNMAP) Encountered also cases where the vm_manager succeeded to be enabled after a gpu reset while the GFX ring was not responding so accel was disabled. https://bugs.freedesktop.org/show_bug.cgi?id=96271 NAK, even if accel isn't working we want to display something and for that we need to be able to create BOs. Christian. Signed-off-by: Julien Isorce--- drivers/gpu/drm/radeon/radeon_gem.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index b97c92b..1030001 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -259,6 +259,13 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, int r; down_read(>exclusive_lock); + + if (!rdev->accel_working && + args->initial_domain != RADEON_GEM_DOMAIN_CPU) { + up_read(>exclusive_lock); + return -EBUSY; + } + /* create a gem object to contain this object in */ args->size = roundup(args->size, PAGE_SIZE); r = radeon_gem_object_create(rdev, args->size, args->alignment, ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/radeon: only warn once in radeon_ttm_bo_destroy if va list not empty
But always print an error. Encountered a dozen of exact same backtraces when mesa's pb_cache_release_all_buffers is called after that a gpu reset failed. An other approach would be to check rdev->vm_manager.enabled instead of rdev->accel_working in the other function radeon_gem_object_close. But it will only work if the vm_manager succeeded to be re-enabled after a gpu reset. bug: https://bugs.freedesktop.org/show_bug.cgi?id=96271 Signed-off-by: Julien Isorce--- drivers/gpu/drm/radeon/radeon_object.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index bec2ec0..76cc039 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -81,7 +81,13 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) list_del_init(>list); mutex_unlock(>rdev->gem.mutex); radeon_bo_clear_surface_reg(bo); - WARN_ON(!list_empty(>va)); + + if (!list_empty(>va)) { + DRM_ERROR("Virtual address list not empty, accel: %d\n", + bo->rdev->accel_working); + WARN_ON_ONCE(1); + } + drm_gem_object_release(>gem_base); kfree(bo); } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/radeon: reject bo creation from ioctl when the gpu is disabled
Like done in radeon_cs_ioctl. In particular it avoids mesa to call map/unmap: radeon_create_bo ioctl(DRM_RADEON_GEM_CREATE) -> ok ioctl(DRM_RADEON_GEM_VA-MAP) radeon_destroy_bo ioctl(DRM_RADEON_GEM_VA-UNMAP) Encountered also cases where the vm_manager succeeded to be enabled after a gpu reset while the GFX ring was not responding so accel was disabled. https://bugs.freedesktop.org/show_bug.cgi?id=96271 Signed-off-by: Julien Isorce--- drivers/gpu/drm/radeon/radeon_gem.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index b97c92b..1030001 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -259,6 +259,13 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, int r; down_read(>exclusive_lock); + + if (!rdev->accel_working && + args->initial_domain != RADEON_GEM_DOMAIN_CPU) { + up_read(>exclusive_lock); + return -EBUSY; + } + /* create a gem object to contain this object in */ args->size = roundup(args->size, PAGE_SIZE); r = radeon_gem_object_create(rdev, args->size, args->alignment, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 0/6 v4] *** Dedicated vmid per process v4 ***
On 2017年04月27日 17:51, Christian König wrote: Patch #1, #2, #4 and #6 are Reviewed-by: Christian König. Patch #3: +/* Select the first entry VMID */ +idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, list); +list_del_init(>list); +vm->reserved_vmid[vmhub] = idle; +mutex_unlock(_mgr->lock); I think we should wait for the VMID to be completely idle before we can use it, but that possible also go into the handling in patch #5. Yes. Patch #5: +if ((amdgpu_vm_had_gpu_reset(adev, id)) || +(atomic64_read(>owner) != vm->client_id) || +(job->vm_pd_addr != id->pd_gpu_addr) || +(updates && (!flushed || updates->context != flushed->context || +fence_is_later(updates, flushed { You also need this check here as well: if (!id->last_flush || (id->last_flush->context != fence_context && !fence_is_signaled(id->last_flush))) added. +tmp = amdgpu_sync_get_fence(>active); +if (tmp) { +r = amdgpu_sync_fence(adev, sync, tmp); +fence_put(tmp); +return r; +} That won't work correctly. The first problem is that amdgpu_sync_get_fence() removes the fence from the active fences. So a another command submission from a different context won't wait for all the necessary fences. I think using amdgpu_sync_peek_fence() instead should work here. good catch. The second problem is that a context could be starved when it needs a VM flush while another context can still submit jobs without a flush. I think you could work around that by setting id->pd_gpu_addr to an invalid value when we hit this case. This way all other contexts would need to do a VM flush as well. I don't catch your opinion, when you object concurrent flush, isn't it expected? +id->pd_gpu_addr = job->vm_pd_addr; +id->current_gpu_reset_count = atomic_read(>gpu_reset_counter); +atomic64_set(>owner, vm->client_id); +job->vm_needs_flush = needs_flush; If we need a flush id->last_flush needs to be set to NULL here as well. E.g. do fence_put(id->last_flush); id->last_flush = NULL; changed. Regards, David Zhou Regards, Christian. Am 27.04.2017 um 07:00 schrieb Chunming Zhou: The current kernel implementation, which grabs the idle VMID from pool when emitting the job may: The back-to-back submission from one process could use different VMID. The submission to different queues from single process could use different VMID It works well in most case but cannot work for the SQ thread trace capture. The VMID for the submission that set the {SQTT}_BASE, which refers to the address of the trace buffer, is stored in shader engine. If the profiling application have to use different VMIDs to submit IBs in its life cycle: Some trace is not captured since it actually uses different VMID to submit jobs. Some part of captured trace may come from different application since they are accidentally uses the owner’s VMID to submit jobs. V2: 1. address Christian's comments: a. drop context flags for tag process, instead, add vm ioctl. b. change order of patches. c. sync waiting only when vm flush needs. 2. address Alex's comments; bump module version V3: address Jerry and Christian's comments. and only reserve gfxhub vmid v4: address Jerry and Christian's comments. fix some race condistions. Chunming Zhou (6): drm/amdgpu: add vm ioctl drm/amdgpu: add reserved vmid field in vm struct v2 drm/amdgpu: reserve/unreserve vmid by vm ioctl v4 drm/amdgpu: add limitation for dedicated vm number v4 drm/amdgpu: implement grab reserved vmid V3 drm/amdgpu: bump module verion for reserved vmid drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 152 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 ++ include/uapi/drm/amdgpu_drm.h | 22 + 5 files changed, 178 insertions(+), 6 deletions(-) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset
On 2017年04月27日 18:11, Christian König wrote: Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei): On 04/27/2017 04:25 PM, Chunming Zhou wrote: the case could happen when gpu reset: 1. when gpu reset, cs can be continue until sw queue is full, then push job will wait with holding pd reservation. 2. gpu_reset routine will also need pd reservation to restore page table from their shadow. 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs releases reservation. Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9edb1a4..a6722a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); +amdgpu_cs_parser_fini(p, 0, true); Please confirm: It will not free/release more things that may be accessed in job working process, as cs_parse_fini free so many things related to parser. Yeah, that indeed won't work. amdgpu_cs_parser_fini() does: No, with my patch, parser->job already is NULL here, same as previous sequence, just put push_job action after amdgpu_cs_parser_fini() if (parser->job) amdgpu_job_free(parser->job); And amdgpu_cs_submit() sets the fence, so it must be called before amdgpu_cs_parser_fini(). yes, fence is already generated, so amdgpu_cs_parser_fini before pushing job is completely safe here. But apart from that this is a rather nifty idea. What was the problem with the initial patch? Just as comments in patch, which is found by Monk. Regards, David Zhou Or we can just release the pd reservation here? We could run into problem with the reservation ticked with that. So I would want to avoid that. Christian. Jerry amd_sched_entity_push_job(>base); return 0; @@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_submit(, cs); +return r; out: amdgpu_cs_parser_fini(, r, reserved_buffers); return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset
Am 27.04.2017 um 11:08 schrieb Zhang, Jerry (Junwei): On 04/27/2017 04:25 PM, Chunming Zhou wrote: the case could happen when gpu reset: 1. when gpu reset, cs can be continue until sw queue is full, then push job will wait with holding pd reservation. 2. gpu_reset routine will also need pd reservation to restore page table from their shadow. 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs releases reservation. Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9edb1a4..a6722a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); +amdgpu_cs_parser_fini(p, 0, true); Please confirm: It will not free/release more things that may be accessed in job working process, as cs_parse_fini free so many things related to parser. Yeah, that indeed won't work. amdgpu_cs_parser_fini() does: if (parser->job) amdgpu_job_free(parser->job); And amdgpu_cs_submit() sets the fence, so it must be called before amdgpu_cs_parser_fini(). But apart from that this is a rather nifty idea. What was the problem with the initial patch? Or we can just release the pd reservation here? We could run into problem with the reservation ticked with that. So I would want to avoid that. Christian. Jerry amd_sched_entity_push_job(>base); return 0; @@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_submit(, cs); +return r; out: amdgpu_cs_parser_fini(, r, reserved_buffers); return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] Revert "drm/amdgpu: Refactor flip into prepare submit and submit. (v2)"
Am 27.04.2017 um 10:18 schrieb Michel Dänzer: From: Michel DänzerThis reverts commit cb341a319f7e66f879d69af929c3dadfc1a8f31e. The purpose of the refactor was for amdgpu_crtc_prepare/submit_flip to be used by the DC code, but that's no longer the case. Signed-off-by: Michel Dänzer --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 147 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h| 17 2 files changed, 29 insertions(+), 135 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 7cf226dfb602..0415875656c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -138,56 +138,10 @@ static void amdgpu_unpin_work_func(struct work_struct *__work) kfree(work); } - -static void amdgpu_flip_work_cleanup(struct amdgpu_flip_work *work) -{ - int i; - - amdgpu_bo_unref(>old_abo); - fence_put(work->excl); - for (i = 0; i < work->shared_count; ++i) - fence_put(work->shared[i]); - kfree(work->shared); - kfree(work); -} - -static void amdgpu_flip_cleanup_unreserve( - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - amdgpu_bo_unreserve(new_abo); - amdgpu_flip_work_cleanup(work); -} - -static void amdgpu_flip_cleanup_unpin( - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) - DRM_ERROR("failed to unpin new abo in error path\n"); - amdgpu_flip_cleanup_unreserve(work, new_abo); -} - -void amdgpu_crtc_cleanup_flip_ctx( - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) { - DRM_ERROR("failed to reserve new abo in error path\n"); - amdgpu_flip_work_cleanup(work); - return; - } - amdgpu_flip_cleanup_unpin(work, new_abo); -} - -int amdgpu_crtc_prepare_flip( - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, - uint32_t target, - struct amdgpu_flip_work **work_p, - struct amdgpu_bo **new_abo_p) +int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, +struct drm_framebuffer *fb, +struct drm_pending_vblank_event *event, +uint32_t page_flip_flags, uint32_t target) { struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; @@ -200,7 +154,7 @@ int amdgpu_crtc_prepare_flip( unsigned long flags; u64 tiling_flags; u64 base; - int r; + int i, r; work = kzalloc(sizeof *work, GFP_KERNEL); if (work == NULL) @@ -261,84 +215,41 @@ int amdgpu_crtc_prepare_flip( spin_unlock_irqrestore(>dev->event_lock, flags); r = -EBUSY; goto pflip_cleanup; - } - spin_unlock_irqrestore(>dev->event_lock, flags); - - *work_p = work; - *new_abo_p = new_abo; - return 0; - -pflip_cleanup: - amdgpu_crtc_cleanup_flip_ctx(work, new_abo); - return r; - -unpin: - amdgpu_flip_cleanup_unpin(work, new_abo); - return r; - -unreserve: - amdgpu_flip_cleanup_unreserve(work, new_abo); - return r; - -cleanup: - amdgpu_flip_work_cleanup(work); - return r; - -} - -void amdgpu_crtc_submit_flip( - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - unsigned long flags; - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - spin_lock_irqsave(>dev->event_lock, flags); amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING; amdgpu_crtc->pflip_works = work; + + DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n", +amdgpu_crtc->crtc_id, amdgpu_crtc, work); /* update crtc fb */ crtc->primary->fb = fb; spin_unlock_irqrestore(>dev->event_lock, flags); - - DRM_DEBUG_DRIVER( - "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n", - amdgpu_crtc->crtc_id, amdgpu_crtc, work); - amdgpu_flip_work_func(>flip_work.work); -} + return 0; -int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event
Re: [PATCH] drm/amdgpu: Enable chained IB MCBP support
Am 27.04.2017 um 09:18 schrieb Trigger Huang: Support for MCBP/Virtualization in combination with chained IBs is formal released on firmware feature version #46. So enable it according to firmware feature version, otherwise, world switch will hang. Signed-off-by: Trigger HuangReviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2ff5f19..9a8c042 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -941,12 +941,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - /* chain ib ucode isn't formal released, just disable it by far -* TODO: when ucod ready we should use ucode version to judge if -* chain-ib support or not. -*/ - adev->virt.chained_ib_support = false; - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); @@ -960,6 +954,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + /* +* Support for MCBP/Virtualization in combination with chained IBs is +* formal released on feature version #46 +*/ + if (adev->gfx.ce_feature_version >= 46 && + adev->gfx.pfp_feature_version >= 46) { + adev->virt.chained_ib_support = true; + DRM_INFO("Chained IB support enabled!\n"); + } else + adev->virt.chained_ib_support = false; + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev); if (err) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 0/6 v4] *** Dedicated vmid per process v4 ***
Patch #1, #2, #4 and #6 are Reviewed-by: Christian König. Patch #3: + /* Select the first entry VMID */ + idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, list); + list_del_init(>list); + vm->reserved_vmid[vmhub] = idle; + mutex_unlock(_mgr->lock); I think we should wait for the VMID to be completely idle before we can use it, but that possible also go into the handling in patch #5. Patch #5: + if ((amdgpu_vm_had_gpu_reset(adev, id)) || + (atomic64_read(>owner) != vm->client_id) || + (job->vm_pd_addr != id->pd_gpu_addr) || + (updates && (!flushed || updates->context != flushed->context || + fence_is_later(updates, flushed { You also need this check here as well: if (!id->last_flush || (id->last_flush->context != fence_context && !fence_is_signaled(id->last_flush))) + tmp = amdgpu_sync_get_fence(>active); + if (tmp) { + r = amdgpu_sync_fence(adev, sync, tmp); + fence_put(tmp); + return r; + } That won't work correctly. The first problem is that amdgpu_sync_get_fence() removes the fence from the active fences. So a another command submission from a different context won't wait for all the necessary fences. I think using amdgpu_sync_peek_fence() instead should work here. The second problem is that a context could be starved when it needs a VM flush while another context can still submit jobs without a flush. I think you could work around that by setting id->pd_gpu_addr to an invalid value when we hit this case. This way all other contexts would need to do a VM flush as well. + id->pd_gpu_addr = job->vm_pd_addr; + id->current_gpu_reset_count = atomic_read(>gpu_reset_counter); + atomic64_set(>owner, vm->client_id); + job->vm_needs_flush = needs_flush; If we need a flush id->last_flush needs to be set to NULL here as well. E.g. do fence_put(id->last_flush); id->last_flush = NULL; Regards, Christian. Am 27.04.2017 um 07:00 schrieb Chunming Zhou: The current kernel implementation, which grabs the idle VMID from pool when emitting the job may: The back-to-back submission from one process could use different VMID. The submission to different queues from single process could use different VMID It works well in most case but cannot work for the SQ thread trace capture. The VMID for the submission that set the {SQTT}_BASE, which refers to the address of the trace buffer, is stored in shader engine. If the profiling application have to use different VMIDs to submit IBs in its life cycle: Some trace is not captured since it actually uses different VMID to submit jobs. Some part of captured trace may come from different application since they are accidentally uses the owner’s VMID to submit jobs. V2: 1. address Christian's comments: a. drop context flags for tag process, instead, add vm ioctl. b. change order of patches. c. sync waiting only when vm flush needs. 2. address Alex's comments; bump module version V3: address Jerry and Christian's comments. and only reserve gfxhub vmid v4: address Jerry and Christian's comments. fix some race condistions. Chunming Zhou (6): drm/amdgpu: add vm ioctl drm/amdgpu: add reserved vmid field in vm struct v2 drm/amdgpu: reserve/unreserve vmid by vm ioctl v4 drm/amdgpu: add limitation for dedicated vm number v4 drm/amdgpu: implement grab reserved vmid V3 drm/amdgpu: bump module verion for reserved vmid drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 152 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 ++ include/uapi/drm/amdgpu_drm.h | 22 + 5 files changed, 178 insertions(+), 6 deletions(-) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Fix use of interruptible waiting
On 27/04/17 04:39 PM, Michel Dänzer wrote: > On 27/04/17 04:19 AM, Alex Xie wrote: >> Hi, >> >> I knew this is part of ioctl. And I intended to fix this interruptible >> waiting in an ioctl. > > It's by design, nothing that needs fixing in the case of an ioctl. > > >> 1. The wait is short. There is not much benefit by interruptible >> waiting. The BO is a frame buffer BO. Are there many competitors to >> lock this BO? If not, the wait is very short. This is my main reason to >> change. > > Irrelevant. If amdgpu_bo_reserve returns -EINTR, so will other functions > down the line, and amdgpu_crtc_page_flip_target will have to propagate > that anyway. So doing an uninterruptible wait will just delay the > inevitable and delivery of the signal to userspace. To be clear, I meant -ERESTARTSYS instead of -EINTR. >> 2. In this function and caller functions, the error handling for such >> interrupt is complicated and risky. > > If you're concerned about the cascade of cleanup functions, let's just > revert commit 0169b52ce575 ("drm/amdgpu: Refactor flip into prepare > submit and submit. (v2)"). amdgpu_crtc_prepare_flip was split out to be > used by DC, but DC is now using a DRM core atomic helper for flips and > no longer using amdgpu_crtc_prepare_flip. > > -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 5/6] drm/amdgpu: implement grab dedicated vmid V2
Am 27.04.2017 um 06:42 schrieb zhoucm1: On 2017年04月27日 10:52, Zhang, Jerry (Junwei) wrote: On 04/26/2017 07:10 PM, Chunming Zhou wrote: v2: move sync waiting only when flush needs Change-Id: I64da2701c9fdcf986afb90ba1492a78d5bef1b6c Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 61 ++ 1 file changed, 61 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 214ac50..bce7701 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -402,6 +402,63 @@ static bool amdgpu_vm_dedicated_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) return !!vm->dedicated_vmid[vmhub]; } +static int amdgpu_vm_grab_dedicated_vmid(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct fence *fence, + struct amdgpu_job *job) +{ +struct amdgpu_device *adev = ring->adev; +unsigned vmhub = ring->funcs->vmhub; +struct amdgpu_vm_id *id = vm->dedicated_vmid[vmhub]; +struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub]; +struct fence *updates = sync->last_vm_update; +int r = 0; +struct fence *flushed, *tmp; +bool needs_flush = false; + +mutex_lock(_mgr->lock); +if (amdgpu_vm_had_gpu_reset(adev, id)) +needs_flush = true; + +flushed = id->flushed_updates; +if (updates && (!flushed || updates->context != flushed->context || +fence_is_later(updates, flushed))) +needs_flush = true; Just question: Do we need to consider concurrent flush for Vega10 like grab id func? Christian has pointed that old asic has hardware bug. Which is fixed for Vega10, on that hardware concurrent flushing works fine. It's just that Tonga/Fiji have a real problem with that and for CIK/Polaris it's more a subtle one which is hard to trigger. Regards, Christian. Regards, David Zhou Jerry +if (needs_flush) { +tmp = amdgpu_sync_get_fence(>active); +if (tmp) { +r = amdgpu_sync_fence(adev, sync, tmp); +fence_put(tmp); +mutex_unlock(_mgr->lock); +return r; +} +} + +/* Good we can use this VMID. Remember this submission as +* user of the VMID. +*/ +r = amdgpu_sync_fence(ring->adev, >active, fence); +if (r) +goto out; + +if (updates && (!flushed || updates->context != flushed->context || +fence_is_later(updates, flushed))) { +fence_put(id->flushed_updates); +id->flushed_updates = fence_get(updates); +} +id->pd_gpu_addr = job->vm_pd_addr; +id->current_gpu_reset_count = atomic_read(>gpu_reset_counter); +atomic64_set(>owner, vm->client_id); +job->vm_needs_flush = needs_flush; + +job->vm_id = id - id_mgr->ids; +trace_amdgpu_vm_grab_id(vm, ring, job); +out: +mutex_unlock(_mgr->lock); +return r; +} + /** * amdgpu_vm_grab_id - allocate the next free VMID * @@ -426,6 +483,10 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, unsigned i; int r = 0; +if (amdgpu_vm_dedicated_vmid_ready(vm, vmhub)) +return amdgpu_vm_grab_dedicated_vmid(vm, ring, sync, + fence, job); + fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); if (!fences) return -ENOMEM; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Fix use of interruptible waiting
On 27/04/17 06:05 PM, Christian König wrote: > >> 2. In this function and caller functions, the error handling for such >> interrupt is complicated and risky. When the waiting is interrupted by >> a signal, the callers of this function need to handle this interrupt >> error. I traced the calling stack all the way back to function >> drm_mode_page_flip_ioctl. I could not find an issue. But I am not sure >> about even upper layer. Is this IOCTL restartable? How about further >> higher layer? Since I didn't see benefit in point 1. So I thought it >> was a good idea to change. > > The page flip IOCTL should be restartable. I think all drm IOCTLs with > driver callbacks are restartable, but I'm not 100% sure, Michel probably > knows that better. Yes, I'm pretty sure it is. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 3/6] drm/amdgpu: reserve/unreserve vmid by vm ioctl v3
Am 27.04.2017 um 04:24 schrieb Zhang, Jerry (Junwei): On 04/27/2017 10:13 AM, zhoucm1 wrote: On 2017年04月26日 20:26, Christian König wrote: Am 26.04.2017 um 13:10 schrieb Chunming Zhou: add reserve/unreserve vmid funtions. v3: only reserve vmid from gfxhub Change-Id: I5f80dc39dc9d44660a96a2b710b0dbb4d3b9039d Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 70 +++--- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ec87d64..a783e8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -397,6 +397,11 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, atomic_read(>gpu_reset_counter); } +static bool amdgpu_vm_dedicated_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) +{ +return !!vm->dedicated_vmid[vmhub]; +} + /** * amdgpu_vm_grab_id - allocate the next free VMID * @@ -546,6 +551,47 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, return r; } +static void amdgpu_vm_free_dedicated_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ +struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub]; + +mutex_lock(_mgr->lock); +if (vm->dedicated_vmid[vmhub]) { + list_add(>dedicated_vmid[vmhub]->list, +_mgr->ids_lru); +vm->dedicated_vmid[vmhub] = NULL; +} +mutex_unlock(_mgr->lock); +} + +static int amdgpu_vm_alloc_dedicated_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ +struct amdgpu_vm_id_manager *id_mgr; +struct amdgpu_vm_id *idle; +int r; + +id_mgr = >vm_manager.id_mgr[vmhub]; +mutex_lock(_mgr->lock); +/* Select the first entry VMID */ +idle = list_first_entry(_mgr->ids_lru, struct amdgpu_vm_id, list); +list_del_init(>list); +vm->dedicated_vmid[vmhub] = idle; We need a check here if there isn't an ID already reserved for the VM. Additional to that I would still rename the field to reserved_vmid, that describes better what it is actually doing. the vmid is global, reserve it from global lru, so it makes sense to name 'reserved_vmid' like in patch#4. the reserved vmid is dedicated to one vm, so the field in vm is 'dedicated_vmid' like here, which is my reason to name it. Just the same thing looking at different views. IMHO, it's reserved vmid from global group, naming "reserved_vmid". And in Patch#4, it constrains the number, possibly naming "reserved_vmid_seq" or "reserved_vmid_ref". Any of them looks reasonable, select the one you like. :) Yeah, agree. More like a gut feeling that reserved is more common wording, but I don't have a strong opinion on that either. Christian. Jerry Anyway, the alternative is ok to me although I prefer mine, I hope we can deliver this solution by today, many RGP issues depend on it. Thanks, David Zhou + mutex_unlock(_mgr->lock); + +r = amdgpu_sync_wait(>active); +if (r) +goto err; This is racy. A command submission could happen while we wait for the id to become idle. The easiest way to solve this is to hold the lock while waiting for the ID to become available. + +return 0; +err: +amdgpu_vm_free_dedicated_vmid(adev, vm, vmhub); +return r; +} + static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2316,18 +2362,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_levels(>root); fence_put(vm->last_dir_update); -for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { -struct amdgpu_vm_id_manager *id_mgr = ->vm_manager.id_mgr[i]; - -mutex_lock(_mgr->lock); -if (vm->dedicated_vmid[i]) { - list_add(>dedicated_vmid[i]->list, - _mgr->ids_lru); -vm->dedicated_vmid[i] = NULL; -} -mutex_unlock(_mgr->lock); -} +for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) +amdgpu_vm_free_dedicated_vmid(adev, vm, i); } /** @@ -2400,11 +2436,19 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_fpriv *fpriv = filp->driver_priv; +int r; switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: +/* current, we only have requirement to reserve vmid from gfxhub */ +if (!amdgpu_vm_dedicated_vmid_ready(>vm, 0)) { +r = amdgpu_vm_alloc_dedicated_vmid(adev, >vm, 0); This is racy as well. Two threads could try to reserve a VMID at the same time. You need to integrate the check if it's already allocated into
Re: [PATCH] drm: Harmonize CIK ASIC support in radeon and amdgpu (v2)
On 27/04/17 05:15 AM, Felix Kuehling wrote: > > Hi Michel, > > You said in an earlier email that it doesn't have to be convenient. With > that in mind, I went back to a minimalistic solution that doesn't need > any additional Kconfig options and only uses two module parameters (one > in radeon, one in amdgpu). > > I'm attaching my WIP patches for reference (currently based on > amd-kfd-staging). For an official review I'd rebase them on > amd-staging-4.9, remove the #if-0-ed hack that didn't work out, and add > the same for SI. > > Can everyone can agree that this is good enough? Yes, something like this could be a good first step in the right direction. Some comments: The radeon options should be available regardless of CONFIG_DRM_AMDGPU_CIK/SI, or they won't be useful for amdgpu-pro / other out-of-tree amdgpu builds. The default at this point should possibly still be for CIK GPUs to be driven by radeon, even if CONFIG_DRM_AMDGPU_CIK is enabled; more definitely so for SI. Then we can flip the default and remove CONFIG_DRM_AMDGPU_CIK/SI at some point in the future, and (much) later maybe remove the CIK/SI code from radeon. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Fix use of interruptible waiting
1. The wait is short. There is not much benefit by interruptible waiting. The BO is a frame buffer BO. Are there many competitors to lock this BO? If not, the wait is very short. This is my main reason to change. The problem is that all those waits can block the MM subsystem from reclaiming memory in an out of memory situation, e.g. when the process is terminated with a SIGKILL. That's why the usual policy used in the drivers is to wait interruptible unless you absolutely need the wait uninterrupted (e.g. in a cleanup path for example). 2. In this function and caller functions, the error handling for such interrupt is complicated and risky. When the waiting is interrupted by a signal, the callers of this function need to handle this interrupt error. I traced the calling stack all the way back to function drm_mode_page_flip_ioctl. I could not find an issue. But I am not sure about even upper layer. Is this IOCTL restartable? How about further higher layer? Since I didn't see benefit in point 1. So I thought it was a good idea to change. The page flip IOCTL should be restartable. I think all drm IOCTLs with driver callbacks are restartable, but I'm not 100% sure, Michel probably knows that better. There is even the CONFIG_DEBUG_WW_MUTEX_SLOWPATH option to throw random backoff cases into reserving the BO for testing. But I think that only applies to when multiple BOs are reserved at the same time. Might be a good idea to create something similar for all interruptible lock acquires to test if they are really restartable. That will most likely yield quite a bunch of cases where the handling isn't correct. Regards, Christian. Am 26.04.2017 um 21:19 schrieb Alex Xie: Hi, I knew this is part of ioctl. And I intended to fix this interruptible waiting in an ioctl. ioctl is one of driver interfaces, where interruptible waiting is good in some scenarios. For example, if ioctl performs IO operations in atomic way, we don't want ioctl to be interrupted by a signal. Interruptible waiting is good when we need to wait for longer time, for example, waiting for an input data for indefinite time. We don't want the process not responding to signals. Interruptible waitings can be good in read/write/ioctl interfaces. For this patch, 1. The wait is short. There is not much benefit by interruptible waiting. The BO is a frame buffer BO. Are there many competitors to lock this BO? If not, the wait is very short. This is my main reason to change. 2. In this function and caller functions, the error handling for such interrupt is complicated and risky. When the waiting is interrupted by a signal, the callers of this function need to handle this interrupt error. I traced the calling stack all the way back to function drm_mode_page_flip_ioctl. I could not find an issue. But I am not sure about even upper layer. Is this IOCTL restartable? How about further higher layer? Since I didn't see benefit in point 1. So I thought it was a good idea to change. Anyway, it is up to you. A change might bring other unseen risk. If you still have concern, I will drop this patch. This IOCTL is used by graphic operation. There may not be a lot of signals to a GUI process which uses this IOCTL. Thanks, Alex Bin On 17-04-26 04:34 AM, Christian König wrote: Am 26.04.2017 um 03:17 schrieb Michel Dänzer: On 26/04/17 06:25 AM, Alex Xie wrote: 1. The wait is short. There is not much benefit by interruptible waiting. 2. In this function and caller functions, the error handling for such interrupt is complicated and risky. Change-Id: I289674ecd3f5ef20c93fe63e33df6d668b3c2edc Signed-off-by: Alex Xie--- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 43082bf..c006cc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -227,7 +227,7 @@ int amdgpu_crtc_prepare_flip( new_abo = gem_to_amdgpu_bo(obj); /* pin the new buffer */ -r = amdgpu_bo_reserve(new_abo, false); +r = amdgpu_bo_reserve(new_abo, true); if (unlikely(r != 0)) { DRM_ERROR("failed to reserve new abo buffer before flip\n"); goto cleanup; I'm afraid we have no choice but to use interruptible here, because this code runs as part of an ioctl (DRM_IOCTL_MODE_PAGE_FLIP). Yes, agree. But the error message is incorrect here and should be removed. Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm: Harmonize CIK ASIC support in radeon and amdgpu (v2)
Am 26.04.2017 um 22:15 schrieb Felix Kuehling: On 17-04-25 02:28 AM, Michel Dänzer wrote: On 22/04/17 02:05 AM, Felix Kuehling wrote: __setup doesn't work in modules. Right. We could build something like drivers/video/fbdev/core/fb_cmdline.c:video_setup() into the kernel to handle this, but it's a bit ugly, which is one reason why I was leaning towards: s8250_options is only compiled if the driver is not a module. That doesn't prevent us from using __module_param_call directly, does it? Although, that still doesn't work as I envision if only one driver's option is set e.g. in /etc/modprobe.d/*.conf . So, I'm starting to think we need a shared module for this, which provides one or multiple module parameters to choose which driver to use for CIK/SI[0], and provides the result to the amdgpu/radeon drivers. That might make things easier for amdgpu-pro / other standalone amdgpu versions in the long run as well, as they could add files to /etc/modprobe.d/ choosing themselves by default, without having to blacklist radeon. What do you guys think? Hi Michel, You said in an earlier email that it doesn't have to be convenient. With that in mind, I went back to a minimalistic solution that doesn't need any additional Kconfig options and only uses two module parameters (one in radeon, one in amdgpu). I'm attaching my WIP patches for reference (currently based on amd-kfd-staging). For an official review I'd rebase them on amd-staging-4.9, remove the #if-0-ed hack that didn't work out, and add the same for SI. Can everyone can agree that this is good enough? It's certainly good enough for me. With the disabled dummy code removed feel free to stick a Reviewed-by: Christian Königat those patches. Regards, Christian. Regards, Felix [0] or possibly even more fine-grained in the future ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset
On 04/27/2017 04:25 PM, Chunming Zhou wrote: the case could happen when gpu reset: 1. when gpu reset, cs can be continue until sw queue is full, then push job will wait with holding pd reservation. 2. gpu_reset routine will also need pd reservation to restore page table from their shadow. 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs releases reservation. Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9edb1a4..a6722a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1076,6 +1076,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); + amdgpu_cs_parser_fini(p, 0, true); Please confirm: It will not free/release more things that may be accessed in job working process, as cs_parse_fini free so many things related to parser. Or we can just release the pd reservation here? Jerry amd_sched_entity_push_job(>base); return 0; @@ -1130,6 +1131,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_submit(, cs); + return r; out: amdgpu_cs_parser_fini(, r, reserved_buffers); return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdgpu: export more gpu info for gfx9
v2: 64-bit aligned for gpu info Signed-off-by: Ken WangSigned-off-by: Junwei Zhang Reviewed-by: Alex Deucher Reviewed-by: Qiang Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 11 +++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++ include/uapi/drm/amdgpu_drm.h | 19 +++ 4 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4a16e3c..503010a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -969,6 +969,9 @@ struct amdgpu_gfx_config { unsigned mc_arb_ramcfg; unsigned gb_addr_config; unsigned num_rbs; + unsigned gs_vgt_table_depth; + unsigned gs_prim_buffer_depth; + unsigned max_gs_waves_per_vgt; uint32_t tile_mode_array[32]; uint32_t macrotile_mode_array[16]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1006d7c..42e5993 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -546,10 +546,21 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (amdgpu_ngg) { dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr; + dev_info.prim_buf_size = adev->gfx.ngg.buf[PRIM].size; dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr; + dev_info.pos_buf_size = adev->gfx.ngg.buf[POS].size; dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr; + dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[CNTL].size; dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr; + dev_info.param_buf_size = adev->gfx.ngg.buf[PARAM].size; } + dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; + dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; + dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; + dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; + dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; + dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_waves_per_vgt; return copy_to_user(out, _info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 13da795..5249bc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -785,6 +785,9 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + adev->gfx.config.gs_vgt_table_depth = 32; + adev->gfx.config.gs_prim_buffer_depth = 1792; + adev->gfx.config.max_gs_waves_per_vgt = 32; gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; break; default: diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d76d525..086ab10 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -769,6 +769,25 @@ struct drm_amdgpu_info_device { __u64 cntl_sb_buf_gpu_addr; /* NGG Parameter Cache */ __u64 param_buf_gpu_addr; + __u32 prim_buf_size; + __u32 pos_buf_size; + __u32 cntl_sb_buf_size; + __u32 param_buf_size; + /* wavefront size*/ + __u32 wave_front_size; + /* shader visible vgprs*/ + __u32 num_shader_visible_vgprs; + /* CU per shader array*/ + __u32 num_cu_per_sh; + /* number of tcc blocks*/ + __u32 num_tcc_blocks; + /* gs vgt table depth*/ + __u32 gs_vgt_table_depth; + /* gs primitive buffer depth*/ + __u32 gs_prim_buffer_depth; + /* max gs wavefront per vgt*/ + __u32 max_gs_waves_per_vgt; + __u32 _pad1; }; struct drm_amdgpu_info_hw_ip { -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu: bump version for exporting gpu info for gfx9
Signed-off-by: Junwei Zhang--- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ead00d7..857c3be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -64,9 +64,10 @@ * - 3.12.0 - Add query for double offchip LDS buffers * - 3.13.0 - Add PRT support * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality + * - 3.15.0 - Export more gpu info for gfx9 */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 14 +#define KMS_DRIVER_MINOR 15 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] Revert "drm/amdgpu: Refactor flip into prepare submit and submit. (v2)"
From: Michel DänzerThis reverts commit cb341a319f7e66f879d69af929c3dadfc1a8f31e. The purpose of the refactor was for amdgpu_crtc_prepare/submit_flip to be used by the DC code, but that's no longer the case. Signed-off-by: Michel Dänzer --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 147 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h| 17 2 files changed, 29 insertions(+), 135 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 7cf226dfb602..0415875656c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -138,56 +138,10 @@ static void amdgpu_unpin_work_func(struct work_struct *__work) kfree(work); } - -static void amdgpu_flip_work_cleanup(struct amdgpu_flip_work *work) -{ - int i; - - amdgpu_bo_unref(>old_abo); - fence_put(work->excl); - for (i = 0; i < work->shared_count; ++i) - fence_put(work->shared[i]); - kfree(work->shared); - kfree(work); -} - -static void amdgpu_flip_cleanup_unreserve( - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - amdgpu_bo_unreserve(new_abo); - amdgpu_flip_work_cleanup(work); -} - -static void amdgpu_flip_cleanup_unpin( - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) - DRM_ERROR("failed to unpin new abo in error path\n"); - amdgpu_flip_cleanup_unreserve(work, new_abo); -} - -void amdgpu_crtc_cleanup_flip_ctx( - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) { - DRM_ERROR("failed to reserve new abo in error path\n"); - amdgpu_flip_work_cleanup(work); - return; - } - amdgpu_flip_cleanup_unpin(work, new_abo); -} - -int amdgpu_crtc_prepare_flip( - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, - uint32_t target, - struct amdgpu_flip_work **work_p, - struct amdgpu_bo **new_abo_p) +int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, +struct drm_framebuffer *fb, +struct drm_pending_vblank_event *event, +uint32_t page_flip_flags, uint32_t target) { struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; @@ -200,7 +154,7 @@ int amdgpu_crtc_prepare_flip( unsigned long flags; u64 tiling_flags; u64 base; - int r; + int i, r; work = kzalloc(sizeof *work, GFP_KERNEL); if (work == NULL) @@ -261,84 +215,41 @@ int amdgpu_crtc_prepare_flip( spin_unlock_irqrestore(>dev->event_lock, flags); r = -EBUSY; goto pflip_cleanup; - } - spin_unlock_irqrestore(>dev->event_lock, flags); - - *work_p = work; - *new_abo_p = new_abo; - return 0; - -pflip_cleanup: - amdgpu_crtc_cleanup_flip_ctx(work, new_abo); - return r; - -unpin: - amdgpu_flip_cleanup_unpin(work, new_abo); - return r; - -unreserve: - amdgpu_flip_cleanup_unreserve(work, new_abo); - return r; - -cleanup: - amdgpu_flip_work_cleanup(work); - return r; - -} - -void amdgpu_crtc_submit_flip( - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - unsigned long flags; - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - spin_lock_irqsave(>dev->event_lock, flags); amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING; amdgpu_crtc->pflip_works = work; + + DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n", +amdgpu_crtc->crtc_id, amdgpu_crtc, work); /* update crtc fb */ crtc->primary->fb = fb; spin_unlock_irqrestore(>dev->event_lock, flags); - - DRM_DEBUG_DRIVER( - "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n", - amdgpu_crtc->crtc_id, amdgpu_crtc, work); - amdgpu_flip_work_func(>flip_work.work); -} + return 0; -int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t
Re: [PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset
pls ignore this one, will send another new one. On 2017年04月27日 15:20, Chunming Zhou wrote: the case could happen when gpu reset: 1. when gpu reset, cs can be continue until sw queue is full, then push job will wait with holding pd reservation. 2. gpu_reset routine will also need pd reservation to restore page table from their shadow. 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs releases reservation. Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9edb1a4..8c6d036 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1076,7 +1076,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); - amd_sched_entity_push_job(>base); return 0; } @@ -1132,6 +1131,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(, r, reserved_buffers); + if (!r) + amd_sched_entity_push_job(>base); return r; } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 0/6 v4] *** Dedicated vmid per process v4 ***
It looks fine for me now. Reviewed-by: Junwei ZhangJerry On 04/27/2017 01:00 PM, Chunming Zhou wrote: The current kernel implementation, which grabs the idle VMID from pool when emitting the job may: The back-to-back submission from one process could use different VMID. The submission to different queues from single process could use different VMID It works well in most case but cannot work for the SQ thread trace capture. The VMID for the submission that set the {SQTT}_BASE, which refers to the address of the trace buffer, is stored in shader engine. If the profiling application have to use different VMIDs to submit IBs in its life cycle: Some trace is not captured since it actually uses different VMID to submit jobs. Some part of captured trace may come from different application since they are accidentally uses the owner’s VMID to submit jobs. V2: 1. address Christian's comments: a. drop context flags for tag process, instead, add vm ioctl. b. change order of patches. c. sync waiting only when vm flush needs. 2. address Alex's comments; bump module version V3: address Jerry and Christian's comments. and only reserve gfxhub vmid v4: address Jerry and Christian's comments. fix some race condistions. Chunming Zhou (6): drm/amdgpu: add vm ioctl drm/amdgpu: add reserved vmid field in vm struct v2 drm/amdgpu: reserve/unreserve vmid by vm ioctl v4 drm/amdgpu: add limitation for dedicated vm number v4 drm/amdgpu: implement grab reserved vmid V3 drm/amdgpu: bump module verion for reserved vmid drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 152 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 ++ include/uapi/drm/amdgpu_drm.h | 22 + 5 files changed, 178 insertions(+), 6 deletions(-) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 5/6] drm/amdgpu: implement grab reserved vmid V3
On 04/27/2017 01:54 PM, Zhang, Jerry (Junwei) wrote: On 04/27/2017 01:00 PM, Chunming Zhou wrote: v2: move sync waiting only when flush needs v3: fix racy Change-Id: I64da2701c9fdcf986afb90ba1492a78d5bef1b6c Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 72 -- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e6fdfa4..7752279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -397,6 +397,65 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, atomic_read(>gpu_reset_counter); } +static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) +{ +return !!vm->reserved_vmid[vmhub]; +} It may be better to populate it into alloc reserved_vmid func as well, getting easy maintenance in the future. + +/* idr_mgr->lock must be held */ +static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct fence *fence, + struct amdgpu_job *job) +{ +struct amdgpu_device *adev = ring->adev; +unsigned vmhub = ring->funcs->vmhub; +struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; +struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub]; +struct fence *updates = sync->last_vm_update; +int r = 0; +struct fence *flushed, *tmp; +bool needs_flush = false; + +flushed = id->flushed_updates; +if ((amdgpu_vm_had_gpu_reset(adev, id)) || +(atomic64_read(>owner) != vm->client_id) || +(job->vm_pd_addr != id->pd_gpu_addr) || +(updates && (!flushed || updates->context != flushed->context || +fence_is_later(updates, flushed { +needs_flush = true; +tmp = amdgpu_sync_get_fence(>active); +if (tmp) { +r = amdgpu_sync_fence(adev, sync, tmp); +fence_put(tmp); +return r; Sorry, I didn't catch up this. Could you give me some tips? (in this case, we no need to update id info as below?) Got that offline to wait for id idle. That will always use the reserved id when it's idle. Anyway, it should work. Jerry +} +} + +/* Good we can use this VMID. Remember this submission as +* user of the VMID. +*/ +r = amdgpu_sync_fence(ring->adev, >active, fence); +if (r) +goto out; + +if (updates && (!flushed || updates->context != flushed->context || +fence_is_later(updates, flushed))) { +fence_put(id->flushed_updates); +id->flushed_updates = fence_get(updates); +} +id->pd_gpu_addr = job->vm_pd_addr; +id->current_gpu_reset_count = atomic_read(>gpu_reset_counter); +atomic64_set(>owner, vm->client_id); +job->vm_needs_flush = needs_flush; + +job->vm_id = id - id_mgr->ids; +trace_amdgpu_vm_grab_id(vm, ring, job); +out: +return r; +} + /** * amdgpu_vm_grab_id - allocate the next free VMID * @@ -421,12 +480,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, unsigned i; int r = 0; +mutex_lock(_mgr->lock); +if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { +r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); +mutex_unlock(_mgr->lock); +return r; +} fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); -if (!fences) +if (!fences) { +mutex_unlock(_mgr->lock); return -ENOMEM; - -mutex_lock(_mgr->lock); - +} /* Check if we have an idle VMID */ i = 0; list_for_each_entry(idle, _mgr->ids_lru, list) { ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: Enable chained IB MCBP support
Reviewed-by: Xiangliang YuThanks! Xiangliang Yu > -Original Message- > From: Trigger Huang [mailto:trigger.hu...@amd.com] > Sent: Thursday, April 27, 2017 3:18 PM > To: amd-gfx@lists.freedesktop.org > Cc: Liu, Monk ; Yu, Xiangliang > ; Huang, Trigger > Subject: [PATCH] drm/amdgpu: Enable chained IB MCBP support > > Support for MCBP/Virtualization in combination with chained IBs is formal > released on firmware feature version #46. So enable it according to firmware > feature version, otherwise, world switch will hang. > > Signed-off-by: Trigger Huang > --- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++-- > 1 file changed, 11 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 2ff5f19..9a8c042 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -941,12 +941,6 @@ static int gfx_v8_0_init_microcode(struct > amdgpu_device *adev) > cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev- > >gfx.me_fw->data; > adev->gfx.me_fw_version = le32_to_cpu(cp_hdr- > >header.ucode_version); > > - /* chain ib ucode isn't formal released, just disable it by far > - * TODO: when ucod ready we should use ucode version to judge if > - * chain-ib support or not. > - */ > - adev->virt.chained_ib_support = false; > - > adev->gfx.me_feature_version = le32_to_cpu(cp_hdr- > >ucode_feature_version); > > snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", > chip_name); @@ -960,6 +954,17 @@ static int > gfx_v8_0_init_microcode(struct amdgpu_device *adev) > adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr- > >header.ucode_version); > adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr- > >ucode_feature_version); > > + /* > + * Support for MCBP/Virtualization in combination with chained IBs is > + * formal released on feature version #46 > + */ > + if (adev->gfx.ce_feature_version >= 46 && > + adev->gfx.pfp_feature_version >= 46) { > + adev->virt.chained_ib_support = true; > + DRM_INFO("Chained IB support enabled!\n"); > + } else > + adev->virt.chained_ib_support = false; > + > snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", > chip_name); > err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev); > if (err) > -- > 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Fix use of interruptible waiting
On 27/04/17 04:19 AM, Alex Xie wrote: > Hi, > > I knew this is part of ioctl. And I intended to fix this interruptible > waiting in an ioctl. It's by design, nothing that needs fixing in the case of an ioctl. > 1. The wait is short. There is not much benefit by interruptible > waiting. The BO is a frame buffer BO. Are there many competitors to > lock this BO? If not, the wait is very short. This is my main reason to > change. Irrelevant. If amdgpu_bo_reserve returns -EINTR, so will other functions down the line, and amdgpu_crtc_page_flip_target will have to propagate that anyway. So doing an uninterruptible wait will just delay the inevitable and delivery of the signal to userspace. > 2. In this function and caller functions, the error handling for such > interrupt is complicated and risky. If you're concerned about the cascade of cleanup functions, let's just revert commit 0169b52ce575 ("drm/amdgpu: Refactor flip into prepare submit and submit. (v2)"). amdgpu_crtc_prepare_flip was split out to be used by DC, but DC is now using a DRM core atomic helper for flips and no longer using amdgpu_crtc_prepare_flip. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Enable chained IB MCBP support
Support for MCBP/Virtualization in combination with chained IBs is formal released on firmware feature version #46. So enable it according to firmware feature version, otherwise, world switch will hang. Signed-off-by: Trigger Huang--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2ff5f19..9a8c042 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -941,12 +941,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - /* chain ib ucode isn't formal released, just disable it by far -* TODO: when ucod ready we should use ucode version to judge if -* chain-ib support or not. -*/ - adev->virt.chained_ib_support = false; - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); @@ -960,6 +954,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + /* +* Support for MCBP/Virtualization in combination with chained IBs is +* formal released on feature version #46 +*/ + if (adev->gfx.ce_feature_version >= 46 && + adev->gfx.pfp_feature_version >= 46) { + adev->virt.chained_ib_support = true; + DRM_INFO("Chained IB support enabled!\n"); + } else + adev->virt.chained_ib_support = false; + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev); if (err) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: fix deadlock of reservation between cs and gpu reset
the case could happen when gpu reset: 1. when gpu reset, cs can be continue until sw queue is full, then push job will wait with holding pd reservation. 2. gpu_reset routine will also need pd reservation to restore page table from their shadow. 3. cs is waiting for gpu_reset complete, but gpu reset is waiting for cs releases reservation. Change-Id: I0f66d04b2bef3433035109623c8a5c5992c84202 Signed-off-by: Chunming Zhou--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9edb1a4..8c6d036 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1076,7 +1076,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); - amd_sched_entity_push_job(>base); return 0; } @@ -1132,6 +1131,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(, r, reserved_buffers); + if (!r) + amd_sched_entity_push_job(>base); return r; } -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: Enable chained IB MCBP support
Reviewed-by: Monk Liu-Original Message- From: Trigger Huang [mailto:trigger.hu...@amd.com] Sent: Thursday, April 27, 2017 3:18 PM To: amd-gfx@lists.freedesktop.org Cc: Liu, Monk ; Yu, Xiangliang ; Huang, Trigger Subject: [PATCH] drm/amdgpu: Enable chained IB MCBP support Support for MCBP/Virtualization in combination with chained IBs is formal released on firmware feature version #46. So enable it according to firmware feature version, otherwise, world switch will hang. Signed-off-by: Trigger Huang --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2ff5f19..9a8c042 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -941,12 +941,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - /* chain ib ucode isn't formal released, just disable it by far -* TODO: when ucod ready we should use ucode version to judge if -* chain-ib support or not. -*/ - adev->virt.chained_ib_support = false; - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); @@ -960,6 +954,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + /* +* Support for MCBP/Virtualization in combination with chained IBs is +* formal released on feature version #46 +*/ + if (adev->gfx.ce_feature_version >= 46 && + adev->gfx.pfp_feature_version >= 46) { + adev->virt.chained_ib_support = true; + DRM_INFO("Chained IB support enabled!\n"); + } else + adev->virt.chained_ib_support = false; + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev); if (err) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 3/6] drm: fourcc byteorder: add bigendian support to drm_mode_legacy_fb_format
On 27/04/17 03:45 PM, Gerd Hoffmann wrote: > Hi, > >>> That is done using the RADEON_TILING_SWAP_{16,32}BIT flag mentioned in >>> another thread? >> >> Right. >> >> >>> What about dumb bos? You've mentioned the swap flag isn't used for >>> those. Which implies they are in little endian byte order (both gpu and >>> cpu view). >> >> Right, AFAICT from looking at the code. > > Ok. > > And I also don't see an easy way to make them big endian (cpu view) > using swapping with the existing drm interfaces, given we apply a format > when we put the bo into use as framebuffer, not when creating it. So > userspace can: (1) create dumb bo, (2) map bo, (3) write something bo, > (4) create fb + attach to crtc. And at (3) we don't know the format > yet, so we can't configure swapping accordingly. > > So just not using the swapping indeed looks like the only sensible > option. Which in turn implies there is no BGRA support for dumb > bos. Hmm, I can see the problem. Userspace expectation appears to be > that ADDFB configures a native endian framebuffer, which the driver > simply can't do on bigendian. ... with pre-R600 GPUs. > So, what can/should the driver do here? Throw errors for ADDFB and > force userspace to use ADDFB2? From a design point of view the best > option, but in the other hand I suspect that could break the xorg radeon > driver ... Yes, it would. One thing we could do is provide a way for userspace to query the effective format(s) as seen by the GPU and/or CPU. It might also make sense for the radeon driver to set the RADEON_TILING_SWAP_{16,32}BIT flags for dumb BOs. I wonder about the status of apps using dumb BOs directly wrt this discussion. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 3/6] drm: fourcc byteorder: add bigendian support to drm_mode_legacy_fb_format
Hi, > > That is done using the RADEON_TILING_SWAP_{16,32}BIT flag mentioned in > > another thread? > > Right. > > > > What about dumb bos? You've mentioned the swap flag isn't used for > > those. Which implies they are in little endian byte order (both gpu and > > cpu view). > > Right, AFAICT from looking at the code. Ok. And I also don't see an easy way to make them big endian (cpu view) using swapping with the existing drm interfaces, given we apply a format when we put the bo into use as framebuffer, not when creating it. So userspace can: (1) create dumb bo, (2) map bo, (3) write something bo, (4) create fb + attach to crtc. And at (3) we don't know the format yet, so we can't configure swapping accordingly. So just not using the swapping indeed looks like the only sensible option. Which in turn implies there is no BGRA support for dumb bos. Hmm, I can see the problem. Userspace expectation appears to be that ADDFB configures a native endian framebuffer, which the driver simply can't do on bigendian. So, what can/should the driver do here? Throw errors for ADDFB and force userspace to use ADDFB2? From a design point of view the best option, but in the other hand I suspect that could break the xorg radeon driver ... cheers, Gerd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx