Re: [PATCH] drm/amdgpu: fix the null pointer to get timeline by scheduler fence
Any updates on this issue? Regards, Andres On 2018-08-08 03:10 AM, Christian König wrote: Yeah that is a known issue, but this solution is not correct either. See the scheduler where the job is execute on is simply not determined yet when we want to trace it. So using the scheduler name from the entity is wrong as well. We should probably move the reschedule from drm_sched_entity_push_job() to drm_sched_job_init() to fix that. I will prepare a patch for that today, Christian. Am 08.08.2018 um 09:05 schrieb Huang Rui: We won't initialize fence scheduler in drm_sched_fence_create() anymore, so it will refer null fence scheduler if open trace event to get the timeline name. Actually, it is the scheduler name from the entity, so add a macro to replace legacy getting timeline name by job. [ 212.844281] BUG: unable to handle kernel NULL pointer dereference at 0018 [ 212.852401] PGD 800427c13067 P4D 800427c13067 PUD 4235fc067 PMD 0 [ 212.859419] Oops: [#1] SMP PTI [ 212.862981] CPU: 4 PID: 1520 Comm: amdgpu_test Tainted: G OE 4.18.0-rc1-custom #1 [ 212.872194] Hardware name: Gigabyte Technology Co., Ltd. Z170XP-SLI/Z170XP-SLI-CF, BIOS F20 11/04/2016 [ 212.881704] RIP: 0010:drm_sched_fence_get_timeline_name+0x2b/0x30 [gpu_sched] [ 212.888948] Code: 1f 44 00 00 48 8b 47 08 48 3d c0 b1 4f c0 74 13 48 83 ef 60 48 3d 60 b1 4f c0 b8 00 00 00 00 48 0f 45 f8 48 8b 87 e0 00 00 00 <48> 8b 40 18 c3 0f 1f 44 00 00 b8 01 00 00 00 c3 0f 1f 44 00 00 0f [ 212.908162] RSP: 0018:a3ed81f27af0 EFLAGS: 00010246 [ 212.913483] RAX: RBX: 00070034 RCX: a3ed81f27da8 [ 212.920735] RDX: 8f24ebfb5460 RSI: 8f24e40d3c00 RDI: 8f24ebfb5400 [ 212.928008] RBP: 8f24e40d3c00 R08: R09: ae4deafc [ 212.935263] R10: ada000ed R11: 0001 R12: 8f24e891f898 [ 212.942558] R13: R14: 8f24ebc46000 R15: 8f24e3de97a8 [ 212.949796] FS: 77fd2700() GS:8f24fed0() knlGS: [ 212.958047] CS: 0010 DS: ES: CR0: 80050033 [ 212.963921] CR2: 0018 CR3: 000423422003 CR4: 003606e0 [ 212.971201] DR0: DR1: DR2: [ 212.978482] DR3: DR6: fffe0ff0 DR7: 0400 [ 212.985720] Call Trace: [ 212.988236] trace_event_raw_event_amdgpu_cs_ioctl+0x4c/0x170 [amdgpu] [ 212.994904] ? amdgpu_ctx_add_fence+0xa9/0x110 [amdgpu] [ 213.000246] ? amdgpu_job_free_resources+0x4b/0x70 [amdgpu] [ 213.005944] amdgpu_cs_ioctl+0x16d1/0x1b50 [amdgpu] [ 213.010920] ? amdgpu_cs_find_mapping+0xf0/0xf0 [amdgpu] [ 213.016354] drm_ioctl_kernel+0x8a/0xd0 [drm] [ 213.020794] ? recalc_sigpending+0x17/0x50 [ 213.024965] drm_ioctl+0x2d7/0x390 [drm] [ 213.028979] ? amdgpu_cs_find_mapping+0xf0/0xf0 [amdgpu] [ 213.034366] ? do_signal+0x36/0x700 [ 213.037928] ? signal_wake_up_state+0x15/0x30 [ 213.042375] amdgpu_drm_ioctl+0x46/0x80 [amdgpu] Signed-off-by: Huang Rui --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 10 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e12871d..be01e1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1247,7 +1247,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_free_resources(job); - trace_amdgpu_cs_ioctl(job); + trace_amdgpu_cs_ioctl(job, entity); amdgpu_vm_bo_trace_cs(>vm, >ticket); priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 8c2dab2..25cdcb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -36,6 +36,8 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(>base.s_fence->finished) +#define AMDGPU_GET_SCHED_NAME(entity) \ + (entity->rq->sched->name) TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), @@ -161,11 +163,11 @@ TRACE_EVENT(amdgpu_cs, ); TRACE_EVENT(amdgpu_cs_ioctl, - TP_PROTO(struct amdgpu_job *job), - TP_ARGS(job), + TP_PROTO(struct amdgpu_job *job, struct drm_sched_entity *entity), + TP_ARGS(job, entity), TP_STRUCT__entry( __field(uint64_t, sched_job_id) - __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) + __string(timeline, AMDGPU_GET_SCHED_NAME(entity)) __field(unsigned int, context) __field(unsigned int, seqno) __field(struct dma_fence *, fence) @@ -175,7
Re: [PATCH] drm/amdgpu: fix job priority handling
On 2018-07-18 02:14 PM, Christian König wrote: The job might already be released at this point. Signed-off-by: Christian König Reviewed-by: Andres Rodriguez --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 911c4a12a163..7c5cc33d0cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1209,6 +1209,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, { struct amdgpu_ring *ring = p->ring; struct drm_sched_entity *entity = >ctx->rings[ring->idx].entity; + enum drm_sched_priority priority; struct amdgpu_job *job; unsigned i; uint64_t seq; @@ -1258,10 +1259,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); + priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); ring = to_amdgpu_ring(entity->sched); - amdgpu_ring_priority_get(ring, job->base.s_priority); + amdgpu_ring_priority_get(ring, priority); ttm_eu_fence_buffer_objects(>ticket, >validated, p->fence); amdgpu_mn_unlock(p->mn); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 8b679c85d213..5a2c26a85984 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -126,6 +126,7 @@ void amdgpu_job_free(struct amdgpu_job *job) int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { + enum drm_sched_priority priority; struct amdgpu_ring *ring; int r; @@ -139,10 +140,11 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, job->owner = owner; *f = dma_fence_get(>base.s_fence->finished); amdgpu_job_free_resources(job); + priority = job->base.s_priority; drm_sched_entity_push_job(>base, entity); ring = to_amdgpu_ring(entity->sched); - amdgpu_ring_priority_get(ring, job->base.s_priority); + amdgpu_ring_priority_get(ring, priority); return 0; } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: cleanup firmware requests v2
Ping. On 2018-04-17 06:12 PM, Andres Rodriguez wrote: Add a new function amdgpu_ucode_request_firmware() that encapsulates a lot of the common behaviour we have around firmware requests. This is the first step in my quest to get rid of the following annoying messages when my polaris10 boots up: [0.558537] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_pfp_2.bin failed with error -2 [0.558551] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_me_2.bin failed with error -2 [0.558562] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_ce_2.bin failed with error -2 [0.558580] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_mec_2.bin failed with error -2 [0.558619] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_mec2_2.bin failed with error -2 v2: make amdgpu_ucode_validate file scope only add kernel-doc for amdgpu_ucode_request_firmware() Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- Sorry for the quick V2, noticed some docs might help and that _validate() could be reduced in scope. Pasting my old message again just in case. Hey Christian, Wanted to go through a cleanup of the ucode loading in amdgpu to facilitate some of our heated discussions :) For now, functionality should remain the same. Once _nowarn() lands we can change amdgpu_ucode_request_firmware() with either: Alternative A: - err = request_firmware(_fw, name, adev->dev); + err = request_firmware_nowarn(_fw, name, adev->dev); Alternative B: - err = request_firmware(_fw, name, adev->dev); + if (optional) + err = request_firmware_nowarn(_fw, name, adev->dev); + else + err = request_firmware(_fw, name, adev->dev); I prefer A, but I'm not opposed to B. I'll leave it up to you. Regards, Andres drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c| 14 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 74 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c| 16 + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c| 16 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c| 16 + drivers/gpu/drm/amd/amdgpu/ci_dpm.c| 15 +--- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 5 +- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 19 ++--- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 30 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 112 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 39 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 17 + drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 14 +--- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 14 +--- drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 18 + drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 15 +--- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 7 +- drivers/gpu/drm/amd/amdgpu/si_dpm.c| 16 + 22 files changed, 164 insertions(+), 325 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index a8a942c60ea2..347ab1710523 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -402,19 +402,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = request_firmware(>pm.fw, fw_name, adev->dev); - if (err) { - DRM_ERROR("Failed to request firmware\n"); + err = amdgpu_ucode_request_firmware(adev, >pm.fw, fw_name, false); + if (err) return err; - } - - err = amdgpu_ucode_validate(adev->pm.fw); - if (err) { - DRM_ERROR("Failed to load firmware \"%s\"", fw_name); - release_firmware(adev->pm.fw); - adev->pm.fw = NULL; - return err; - } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { ucode = >firmware.ucode[AMDGPU_UCODE_ID_SMC]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index abc33464959e..967e14f14abc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1355,20 +1355,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); -
[PATCH] drm/amdgpu: cleanup firmware requests v2
Add a new function amdgpu_ucode_request_firmware() that encapsulates a lot of the common behaviour we have around firmware requests. This is the first step in my quest to get rid of the following annoying messages when my polaris10 boots up: [0.558537] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_pfp_2.bin failed with error -2 [0.558551] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_me_2.bin failed with error -2 [0.558562] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_ce_2.bin failed with error -2 [0.558580] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_mec_2.bin failed with error -2 [0.558619] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_mec2_2.bin failed with error -2 v2: make amdgpu_ucode_validate file scope only add kernel-doc for amdgpu_ucode_request_firmware() Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- Sorry for the quick V2, noticed some docs might help and that _validate() could be reduced in scope. Pasting my old message again just in case. Hey Christian, Wanted to go through a cleanup of the ucode loading in amdgpu to facilitate some of our heated discussions :) For now, functionality should remain the same. Once _nowarn() lands we can change amdgpu_ucode_request_firmware() with either: Alternative A: - err = request_firmware(_fw, name, adev->dev); + err = request_firmware_nowarn(_fw, name, adev->dev); Alternative B: - err = request_firmware(_fw, name, adev->dev); + if (optional) + err = request_firmware_nowarn(_fw, name, adev->dev); + else + err = request_firmware(_fw, name, adev->dev); I prefer A, but I'm not opposed to B. I'll leave it up to you. Regards, Andres drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c| 14 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 74 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c| 16 + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c| 16 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c| 16 + drivers/gpu/drm/amd/amdgpu/ci_dpm.c| 15 +--- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 5 +- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 19 ++--- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 30 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 112 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 39 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 17 + drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 14 +--- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 14 +--- drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 18 + drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 15 +--- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 7 +- drivers/gpu/drm/amd/amdgpu/si_dpm.c| 16 + 22 files changed, 164 insertions(+), 325 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index a8a942c60ea2..347ab1710523 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -402,19 +402,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = request_firmware(>pm.fw, fw_name, adev->dev); - if (err) { - DRM_ERROR("Failed to request firmware\n"); + err = amdgpu_ucode_request_firmware(adev, >pm.fw, fw_name, false); + if (err) return err; - } - - err = amdgpu_ucode_validate(adev->pm.fw); - if (err) { - DRM_ERROR("Failed to load firmware \"%s\"", fw_name); - release_firmware(adev->pm.fw); - adev->pm.fw = NULL; - return err; - } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { ucode = >firmware.ucode[AMDGPU_UCODE_ID_SMC]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index abc33464959e..967e14f14abc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1355,20 +1355,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); - err = request_firmware(>firmwar
[PATCH] drm/amdgpu: cleanup firmware requests
Add a new function amdgpu_ucode_request_firmware() that encapsulates a lot of the common behaviour we have around firmware requests. This is the first step in my quest to get rid of the following annoying messages when my polaris10 boots up: [0.558537] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_pfp_2.bin failed with error -2 [0.558551] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_me_2.bin failed with error -2 [0.558562] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_ce_2.bin failed with error -2 [0.558580] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_mec_2.bin failed with error -2 [0.558619] amdgpu :01:00.0: Direct firmware load for amdgpu/polaris10_mec2_2.bin failed with error -2 Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- Hey Christian, Wanted to go through a cleanup of the ucode loading in amdgpu to facilitate some of our heated discussions :) For now, functionality should remain the same. Once _nowarn() lands we can change amdgpu_ucode_request_firmware() with either: Alternative A: - err = request_firmware(_fw, name, adev->dev); + err = request_firmware_nowarn(_fw, name, adev->dev); Alternative B: - err = request_firmware(_fw, name, adev->dev); + if (optional) + err = request_firmware_nowarn(_fw, name, adev->dev); + else + err = request_firmware(_fw, name, adev->dev); I prefer A, but I'm not opposed to B. I'll leave it up to you. Regards, Andres drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c| 14 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 39 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c| 16 + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c| 16 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c| 16 + drivers/gpu/drm/amd/amdgpu/ci_dpm.c| 15 +--- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 5 +- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 19 ++--- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 30 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 112 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 39 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 17 + drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 14 +--- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 14 +--- drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 18 + drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 15 +--- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 7 +- drivers/gpu/drm/amd/amdgpu/si_dpm.c| 16 + 22 files changed, 139 insertions(+), 314 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index a8a942c60ea2..347ab1710523 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -402,19 +402,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = request_firmware(>pm.fw, fw_name, adev->dev); - if (err) { - DRM_ERROR("Failed to request firmware\n"); + err = amdgpu_ucode_request_firmware(adev, >pm.fw, fw_name, false); + if (err) return err; - } - - err = amdgpu_ucode_validate(adev->pm.fw); - if (err) { - DRM_ERROR("Failed to load firmware \"%s\"", fw_name); - release_firmware(adev->pm.fw); - adev->pm.fw = NULL; - return err; - } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { ucode = >firmware.ucode[AMDGPU_UCODE_ID_SMC]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index abc33464959e..967e14f14abc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1355,20 +1355,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); - err = request_firmware(>firmware.gpu_info_fw, fw_name, adev->dev); - if (err) { - dev_err(adev->dev, - "Failed to load gpu_info firmware \"%s\"\n", - fw_name); + err = amdgpu_ucode_request_
Re: [PATCH] drm/amdgpu: use queue 0 for kiq ring
On 2018-01-22 08:45 AM, Huang Rui wrote: It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN only can be issued on queue 0. Signed-off-by: Huang RuiAcked-by: Hawking Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ef04336..0cfb939 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -179,8 +179,12 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, amdgpu_gfx_bit_to_queue(adev, queue_bit, , , ); - /* Using pipes 2/3 from MEC 2 seems cause problems */ - if (mec == 1 && pipe > 1) + /* +* 1. Using pipes 2/3 from MEC 2 seems cause problems.' Could this have been related to #2? Should we just simplify this by guaranteeing KIQ MEC[0] PIPE[0] QUEUE[0]? Regards, Andres +* 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN +* only can be issued on queue 0. +*/ + if ((mec == 1 && pipe > 1) || queue != 0) continue; ring->me = mec + 1; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: bump version for gfx9 high priority compute
Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 50afcf65181a..d96f9ac9e5fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -73,9 +73,10 @@ * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl * - 3.23.0 - Add query for VRAM lost counter + * - 3.24.0 - Add high priority compute support for gfx9 */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 23 +#define KMS_DRIVER_MINOR 24 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; -- 2.14.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: add high priority compute support for gfx9
On 2018-01-03 06:29 PM, Alex Deucher wrote: On Tue, Jan 2, 2018 at 3:49 PM, Andres Rodriguez <andre...@gmail.com> wrote: We follow the same approach as gfx8. The only changes are register access macros. Tested on vega10. The execution latency results fall within the expected ranges from the polaris10 data. Signed-off-by: Andres Rodriguez <andre...@gmail.com> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> Thanks, and a happy new year to you :) Do you want to send a patch to bump the driver version so you know when this is available? That would be perfect. I'll send it out in the morning. Regards, Andres Alex --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 100 ++ 1 file changed, 100 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9f7be230734c..80af928b153e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3734,6 +3734,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) return wptr; } +static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32(reg, tmp); +} + +static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + mutex_lock(>gfx.pipe_reserve_mutex); + pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { + /* Clear all reservations - everyone reacquires all resources */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) + gfx_v9_0_ring_set_pipe_percent(>gfx.gfx_ring[i], + true); + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) + gfx_v9_0_ring_set_pipe_percent(>gfx.compute_ring[i], + true); + } else { + /* Lower all pipes without a current reservation */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + iring = >gfx.gfx_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v9_0_ring_set_pipe_percent(iring, reserve); + } + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + iring = >gfx.compute_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v9_0_ring_set_pipe_percent(iring, reserve); + } + } + + mutex_unlock(>gfx.pipe_reserve_mutex); +} + +static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + uint32_t pipe_priority = acquire ? 0x2 : 0x0; + uint32_t queue_priority = acquire ? 0xf : 0x0; + + mutex_lock(>srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); + + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(>srbm_mutex); +} + +static void gfx_v9_0_ring_set_priority_compute(struct amdgp
Re: [PATCH] drm/amdgpu: fix amdgpu_sync_resv v2
On 2017-11-29 08:10 AM, Christian König wrote: Hi Andres, just a gentle ping to see if you have noticed this. Thanks, Christian. Am 24.11.2017 um 13:49 schrieb Christian König: Fixes a bug introduced by AMDGPU_GEM_CREATE_EXPLICIT_SYNC. We still need to wait for pipelined moves in the shared fences list. v2: fix typo Signed-off-by: Christian König <christian.koe...@amd.com> Hi Christian, Sorry, last few weeks have been a little hectic. This patch looks good to me. You can add: Reviewed-by: Andres Rodriguez <andre...@gmail.com> The steamvr explicit sync use cases are untouched by this patch, so we should be good on that front as well. Kind Regards, Andres --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index a4bf21f8f1c1..bbbc40d630a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -191,9 +191,6 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, f = reservation_object_get_excl(resv); r = amdgpu_sync_fence(adev, sync, f); - if (explicit_sync) - return r; - flist = reservation_object_get_list(resv); if (!flist || r) return r; @@ -212,11 +209,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, (fence_owner == AMDGPU_FENCE_OWNER_VM))) continue; - /* Ignore fence from the same owner as + /* Ignore fence from the same owner and explicit one as * long as it isn't undefined. */ if (owner != AMDGPU_FENCE_OWNER_UNDEFINED && - fence_owner == owner) + (fence_owner == owner || explicit_sync)) continue; } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: use multipipe compute policy on non PL11 asics
Do you have any work actually going into multiple pipes? My understanding is that opencl will only use one queue at a time (but I'm not really certain about that). What you can also check is if the app works correctly when it executed on pipe0, and if it hangs on pipe 1+. I removed all the locations where pipe0 was hardcoded in the open driver, but it is possible it is still hardcoded somewhere on the closed stack. Regards, Andres On Nov 6, 2017 10:19 PM, "Zhou, David(ChunMing)" <david1.z...@amd.com> wrote: > Then snychronization should have no problem, it maybe relate to multipipe > hw setting issue. > > > Regards, > > David Zhou > ------ > *From:* Andres Rodriguez <andre...@gmail.com> > *Sent:* Tuesday, November 7, 2017 2:00:57 AM > *To:* Zhou, David(ChunMing); amd-gfx list > *Cc:* Deucher, Alexander > *Subject:* Re: [PATCH 1/2] drm/amdgpu: use multipipe compute policy on > non PL11 asics > > Sorry my mail client seems to have blown up. My reply got cut off, > here is the full version: > > > > On 2017-11-06 01:49 AM, Chunming Zhou wrote: > > Hi Andres, > > > Hi David, > > > With your this patch, OCLperf hung. > Is this on all ASICs or just a specific one? > > > > > Could you explain more? > > > > If I am correctly, the difference of with and without this patch is > > setting first two queue or setting all queues of pipe0 to queue_bitmap. > It is slightly different. With this patch we will also use the first > two queues of all pipes, not just pipe 0; > > Pre-patch: > > |-Pipe 0-||-Pipe 1-||-Pipe 2-||-Pipe 3-| > > > Post-patch: > > |-Pipe 0-||-Pipe 1-||-Pipe 2-||-Pipe 3-| > 1100 1100 1100 1100 > > What this means is that we are allowing real multithreading for > compute. Jobs on different pipes allow for parallel execution of work. > Jobs on the same pipe (but different queues) use timeslicing to share > the hardware. > > > > > > Then UMD can use different number queue to submit command for compute > > selected by amdgpu_queue_mgr_map. > > > > I checked amdgpu_queue_mgr_map implementation, CS_IOCTL can map user > > ring to different hw ring depending on busy or idle, right? > Yes, when a queue is first used, amdgpu_queue_mgr_map will decide what > the mapping is for a usermode ring to a kernel ring id. > > > If yes, I see a bug in it, which will result in our sched_fence not > > work. Our sched fence assumes the job will be executed in order, your > > mapping queue breaks this. > > I think here you mean that work will execute out of order because it > will go to different rings? > > That should not happen, since the id mapping is permanent on a > per-context basis. Once a mapping is decided, it will be cached for > this context so that we keep execution order guarantees. See the > id-caching code in amdgpu_queue_mgr.c for reference. > > As long as the usermode keeps submitting work to the same ring, it > will all be executed in order (all in the same ring). There is no > change in this guarantee compared to pre-patch. Note that even before > this patch amdgpu_queue_mgr_map has been using an LRU policy for a > long time now. > > Regards, > Andres > > On Mon, Nov 6, 2017 at 12:44 PM, Andres Rodriguez <andre...@gmail.com> > wrote: > > > > > > On 2017-11-06 01:49 AM, Chunming Zhou wrote: > >> > >> Hi Andres, > >> > > > > Hi David, > > > >> With your this patch, OCLperf hung. > > > > > > Is this on all ASICs or just a specific one? > > > >> > >> Could you explain more? > >> > >> If I am correctly, the difference of with and without this patch is > >> setting first two queue or setting all queues of pipe0 to queue_bitmap. > > > > > > It is slightly different. With this patch we will also use the first two > > queues of all pipes, not just pipe 0; > > > > Pre-patch: > > > > |-Pipe 0-||-Pipe 1-||-Pipe 2-||-Pipe 3-| > > > > > > Post-patch: > > > > |-Pipe 0-||-Pipe 1-||-Pipe 2-||-Pipe 3-| > > 1100 1100 1100 1100 > > > > What this means is that we are allowing real multithreading for compute. > > Jobs on different pipes allow for parallel execution of work. Jobs on the > > same pipe (but different queues) use timeslicing to share the hardware. > > > > > > > >> > >> Then UMD can use different number queue to submit command for compute > >> select
Re: [PATCH 1/2] drm/amdgpu: use multipipe compute policy on non PL11 asics
Sorry my mail client seems to have blown up. My reply got cut off, here is the full version: On 2017-11-06 01:49 AM, Chunming Zhou wrote: > Hi Andres, > Hi David, > With your this patch, OCLperf hung. Is this on all ASICs or just a specific one? > > Could you explain more? > > If I am correctly, the difference of with and without this patch is > setting first two queue or setting all queues of pipe0 to queue_bitmap. It is slightly different. With this patch we will also use the first two queues of all pipes, not just pipe 0; Pre-patch: |-Pipe 0-||-Pipe 1-||-Pipe 2-||-Pipe 3-| Post-patch: |-Pipe 0-||-Pipe 1-||-Pipe 2-||-Pipe 3-| 1100 1100 1100 1100 What this means is that we are allowing real multithreading for compute. Jobs on different pipes allow for parallel execution of work. Jobs on the same pipe (but different queues) use timeslicing to share the hardware. > > Then UMD can use different number queue to submit command for compute > selected by amdgpu_queue_mgr_map. > > I checked amdgpu_queue_mgr_map implementation, CS_IOCTL can map user > ring to different hw ring depending on busy or idle, right? Yes, when a queue is first used, amdgpu_queue_mgr_map will decide what the mapping is for a usermode ring to a kernel ring id. > If yes, I see a bug in it, which will result in our sched_fence not > work. Our sched fence assumes the job will be executed in order, your > mapping queue breaks this. I think here you mean that work will execute out of order because it will go to different rings? That should not happen, since the id mapping is permanent on a per-context basis. Once a mapping is decided, it will be cached for this context so that we keep execution order guarantees. See the id-caching code in amdgpu_queue_mgr.c for reference. As long as the usermode keeps submitting work to the same ring, it will all be executed in order (all in the same ring). There is no change in this guarantee compared to pre-patch. Note that even before this patch amdgpu_queue_mgr_map has been using an LRU policy for a long time now. Regards, Andres On Mon, Nov 6, 2017 at 12:44 PM, Andres Rodriguez <andre...@gmail.com> wrote: > > > On 2017-11-06 01:49 AM, Chunming Zhou wrote: >> >> Hi Andres, >> > > Hi David, > >> With your this patch, OCLperf hung. > > > Is this on all ASICs or just a specific one? > >> >> Could you explain more? >> >> If I am correctly, the difference of with and without this patch is >> setting first two queue or setting all queues of pipe0 to queue_bitmap. > > > It is slightly different. With this patch we will also use the first two > queues of all pipes, not just pipe 0; > > Pre-patch: > > |-Pipe 0-||-Pipe 1-||-Pipe 2-||-Pipe 3-| > > > Post-patch: > > |-Pipe 0-||-Pipe 1-||-Pipe 2-||-Pipe 3-| > 1100 1100 1100 1100 > > What this means is that we are allowing real multithreading for compute. > Jobs on different pipes allow for parallel execution of work. Jobs on the > same pipe (but different queues) use timeslicing to share the hardware. > > > >> >> Then UMD can use different number queue to submit command for compute >> selected by amdgpu_queue_mgr_map. >> >> I checked amdgpu_queue_mgr_map implementation, CS_IOCTL can map user ring >> to different hw ring depending on busy or idle, right? >> >> If yes, I see a bug in it, which will result in our sched_fence not work. >> Our sched fence assumes the job will be executed in order, your mapping >> queue breaks this. >> >> >> Regards, >> >> David Zhou >> >> >> On 2017年09月27日 00:22, Andres Rodriguez wrote: >>> >>> A performance regression for OpenCL tests on Polaris11 had this feature >>> disabled for all asics. >>> >>> Instead, disable it selectively on the affected asics. >>> >>> Signed-off-by: Andres Rodriguez <andre...@gmail.com> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 14 -- >>> 1 file changed, 12 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >>> index 4f6c68f..3d76e76 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >>> @@ -109,9 +109,20 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, >>> unsigned max_se, unsigned max_s >>> } >>> } >>> +static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) >>> +{ >>&
Re: [PATCH 7/7] drm/amdgpu: retry init if it fails due to exclusive mode timeout
On 2017-10-23 06:03 AM, Pixel Ding wrote: From: pdingThe exclusive mode has real-time limitation in reality, such like being done in 300ms. It's easy observed if running many VF/VMs in single host with heavy CPU workload. If we find the init fails due to exclusive mode timeout, try it again. Signed-off-by: pding --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 15 +-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3458d46..1935f5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2306,6 +2306,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_init(adev); if (r) { + /* failed in exclusive mode due to timeout */ + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + amdgpu_virt_mmio_blocked(adev) && + !amdgpu_virt_wait_reset(adev)) { + dev_err(adev->dev, "VF exclusive mode timeout\n"); + r = -EAGAIN; + goto failed; + } dev_err(adev->dev, "amdgpu_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); amdgpu_fini(adev); @@ -2393,6 +2402,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_vf_error_trans_all(adev); if (runtime) vga_switcheroo_fini_domain_pm_ops(adev->dev); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f2eb7ac..fdc240a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -86,7 +86,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) { struct amdgpu_device *adev; - int r, acpi_status; + int r, acpi_status, retry = 0; #ifdef CONFIG_DRM_AMDGPU_SI if (!amdgpu_si_support) { @@ -122,6 +122,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) } } #endif +retry_init: adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { @@ -144,7 +145,17 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) * VRAM allocation */ r = amdgpu_device_init(adev, dev, dev->pdev, flags); - if (r) { + if (++retry != 3 && r == -EAGAIN) { Minor nitpick here. Might want to rewrite the condition so that it evaluates to false for most values of retry (currently it evaluates to false only for one value of retry). E.g. if (++retry >= 3 ...) Or int retry = 3; ... if (--retry >= 0 ...) + adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; + adev->virt.ops = NULL; + amdgpu_device_fini(adev); + kfree(adev); + dev->dev_private = NULL; + msleep(5000); + dev_err(>pdev->dev, "retry init %d\n", retry); + amdgpu_init_log = 0; + goto retry_init; + } else if (r) { dev_err(>pdev->dev, "Fatal error during GPU init\n"); goto out; } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/3] drm/amdgpu: Avoid accessing job->entity after the job is scheduled.
On 2017-10-20 02:24 PM, Christian König wrote: Am 20.10.2017 um 18:51 schrieb Andrey Grodzovsky: On 2017-10-20 12:26 PM, Andres Rodriguez wrote: On 2017-10-20 12:19 PM, Andrey Grodzovsky wrote: On 2017-10-20 11:59 AM, Andres Rodriguez wrote: On 2017-10-20 09:32 AM, Andrey Grodzovsky wrote: Bug: amdgpu_job_free_cb was accessing s_job->s_entity when the allocated amdgpu_ctx (and the entity inside it) were already deallocated from amdgpu_cs_parser_fini. Fix: Save job's priority on it's creation instead of accessing it from s_entity later on. I'm not sure if this is the correct approach for a fix. Keeping s_entity as a dangling pointer could result in a similar bugs being reintroduced. For example, there would still be a race condition between amdgpu_cs_parser_fini() and amdgpu_job_dependency(). .dependency hook is called only in once place amd_sched_entity_pop_job, amdgpu_cs_parser_fini will wait (from amd_sched_entity_fini) for wake_up(>job_scheduled) from amd_sched_main so I don't see a race here. Instead, it might be better for the job to grab a reference to the context during job_init(), and put that reference on job free. Originally it was my thinkimg to, but I consulted Christian and he advised that quote - "it's not the best idea since the problem is that when we terminate a process we need to make sure that all resources are released or at least not hold for much longer. When we keep the ctx alive with the job we need to also keep the VM alive and that means we need to keep all the VM page tables alive". That makes sense. Since s_entity is tied to the context reference held by the parser, can you set it to NULL when you drop the context reference there? O am not sure i understand - you want to send s_job->s_entity to NULL in amd_sched_entity_fini for each remaining job in the queue ? But all the jobs remaining in the queue are destroyed there anyway. I think what Andres means here is exactly what we planned to do anyway. Set job->s_entity to NULL as soon as we know that the entity is not used any more and might be released. Yeah this is what I would like to see. If you already have discussed it and have a plan to address it, then this patch looks good to me for static and dynamic priorities. Feel free to add: Reviewed-by: Andres Rodriguez <andre...@gmail.com> In the long term we should target towards making s_job->s_entity as well as job->vm superfluous. This way we could even push remaining jobs on a graveyard entity when we destroy one and timeout. Alternatively we could look into why wait_event_killable is sometimes not killable as the name says :) Maybe we can get to a point where we can finally reboot the system cleanly even when the GPU is stuck. Regards, Christian. Thanks, Andrey At least that way we can easily detect misuse of s_entity after it enters a "possibly deleted" state. Regards, Andres Thanks, Andrey Regards, Andres Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 ++--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 + drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 32 --- 4 files changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f7fceb6..a760b6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1192,8 +1192,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, - amd_sched_get_job_priority(>base)); + amdgpu_ring_priority_get(job->ring, job->base.s_priority); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(>base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0cfc68d..a58e3c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -104,7 +104,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); - amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); + amdgpu_ring_priority_put(job->ring, s_job->s_priority); dma_fence_put(job->fence); amdgpu_sync_free(>sync); amdgpu_sync_free(>dep_sync); @@ -141,8 +141,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(>base.s_fence->finished); amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, - amd_sched_get_job_priority(>base)); + amdgpu_ring
Re: [PATCH 1/3] drm/amdgpu: Avoid accessing job->entity after the job is scheduled.
On 2017-10-20 12:19 PM, Andrey Grodzovsky wrote: On 2017-10-20 11:59 AM, Andres Rodriguez wrote: On 2017-10-20 09:32 AM, Andrey Grodzovsky wrote: Bug: amdgpu_job_free_cb was accessing s_job->s_entity when the allocated amdgpu_ctx (and the entity inside it) were already deallocated from amdgpu_cs_parser_fini. Fix: Save job's priority on it's creation instead of accessing it from s_entity later on. I'm not sure if this is the correct approach for a fix. Keeping s_entity as a dangling pointer could result in a similar bugs being reintroduced. For example, there would still be a race condition between amdgpu_cs_parser_fini() and amdgpu_job_dependency(). .dependency hook is called only in once place amd_sched_entity_pop_job, amdgpu_cs_parser_fini will wait (from amd_sched_entity_fini) for wake_up(>job_scheduled) from amd_sched_main so I don't see a race here. Instead, it might be better for the job to grab a reference to the context during job_init(), and put that reference on job free. Originally it was my thinkimg to, but I consulted Christian and he advised that quote - "it's not the best idea since the problem is that when we terminate a process we need to make sure that all resources are released or at least not hold for much longer. When we keep the ctx alive with the job we need to also keep the VM alive and that means we need to keep all the VM page tables alive". That makes sense. Since s_entity is tied to the context reference held by the parser, can you set it to NULL when you drop the context reference there? At least that way we can easily detect misuse of s_entity after it enters a "possibly deleted" state. Regards, Andres Thanks, Andrey Regards, Andres Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 ++--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 + drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 32 --- 4 files changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f7fceb6..a760b6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1192,8 +1192,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, - amd_sched_get_job_priority(>base)); + amdgpu_ring_priority_get(job->ring, job->base.s_priority); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(>base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0cfc68d..a58e3c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -104,7 +104,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); - amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); + amdgpu_ring_priority_put(job->ring, s_job->s_priority); dma_fence_put(job->fence); amdgpu_sync_free(>sync); amdgpu_sync_free(>dep_sync); @@ -141,8 +141,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(>base.s_fence->finished); amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, - amd_sched_get_job_priority(>base)); + amdgpu_ring_priority_get(job->ring, job->base.s_priority); amd_sched_entity_push_job(>base); return 0; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index e4d3b4e..1bbbce2 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -529,6 +529,7 @@ int amd_sched_job_init(struct amd_sched_job *job, { job->sched = sched; job->s_entity = entity; + job->s_priority = entity->rq - sched->sched_rq; job->s_fence = amd_sched_fence_create(entity, owner); if (!job->s_fence) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 52c8e54..3f75b45 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -30,6 +30,19 @@ struct amd_gpu_scheduler; struct amd_sched_rq; +enum amd_sched_priority { + AMD_SCHED_PRIORITY_MIN, + AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, + AMD_SCHED_PRIORITY_NORMAL, + AMD_SCHED_PRIORITY_HIGH_SW, + AMD_SCHED_PRIORITY_HIGH_HW, + AMD_SCHED_PRIORITY_KERNEL, + AMD_SCHED_PRIORITY
Re: [PATCH 1/3] drm/amdgpu: Avoid accessing job->entity after the job is scheduled.
On 2017-10-20 09:32 AM, Andrey Grodzovsky wrote: Bug: amdgpu_job_free_cb was accessing s_job->s_entity when the allocated amdgpu_ctx (and the entity inside it) were already deallocated from amdgpu_cs_parser_fini. Fix: Save job's priority on it's creation instead of accessing it from s_entity later on. I'm not sure if this is the correct approach for a fix. Keeping s_entity as a dangling pointer could result in a similar bugs being reintroduced. For example, there would still be a race condition between amdgpu_cs_parser_fini() and amdgpu_job_dependency(). Instead, it might be better for the job to grab a reference to the context during job_init(), and put that reference on job free. Regards, Andres Signed-off-by: Andrey Grodzovsky--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 ++--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 + drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 32 --- 4 files changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f7fceb6..a760b6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1192,8 +1192,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, -amd_sched_get_job_priority(>base)); + amdgpu_ring_priority_get(job->ring, job->base.s_priority); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(>base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0cfc68d..a58e3c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -104,7 +104,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); - amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); + amdgpu_ring_priority_put(job->ring, s_job->s_priority); dma_fence_put(job->fence); amdgpu_sync_free(>sync); amdgpu_sync_free(>dep_sync); @@ -141,8 +141,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(>base.s_fence->finished); amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, -amd_sched_get_job_priority(>base)); + amdgpu_ring_priority_get(job->ring, job->base.s_priority); amd_sched_entity_push_job(>base); return 0; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index e4d3b4e..1bbbce2 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -529,6 +529,7 @@ int amd_sched_job_init(struct amd_sched_job *job, { job->sched = sched; job->s_entity = entity; + job->s_priority = entity->rq - sched->sched_rq; job->s_fence = amd_sched_fence_create(entity, owner); if (!job->s_fence) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 52c8e54..3f75b45 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -30,6 +30,19 @@ struct amd_gpu_scheduler; struct amd_sched_rq; +enum amd_sched_priority { + AMD_SCHED_PRIORITY_MIN, + AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, + AMD_SCHED_PRIORITY_NORMAL, + AMD_SCHED_PRIORITY_HIGH_SW, + AMD_SCHED_PRIORITY_HIGH_HW, + AMD_SCHED_PRIORITY_KERNEL, + AMD_SCHED_PRIORITY_MAX, + AMD_SCHED_PRIORITY_INVALID = -1, + AMD_SCHED_PRIORITY_UNSET = -2 +}; + + /** * A scheduler entity is a wrapper around a job queue or a group * of other entities. Entities take turns emitting jobs from their @@ -83,6 +96,7 @@ struct amd_sched_job { struct delayed_work work_tdr; uint64_tid; atomic_t karma; + enum amd_sched_priority s_priority; }; extern const struct dma_fence_ops amd_sched_fence_ops_scheduled; @@ -114,18 +128,6 @@ struct amd_sched_backend_ops { void (*free_job)(struct amd_sched_job *sched_job); }; -enum amd_sched_priority { - AMD_SCHED_PRIORITY_MIN, - AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, - AMD_SCHED_PRIORITY_NORMAL, - AMD_SCHED_PRIORITY_HIGH_SW, - AMD_SCHED_PRIORITY_HIGH_HW, - AMD_SCHED_PRIORITY_KERNEL, - AMD_SCHED_PRIORITY_MAX, - AMD_SCHED_PRIORITY_INVALID = -1, - AMD_SCHED_PRIORITY_UNSET = -2 -}; - /** * One
[PATCH libdrm 1/2] headers: Sync amdgpu_drm.h with drm-next
Generated using make headers_install from: airlied/drm-next 282dc83 Merge tag 'drm-intel-next-2017-10-12' ... Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- include/drm/amdgpu_drm.h | 31 ++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index 4c6e8c4..ff01818 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -53,6 +53,7 @@ extern "C" { #define DRM_AMDGPU_WAIT_FENCES 0x12 #define DRM_AMDGPU_VM 0x13 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 +#define DRM_AMDGPU_SCHED 0x15 #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -69,6 +70,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) +#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -91,6 +93,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) /* Flag that BO is always valid in this VM */ #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitly synchronized */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC(1 << 7) struct drm_amdgpu_gem_create_in { /** the requested memory size */ @@ -166,13 +170,22 @@ union drm_amdgpu_bo_list { /* unknown cause */ #define AMDGPU_CTX_UNKNOWN_RESET 3 +/* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 +#define AMDGPU_CTX_PRIORITY_VERY_LOW-1023 +#define AMDGPU_CTX_PRIORITY_LOW -512 +#define AMDGPU_CTX_PRIORITY_NORMAL 0 +/* Selecting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER */ +#define AMDGPU_CTX_PRIORITY_HIGH512 +#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; /** For future use, no flags defined so far */ __u32 flags; __u32 ctx_id; - __u32 _pad; + __s32 priority; }; union drm_amdgpu_ctx_out { @@ -216,6 +229,21 @@ union drm_amdgpu_vm { struct drm_amdgpu_vm_out out; }; +/* sched ioctl */ +#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 + +struct drm_amdgpu_sched_in { + /* AMDGPU_SCHED_OP_* */ + __u32 op; + __u32 fd; + __s32 priority; + __u32 flags; +}; + +union drm_amdgpu_sched { + struct drm_amdgpu_sched_in in; +}; + /* * This is not a reliable API and you should expect it to fail for any * number of reasons and have fallback path that do not use userptr to @@ -629,6 +657,7 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_SENSOR_VDDGFX 0x7 /* Number of VRAM page faults on CPU access. */ #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E +#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH libdrm 2/2] amdgpu: implement context priority for amdgpu_cs_ctx_create2 v3
Add a new context creation function that allows specifying the context priority. A high priority context has the potential of starving lower priority contexts. The current kernel driver implementation allows only apps that hold CAP_SYS_NICE or DRM_MASTER to acquire a priority above AMDGPU_CTX_PRIORITY_NORMAL. v2: corresponding changes for kernel patch v2 v3: Fixed 'make check' symbol error Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- amdgpu/amdgpu-symbol-check | 1 + amdgpu/amdgpu.h| 17 +++-- amdgpu/amdgpu_cs.c | 17 + 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/amdgpu/amdgpu-symbol-check b/amdgpu/amdgpu-symbol-check index d9f89ef..095c3a0 100755 --- a/amdgpu/amdgpu-symbol-check +++ b/amdgpu/amdgpu-symbol-check @@ -30,6 +30,7 @@ amdgpu_cs_chunk_fence_to_dep amdgpu_cs_create_semaphore amdgpu_cs_create_syncobj amdgpu_cs_ctx_create +amdgpu_cs_ctx_create2 amdgpu_cs_ctx_free amdgpu_cs_destroy_semaphore amdgpu_cs_destroy_syncobj diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 23cde10..ecc975f 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h @@ -798,8 +798,9 @@ int amdgpu_bo_list_update(amdgpu_bo_list_handle handle, * context will always be executed in order (first come, first serve). * * - * \param dev- \c [in] Device handle. See #amdgpu_device_initialize() - * \param context - \c [out] GPU Context handle + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param priority - \c [in] Context creation flags. See AMDGPU_CTX_PRIORITY_* + * \param context - \c [out] GPU Context handle * * \return 0 on success\n * <0 - Negative POSIX Error code @@ -807,6 +808,18 @@ int amdgpu_bo_list_update(amdgpu_bo_list_handle handle, * \sa amdgpu_cs_ctx_free() * */ +int amdgpu_cs_ctx_create2(amdgpu_device_handle dev, +uint32_t priority, +amdgpu_context_handle *context); +/** + * Create GPU execution Context + * + * Refer to amdgpu_cs_ctx_create2 for full documentation. This call + * is missing the priority parameter. + * + * \sa amdgpu_cs_ctx_create2() + * +*/ int amdgpu_cs_ctx_create(amdgpu_device_handle dev, amdgpu_context_handle *context); diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c index 9577d5c..b9fc01e 100644 --- a/amdgpu/amdgpu_cs.c +++ b/amdgpu/amdgpu_cs.c @@ -46,13 +46,14 @@ static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem); /** * Create command submission context * - * \param dev - \c [in] amdgpu device handle - * \param context - \c [out] amdgpu context handle + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param priority - \c [in] Context creation flags. See AMDGPU_CTX_PRIORITY_* + * \param context - \c [out] GPU Context handle * * \return 0 on success otherwise POSIX Error code */ -int amdgpu_cs_ctx_create(amdgpu_device_handle dev, -amdgpu_context_handle *context) +int amdgpu_cs_ctx_create2(amdgpu_device_handle dev, uint32_t priority, + amdgpu_context_handle *context) { struct amdgpu_context *gpu_context; union drm_amdgpu_ctx args; @@ -75,6 +76,8 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev, /* Create the context */ memset(, 0, sizeof(args)); args.in.op = AMDGPU_CTX_OP_ALLOC_CTX; + args.in.priority = priority; + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, , sizeof(args)); if (r) goto error; @@ -94,6 +97,12 @@ error: return r; } +int amdgpu_cs_ctx_create(amdgpu_device_handle dev, +amdgpu_context_handle *context) +{ + return amdgpu_cs_ctx_create2(dev, AMDGPU_CTX_PRIORITY_NORMAL, context); +} + /** * Release command submission context * -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: Upstream repo for libhsakmt
If I remember correctly, it should be John B. Regards, Andres On 2017-10-17 11:42 AM, Felix Kuehling wrote: I didn't even know about the freedesktop repository. Do you know who has commit access to that? Regards, Felix On 2017-10-16 10:44 PM, Tom Stellard wrote: Hi Felix, What do you think about merging your fxkamd/drm-next-wip into the master branch of the hsakmt repository of freedesktop[1]? Fedora is already packaging code from the freedesktop repository, and it might help to distinguish between the ROCm thunk and the upstream thunk by keeping them in separate repos. -Tom [1]https://cgit.freedesktop.org/amd/hsakmt/ ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: rename context priority levels
Don't leak implementation details about how each priority behaves to usermode. This allows greater flexibility in the future. Squash into c2636dc53abd8269a0930bccd564f2f195dba729 Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- Hey Alex, From some of the IRC discussions, I thought this would be appropriate. Regards, Andres drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 8 include/uapi/drm/amdgpu_drm.h | 8 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index cd12330..290cc3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -32,14 +32,14 @@ enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) { switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH_HW: + case AMDGPU_CTX_PRIORITY_VERY_HIGH: return AMD_SCHED_PRIORITY_HIGH_HW; - case AMDGPU_CTX_PRIORITY_HIGH_SW: + case AMDGPU_CTX_PRIORITY_HIGH: return AMD_SCHED_PRIORITY_HIGH_SW; case AMDGPU_CTX_PRIORITY_NORMAL: return AMD_SCHED_PRIORITY_NORMAL; - case AMDGPU_CTX_PRIORITY_LOW_SW: - case AMDGPU_CTX_PRIORITY_LOW_HW: + case AMDGPU_CTX_PRIORITY_LOW: + case AMDGPU_CTX_PRIORITY_VERY_LOW: return AMD_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_UNSET: return AMD_SCHED_PRIORITY_UNSET; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index a332de1..d0a3ea6 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -172,12 +172,12 @@ union drm_amdgpu_bo_list { /* Context priority level */ #define AMDGPU_CTX_PRIORITY_UNSET -2048 -#define AMDGPU_CTX_PRIORITY_LOW_HW -1023 -#define AMDGPU_CTX_PRIORITY_LOW_SW -512 +#define AMDGPU_CTX_PRIORITY_VERY_LOW-1023 +#define AMDGPU_CTX_PRIORITY_LOW -512 #define AMDGPU_CTX_PRIORITY_NORMAL 0 /* Selecting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER */ -#define AMDGPU_CTX_PRIORITY_HIGH_SW 512 -#define AMDGPU_CTX_PRIORITY_HIGH_HW 1023 +#define AMDGPU_CTX_PRIORITY_HIGH512 +#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 4/5] drm/amd/sched: NULL out the s_fence field after run_job
On 2017-09-28 10:55 AM, Nicolai Hähnle wrote: From: Nicolai Hähnle <nicolai.haeh...@amd.com> amd_sched_process_job drops the fence reference, so NULL out the s_fence field before adding it as a callback to guard against accidentally using s_fence after it may have be freed. Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Acked-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index e793312e351c..54eb77cffd9b 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -604,20 +604,23 @@ static int amd_sched_main(void *param) if (!sched_job) continue; s_fence = sched_job->s_fence; atomic_inc(>hw_rq_count); amd_sched_job_begin(sched_job); fence = sched->ops->run_job(sched_job); amd_sched_fence_scheduled(s_fence); + + sched_job->s_fence = NULL; Minor optional nitpick here. Could this be moved somewhere closer to where the fence reference is actually dropped? Alternatively, could a comment be added to specify which function call results in the reference ownership transfer? Whether a change is made or not, this series is Reviewed-by: Andres Rodriguez <andre...@gmail.com> Currently running piglit to check if this fixes the occasional soft hangs I was getting where all tests complete except one. + if (fence) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, _fence->cb, amd_sched_process_job); if (r == -ENOENT) amd_sched_process_job(fence, _fence->cb); else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); dma_fence_put(fence); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: map compute rings by least recently used pipe
This patch provides a guarantee that the first n queues allocated by an application will be on different pipes. Where n is the number of pipes available from the hardware. This helps avoid ring aliasing which can result in work executing in time-sliced mode instead of truly parallel mode. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 8 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 25 - drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index befc09b..190e28c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -121,7 +121,7 @@ static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) static int amdgpu_lru_map(struct amdgpu_device *adev, struct amdgpu_queue_mapper *mapper, - int user_ring, + int user_ring, bool lru_pipe_order, struct amdgpu_ring **out_ring) { int r, i, j; @@ -139,7 +139,7 @@ static int amdgpu_lru_map(struct amdgpu_device *adev, } r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist, - j, out_ring); + j, lru_pipe_order, out_ring); if (r) return r; @@ -284,8 +284,10 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, r = amdgpu_identity_map(adev, mapper, ring, out_ring); break; case AMDGPU_HW_IP_DMA: + r = amdgpu_lru_map(adev, mapper, ring, false, out_ring); + break; case AMDGPU_HW_IP_COMPUTE: - r = amdgpu_lru_map(adev, mapper, ring, out_ring); + r = amdgpu_lru_map(adev, mapper, ring, true, out_ring); break; default: *out_ring = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5ce6528..019932a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -315,14 +315,16 @@ static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring, * @type: amdgpu_ring_type enum * @blacklist: blacklisted ring ids array * @num_blacklist: number of entries in @blacklist + * @lru_pipe_order: find a ring from the least recently used pipe * @ring: output ring * * Retrieve the amdgpu_ring structure for the least recently used ring of * a specific IP block (all asics). * Returns 0 on success, error on failure. */ -int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, - int num_blacklist, struct amdgpu_ring **ring) +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, + int *blacklist, int num_blacklist, + bool lru_pipe_order, struct amdgpu_ring **ring) { struct amdgpu_ring *entry; @@ -337,10 +339,23 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist)) continue; - *ring = entry; - amdgpu_ring_lru_touch_locked(adev, *ring); - break; + if (!*ring) { + *ring = entry; + + /* We are done for ring LRU */ + if (!lru_pipe_order) + break; + } + + /* Move all rings on the same pipe to the end of the list */ + if (entry->pipe == (*ring)->pipe) + amdgpu_ring_lru_touch_locked(adev, entry); } + + /* Move the ring we found to the end of the list */ + if (*ring) + amdgpu_ring_lru_touch_locked(adev, *ring); + spin_unlock(>ring_lru_list_lock); if (!*ring) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 322d2529..491bd55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -201,8 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_ring_fini(struct amdgpu_ring *ring); -int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, - int num_blacklist, struct amdgpu_ring **ring); +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, + int *blacklist, int num_blacklist, + bool lru_pipe_order, struct amdgpu_ring **ring); void amdgpu_ring_l
[PATCH 1/2] drm/amdgpu: use multipipe compute policy on non PL11 asics
A performance regression for OpenCL tests on Polaris11 had this feature disabled for all asics. Instead, disable it selectively on the affected asics. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 4f6c68f..3d76e76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -109,9 +109,20 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s } } +static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) +{ + /* FIXME: spreading the queues across pipes causes perf regressions +* on POLARIS11 compute workloads */ + if (adev->asic_type == CHIP_POLARIS11) + return false; + + return adev->gfx.mec.num_mec > 1; +} + void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe, mec; + bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); /* policy for amdgpu compute queue ownership */ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { @@ -125,8 +136,7 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) if (mec >= adev->gfx.mec.num_mec) break; - /* FIXME: spreading the queues across pipes causes perf regressions */ - if (0) { + if (multipipe_policy) { /* policy: amdgpu owns the first two queues of the first MEC */ if (mec == 0 && queue < 2) set_bit(i, adev->gfx.mec.queue_bitmap); -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu: add option for force enable multipipe policy for compute
Useful for testing the effects of multipipe compute without recompiling. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 ++ 3 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d62a35e..b2f0b5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -121,6 +121,7 @@ extern int amdgpu_cntl_sb_buf_per_se; extern int amdgpu_param_buf_per_se; extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; +extern int amdgpu_compute_multipipe; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 792b117..308749c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -122,6 +122,7 @@ int amdgpu_cntl_sb_buf_per_se = 0; int amdgpu_param_buf_per_se = 0; int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; +int amdgpu_compute_multipipe = -1; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -265,6 +266,9 @@ module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(lbpw, amdgpu_lbpw, int, 0444); +MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); +module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); + #ifdef CONFIG_DRM_AMDGPU_SI #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 3d76e76..48d94ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -111,6 +111,12 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) { + if (amdgpu_compute_multipipe != -1) { + DRM_INFO("amdgpu: forcing compute pipe policy %d\n", +amdgpu_compute_multipipe); + return amdgpu_compute_multipipe == 1; + } + /* FIXME: spreading the queues across pipes causes perf regressions * on POLARIS11 compute workloads */ if (adev->asic_type == CHIP_POLARIS11) -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 0/2] Selectively spread compute rings across pipes
This was disabled due to an OCL perf regression as discussed on amd-gfx. This series re-enables the feature for ASICs that are not affected, and also introduces a boot parameter to force the policy on or off. This should help future effort of comparing performance with the feature enabled/disabled. Andres Rodriguez (2): drm/amdgpu: use multipipe compute policy on non PL11 asics drm/amdgpu: add option for force enable multipipe policy for compute drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 20 ++-- 3 files changed, 23 insertions(+), 2 deletions(-) -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 8/8] drm/amdgpu: add interface for editing a foreign process's priority v3
The AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE ioctls are used to set the priority of a different process in the current system. When a request is dropped, the process's contexts will be restored to the priority specified at context creation time. A request can be dropped by setting the override priority to AMDGPU_CTX_PRIORITY_UNSET. An fd is used to identify the remote process. This is simpler than passing a pid number, which is vulnerable to re-use, etc. This functionality is limited to DRM_MASTER since abuse of this interface can have a negative impact on the system's performance. v2: removed unused output structure v3: change refcounted interface for a regular set operation Signed-off-by: Andres Rodriguez <andre...@gmail.com> Acked-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 21 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 109 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 ++ include/uapi/drm/amdgpu_drm.h | 17 + 6 files changed, 164 insertions(+), 21 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 25a95c9..ef9a3b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o amdgpu_vf_error.o + amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index bf05180..97aafc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -25,6 +25,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_sched.h" static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum amd_sched_priority priority) @@ -220,26 +221,6 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } -static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) -{ - switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH_HW: - return AMD_SCHED_PRIORITY_HIGH_HW; - case AMDGPU_CTX_PRIORITY_HIGH_SW: - return AMD_SCHED_PRIORITY_HIGH_SW; - case AMDGPU_CTX_PRIORITY_NORMAL: - return AMD_SCHED_PRIORITY_NORMAL; - case AMDGPU_CTX_PRIORITY_LOW_SW: - case AMDGPU_CTX_PRIORITY_LOW_HW: - return AMD_SCHED_PRIORITY_LOW; - case AMDGPU_CTX_PRIORITY_UNSET: - return AMD_SCHED_PRIORITY_UNSET; - default: - WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_INVALID; - } -} - int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 79d9ab4..dff4e54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -28,6 +28,7 @@ #include #include "amdgpu.h" #include +#include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -1020,6 +1021,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c new file mode 100644 index 000..cd12330 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -0,0 +1,109 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including wi
[PATCH 5/8] drm/amd/sched: allow clients to edit an entity's rq v2
This is useful for changing an entity's priority at runtime. v2: don't modify the order of amd_sched_entity members Signed-off-by: Andres Rodriguez <andre...@gmail.com> Acked-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 24 ++-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 +++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 97c94f9..a5adde1 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -133,6 +133,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; + spin_lock_init(>rq_lock); spin_lock_init(>queue_lock); r = kfifo_alloc(>job_queue, jobs * sizeof(void *), GFP_KERNEL); if (r) @@ -204,7 +205,6 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { - struct amd_sched_rq *rq = entity->rq; int r; if (!amd_sched_entity_is_initialized(sched, entity)) @@ -218,7 +218,7 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, else r = wait_event_killable(sched->job_scheduled, amd_sched_entity_is_idle(entity)); - amd_sched_rq_remove_entity(rq, entity); + amd_sched_entity_set_rq(entity, NULL); if (r) { struct amd_sched_job *job; @@ -251,6 +251,24 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb dma_fence_put(f); } +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, +struct amd_sched_rq *rq) +{ + if (entity->rq == rq) + return; + + spin_lock(>rq_lock); + + if (entity->rq) + amd_sched_rq_remove_entity(entity->rq, entity); + + entity->rq = rq; + if (rq) + amd_sched_rq_add_entity(rq, entity); + + spin_unlock(>rq_lock); +} + bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity) { @@ -348,7 +366,9 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) /* first job wakes up scheduler */ if (first) { /* Add the entity to the run queue */ + spin_lock(>rq_lock); amd_sched_rq_add_entity(entity->rq, entity); + spin_unlock(>rq_lock); amd_sched_wakeup(sched); } return added; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index da040bc..4b528f7 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -39,6 +39,7 @@ struct amd_sched_rq; struct amd_sched_entity { struct list_headlist; struct amd_sched_rq *rq; + spinlock_t rq_lock; struct amd_gpu_scheduler*sched; spinlock_t queue_lock; @@ -154,6 +155,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); void amd_sched_entity_push_job(struct amd_sched_job *sched_job); +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, +struct amd_sched_rq *rq); int amd_sched_fence_slab_init(void); void amd_sched_fence_slab_fini(void); -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/8] drm/amdgpu: make amdgpu_to_sched_priority detect invalid parameters
Returning invalid priorities as _NORMAL is a backwards compatibility quirk of amdgpu_ctx_ioctl(). Move this detail one layer up where it belongs. Signed-off-by: Andres Rodriguez <andre...@gmail.com> Acked-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 9ec85d5..fbf15dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -232,7 +232,7 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) return AMD_SCHED_PRIORITY_LOW; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_NORMAL; + return AMD_SCHED_PRIORITY_INVALID; } } @@ -251,8 +251,10 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, id = args->in.ctx_id; priority = amdgpu_to_sched_priority(args->in.priority); - if (priority >= AMD_SCHED_PRIORITY_MAX) - return -EINVAL; + /* For backwards compatibility reasons, we need to accept +* ioctls with garbage in the priority field */ + if (priority == AMD_SCHED_PRIORITY_INVALID) + priority = AMD_SCHED_PRIORITY_NORMAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index dbcaa2e..da040bc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -120,7 +120,8 @@ enum amd_sched_priority { AMD_SCHED_PRIORITY_HIGH_SW, AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, - AMD_SCHED_PRIORITY_MAX + AMD_SCHED_PRIORITY_MAX, + AMD_SCHED_PRIORITY_INVALID = -1 }; /** -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 6/8] drm/amdgpu: introduce AMDGPU_CTX_PRIORITY_UNSET
Use _INVALID to identify bad parameters and _UNSET to represent the lack of interest in a specific value. Signed-off-by: Andres Rodriguez <andre...@gmail.com> Acked-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++- include/uapi/drm/amdgpu_drm.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fbf15dd..e4de0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -230,6 +230,8 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) case AMDGPU_CTX_PRIORITY_LOW_SW: case AMDGPU_CTX_PRIORITY_LOW_HW: return AMD_SCHED_PRIORITY_LOW; + case AMDGPU_CTX_PRIORITY_UNSET: + return AMD_SCHED_PRIORITY_UNSET; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); return AMD_SCHED_PRIORITY_INVALID; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 4b528f7..52c8e54 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -122,7 +122,8 @@ enum amd_sched_priority { AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, AMD_SCHED_PRIORITY_MAX, - AMD_SCHED_PRIORITY_INVALID = -1 + AMD_SCHED_PRIORITY_INVALID = -1, + AMD_SCHED_PRIORITY_UNSET = -2 }; /** diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d93e988..127797a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -167,6 +167,7 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_UNKNOWN_RESET 3 /* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 #define AMDGPU_CTX_PRIORITY_LOW_HW -1023 #define AMDGPU_CTX_PRIORITY_LOW_SW -512 #define AMDGPU_CTX_PRIORITY_NORMAL 0 -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/8] drm/amdgpu: add parameter to allocate high priority contexts v11
Add a new context creation parameter to express a global context priority. The priority ranking in descending order is as follows: * AMDGPU_CTX_PRIORITY_HIGH_HW * AMDGPU_CTX_PRIORITY_HIGH_SW * AMDGPU_CTX_PRIORITY_NORMAL * AMDGPU_CTX_PRIORITY_LOW_SW * AMDGPU_CTX_PRIORITY_LOW_HW The driver will attempt to schedule work to the hardware according to the priorities. No latency or throughput guarantees are provided by this patch. This interface intends to service the EGL_IMG_context_priority extension, and vulkan equivalents. Setting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER. v2: Instead of using flags, repurpose __pad v3: Swap enum values of _NORMAL _HIGH for backwards compatibility v4: Validate usermode priority and store it v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN v7: remove ctx->priority v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE v9: change the priority parameter to __s32 v10: split priorities into _SW and _HW v11: Allow DRM_MASTER without CAP_SYS_NICE Reviewed-by: Emil Velikov <emil.l.veli...@gmail.com> Reviewed-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 61 +-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 5 ++- include/uapi/drm/amdgpu_drm.h | 10 - 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a11e443..9ec85d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -23,13 +23,40 @@ */ #include +#include #include "amdgpu.h" -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) +static int amdgpu_ctx_priority_permit(struct drm_file *filp, + enum amd_sched_priority priority) +{ + /* NORMAL and below are accessible by everyone */ + if (priority <= AMD_SCHED_PRIORITY_NORMAL) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum amd_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) { unsigned i, j; int r; + if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(>refcount); @@ -51,7 +78,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) struct amdgpu_ring *ring = adev->rings[i]; struct amd_sched_rq *rq; - rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + rq = >sched.sched_rq[priority]; if (ring == >gfx.kiq.ring) continue; @@ -100,6 +127,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, + struct drm_file *filp, + enum amd_sched_priority priority, uint32_t *id) { struct amdgpu_ctx_mgr *mgr = >ctx_mgr; @@ -117,8 +146,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, kfree(ctx); return r; } + *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, ctx); + r = amdgpu_ctx_init(adev, priority, filp, ctx); if (r) { idr_remove(>ctx_handles, *id); *id = 0; @@ -188,11 +218,30 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } +static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) +{ + switch (amdgpu_priority) { + case AMDGPU_CTX_PRIORITY_HIGH_HW: + return AMD_SCHED_PRIORITY_HIGH_HW; + case AMDGPU_CTX_PRIORITY_HIGH_SW: + return AMD_SCHED_PRIORITY_HIGH_SW; + case AMDGPU_CTX_PRIORITY_NORMAL: + return AMD_SCHED_PRIORITY_NORMAL; + case AMDGPU_CTX_PRIORITY_LOW_SW: + case AMDGPU_CTX_PRIORITY_LOW_HW: + return AMD_SCHED_PRIORITY_LOW; + default: + WARN(1, "Invalid context priority %d\n", amdgpu_priority); + return AMD_SCHED_PRIORITY_NORMAL; + } +} + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { int r;
[PATCH 3/8] drm/amdgpu: implement ring set_priority for gfx_v8 compute v9
Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over other queues on the same pipe. Multiple queues on a pipe are timesliced so this gives us full precedence over other queues. Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the wave as follows: 0x2: CS_H 0x1: CS_M 0x0: CS_L The SPI block will then dispatch work according to the policy set by SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than gfx. In order to prevent getting stuck in loops of resources bouncing between GFX and high priority compute and introducing further latency, we statically reserve a portion of the pipe. v2: fix srbm_select to ring->queue and use ring->funcs->type v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: switch int to enum amd_sched_priority v5: corresponding changes for srbm_lock v6: change CU reservation to PIPE_PERCENT allocation v7: use kiq instead of MMIO v8: back to MMIO, and make the implementation sleep safe. v9: corresponding changes for splitting HIGH into _HW/_SW Acked-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 99 ++ 3 files changed, 105 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d62a35e..cedfc96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1023,6 +1023,10 @@ struct amdgpu_gfx { boolin_suspend; /* NGG */ struct amdgpu_ngg ngg; + + /* pipe reservation */ + struct mutexpipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b0109eb..e07750c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2012,6 +2012,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); + bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); adev->smc_rreg = _invalid_rreg; adev->smc_wreg = _invalid_wreg; @@ -2040,6 +2041,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(>pm.mutex); mutex_init(>gfx.gpu_clock_mutex); mutex_init(>srbm_mutex); + mutex_init(>gfx.pipe_reserve_mutex); mutex_init(>grbm_idx_mutex); mutex_init(>mn_lock); hash_init(adev->mn_hash); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 666a154..3b565df 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6365,6 +6365,104 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } +static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32(reg, tmp); +} + +static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + mutex_lock(>gfx.pipe_reserve_mutex); + pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { + /* Clear all reservations - everyone reacquires all resources */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(>gfx.gf
[PATCH 0/8] allow DRM_MASTER to change client's priorities v3
Just a rebase of v2 since it went a little stale. Series available in the wip-process-priorities-v3 branch at: git://people.freedesktop.org/~lostgoat/linux Gpuvis trace with normal priority running steamvr, hellovr_vulkan and ssao demo: https://drive.google.com/open?id=0B2ygSoZuj3IMdFJ4bTZPcmh5aHc Gpuvis trace with compute at high priority: https://drive.google.com/open?id=0B2ygSoZuj3IMYzBFUUN0dXV4bGs Full trace data vailable here: https://drive.google.com/open?id=0B2ygSoZuj3IMWGUzV25yNGtyRFU For validation, use the wip-process-priorities-v3 branch of: mesa: g...@github.com:lostgoat/mesa.git libdrm: g...@github.com:lostgoat/libdrm.git vk-demos: g...@github.com:lostgoat/Vulkan.git Run the following two vk-demos simultaneously: ./ssao ./computeparticles -high-priority Andres Rodriguez (8): drm/amdgpu: add parameter to allocate high priority contexts v11 drm/amdgpu: add framework for HW specific priority settings v9 drm/amdgpu: implement ring set_priority for gfx_v8 compute v9 drm/amdgpu: make amdgpu_to_sched_priority detect invalid parameters drm/amd/sched: allow clients to edit an entity's rq v2 drm/amdgpu: introduce AMDGPU_CTX_PRIORITY_UNSET drm/amdgpu: add plumbing for ctx priority changes v2 drm/amdgpu: add interface for editing a foreign process's priority v3 drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 75 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 109 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 99 +++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 24 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 19 - include/uapi/drm/amdgpu_drm.h | 28 ++- 15 files changed, 489 insertions(+), 12 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 0/2] [reference] Disable implicit sync for non-wsi bos
This is a reference patch series for the kernel series: "drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC v2" Andres Rodriguez (2): radv: factor out radv_alloc_memory radv: disable implicit sync for radv allocated bos v2 src/amd/vulkan/radv_device.c | 22 +- src/amd/vulkan/radv_private.h | 11 +++ src/amd/vulkan/radv_radeon_winsys.h | 1 + src/amd/vulkan/radv_wsi.c | 3 ++- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 2 ++ 5 files changed, 33 insertions(+), 6 deletions(-) -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] radv: disable implicit sync for radv allocated bos v2
Implicit sync kicks in when a buffer is used by two different amdgpu contexts simultaneously. Jobs that use explicit synchronization mechanisms end up needlessly waiting to be scheduled for long periods of time in order to achieve serialized execution. This patch disables implicit synchronization for all radv allocations except for wsi bos. The only systems that require implicit synchronization are DRI2/3 and PRIME. v2: mark wsi bos as RADV_MEM_IMPLICIT_SYNC Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- src/amd/vulkan/radv_device.c | 3 +++ src/amd/vulkan/radv_radeon_winsys.h | 1 + src/amd/vulkan/radv_wsi.c | 3 ++- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 2 ++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 106eaf6..26944d2 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2309,6 +2309,9 @@ VkResult radv_alloc_memory(VkDevice _device, if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE) flags |= RADEON_FLAG_GTT_WC; + if (mem_flags & RADV_MEM_IMPLICIT_SYNC) + flags |= RADEON_FLAG_IMPLICIT_SYNC; + mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment, domain, flags); diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 82ec5fe..02e0243 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -53,6 +53,7 @@ enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), RADEON_FLAG_VIRTUAL = (1 << 3), RADEON_FLAG_VA_UNCACHED = (1 << 4), + RADEON_FLAG_IMPLICIT_SYNC = (1 << 5), }; enum radeon_bo_usage { /* bitfield */ diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index aa44b7d..f8307ee 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -193,7 +193,7 @@ radv_wsi_image_create(VkDevice device_h, .image = image_h }; - result = radv_AllocateMemory(device_h, + result = radv_alloc_memory(device_h, &(VkMemoryAllocateInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = _alloc, @@ -201,6 +201,7 @@ radv_wsi_image_create(VkDevice device_h, .memoryTypeIndex = linear ? 1 : 0, }, NULL /* XXX: pAllocator */, +RADV_MEM_IMPLICIT_SYNC, _h); if (result != VK_SUCCESS) goto fail_create_image; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 325f875..cd0efbe 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -330,6 +330,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; + if (!(flags & RADEON_FLAG_IMPLICIT_SYNC)) + request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC; r = amdgpu_bo_alloc(ws->dev, , _handle); if (r) { -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] radv: factor out radv_alloc_memory
This allows us to pass extra parameters to the memory allocation operation that are not defined in the vulkan spec. This is useful for internal usage. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- src/amd/vulkan/radv_device.c | 19 ++- src/amd/vulkan/radv_private.h | 11 +++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 5d96070..106eaf6 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2242,11 +2242,11 @@ bool radv_get_memory_fd(struct radv_device *device, pFD); } -VkResult radv_AllocateMemory( - VkDevice_device, - const VkMemoryAllocateInfo* pAllocateInfo, - const VkAllocationCallbacks*pAllocator, - VkDeviceMemory* pMem) +VkResult radv_alloc_memory(VkDevice_device, + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks*pAllocator, + enum radv_mem_flags_bitsmem_flags, + VkDeviceMemory* pMem) { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_device_memory *mem; @@ -2328,6 +2328,15 @@ fail: return result; } +VkResult radv_AllocateMemory( + VkDevice_device, + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks*pAllocator, + VkDeviceMemory* pMem) +{ + return radv_alloc_memory(_device, pAllocateInfo, pAllocator, 0, pMem); +} + void radv_FreeMemory( VkDevice_device, VkDeviceMemory _mem, diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index a9da9e7..8cb3807 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -105,6 +105,11 @@ enum radv_mem_type { RADV_MEM_TYPE_COUNT }; +enum radv_mem_flags_bits { + /* enable implicit synchronization when accessing the underlying bo */ + RADV_MEM_IMPLICIT_SYNC = 1 << 0, +}; + #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) static inline uint32_t @@ -935,6 +940,12 @@ void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer); bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD); +VkResult radv_alloc_memory(VkDevice _device, + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks* pAllocator, + enum radv_mem_flags_bits flags, + VkDeviceMemory* pMem); + /* * Takes x,y,z as exact numbers of invocations, instead of blocks. * -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC v2
Correct. The idea is to only set AMDGPU_GEM_CREATE_EXPLICIT_SYNC for buffers that are not associated with dri2/3 or PRIME. Regards, Andres On 2017-09-19 10:10 AM, Mao, David wrote: Hi Andres, The explicit sync should not be used for DrI3 and DRI2 but for cross process memory sharing, right? We still have to rely on implicit sync to guarantee the. Correct order of rendering and present. Could you confirm? Thanks. Sent from my iPhone On 19 Sep 2017, at 9:57 PM, Andres Rodriguez <andre...@gmail.com <mailto:andre...@gmail.com>> wrote: On 2017-09-19 09:24 AM, Christian König wrote: Am 19.09.2017 um 14:59 schrieb Andres Rodriguez: Introduce a flag to signal that access to a BO will be synchronized through an external mechanism. Currently all buffers shared between contexts are subject to implicit synchronization. However, this is only required for protocols that currently don't support an explicit synchronization mechanism (DRI2/3). This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that users can specify when it is safe to disable implicit sync. v2: only disable explicit sync in amdgpu_cs_ioctl Signed-off-by: Andres Rodriguez <andre...@gmail.com <mailto:andre...@gmail.com>> --- Hey Christian, I kept the amdgpu_bo_explicit_sync() function since it makes it easier to maintain an 80 line wrap in amdgpu_cs_sync_rings() Looks good to me, but I would like to see the matching user space code as well. Especially I have no idea how you want to have DRI3 compatibility with that? No problem. I'm fixing the radv patch atm and I'll re-send it for your reference. Regards, Andres Regards, Christian. Regards, Andres drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 7 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 include/uapi/drm/amdgpu_drm.h | 2 ++ 8 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index db97e78..bc8a403 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -704,7 +704,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, >validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, >job->sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, >job->sync, resv, p->filp, + amdgpu_bo_explicit_sync(e->robj)); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8..21e9936 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + return -EINVAL; /* reject invalid gem domains */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c26ef53..428aae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } } +/** + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced + */ +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44..a4bf21f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence - * @shared: true if we should only sync to the exclusive fence + * @explicit_sync: true if we should only sync to the exclusive fence * * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation
[PATCH] drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC v2
Introduce a flag to signal that access to a BO will be synchronized through an external mechanism. Currently all buffers shared between contexts are subject to implicit synchronization. However, this is only required for protocols that currently don't support an explicit synchronization mechanism (DRI2/3). This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that users can specify when it is safe to disable implicit sync. v2: only disable explicit sync in amdgpu_cs_ioctl Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- Hey Christian, I kept the amdgpu_bo_explicit_sync() function since it makes it easier to maintain an 80 line wrap in amdgpu_cs_sync_rings() Regards, Andres drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 7 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 include/uapi/drm/amdgpu_drm.h | 2 ++ 8 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index db97e78..bc8a403 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -704,7 +704,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, >validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, >job->sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, >job->sync, resv, p->filp, +amdgpu_bo_explicit_sync(e->robj)); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8..21e9936 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + return -EINVAL; /* reject invalid gem domains */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c26ef53..428aae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } } +/** + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced + */ +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44..a4bf21f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence - * @shared: true if we should only sync to the exclusive fence + * @explicit_sync: true if we should only sync to the exclusive fence * * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, -void *owner) +void *owner, bool explicit_sync) { struct reservation_object_list *flist; struct dma_fence *f; @@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, f = reservation_object_get_excl(resv); r = amdgpu_sync_fence(adev, sync, f); + if (explicit_sync) + return r; + flist = reservation_object_get_list(resv); if (!flist || r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index dc76879..70d7e3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_resv(struct amdgpu_device *ad
Re: [PATCH] drm/amdgpu: add helper to convert a ttm bo to amdgpu_bo
This is a small cleanup patch from my initial naive attempt at extracting a TTM bo in amdgpu_sync_resv(). It didn't end up being useful in that specific case, but I thought I'd send it out anyways in case you find it useful. Regards, Andres On 2017-09-18 11:17 PM, Andres Rodriguez wrote: Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 9 + 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 726a662..73eedd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -40,9 +40,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *bo; - - bo = container_of(tbo, struct amdgpu_bo, tbo); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); amdgpu_bo_kunmap(bo); @@ -891,7 +889,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); amdgpu_vm_bo_invalidate(adev, abo, evict); amdgpu_bo_kunmap(abo); @@ -918,7 +916,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return 0; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 39b6bf6..c26ef53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -94,6 +94,11 @@ struct amdgpu_bo { }; }; +static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) +{ + return container_of(tbo, struct amdgpu_bo, tbo); +} + /** * amdgpu_mem_type_to_domain - return domain corresponding to mem_type * @mem_type: ttm memory type diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b2b11e1..c9c059d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -44,6 +44,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_object.h" #include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" @@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, placement->num_busy_placement = 1; return; } - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (adev->mman.buffer_funcs && @@ -257,7 +258,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; @@ -484,7 +485,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, int r; /* Can't move a pinned BO */ - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); if (WARN_ON_ONCE(abo->pin_count > 0)) return -EINVAL; @@ -1172,7 +1173,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); struct drm_mm_node *nodes = abo->tbo.mem.mm_node; uint32_t value = 0; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: add helper to convert a ttm bo to amdgpu_bo
Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 9 + 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 726a662..73eedd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -40,9 +40,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *bo; - - bo = container_of(tbo, struct amdgpu_bo, tbo); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); amdgpu_bo_kunmap(bo); @@ -891,7 +889,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); amdgpu_vm_bo_invalidate(adev, abo, evict); amdgpu_bo_kunmap(abo); @@ -918,7 +916,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return 0; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 39b6bf6..c26ef53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -94,6 +94,11 @@ struct amdgpu_bo { }; }; +static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) +{ + return container_of(tbo, struct amdgpu_bo, tbo); +} + /** * amdgpu_mem_type_to_domain - return domain corresponding to mem_type * @mem_type: ttm memory type diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b2b11e1..c9c059d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -44,6 +44,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_object.h" #include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" @@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, placement->num_busy_placement = 1; return; } - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (adev->mman.buffer_funcs && @@ -257,7 +258,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; @@ -484,7 +485,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, int r; /* Can't move a pinned BO */ - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); if (WARN_ON_ONCE(abo->pin_count > 0)) return -EINVAL; @@ -1172,7 +1173,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); struct drm_mm_node *nodes = abo->tbo.mem.mm_node; uint32_t value = 0; -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC
On 2017-09-18 10:47 PM, zhoucm1 wrote: On 2017年09月19日 07:16, Andres Rodriguez wrote: Introduce a flag to signal that access to a BO will be synchronized through an external mechanism. Currently all buffers shared between contexts are subject to implicit synchronization. However, this is only required for protocols that currently don't support an explicit synchronization mechanism (DRI2/3). This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that users can specify when it is safe to disable implicit sync. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 8 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++ include/uapi/drm/amdgpu_drm.h | 2 ++ 8 files changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index db97e78..107533f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -704,7 +704,9 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, >validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, >job->sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, >job->sync, resv, + p->filp, + amdgpu_bo_explicit_sync(e->robj)); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8..21e9936 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + return -EINVAL; /* reject invalid gem domains */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c26ef53..428aae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } } +/** + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced + */ +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44..6bf4bed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -169,14 +169,15 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence - * @shared: true if we should only sync to the exclusive fence + * @explicit_sync: true if we should only sync to the exclusive fence * * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner) + void *owner, + bool explicit_sync) Could you move explicit_sync inside function? like: bool explicit_sync = bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; I was thinking of doing something like this originally. Extract the ttm bo from the resv object, and then get the abo from that. Would've been a pretty tiny and clean patch. However, the reservation object is a pointer instead of being embedded inside the ttm bo. So that path doesn't work. Passing in a pointer to the full bo is overkill I think. And the function might be used in cases where the reservation object is not associated with a specific bo (at least the current interface allows for that). That is why I ended up choosing this interface, even though it made the patch a lot more verbose. But if you, or anyone, has any suggestions on how to simplify this patch let me know. { struct reservation_object_list *flist; struct dma_fence *f; @@ -191,6 +192,9 @@ int amdgpu_sync_resv(struct amdgpu_
Re: [PATCH] radv: disable implicit sync for radv allocated bos
Got some feedback from Dave, and this patch doesn't handle dri3 use cases correctly. Regards, Andres On 2017-09-18 07:16 PM, Andres Rodriguez wrote: Implicit sync kicks in when a buffer is used by two different amdgpu contexts simultaneously. Jobs that use explicit synchronization mechanisms end up needlessly waiting to be scheduled for long periods of time in order to achieve serialized execution. This patch disables implicit synchronization for all radv allocations. The only systems that require implicit synchronization are DRI2/3 and PRIME. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 325f875..9dc7559 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -330,6 +330,7 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; + request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC; r = amdgpu_bo_alloc(ws->dev, , _handle); if (r) { ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC
Introduce a flag to signal that access to a BO will be synchronized through an external mechanism. Currently all buffers shared between contexts are subject to implicit synchronization. However, this is only required for protocols that currently don't support an explicit synchronization mechanism (DRI2/3). This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that users can specify when it is safe to disable implicit sync. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 8 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 6 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++ include/uapi/drm/amdgpu_drm.h | 2 ++ 8 files changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index db97e78..107533f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -704,7 +704,9 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, >validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, >job->sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, >job->sync, resv, +p->filp, +amdgpu_bo_explicit_sync(e->robj)); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8..21e9936 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + return -EINVAL; /* reject invalid gem domains */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c26ef53..428aae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } } +/** + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced + */ +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44..6bf4bed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -169,14 +169,15 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence - * @shared: true if we should only sync to the exclusive fence + * @explicit_sync: true if we should only sync to the exclusive fence * * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, -void *owner) +void *owner, +bool explicit_sync) { struct reservation_object_list *flist; struct dma_fence *f; @@ -191,6 +192,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, f = reservation_object_get_excl(resv); r = amdgpu_sync_fence(adev, sync, f); + if (explicit_sync) + return r; + flist = reservation_object_get_list(resv); if (!flist || r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index dc76879..70d7e3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, -
[PATCH] Add flag to disable implicit synchronization
Implicit synchronization of jobs that access a shared bo is always enabled. Currently this behaviour is required for DRI2/3 and PRIME use cases, where the protocol doesn't provide a mechanism to shared an synchronization primitive alongside the surface. This patch series aims to provide a mechanism to allow userspace to disable implicit synchronization when it is not required. Following is an example of some async compute work getting delayed for 2.12ms due to implicit synchronization: https://drive.google.com/open?id=0B2ygSoZuj3IMRzFCYzBxaDFpaFk Following is the same workload but AMDGPU_GEM_CREATE_EXPLICIT_SYNC enabled: https://drive.google.com/open?id=0B2ygSoZuj3IMb0pTZEJRQmNwVHM In the second case we can see that hellovr_vulkan and the steamvr compositor can access the same surface simultaneously, without the gpu scheduler introducing any implicit waits. Gpuvis traces for these two scenarios can be found here: https://drive.google.com/open?id=0B2ygSoZuj3IMRklfM1llbTJqTnc The libdrm and radv patches are included for reference. Andres Rodriguez (1): drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 8 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 6 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++ include/uapi/drm/amdgpu_drm.h | 2 ++ 8 files changed, 34 insertions(+), 11 deletions(-) -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH libdrm] amdgpu: update headers
For testing the kernel commit --- include/drm/amdgpu_drm.h | 4 1 file changed, 4 insertions(+) diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index da2ade6..c01abaa 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -89,6 +89,10 @@ extern "C" { #define AMDGPU_GEM_CREATE_SHADOW (1 << 4) /* Flag that allocating the BO should use linear VRAM */ #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) +/* Flag that BO is always valid in this VM */ +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitely sync'd */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC(1 << 7) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] radv: disable implicit sync for radv allocated bos
Implicit sync kicks in when a buffer is used by two different amdgpu contexts simultaneously. Jobs that use explicit synchronization mechanisms end up needlessly waiting to be scheduled for long periods of time in order to achieve serialized execution. This patch disables implicit synchronization for all radv allocations. The only systems that require implicit synchronization are DRI2/3 and PRIME. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 325f875..9dc7559 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -330,6 +330,7 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; + request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC; r = amdgpu_bo_alloc(ws->dev, , _handle); if (r) { -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH resend] allow DRM_MASTER to change client's priorities v2
Hey David, I'm not sure if it's been merged yet (I don't have push permissions). I just had a little baby girl, so I'm taking a couple of weeks off :) Regards, Andres On Jul 24, 2017 3:25 AM, "zhoucm1" <david1.z...@amd.com> wrote: Hi Andres, How about your this set? Have you pushed? Regards, David Zhou On 2017年07月08日 03:26, Andres Rodriguez wrote: > Little ping on this series for review. > > Also added recent reviews/acks and rebased. > > Andres Rodriguez (8): >drm/amdgpu: add parameter to allocate high priority contexts v11 >drm/amdgpu: add framework for HW specific priority settings v9 >drm/amdgpu: implement ring set_priority for gfx_v8 compute v9 >drm/amdgpu: make amdgpu_to_sched_priority detect invalid parameters >drm/amd/sched: allow clients to edit an entity's rq v2 >drm/amdgpu: introduce AMDGPU_CTX_PRIORITY_UNSET >drm/amdgpu: add plumbing for ctx priority changes v2 >drm/amdgpu: add interface for editing a foreign process's priority v3 > > drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 75 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 + > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 > drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 109 > ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 105 > + > drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +- > drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 19 - > include/uapi/drm/amdgpu_drm.h | 28 ++- > 15 files changed, 496 insertions(+), 13 deletions(-) > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v2] drm/amdgpu: Fix KFD oversubscription by tracking queues correctly
Reviewed-by: Andres Rodriguez <andre...@gmail.com> On 2017-07-13 04:23 PM, Jay Cornwall wrote: The number of compute queues available to the KFD was erroneously calculated as 64. Only the first MEC can execute compute queues and it has 32 queue slots. This caused the oversubscription limit to be calculated incorrectly, leading to a missing chained runlist command at the end of an oversubscribed runlist. v2: Remove unused num_mec field to avoid duplicate logic Change-Id: Ic4a139c04b8a6d025fbb831a0a67e98728bfe461 Signed-off-by: Jay Cornwall <jay.cornw...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 --- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 3 --- drivers/gpu/drm/radeon/radeon_kfd.c | 1 - 5 files changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 7060daf..8c710f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -116,7 +116,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = global_compute_vmid_bitmap, - .num_mec = adev->gfx.mec.num_mec, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, .gpuvm_size = (uint64_t)amdgpu_vm_size << 30 @@ -140,7 +139,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant */ - last_valid_bit = adev->gfx.mec.num_mec + last_valid_bit = 1 /* only first MEC can have compute queues */ * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1cf00d4..95f9396 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -494,10 +494,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } else kfd->max_proc_per_quantum = hws_max_conc_proc; - /* We only use the first MEC */ - if (kfd->shared_resources.num_mec > 1) - kfd->shared_resources.num_mec = 1; - /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 7607989..306144f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -82,13 +82,6 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) return false; } -unsigned int get_mec_num(struct device_queue_manager *dqm) -{ - BUG_ON(!dqm || !dqm->dev); - - return dqm->dev->shared_resources.num_mec; -} - unsigned int get_queues_num(struct device_queue_manager *dqm) { BUG_ON(!dqm || !dqm->dev); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index a4d2fee..10794b3 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -107,9 +107,6 @@ struct kgd2kfd_shared_resources { /* Bit n == 1 means VMID n is available for KFD. */ unsigned int compute_vmid_bitmap; - /* number of mec available from the hardware */ - uint32_t num_mec; - /* number of pipes per mec */ uint32_t num_pipe_per_mec; diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 719ea51..8f8c7c1 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -251,7 +251,6 @@ void radeon_kfd_device_init(struct radeon_device *rdev) if (rdev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - .num_mec = 1, .num_pipe_per_mec = 4, .num_queue_per_pipe = 8, .gpuvm_size = (uint64_t)radeon_vm_size << 30 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: Fix KFD oversubscription by tracking queues correctly
On 2017-07-13 03:35 PM, Felix Kuehling wrote: On 17-07-13 03:15 PM, Jay Cornwall wrote: On Thu, Jul 13, 2017, at 13:36, Andres Rodriguez wrote: On 2017-07-12 02:26 PM, Jay Cornwall wrote: The number of compute queues available to the KFD was erroneously calculated as 64. Only the first MEC can execute compute queues and it has 32 queue slots. This caused the oversubscription limit to be calculated incorrectly, leading to a missing chained runlist command at the end of an oversubscribed runlist. Change-Id: Ic4a139c04b8a6d025fbb831a0a67e98728bfe461 Signed-off-by: Jay Cornwall <jay.cornw...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 7060daf..aa4006a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -140,7 +140,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant */ - last_valid_bit = adev->gfx.mec.num_mec + last_valid_bit = 1 /* only first MEC can have compute queues */ Hey Jay, Minor nitpick. We already have some similar resource patching in kgd2kfd_device_init(), and I think it would be good to keep all of these together. OK. I see shared_resources.num_mec is set to 1 in kgd2kfd_device_init. That's not very clear (the number of MECs doesn't change) and num_mec doesn't appear to be used anywhere except in dead code in kfd_device.c. That code also runs after the queue bitmap setup. How about I remove that field entirely? Yeah, that's fine with me. Good with me as well. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: Fix KFD oversubscription by tracking queues correctly
On 2017-07-13 02:36 PM, Andres Rodriguez wrote: On 2017-07-12 02:26 PM, Jay Cornwall wrote: The number of compute queues available to the KFD was erroneously calculated as 64. Only the first MEC can execute compute queues and it has 32 queue slots. This caused the oversubscription limit to be calculated incorrectly, leading to a missing chained runlist command at the end of an oversubscribed runlist. Change-Id: Ic4a139c04b8a6d025fbb831a0a67e98728bfe461 Signed-off-by: Jay Cornwall <jay.cornw...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 7060daf..aa4006a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -140,7 +140,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant */ -last_valid_bit = adev->gfx.mec.num_mec +last_valid_bit = 1 /* only first MEC can have compute queues */ Hey Jay, Minor nitpick. We already have some similar resource patching in kgd2kfd_device_init(), and I think it would be good to keep all of these together. Otherwise, looks good to me. Just re-read my reply and wanted to clarify. I don't really have a strong opining on which side does the resource availability patched. Whether it happens here or on the KFD side it is fine. I just don't think it is good to keep it in different two places. Regards, Andres Regards, Andres * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: Fix KFD oversubscription by tracking queues correctly
On 2017-07-12 02:26 PM, Jay Cornwall wrote: The number of compute queues available to the KFD was erroneously calculated as 64. Only the first MEC can execute compute queues and it has 32 queue slots. This caused the oversubscription limit to be calculated incorrectly, leading to a missing chained runlist command at the end of an oversubscribed runlist. Change-Id: Ic4a139c04b8a6d025fbb831a0a67e98728bfe461 Signed-off-by: Jay Cornwall--- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 7060daf..aa4006a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -140,7 +140,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant */ - last_valid_bit = adev->gfx.mec.num_mec + last_valid_bit = 1 /* only first MEC can have compute queues */ Hey Jay, Minor nitpick. We already have some similar resource patching in kgd2kfd_device_init(), and I think it would be good to keep all of these together. Otherwise, looks good to me. Regards, Andres * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu/gfx: keep all compute queues on the same pipe
Hey Alex, Which apps are having perf problems? Also, is the issue present when the queue gets mapped to pipe0? (i.e. it the perf regression only happens on pipe1+) Regards, Andres On 2017-07-11 11:13 AM, Alex Deucher wrote: Spreading them causes performance regressions using compute queues. Cc: Jim QuSigned-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e26108a..4f6c68f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -125,7 +125,8 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) if (mec >= adev->gfx.mec.num_mec) break; - if (adev->gfx.mec.num_mec > 1) { + /* FIXME: spreading the queues across pipes causes perf regressions */ + if (0) { /* policy: amdgpu owns the first two queues of the first MEC */ if (mec == 0 && queue < 2) set_bit(i, adev->gfx.mec.queue_bitmap); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 0/2] Add support for context priorities
On 2017-07-07 04:31 PM, Alex Deucher wrote: On Fri, Jul 7, 2017 at 3:28 PM, Andres Rodriguez <andre...@gmail.com> wrote: As requested, for validation of the kernel series. Andres Rodriguez (2): headers: sync amdgpu_drm.h with the kernel amdgpu: implement context priority for amdgpu_cs_ctx_create2 v3 Do you have a test app or mesa patches that actually use this that you could share? Yeah. The mesa portion is a little messy since the extension isn't registered yet. But it can be found on the wip-high-priority branch here: https://github.com/lostgoat/mesa.git For a test app you can download this other project (wip-high-priority branch as well): https://github.com/lostgoat/Vulkan.git Once everything is setup I run one session of the SSAO sample: QUEUE_NUM=0 ./ssao Then you can simultaneously run the computeparticles with or without high priority: sudo QUEUE_NUM=0 nice -n 20 ./computeparticles -high-priority Or sudo QUEUE_NUM=0 nice -n 20 ./computeparticles Note: QUEUE_NUM is required. If you have any issues building/running these let me know. For reference, following are the output of these apps on my system on an RX480: ssao: Max: 2.917ms Min: 2.583ms MaxH: 2.917ms MinH: 2.583ms Med: 2.806ms Avg: 2.797ms Frames: 58 Cur: 2.745ms computeparticles with regular priority: Max: 2.021ms Min: 0.058ms MaxH: 1.899ms MinH: 0.932ms Med: 1.187ms Avg: 1.236ms Frames: 3017 Cur: 1.339ms computeparticles with high priority: Max: 0.298ms Min: 0.054ms MaxH: 0.077ms MinH: 0.054ms Med: 0.062ms Avg: 0.061ms Frames: 5999 Cur: 0.063ms Regards, Andres Alex amdgpu/amdgpu-symbol-check | 1 + amdgpu/amdgpu.h| 17 +++-- amdgpu/amdgpu_cs.c | 17 + include/drm/amdgpu_drm.h | 31 ++- 4 files changed, 59 insertions(+), 7 deletions(-) -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] headers: sync amdgpu_drm.h with the kernel
Generated using make headers_install from: [TODO: Pending kernel patches] Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- include/drm/amdgpu_drm.h | 31 ++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index d9aa4a3..da2ade6 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -52,6 +52,7 @@ extern "C" { #define DRM_AMDGPU_GEM_USERPTR 0x11 #define DRM_AMDGPU_WAIT_FENCES 0x12 #define DRM_AMDGPU_VM 0x13 +#define DRM_AMDGPU_SCHED 0x14 #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -67,6 +68,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) +#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -162,13 +164,22 @@ union drm_amdgpu_bo_list { /* unknown cause */ #define AMDGPU_CTX_UNKNOWN_RESET 3 +/* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 +#define AMDGPU_CTX_PRIORITY_LOW_HW -1023 +#define AMDGPU_CTX_PRIORITY_LOW_SW -512 +#define AMDGPU_CTX_PRIORITY_NORMAL 0 +/* Selecting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER */ +#define AMDGPU_CTX_PRIORITY_HIGH_SW 512 +#define AMDGPU_CTX_PRIORITY_HIGH_HW 1023 + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; /** For future use, no flags defined so far */ __u32 flags; __u32 ctx_id; - __u32 _pad; + __s32 priority; }; union drm_amdgpu_ctx_out { @@ -212,6 +223,21 @@ union drm_amdgpu_vm { struct drm_amdgpu_vm_out out; }; +/* sched ioctl */ +#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 + +struct drm_amdgpu_sched_in { + /* AMDGPU_SCHED_OP_* */ + __u32 op; + __u32 fd; + __s32 priority; + __u32 flags; +}; + +union drm_amdgpu_sched { + struct drm_amdgpu_sched_in in; +}; + /* * This is not a reliable API and you should expect it to fail for any * number of reasons and have fallback path that do not use userptr to @@ -764,6 +790,7 @@ struct drm_amdgpu_info_device { __u64 max_memory_clock; /* cu information */ __u32 cu_active_number; + /* NOTE: cu_ao_mask is INVALID, DON'T use it */ __u32 cu_ao_mask; __u32 cu_bitmap[4][4]; /** Render backend pipe mask. One render backend is CB+DB. */ @@ -818,6 +845,8 @@ struct drm_amdgpu_info_device { /* max gs wavefront per vgt*/ __u32 max_gs_waves_per_vgt; __u32 _pad1; + /* always on cu bitmap */ + __u32 cu_ao_bitmap[4][4]; }; struct drm_amdgpu_info_hw_ip { -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 0/2] Add support for context priorities
As requested, for validation of the kernel series. Andres Rodriguez (2): headers: sync amdgpu_drm.h with the kernel amdgpu: implement context priority for amdgpu_cs_ctx_create2 v3 amdgpu/amdgpu-symbol-check | 1 + amdgpu/amdgpu.h| 17 +++-- amdgpu/amdgpu_cs.c | 17 + include/drm/amdgpu_drm.h | 31 ++- 4 files changed, 59 insertions(+), 7 deletions(-) -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 6/8] drm/amdgpu: introduce AMDGPU_CTX_PRIORITY_UNSET
Use _INVALID to identify bad parameters and _UNSET to represent the lack of interest in a specific value. Signed-off-by: Andres Rodriguez <andre...@gmail.com> Acked-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++- include/uapi/drm/amdgpu_drm.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fbf15dd..e4de0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -230,6 +230,8 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) case AMDGPU_CTX_PRIORITY_LOW_SW: case AMDGPU_CTX_PRIORITY_LOW_HW: return AMD_SCHED_PRIORITY_LOW; + case AMDGPU_CTX_PRIORITY_UNSET: + return AMD_SCHED_PRIORITY_UNSET; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); return AMD_SCHED_PRIORITY_INVALID; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 4b528f7..52c8e54 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -122,7 +122,8 @@ enum amd_sched_priority { AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, AMD_SCHED_PRIORITY_MAX, - AMD_SCHED_PRIORITY_INVALID = -1 + AMD_SCHED_PRIORITY_INVALID = -1, + AMD_SCHED_PRIORITY_UNSET = -2 }; /** diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 977903c..2bf6569 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -163,6 +163,7 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_UNKNOWN_RESET 3 /* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 #define AMDGPU_CTX_PRIORITY_LOW_HW -1023 #define AMDGPU_CTX_PRIORITY_LOW_SW -512 #define AMDGPU_CTX_PRIORITY_NORMAL 0 -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 5/8] drm/amd/sched: allow clients to edit an entity's rq v2
This is useful for changing an entity's priority at runtime. v2: don't modify the order of amd_sched_entity members Signed-off-by: Andres Rodriguez <andre...@gmail.com> Acked-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +++--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 +++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 38cea6f..0166620 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -133,6 +133,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; + spin_lock_init(>rq_lock); spin_lock_init(>queue_lock); r = kfifo_alloc(>job_queue, jobs * sizeof(void *), GFP_KERNEL); if (r) @@ -204,8 +205,6 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { - struct amd_sched_rq *rq = entity->rq; - if (!amd_sched_entity_is_initialized(sched, entity)) return; @@ -215,7 +214,8 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, */ wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); - amd_sched_rq_remove_entity(rq, entity); + amd_sched_entity_set_rq(entity, NULL); + kfifo_free(>job_queue); } @@ -236,6 +236,24 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb dma_fence_put(f); } +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, +struct amd_sched_rq *rq) +{ + if (entity->rq == rq) + return; + + spin_lock(>rq_lock); + + if (entity->rq) + amd_sched_rq_remove_entity(entity->rq, entity); + + entity->rq = rq; + if (rq) + amd_sched_rq_add_entity(rq, entity); + + spin_unlock(>rq_lock); +} + bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity) { @@ -333,7 +351,9 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) /* first job wakes up scheduler */ if (first) { /* Add the entity to the run queue */ + spin_lock(>rq_lock); amd_sched_rq_add_entity(entity->rq, entity); + spin_unlock(>rq_lock); amd_sched_wakeup(sched); } return added; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index da040bc..4b528f7 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -39,6 +39,7 @@ struct amd_sched_rq; struct amd_sched_entity { struct list_headlist; struct amd_sched_rq *rq; + spinlock_t rq_lock; struct amd_gpu_scheduler*sched; spinlock_t queue_lock; @@ -154,6 +155,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); void amd_sched_entity_push_job(struct amd_sched_job *sched_job); +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, +struct amd_sched_rq *rq); int amd_sched_fence_slab_init(void); void amd_sched_fence_slab_fini(void); -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 8/8] drm/amdgpu: add interface for editing a foreign process's priority v3
The AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE ioctls are used to set the priority of a different process in the current system. When a request is dropped, the process's contexts will be restored to the priority specified at context creation time. A request can be dropped by setting the override priority to AMDGPU_CTX_PRIORITY_UNSET. An fd is used to identify the remote process. This is simpler than passing a pid number, which is vulnerable to re-use, etc. This functionality is limited to DRM_MASTER since abuse of this interface can have a negative impact on the system's performance. v2: removed unused output structure v3: change refcounted interface for a regular set operation Signed-off-by: Andres Rodriguez <andre...@gmail.com> Acked-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 21 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 109 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 ++ include/uapi/drm/amdgpu_drm.h | 17 + 6 files changed, 164 insertions(+), 21 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 658bac0..c6fd886 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o amdgpu_vf_error.o + amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index bf05180..97aafc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -25,6 +25,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_sched.h" static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum amd_sched_priority priority) @@ -220,26 +221,6 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } -static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) -{ - switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH_HW: - return AMD_SCHED_PRIORITY_HIGH_HW; - case AMDGPU_CTX_PRIORITY_HIGH_SW: - return AMD_SCHED_PRIORITY_HIGH_SW; - case AMDGPU_CTX_PRIORITY_NORMAL: - return AMD_SCHED_PRIORITY_NORMAL; - case AMDGPU_CTX_PRIORITY_LOW_SW: - case AMDGPU_CTX_PRIORITY_LOW_HW: - return AMD_SCHED_PRIORITY_LOW; - case AMDGPU_CTX_PRIORITY_UNSET: - return AMD_SCHED_PRIORITY_UNSET; - default: - WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_INVALID; - } -} - int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b0b2310..733c4ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -28,6 +28,7 @@ #include #include "amdgpu.h" #include +#include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -1019,6 +1020,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c new file mode 100644 index 000..cd12330 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -0,0 +1,109 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including wi
[PATCH 4/8] drm/amdgpu: make amdgpu_to_sched_priority detect invalid parameters
Returning invalid priorities as _NORMAL is a backwards compatibility quirk of amdgpu_ctx_ioctl(). Move this detail one layer up where it belongs. Signed-off-by: Andres Rodriguez <andre...@gmail.com> Acked-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 9ec85d5..fbf15dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -232,7 +232,7 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) return AMD_SCHED_PRIORITY_LOW; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_NORMAL; + return AMD_SCHED_PRIORITY_INVALID; } } @@ -251,8 +251,10 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, id = args->in.ctx_id; priority = amdgpu_to_sched_priority(args->in.priority); - if (priority >= AMD_SCHED_PRIORITY_MAX) - return -EINVAL; + /* For backwards compatibility reasons, we need to accept +* ioctls with garbage in the priority field */ + if (priority == AMD_SCHED_PRIORITY_INVALID) + priority = AMD_SCHED_PRIORITY_NORMAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index dbcaa2e..da040bc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -120,7 +120,8 @@ enum amd_sched_priority { AMD_SCHED_PRIORITY_HIGH_SW, AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, - AMD_SCHED_PRIORITY_MAX + AMD_SCHED_PRIORITY_MAX, + AMD_SCHED_PRIORITY_INVALID = -1 }; /** -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/8] drm/amdgpu: implement ring set_priority for gfx_v8 compute v9
Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over other queues on the same pipe. Multiple queues on a pipe are timesliced so this gives us full precedence over other queues. Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the wave as follows: 0x2: CS_H 0x1: CS_M 0x0: CS_L The SPI block will then dispatch work according to the policy set by SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than gfx. In order to prevent getting stuck in loops of resources bouncing between GFX and high priority compute and introducing further latency, we statically reserve a portion of the pipe. v2: fix srbm_select to ring->queue and use ring->funcs->type v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: switch int to enum amd_sched_priority v5: corresponding changes for srbm_lock v6: change CU reservation to PIPE_PERCENT allocation v7: use kiq instead of MMIO v8: back to MMIO, and make the implementation sleep safe. v9: corresponding changes for splitting HIGH into _HW/_SW Acked-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 105 + 3 files changed, 111 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 20e0ed9..479a1e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1117,6 +1117,10 @@ struct amdgpu_gfx { boolin_suspend; /* NGG */ struct amdgpu_ngg ngg; + + /* pipe reservation */ + struct mutexpipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 88e45c6..bbad84c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2020,6 +2020,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); + bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); adev->smc_rreg = _invalid_rreg; adev->smc_wreg = _invalid_wreg; @@ -2048,6 +2049,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(>pm.mutex); mutex_init(>gfx.gpu_clock_mutex); mutex_init(>srbm_mutex); + mutex_init(>gfx.pipe_reserve_mutex); mutex_init(>grbm_idx_mutex); mutex_init(>mn_lock); hash_init(adev->mn_hash); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a1ef7f6..b39eac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6468,6 +6468,110 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } +static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + /* There is a bug in the GFX pipes that results in a HS +* deadlock if the pipe is restricted to a percentage +* lower than 17 */ + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE && pipe_percent < 17) + pipe_percent = 17; + + reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32(reg, tmp); +} + +static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + mutex_lock(>gfx.pipe_reserve_mutex); + pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weigh
[PATCH resend] allow DRM_MASTER to change client's priorities v2
Little ping on this series for review. Also added recent reviews/acks and rebased. Andres Rodriguez (8): drm/amdgpu: add parameter to allocate high priority contexts v11 drm/amdgpu: add framework for HW specific priority settings v9 drm/amdgpu: implement ring set_priority for gfx_v8 compute v9 drm/amdgpu: make amdgpu_to_sched_priority detect invalid parameters drm/amd/sched: allow clients to edit an entity's rq v2 drm/amdgpu: introduce AMDGPU_CTX_PRIORITY_UNSET drm/amdgpu: add plumbing for ctx priority changes v2 drm/amdgpu: add interface for editing a foreign process's priority v3 drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 75 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 109 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 105 + drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 19 - include/uapi/drm/amdgpu_drm.h | 28 ++- 15 files changed, 496 insertions(+), 13 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/8] drm/amdgpu: add framework for HW specific priority settings v9
Add an initial framework for changing the HW priorities of rings. The framework allows requesting priority changes for the lifetime of an amdgpu_job. After the job completes the priority will decay to the next lowest priority for which a request is still valid. A new ring function set_priority() can now be populated to take care of the HW specific programming sequence for priority changes. v2: set priority before emitting IB, and take a ref on amdgpu_job v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: plug amdgpu_ring_restore_priority_cb into amdgpu_job_free_cb v5: use atomic for tracking job priorities instead of last_job v6: rename amdgpu_ring_priority_[get/put]() and align parameters v7: replace spinlocks with mutexes for KIQ compatibility v8: raise ring priority during cs_ioctl, instead of job_run v9: priority_get() before push_job() Acked-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 7 +++ 5 files changed, 103 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 546a77e..763d74e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1102,6 +1102,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); amdgpu_cs_parser_fini(p, 0, true); + amdgpu_ring_priority_get(job->ring, +amd_sched_get_job_priority(>base)); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(>base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 3d641e1..63b0f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -101,6 +101,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); dma_fence_put(job->fence); amdgpu_sync_free(>sync); amdgpu_sync_free(>dep_sync); @@ -137,6 +138,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(>base.s_fence->finished); amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, +amd_sched_get_job_priority(>base)); amd_sched_entity_push_job(>base); return 0; @@ -201,6 +204,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); + amdgpu_job_free_resources(job); return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 75165e0..2d8b20a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -155,6 +155,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } /** + * amdgpu_ring_priority_put - restore a ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Release a request for executing at @priority + */ +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + int i; + + if (!ring->funcs->set_priority) + return; + + if (atomic_dec_return(>num_jobs[priority]) > 0) + return; + + /* no need to restore if the job is already at the lowest priority */ + if (priority == AMD_SCHED_PRIORITY_NORMAL) + return; + + mutex_lock(>priority_mutex); + /* something higher prio is executing, no need to decay */ + if (ring->priority > priority) + goto out_unlock; + + /* decay priority to the next level with a job available */ + for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { + if (i == AMD_SCHED_PRIORITY_NORMAL + || atomic_read(>num_jobs[i])) { + ring->priority = i; + ring->funcs->set_priority(ring, i); + break; + } + } + +out_unlock: + mutex_unlock(>priority_mutex); +} + +/** + * amdgpu_ring_priority_get - change the ring's priority +
[PATCH 1/8] drm/amdgpu: add parameter to allocate high priority contexts v11
Add a new context creation parameter to express a global context priority. The priority ranking in descending order is as follows: * AMDGPU_CTX_PRIORITY_HIGH_HW * AMDGPU_CTX_PRIORITY_HIGH_SW * AMDGPU_CTX_PRIORITY_NORMAL * AMDGPU_CTX_PRIORITY_LOW_SW * AMDGPU_CTX_PRIORITY_LOW_HW The driver will attempt to schedule work to the hardware according to the priorities. No latency or throughput guarantees are provided by this patch. This interface intends to service the EGL_IMG_context_priority extension, and vulkan equivalents. Setting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER. v2: Instead of using flags, repurpose __pad v3: Swap enum values of _NORMAL _HIGH for backwards compatibility v4: Validate usermode priority and store it v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN v7: remove ctx->priority v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE v9: change the priority parameter to __s32 v10: split priorities into _SW and _HW v11: Allow DRM_MASTER without CAP_SYS_NICE Reviewed-by: Emil Velikov <emil.l.veli...@gmail.com> Reviewed-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 61 +-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 5 ++- include/uapi/drm/amdgpu_drm.h | 10 - 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a11e443..9ec85d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -23,13 +23,40 @@ */ #include +#include #include "amdgpu.h" -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) +static int amdgpu_ctx_priority_permit(struct drm_file *filp, + enum amd_sched_priority priority) +{ + /* NORMAL and below are accessible by everyone */ + if (priority <= AMD_SCHED_PRIORITY_NORMAL) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum amd_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) { unsigned i, j; int r; + if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(>refcount); @@ -51,7 +78,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) struct amdgpu_ring *ring = adev->rings[i]; struct amd_sched_rq *rq; - rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + rq = >sched.sched_rq[priority]; if (ring == >gfx.kiq.ring) continue; @@ -100,6 +127,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, + struct drm_file *filp, + enum amd_sched_priority priority, uint32_t *id) { struct amdgpu_ctx_mgr *mgr = >ctx_mgr; @@ -117,8 +146,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, kfree(ctx); return r; } + *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, ctx); + r = amdgpu_ctx_init(adev, priority, filp, ctx); if (r) { idr_remove(>ctx_handles, *id); *id = 0; @@ -188,11 +218,30 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } +static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) +{ + switch (amdgpu_priority) { + case AMDGPU_CTX_PRIORITY_HIGH_HW: + return AMD_SCHED_PRIORITY_HIGH_HW; + case AMDGPU_CTX_PRIORITY_HIGH_SW: + return AMD_SCHED_PRIORITY_HIGH_SW; + case AMDGPU_CTX_PRIORITY_NORMAL: + return AMD_SCHED_PRIORITY_NORMAL; + case AMDGPU_CTX_PRIORITY_LOW_SW: + case AMDGPU_CTX_PRIORITY_LOW_HW: + return AMD_SCHED_PRIORITY_LOW; + default: + WARN(1, "Invalid context priority %d\n", amdgpu_priority); + return AMD_SCHED_PRIORITY_NORMAL; + } +} + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { int r;
[PATCH 7/8] drm/amdgpu: add plumbing for ctx priority changes v2
Introduce amdgpu_ctx_priority_override(). A mechanism to override a context's priority. An override can be terminated by setting the override to AMD_SCHED_PRIORITY_UNSET. v2: change refcounted interface for a direct set Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 29 + 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7c6fca6..0a24c1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -825,7 +825,9 @@ struct amdgpu_ctx { spinlock_t ring_lock; struct dma_fence**fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; - bool preamble_presented; + boolpreamble_presented; + enum amd_sched_priority init_priority; + enum amd_sched_priority override_priority; }; struct amdgpu_ctx_mgr { @@ -842,6 +844,8 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *fence); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); +void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, + enum amd_sched_priority priority); int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e4de0fb..bf05180 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -72,6 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, } ctx->reset_counter = atomic_read(>gpu_reset_counter); + ctx->init_priority = priority; + ctx->override_priority = AMD_SCHED_PRIORITY_UNSET; /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { @@ -360,6 +362,33 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, return fence; } +void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, + enum amd_sched_priority priority) +{ + int i; + struct amdgpu_device *adev = ctx->adev; + struct amd_sched_rq *rq; + struct amd_sched_entity *entity; + struct amdgpu_ring *ring; + enum amd_sched_priority ctx_prio; + + ctx->override_priority = priority; + + ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ? + ctx->init_priority : ctx->override_priority; + + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + entity = >rings[i].entity; + rq = >sched.sched_rq[ctx_prio]; + + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + continue; + + amd_sched_entity_set_rq(entity, rq); + } +} + void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) { mutex_init(>lock); -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 8/8] drm/amdgpu: add interface for editing a foreign process's priority v3
The AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE ioctls are used to set the priority of a different process in the current system. When a request is dropped, the process's contexts will be restored to the priority specified at context creation time. A request can be dropped by setting the override priority to AMDGPU_CTX_PRIORITY_UNSET. An fd is used to identify the remote process. This is simpler than passing a pid number, which is vulnerable to re-use, etc. This functionality is limited to DRM_MASTER since abuse of this interface can have a negative impact on the system's performance. v2: removed unused output structure v3: change refcounted interface for a regular set operation Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 21 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 109 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 ++ include/uapi/drm/amdgpu_drm.h | 17 + 6 files changed, 164 insertions(+), 21 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index faea634..f039d88 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o + amdgpu_queue_mgr.o amdgpu_sched.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index bf05180..97aafc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -25,6 +25,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_sched.h" static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum amd_sched_priority priority) @@ -220,26 +221,6 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } -static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) -{ - switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH_HW: - return AMD_SCHED_PRIORITY_HIGH_HW; - case AMDGPU_CTX_PRIORITY_HIGH_SW: - return AMD_SCHED_PRIORITY_HIGH_SW; - case AMDGPU_CTX_PRIORITY_NORMAL: - return AMD_SCHED_PRIORITY_NORMAL; - case AMDGPU_CTX_PRIORITY_LOW_SW: - case AMDGPU_CTX_PRIORITY_LOW_HW: - return AMD_SCHED_PRIORITY_LOW; - case AMDGPU_CTX_PRIORITY_UNSET: - return AMD_SCHED_PRIORITY_UNSET; - default: - WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_INVALID; - } -} - int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 12497a4..80483ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -28,6 +28,7 @@ #include #include "amdgpu.h" #include +#include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -1017,6 +1018,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c new file mode 100644 index 000..cd12330 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -0,0 +1,109 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or se
[PATCH 1/8] drm/amdgpu: add parameter to allocate high priority contexts v11
Add a new context creation parameter to express a global context priority. The priority ranking in descending order is as follows: * AMDGPU_CTX_PRIORITY_HIGH_HW * AMDGPU_CTX_PRIORITY_HIGH_SW * AMDGPU_CTX_PRIORITY_NORMAL * AMDGPU_CTX_PRIORITY_LOW_SW * AMDGPU_CTX_PRIORITY_LOW_HW The driver will attempt to schedule work to the hardware according to the priorities. No latency or throughput guarantees are provided by this patch. This interface intends to service the EGL_IMG_context_priority extension, and vulkan equivalents. Setting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER. v2: Instead of using flags, repurpose __pad v3: Swap enum values of _NORMAL _HIGH for backwards compatibility v4: Validate usermode priority and store it v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN v7: remove ctx->priority v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE v9: change the priority parameter to __s32 v10: split priorities into _SW and _HW v11: Allow DRM_MASTER without CAP_SYS_NICE Reviewed-by: Emil Velikov <emil.l.veli...@gmail.com> Reviewed-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 61 +-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 5 ++- include/uapi/drm/amdgpu_drm.h | 10 - 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a11e443..9ec85d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -23,13 +23,40 @@ */ #include +#include #include "amdgpu.h" -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) +static int amdgpu_ctx_priority_permit(struct drm_file *filp, + enum amd_sched_priority priority) +{ + /* NORMAL and below are accessible by everyone */ + if (priority <= AMD_SCHED_PRIORITY_NORMAL) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum amd_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) { unsigned i, j; int r; + if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(>refcount); @@ -51,7 +78,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) struct amdgpu_ring *ring = adev->rings[i]; struct amd_sched_rq *rq; - rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + rq = >sched.sched_rq[priority]; if (ring == >gfx.kiq.ring) continue; @@ -100,6 +127,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, + struct drm_file *filp, + enum amd_sched_priority priority, uint32_t *id) { struct amdgpu_ctx_mgr *mgr = >ctx_mgr; @@ -117,8 +146,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, kfree(ctx); return r; } + *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, ctx); + r = amdgpu_ctx_init(adev, priority, filp, ctx); if (r) { idr_remove(>ctx_handles, *id); *id = 0; @@ -188,11 +218,30 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } +static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) +{ + switch (amdgpu_priority) { + case AMDGPU_CTX_PRIORITY_HIGH_HW: + return AMD_SCHED_PRIORITY_HIGH_HW; + case AMDGPU_CTX_PRIORITY_HIGH_SW: + return AMD_SCHED_PRIORITY_HIGH_SW; + case AMDGPU_CTX_PRIORITY_NORMAL: + return AMD_SCHED_PRIORITY_NORMAL; + case AMDGPU_CTX_PRIORITY_LOW_SW: + case AMDGPU_CTX_PRIORITY_LOW_HW: + return AMD_SCHED_PRIORITY_LOW; + default: + WARN(1, "Invalid context priority %d\n", amdgpu_priority); + return AMD_SCHED_PRIORITY_NORMAL; + } +} + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { int r;
[PATCH 3/8] drm/amdgpu: implement ring set_priority for gfx_v8 compute v9
Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over other queues on the same pipe. Multiple queues on a pipe are timesliced so this gives us full precedence over other queues. Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the wave as follows: 0x2: CS_H 0x1: CS_M 0x0: CS_L The SPI block will then dispatch work according to the policy set by SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than gfx. In order to prevent getting stuck in loops of resources bouncing between GFX and high priority compute and introducing further latency, we statically reserve a portion of the pipe. v2: fix srbm_select to ring->queue and use ring->funcs->type v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: switch int to enum amd_sched_priority v5: corresponding changes for srbm_lock v6: change CU reservation to PIPE_PERCENT allocation v7: use kiq instead of MMIO v8: back to MMIO, and make the implementation sleep safe. v9: corresponding changes for splitting HIGH into _HW/_SW Acked-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 105 + 3 files changed, 111 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b1e8cd9..7c6fca6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1114,6 +1114,10 @@ struct amdgpu_gfx { boolin_suspend; /* NGG */ struct amdgpu_ngg ngg; + + /* pipe reservation */ + struct mutexpipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ae4387f..d7303d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2019,6 +2019,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); + bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); adev->smc_rreg = _invalid_rreg; adev->smc_wreg = _invalid_wreg; @@ -2047,6 +2048,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(>pm.mutex); mutex_init(>gfx.gpu_clock_mutex); mutex_init(>srbm_mutex); + mutex_init(>gfx.pipe_reserve_mutex); mutex_init(>grbm_idx_mutex); mutex_init(>mn_lock); hash_init(adev->mn_hash); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1429242..bee5ef4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6461,6 +6461,110 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } +static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + /* There is a bug in the GFX pipes that results in a HS +* deadlock if the pipe is restricted to a percentage +* lower than 17 */ + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE && pipe_percent < 17) + pipe_percent = 17; + + reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32(reg, tmp); +} + +static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + mutex_lock(>gfx.pipe_reserve_mutex); + pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weigh
[PATCH] allow DRM_MASTER to change client's priorities v2
Updated with Christian's request to simplify the process priority override interface. Series available in the wip-process-priorities-v2 branch of: git://people.freedesktop.org/~lostgoat/linux ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 5/8] drm/amd/sched: allow clients to edit an entity's rq v2
This is useful for changing an entity's priority at runtime. v2: don't modify the order of amd_sched_entity members Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +++--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 +++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 38cea6f..0166620 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -133,6 +133,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; + spin_lock_init(>rq_lock); spin_lock_init(>queue_lock); r = kfifo_alloc(>job_queue, jobs * sizeof(void *), GFP_KERNEL); if (r) @@ -204,8 +205,6 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { - struct amd_sched_rq *rq = entity->rq; - if (!amd_sched_entity_is_initialized(sched, entity)) return; @@ -215,7 +214,8 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, */ wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); - amd_sched_rq_remove_entity(rq, entity); + amd_sched_entity_set_rq(entity, NULL); + kfifo_free(>job_queue); } @@ -236,6 +236,24 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb dma_fence_put(f); } +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, +struct amd_sched_rq *rq) +{ + if (entity->rq == rq) + return; + + spin_lock(>rq_lock); + + if (entity->rq) + amd_sched_rq_remove_entity(entity->rq, entity); + + entity->rq = rq; + if (rq) + amd_sched_rq_add_entity(rq, entity); + + spin_unlock(>rq_lock); +} + bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity) { @@ -333,7 +351,9 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) /* first job wakes up scheduler */ if (first) { /* Add the entity to the run queue */ + spin_lock(>rq_lock); amd_sched_rq_add_entity(entity->rq, entity); + spin_unlock(>rq_lock); amd_sched_wakeup(sched); } return added; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index da040bc..4b528f7 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -39,6 +39,7 @@ struct amd_sched_rq; struct amd_sched_entity { struct list_headlist; struct amd_sched_rq *rq; + spinlock_t rq_lock; struct amd_gpu_scheduler*sched; spinlock_t queue_lock; @@ -154,6 +155,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); void amd_sched_entity_push_job(struct amd_sched_job *sched_job); +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, +struct amd_sched_rq *rq); int amd_sched_fence_slab_init(void); void amd_sched_fence_slab_fini(void); -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 6/8] drm/amdgpu: introduce AMDGPU_CTX_PRIORITY_UNSET
Use _INVALID to identify bad parameters and _UNSET to represent the lack of interest in a specific value. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++- include/uapi/drm/amdgpu_drm.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index fbf15dd..e4de0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -230,6 +230,8 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) case AMDGPU_CTX_PRIORITY_LOW_SW: case AMDGPU_CTX_PRIORITY_LOW_HW: return AMD_SCHED_PRIORITY_LOW; + case AMDGPU_CTX_PRIORITY_UNSET: + return AMD_SCHED_PRIORITY_UNSET; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); return AMD_SCHED_PRIORITY_INVALID; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 4b528f7..52c8e54 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -122,7 +122,8 @@ enum amd_sched_priority { AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, AMD_SCHED_PRIORITY_MAX, - AMD_SCHED_PRIORITY_INVALID = -1 + AMD_SCHED_PRIORITY_INVALID = -1, + AMD_SCHED_PRIORITY_UNSET = -2 }; /** diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 6e2d92e..53aa9b5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -163,6 +163,7 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_UNKNOWN_RESET 3 /* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 #define AMDGPU_CTX_PRIORITY_LOW_HW -1023 #define AMDGPU_CTX_PRIORITY_LOW_SW -512 #define AMDGPU_CTX_PRIORITY_NORMAL 0 -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/8] drm/amdgpu: add framework for HW specific priority settings v9
Add an initial framework for changing the HW priorities of rings. The framework allows requesting priority changes for the lifetime of an amdgpu_job. After the job completes the priority will decay to the next lowest priority for which a request is still valid. A new ring function set_priority() can now be populated to take care of the HW specific programming sequence for priority changes. v2: set priority before emitting IB, and take a ref on amdgpu_job v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: plug amdgpu_ring_restore_priority_cb into amdgpu_job_free_cb v5: use atomic for tracking job priorities instead of last_job v6: rename amdgpu_ring_priority_[get/put]() and align parameters v7: replace spinlocks with mutexes for KIQ compatibility v8: raise ring priority during cs_ioctl, instead of job_run v9: priority_get() before push_job() Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 7 +++ 5 files changed, 103 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index aeee684..2d2d59b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1102,6 +1102,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); amdgpu_cs_parser_fini(p, 0, true); + amdgpu_ring_priority_get(job->ring, +amd_sched_get_job_priority(>base)); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(>base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 3d641e1..63b0f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -101,6 +101,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); dma_fence_put(job->fence); amdgpu_sync_free(>sync); amdgpu_sync_free(>dep_sync); @@ -137,6 +138,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(>base.s_fence->finished); amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, +amd_sched_get_job_priority(>base)); amd_sched_entity_push_job(>base); return 0; @@ -201,6 +204,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); + amdgpu_job_free_resources(job); return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 75165e0..2d8b20a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -155,6 +155,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } /** + * amdgpu_ring_priority_put - restore a ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Release a request for executing at @priority + */ +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + int i; + + if (!ring->funcs->set_priority) + return; + + if (atomic_dec_return(>num_jobs[priority]) > 0) + return; + + /* no need to restore if the job is already at the lowest priority */ + if (priority == AMD_SCHED_PRIORITY_NORMAL) + return; + + mutex_lock(>priority_mutex); + /* something higher prio is executing, no need to decay */ + if (ring->priority > priority) + goto out_unlock; + + /* decay priority to the next level with a job available */ + for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { + if (i == AMD_SCHED_PRIORITY_NORMAL + || atomic_read(>num_jobs[i])) { + ring->priority = i; + ring->funcs->set_priority(ring, i); + break; + } + } + +out_unlock: + mutex_unlock(>priority_mutex); +} + +/** + * amdgpu_ring_priority_get - change the ring's priority + * + * @ring: amdgpu_ring structure holding the information + *
Re: [PATCH 5/8] drm/amdgpu: move priority decay logic into amd_sched_priority_ctr
On 2017-06-26 09:32 AM, Christian König wrote: Sorry for the delay, back from a rather long sick leave today and trying to catch up with my work. Don't worry too much. Get well soon! Quick ping on the query above. The query can be summarized as: which ioctl interface do we prefer for the override? * AMDGPU_SCHED_OP_PROCESS_PRIORITY_GET/PUT - refcounted override or * AMDGPU_SCHED_OP_PROCESS_PRIORITY_SET - simple set Please keep it simple and stupid. No extra fiddling here, that stuff is complicated enough. So I would strongly suggest to have a simple set interface which the DRM master can use to set the priority. If we have multiple DRM masters which feel responsible for this they should to sync up in userspace what to do. Sounds good to me. I'll send out a _SET version of this series. Regards, Andres Regards, Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: disable vga render in dce hw_init
This fixes display regressions on my MSI RX480 Gaming X Tested-by: Andres Rodriguez <andre...@gmail.com> On 2017-06-19 05:29 PM, Alex Deucher wrote: This got dropped accidently with the fb location changes, but for some reason, this doesn't seem to cause an issue on all cards which is why I never saw it despite extensive testing. I suspect it may only be an issue on systems with a legacy sbios that enable vga. Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 2 ++ 4 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 884f22f..712dfd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2894,6 +2894,8 @@ static int dce_v10_0_hw_init(void *handle) dce_v10_0_init_golden_registers(adev); + /* disable vga render */ + dce_v10_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_encoder_init_dig(adev); amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 15208a5..406fe66 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3010,6 +3010,8 @@ static int dce_v11_0_hw_init(void *handle) dce_v11_0_init_golden_registers(adev); + /* disable vga render */ + dce_v11_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_crtc_powergate_init(adev); amdgpu_atombios_encoder_init_dig(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index af0b96c..1e28f14 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2759,6 +2759,8 @@ static int dce_v6_0_hw_init(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* disable vga render */ + dce_v6_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_encoder_init_dig(adev); amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index b7079f8..e34335f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2792,6 +2792,8 @@ static int dce_v8_0_hw_init(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* disable vga render */ + dce_v8_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_encoder_init_dig(adev); amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 5/8] drm/amdgpu: move priority decay logic into amd_sched_priority_ctr
On 2017-06-09 01:05 PM, Andres Rodriguez wrote: On 2017-06-09 06:54 AM, Christian König wrote: Am 09.06.2017 um 00:06 schrieb Andres Rodriguez: So that it can be re-used. A priority counter can be used to track priority requests. If no active requests exists, the counter will default to ()->default_priority. The re-factored version is now allowed to decay below NORMAL. This allows us to create requests that lower the priority. This didn't matter for rings, as no priority below NORMAL has an effect. NAK to the whole approach. This handling is amdgpu specific and not related to the gpu scheduler in any way. Hey Christian, I moved this to gpu_scheduler.c since it's original purpose is to track the scheduler's min_priority. Sorry I didn't provide any context for that in the original cover letter. In my previous patch, "[PATCH 3/3] drm/amdgpu: add a mechanism to acquire gpu exclusivity", the functions amd_sched_min_priority_get/put() are almost identical copies of amdgpu_ring_priority_get/put(). To remove the duplication I introduced amd_sched_priority_ctr. I later realized that I could re-use the same mechanism to track the context priority changes if I added a default_priority parameter. It also has the added benefit of keeping the requests refcounted. I agree the usage of amd_sched_priority_ctr seems a little overkill. I originally used the approach of combining a ctx->init_priority with a ctx->master_priority, and that was pretty simple. However, re-using a concept that was already implemented, instead of introducing a new one had its own arguments for simplicity as well. There is also one theoretical future scenario where the refcounting could be useful. Most VR apps output a 'mirror' window to the system compositor. Therefore they are clients of the system compositor and the VR compositor simultaneously. If both compositors were to use the ctx_set_priority() API on this app, the second request would override the first. With amd_sched_priority_ctr we would honor the highest of the two requests. In combination with the ability to set a minimum required priority to schedule gpu work, we can potentially run into undesired consequences. Anyways, I only have a slight preference for this approach. So if you'd like me to go back to the muxing of two priorities I'm happy to go for it (and move this patch to the followup series for min_priority tracking). Quick ping on the query above. The query can be summarized as: which ioctl interface do we prefer for the override? * AMDGPU_SCHED_OP_PROCESS_PRIORITY_GET/PUT - refcounted override or * AMDGPU_SCHED_OP_PROCESS_PRIORITY_SET - simple set Regards, Andres Regards, Andres Christian. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 69 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 122 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 21 + 4 files changed, 164 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 2d8b20a7..159ab0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -142,115 +142,86 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) /** * amdgpu_ring_undo - reset the wptr * * @ring: amdgpu_ring structure holding ring information * * Reset the driver's copy of the wptr (all asics). */ void amdgpu_ring_undo(struct amdgpu_ring *ring) { ring->wptr = ring->wptr_old; if (ring->funcs->end_use) ring->funcs->end_use(ring); } +static void amdgpu_ring_priority_set(struct amd_sched_priority_ctr *p, + enum amd_sched_priority priority) +{ +struct amdgpu_ring *ring = container_of(p, struct amdgpu_ring, +priority_ctr); + +if (ring->funcs->set_priority) +ring->funcs->set_priority(ring, priority); +} + /** * amdgpu_ring_priority_put - restore a ring's priority * * @ring: amdgpu_ring structure holding the information * @priority: target priority * * Release a request for executing at @priority */ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, enum amd_sched_priority priority) { -int i; - -if (!ring->funcs->set_priority) -return; - -if (atomic_dec_return(>num_jobs[priority]) > 0) -return; - -/* no need to restore if the job is already at the lowest priority */ -if (priority == AMD_SCHED_PRIORITY_NORMAL) -return; - -mutex_lock(>priority_mutex); -/* something higher prio is executing, no need to decay */ -if (ring->priority > priority) -goto out_unlock; - -/* decay priority to the next level wi
Re: [PATCH] drm/amd/sched: print sched job id in amd_sched_job trace
On 2017-06-13 04:14 PM, Nicolai Hähnle wrote: From: Nicolai Hähnle <nicolai.haeh...@amd.com> This makes it easier to correlate amd_sched_job with with other trace points that don't log the job pointer. Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h index dbd4fd3a..09c4230 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h @@ -12,36 +12,39 @@ #define TRACE_INCLUDE_FILE gpu_sched_trace TRACE_EVENT(amd_sched_job, TP_PROTO(struct amd_sched_job *sched_job), TP_ARGS(sched_job), TP_STRUCT__entry( __field(struct amd_sched_entity *, entity) __field(struct amd_sched_job *, sched_job) Change looks good. One minor suggestion would be to get rid of the sched_job pointer altogether. Since we have an id the sched_job* becomes superfluous, and it can lead to confusion if we hit the pointer re-use case. I think this is also the only trace that still prints the pointer, so it isn't actually very useful in the first place. With that change you can add: Reviewed-by: Andres Rodriguez <andre...@gmail.com> Regards, Andres __field(struct dma_fence *, fence) __field(const char *, name) +__field(uint64_t, id) __field(u32, job_count) __field(int, hw_job_count) ), TP_fast_assign( __entry->entity = sched_job->s_entity; __entry->sched_job = sched_job; + __entry->id = sched_job->id; __entry->fence = _job->s_fence->finished; __entry->name = sched_job->sched->name; __entry->job_count = kfifo_len( _job->s_entity->job_queue) / sizeof(sched_job); __entry->hw_job_count = atomic_read( _job->sched->hw_rq_count); ), - TP_printk("entity=%p, sched job=%p, fence=%p, ring=%s, job count:%u, hw job count:%d", - __entry->entity, __entry->sched_job, __entry->fence, __entry->name, + TP_printk("entity=%p, sched job=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", + __entry->entity, __entry->sched_job, __entry->id, + __entry->fence, __entry->name, __entry->job_count, __entry->hw_job_count) ); TRACE_EVENT(amd_sched_process_job, TP_PROTO(struct amd_sched_fence *fence), TP_ARGS(fence), TP_STRUCT__entry( __field(struct dma_fence *, fence) ), ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu/gfx: fix MEC interrupt enablement for pipes != 0
On 2017-06-09 11:16 PM, Deucher, Alexander wrote: -Original Message- From: Andres Rodriguez [mailto:andre...@gmail.com] Sent: Friday, June 09, 2017 7:49 PM To: Alex Deucher; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander Subject: Re: [PATCH 1/2] drm/amdgpu/gfx: fix MEC interrupt enablement for pipes != 0 I'm a little curious about the failures test cases. Is it related to a specific ASIC? Terrible performance on a range of gfx8 parts. >> Using CPC_INT_CNTL seemed to be working well for me on polaris10 (I was getting terrible perf on pipes 1+ without the original patch). Weird, not sure. Maybe the indexing works on Polaris. Maybe it only works on some parts and not others? This is also how current kfd and ROCM enable interrupts: https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/radeon/radeon_kfd.c#L441 https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c#L328 Might want to keep an eye on that if kfd ends up supporting those ASICs. Regards, Andres Alex Regards, Andres On 2017-06-09 08:49 AM, Alex Deucher wrote: The interrupt registers are not indexed. Fixes: 763a47b8e (drm/amdgpu: teach amdgpu how to enable interrupts for any pipe v3) Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 57 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 57 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 59 +-- 3 files changed, 124 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index e30c7d0..fb0a94c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5015,28 +5015,51 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, int me, int pipe, enum amdgpu_interrupt_state state) { - /* Me 0 is for graphics and Me 2 is reserved for HW scheduling -* So we should only really be configuring ME 1 i.e. MEC0 + u32 mec_int_cntl, mec_int_cntl_reg; + + /* +* amdgpu controls only the first MEC. That's why this function only +* handles the setting of interrupts for this specific MEC. All other +* pipes' interrupts are set by amdkfd. */ - if (me != 1) { - DRM_ERROR("Ignoring request to enable interrupts for invalid me:%d\n", me); - return; - } - if (pipe >= adev->gfx.mec.num_pipe_per_mec) { - DRM_ERROR("Ignoring request to enable interrupts for invalid " - "me:%d pipe:%d\n", pipe, me); + if (me == 1) { + switch (pipe) { + case 0: + mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; + break; + case 1: + mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; + break; + case 2: + mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; + break; + case 3: + mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); return; } - mutex_lock(>srbm_mutex); - cik_srbm_select(adev, me, pipe, 0, 0); - - WREG32_FIELD(CPC_INT_CNTL, TIME_STAMP_INT_ENABLE, - state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); - - cik_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(>srbm_mutex); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } } static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 6e541af..1a75ab1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6610,26 +6610,51 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct
[PATCH 8/8] drm/amdgpu: add interface for editing a foreign process's priority v2
The AMDGPU_SCHED_OP_PROCESS_PRIORITY_GET/PUT ioctls are used to set the priority of a different process in the current system. When all requests are dropped, the foreign process's contexts will be restored to the priority specified at context creation time. An fd is used to identify the remote process. This is simpler than passing a pid number, which is vulnerable to re-use, etc. This functionality is limited to DRM_MASTER since abuse of this interface can have a negative impact on the system's performance. v2: removed unused output structure Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 19 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 117 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 + include/uapi/drm/amdgpu_drm.h | 18 + 6 files changed, 173 insertions(+), 19 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index b62d9e9..e4d3b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o + amdgpu_queue_mgr.o amdgpu_sched.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index dca483f..0f1c174 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -25,6 +25,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_sched.h" static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum amd_sched_priority priority) @@ -247,24 +248,6 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } -static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) -{ - switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH_HW: - return AMD_SCHED_PRIORITY_HIGH_HW; - case AMDGPU_CTX_PRIORITY_HIGH_SW: - return AMD_SCHED_PRIORITY_HIGH_SW; - case AMDGPU_CTX_PRIORITY_NORMAL: - return AMD_SCHED_PRIORITY_NORMAL; - case AMDGPU_CTX_PRIORITY_LOW_SW: - case AMDGPU_CTX_PRIORITY_LOW_HW: - return AMD_SCHED_PRIORITY_LOW; - default: - WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_INVALID; - } -} - int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 92e93b3..6a33c61 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -28,6 +28,7 @@ #include #include "amdgpu.h" #include +#include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -1016,6 +1017,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c new file mode 100644 index 000..4966af2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -0,0 +1,117 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in +
[PATCH 6/8] drm/amd/sched: allow clients to edit an entity's rq v2
This is useful for changing an entity's priority at runtime. v2: don't modify the order of amd_sched_entity members Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +++--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 +++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index a203736..c19bb85 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -133,6 +133,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; + spin_lock_init(>rq_lock); spin_lock_init(>queue_lock); r = kfifo_alloc(>job_queue, jobs * sizeof(void *), GFP_KERNEL); if (r) @@ -204,8 +205,6 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { - struct amd_sched_rq *rq = entity->rq; - if (!amd_sched_entity_is_initialized(sched, entity)) return; @@ -215,7 +214,8 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, */ wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); - amd_sched_rq_remove_entity(rq, entity); + amd_sched_entity_set_rq(entity, NULL); + kfifo_free(>job_queue); } @@ -236,6 +236,24 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb dma_fence_put(f); } +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, +struct amd_sched_rq *rq) +{ + if (entity->rq == rq) + return; + + spin_lock(>rq_lock); + + if (entity->rq) + amd_sched_rq_remove_entity(entity->rq, entity); + + entity->rq = rq; + if (rq) + amd_sched_rq_add_entity(rq, entity); + + spin_unlock(>rq_lock); +} + bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity) { @@ -333,7 +351,9 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) /* first job wakes up scheduler */ if (first) { /* Add the entity to the run queue */ + spin_lock(>rq_lock); amd_sched_rq_add_entity(entity->rq, entity); + spin_unlock(>rq_lock); amd_sched_wakeup(sched); } return added; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index b9283b5..558d3a3 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -40,6 +40,7 @@ struct amd_sched_priority_ctr; struct amd_sched_entity { struct list_headlist; struct amd_sched_rq *rq; + spinlock_t rq_lock; struct amd_gpu_scheduler*sched; spinlock_t queue_lock; @@ -166,6 +167,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); void amd_sched_entity_push_job(struct amd_sched_job *sched_job); +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, +struct amd_sched_rq *rq); int amd_sched_fence_slab_init(void); void amd_sched_fence_slab_fini(void); -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/8] drm/amdgpu: add framework for HW specific priority settings v9
Add an initial framework for changing the HW priorities of rings. The framework allows requesting priority changes for the lifetime of an amdgpu_job. After the job completes the priority will decay to the next lowest priority for which a request is still valid. A new ring function set_priority() can now be populated to take care of the HW specific programming sequence for priority changes. v2: set priority before emitting IB, and take a ref on amdgpu_job v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: plug amdgpu_ring_restore_priority_cb into amdgpu_job_free_cb v5: use atomic for tracking job priorities instead of last_job v6: rename amdgpu_ring_priority_[get/put]() and align parameters v7: replace spinlocks with mutexes for KIQ compatibility v8: raise ring priority during cs_ioctl, instead of job_run v9: priority_get() before push_job() Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 7 +++ 5 files changed, 103 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b17635c..6eda0f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1005,6 +1005,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); amdgpu_cs_parser_fini(p, 0, true); + amdgpu_ring_priority_get(job->ring, +amd_sched_get_job_priority(>base)); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(>base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 3d641e1..63b0f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -101,6 +101,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); dma_fence_put(job->fence); amdgpu_sync_free(>sync); amdgpu_sync_free(>dep_sync); @@ -137,6 +138,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(>base.s_fence->finished); amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, +amd_sched_get_job_priority(>base)); amd_sched_entity_push_job(>base); return 0; @@ -201,6 +204,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); + amdgpu_job_free_resources(job); return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 75165e0..2d8b20a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -155,6 +155,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } /** + * amdgpu_ring_priority_put - restore a ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Release a request for executing at @priority + */ +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + int i; + + if (!ring->funcs->set_priority) + return; + + if (atomic_dec_return(>num_jobs[priority]) > 0) + return; + + /* no need to restore if the job is already at the lowest priority */ + if (priority == AMD_SCHED_PRIORITY_NORMAL) + return; + + mutex_lock(>priority_mutex); + /* something higher prio is executing, no need to decay */ + if (ring->priority > priority) + goto out_unlock; + + /* decay priority to the next level with a job available */ + for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { + if (i == AMD_SCHED_PRIORITY_NORMAL + || atomic_read(>num_jobs[i])) { + ring->priority = i; + ring->funcs->set_priority(ring, i); + break; + } + } + +out_unlock: + mutex_unlock(>priority_mutex); +} + +/** + * amdgpu_ring_priority_get - change the ring's priority + * + * @ring: amdgpu_ring structure holding the information + *
Re: [PATCH 7/8] drm/amdgpu: add plumbing for ctx priority changes
On 2017-06-09 07:11 AM, Christian König wrote: Am 09.06.2017 um 00:06 schrieb Andres Rodriguez: Introduce amdgpu_ctx_priority_get/put(). This a refcounted mechanism to change a context's priority. A context's priority will be set to the highest priority for which a request exists. If no active requests exist, the context will default to the priority requested at context allocation time. Note that the priority requested at allocation does not create a request, therefore it can be overridden by a get() request for a lower priority. So you just use the maximum priority between the per process and the per context priority? Not exactly. If we use the maximum of the two, then we won't be able to reduce the priority. As long as we have a valid process priority, it will force override the context priority. The actual priority of a context would be: priority = (process_prio != 'INVALID') ? process_prio : context_prio Where process_prio is the maximum level for which a DRM_MASTER has an active request. Sounds logical, since the per context priority can only be used by the DRM master as well. I think there is a small typo here (s/context/process). Based on your usage of the context priority in the next paragraph I think we are on the same page. But you don't need the complicated amd_sched_priority_ctr handling for this. Just add a min priority field into amdgpu_ctx_mgr and use that to override the context priority. Agreed. My initial implementation was pretty much the one line of code above. I just happened to have amd_sched_priority_ctr around for tracking the minimum scheduler priority, and it seems to fit this use case pretty well. Regards, Andres Christian. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 43 + 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 04ea1b9..b998f42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -813,47 +813,53 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_ctx_ring { uint64_tsequence; struct dma_fence**fences; struct amd_sched_entityentity; }; struct amdgpu_ctx { struct krefrefcount; struct amdgpu_device*adev; struct amdgpu_queue_mgr queue_mgr; unsignedreset_counter; spinlock_tring_lock; struct dma_fence**fences; struct amdgpu_ctx_ringrings[AMDGPU_MAX_RINGS]; -bool preamble_presented; +bool preamble_presented; + +struct amd_sched_priority_ctrpriority_ctr; }; struct amdgpu_ctx_mgr { struct amdgpu_device*adev; struct mutexlock; /* protected by lock */ struct idrctx_handles; }; struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *fence); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); +void amdgpu_ctx_priority_get(struct amdgpu_ctx *ctx, + enum amd_sched_priority priority); +void amdgpu_ctx_priority_put(struct amdgpu_ctx *ctx, + enum amd_sched_priority priority); int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); /* * file private structure */ struct amdgpu_fpriv { struct amdgpu_vmvm; struct amdgpu_bo_va*prt_va; struct mutexbo_list_lock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0285eef..cc15b7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -29,30 +29,53 @@ static amdgpu_ctx_priority_permit(struct drm_file *filp, enum amd_sched_priority priority) { /* NORMAL and below are accessible by everyone */ if (priority <= AMD_SCHED_PRIORITY_NORMAL) return 0; if (capable(CAP_SYS_NICE)) return 0; if (drm_is_current_master(filp)) return 0; return -EACCES; } +static void amdgpu_ctx_priority_set(struct amd_sched_priority_ctr *p, +enum amd_sched_priority priority) +{ +int i; +struct amd_sched_rq *rq; +struct amd_sched_entity *entity; +struct amdgpu_ring *ring; +struct amdgpu_ctx *ctx = container_of(p, struct amdgpu_ctx, + priority_ctr); +struct amdgpu_device *adev = ctx->adev; +
Re: [PATCH 5/8] drm/amdgpu: move priority decay logic into amd_sched_priority_ctr
On 2017-06-09 06:54 AM, Christian König wrote: Am 09.06.2017 um 00:06 schrieb Andres Rodriguez: So that it can be re-used. A priority counter can be used to track priority requests. If no active requests exists, the counter will default to ()->default_priority. The re-factored version is now allowed to decay below NORMAL. This allows us to create requests that lower the priority. This didn't matter for rings, as no priority below NORMAL has an effect. NAK to the whole approach. This handling is amdgpu specific and not related to the gpu scheduler in any way. Hey Christian, I moved this to gpu_scheduler.c since it's original purpose is to track the scheduler's min_priority. Sorry I didn't provide any context for that in the original cover letter. In my previous patch, "[PATCH 3/3] drm/amdgpu: add a mechanism to acquire gpu exclusivity", the functions amd_sched_min_priority_get/put() are almost identical copies of amdgpu_ring_priority_get/put(). To remove the duplication I introduced amd_sched_priority_ctr. I later realized that I could re-use the same mechanism to track the context priority changes if I added a default_priority parameter. It also has the added benefit of keeping the requests refcounted. I agree the usage of amd_sched_priority_ctr seems a little overkill. I originally used the approach of combining a ctx->init_priority with a ctx->master_priority, and that was pretty simple. However, re-using a concept that was already implemented, instead of introducing a new one had its own arguments for simplicity as well. There is also one theoretical future scenario where the refcounting could be useful. Most VR apps output a 'mirror' window to the system compositor. Therefore they are clients of the system compositor and the VR compositor simultaneously. If both compositors were to use the ctx_set_priority() API on this app, the second request would override the first. With amd_sched_priority_ctr we would honor the highest of the two requests. In combination with the ability to set a minimum required priority to schedule gpu work, we can potentially run into undesired consequences. Anyways, I only have a slight preference for this approach. So if you'd like me to go back to the muxing of two priorities I'm happy to go for it (and move this patch to the followup series for min_priority tracking). Regards, Andres Christian. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 69 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 122 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 21 + 4 files changed, 164 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 2d8b20a7..159ab0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -142,115 +142,86 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) /** * amdgpu_ring_undo - reset the wptr * * @ring: amdgpu_ring structure holding ring information * * Reset the driver's copy of the wptr (all asics). */ void amdgpu_ring_undo(struct amdgpu_ring *ring) { ring->wptr = ring->wptr_old; if (ring->funcs->end_use) ring->funcs->end_use(ring); } +static void amdgpu_ring_priority_set(struct amd_sched_priority_ctr *p, + enum amd_sched_priority priority) +{ +struct amdgpu_ring *ring = container_of(p, struct amdgpu_ring, +priority_ctr); + +if (ring->funcs->set_priority) +ring->funcs->set_priority(ring, priority); +} + /** * amdgpu_ring_priority_put - restore a ring's priority * * @ring: amdgpu_ring structure holding the information * @priority: target priority * * Release a request for executing at @priority */ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, enum amd_sched_priority priority) { -int i; - -if (!ring->funcs->set_priority) -return; - -if (atomic_dec_return(>num_jobs[priority]) > 0) -return; - -/* no need to restore if the job is already at the lowest priority */ -if (priority == AMD_SCHED_PRIORITY_NORMAL) -return; - -mutex_lock(>priority_mutex); -/* something higher prio is executing, no need to decay */ -if (ring->priority > priority) -goto out_unlock; - -/* decay priority to the next level with a job available */ -for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { -if (i == AMD_SCHED_PRIORITY_NORMAL -|| atomic_read(>num_jobs[i])) { -ring->priority = i; -ring->funcs->set_priority(ring, i); -break; -} -
Re: [PATCH] drm/amdgpu: add parameter to allocate high priority contexts v11
I had forgotten to squash some fixups to the original patch. This should be the correct one. Regards, Andres On 2017-06-08 06:20 PM, Andres Rodriguez wrote: Add a new context creation parameter to express a global context priority. The priority ranking in descending order is as follows: * AMDGPU_CTX_PRIORITY_HIGH_HW * AMDGPU_CTX_PRIORITY_HIGH_SW * AMDGPU_CTX_PRIORITY_NORMAL * AMDGPU_CTX_PRIORITY_LOW_SW * AMDGPU_CTX_PRIORITY_LOW_HW The driver will attempt to schedule work to the hardware according to the priorities. No latency or throughput guarantees are provided by this patch. This interface intends to service the EGL_IMG_context_priority extension, and vulkan equivalents. Setting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER. v2: Instead of using flags, repurpose __pad v3: Swap enum values of _NORMAL _HIGH for backwards compatibility v4: Validate usermode priority and store it v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN v7: remove ctx->priority v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE v9: change the priority parameter to __s32 v10: split priorities into _SW and _HW v11: Allow DRM_MASTER without CAP_SYS_NICE Reviewed-by: Emil Velikov <emil.l.veli...@gmail.com> Reviewed-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 61 +-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 5 ++- include/uapi/drm/amdgpu_drm.h | 10 - 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a11e443..9ec85d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -11,59 +11,86 @@ * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: monk liu <monk@amd.com> */ #include +#include #include "amdgpu.h" -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) +static int amdgpu_ctx_priority_permit(struct drm_file *filp, + enum amd_sched_priority priority) +{ + /* NORMAL and below are accessible by everyone */ + if (priority <= AMD_SCHED_PRIORITY_NORMAL) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum amd_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) { unsigned i, j; int r; + if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(>refcount); spin_lock_init(>ring_lock); ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, sizeof(struct dma_fence*), GFP_KERNEL); if (!ctx->fences) return -ENOMEM; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { ctx->rings[i].sequence = 1; ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i]; } ctx->reset_counter = atomic_read(>gpu_reset_counter); /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { struct amdgpu_ring *ring = adev->rings[i]; struct amd_sched_rq *rq; - rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + rq = >sched.sched_rq[priority]; if (ring == >gfx.kiq.ring) continue; r = amd_sched_entity_init(>sched, >rings[i].entity, rq, amdgpu_sched_jobs); if (r) goto failed; } r = amdgpu_queue_mgr_init(adev, >queue_mgr); if (r) goto failed; return 0
[PATCH] drm/amdgpu: add parameter to allocate high priority contexts v11
Add a new context creation parameter to express a global context priority. The priority ranking in descending order is as follows: * AMDGPU_CTX_PRIORITY_HIGH_HW * AMDGPU_CTX_PRIORITY_HIGH_SW * AMDGPU_CTX_PRIORITY_NORMAL * AMDGPU_CTX_PRIORITY_LOW_SW * AMDGPU_CTX_PRIORITY_LOW_HW The driver will attempt to schedule work to the hardware according to the priorities. No latency or throughput guarantees are provided by this patch. This interface intends to service the EGL_IMG_context_priority extension, and vulkan equivalents. Setting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER. v2: Instead of using flags, repurpose __pad v3: Swap enum values of _NORMAL _HIGH for backwards compatibility v4: Validate usermode priority and store it v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN v7: remove ctx->priority v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE v9: change the priority parameter to __s32 v10: split priorities into _SW and _HW v11: Allow DRM_MASTER without CAP_SYS_NICE Reviewed-by: Emil Velikov <emil.l.veli...@gmail.com> Reviewed-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 61 +-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 5 ++- include/uapi/drm/amdgpu_drm.h | 10 - 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a11e443..9ec85d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -11,59 +11,86 @@ * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: monk liu <monk@amd.com> */ #include +#include #include "amdgpu.h" -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) +static int amdgpu_ctx_priority_permit(struct drm_file *filp, + enum amd_sched_priority priority) +{ + /* NORMAL and below are accessible by everyone */ + if (priority <= AMD_SCHED_PRIORITY_NORMAL) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum amd_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) { unsigned i, j; int r; + if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(>refcount); spin_lock_init(>ring_lock); ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, sizeof(struct dma_fence*), GFP_KERNEL); if (!ctx->fences) return -ENOMEM; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { ctx->rings[i].sequence = 1; ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i]; } ctx->reset_counter = atomic_read(>gpu_reset_counter); /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { struct amdgpu_ring *ring = adev->rings[i]; struct amd_sched_rq *rq; - rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + rq = >sched.sched_rq[priority]; if (ring == >gfx.kiq.ring) continue; r = amd_sched_entity_init(>sched, >rings[i].entity, rq, amdgpu_sched_jobs); if (r) goto failed; } r = amdgpu_queue_mgr_init(adev, >queue_mgr); if (r) goto failed; return 0; @@ -88,49 +115,52 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
[PATCH 7/8] drm/amdgpu: add plumbing for ctx priority changes
Introduce amdgpu_ctx_priority_get/put(). This a refcounted mechanism to change a context's priority. A context's priority will be set to the highest priority for which a request exists. If no active requests exist, the context will default to the priority requested at context allocation time. Note that the priority requested at allocation does not create a request, therefore it can be overridden by a get() request for a lower priority. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 43 + 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 04ea1b9..b998f42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -813,47 +813,53 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_ctx_ring { uint64_tsequence; struct dma_fence**fences; struct amd_sched_entity entity; }; struct amdgpu_ctx { struct kref refcount; struct amdgpu_device*adev; struct amdgpu_queue_mgr queue_mgr; unsignedreset_counter; spinlock_t ring_lock; struct dma_fence**fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; - bool preamble_presented; + boolpreamble_presented; + + struct amd_sched_priority_ctr priority_ctr; }; struct amdgpu_ctx_mgr { struct amdgpu_device*adev; struct mutexlock; /* protected by lock */ struct idr ctx_handles; }; struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *fence); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); +void amdgpu_ctx_priority_get(struct amdgpu_ctx *ctx, +enum amd_sched_priority priority); +void amdgpu_ctx_priority_put(struct amdgpu_ctx *ctx, +enum amd_sched_priority priority); int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); /* * file private structure */ struct amdgpu_fpriv { struct amdgpu_vmvm; struct amdgpu_bo_va *prt_va; struct mutexbo_list_lock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0285eef..cc15b7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -29,30 +29,53 @@ static amdgpu_ctx_priority_permit(struct drm_file *filp, enum amd_sched_priority priority) { /* NORMAL and below are accessible by everyone */ if (priority <= AMD_SCHED_PRIORITY_NORMAL) return 0; if (capable(CAP_SYS_NICE)) return 0; if (drm_is_current_master(filp)) return 0; return -EACCES; } +static void amdgpu_ctx_priority_set(struct amd_sched_priority_ctr *p, + enum amd_sched_priority priority) +{ + int i; + struct amd_sched_rq *rq; + struct amd_sched_entity *entity; + struct amdgpu_ring *ring; + struct amdgpu_ctx *ctx = container_of(p, struct amdgpu_ctx, + priority_ctr); + struct amdgpu_device *adev = ctx->adev; + + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + entity = >rings[i].entity; + rq = >sched.sched_rq[priority]; + + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + continue; + + amd_sched_entity_set_rq(entity, rq); + } +} + static int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { unsigned i, j; int r; if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) return -EINVAL; r = amdgpu_ctx_priority_permit(filp, priority); if (r) return r; @@ -76,53 +99,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, for (i = 0; i < adev->num_rings; i++) { struct amdgpu_ring *ring = adev->rings[i];
[PATCH 8/8] drm/amdgpu: add interface for editing a foreign process's priority
The AMDGPU_SCHED_OP_PROCESS_PRIORITY_GET/PUT ioctls are used to set the priority of a different process in the current system. When all requests are dropped, the foreign process's contexts will be restored to the priority specified at context creation time. An fd is used to identify the remote process. This is simpler than passing a pid number, which is vulnerable to re-use, etc. This functionality is limited to DRM_MASTER since abuse of this interface can have a negative impact on the system's performance. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 19 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 117 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 + include/uapi/drm/amdgpu_drm.h | 25 +++ 6 files changed, 180 insertions(+), 19 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index b62d9e9..e4d3b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -13,31 +13,31 @@ ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \ amdgpu-y := amdgpu_drv.o # add KMS driver amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \ atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \ amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \ amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o + amdgpu_queue_mgr.o amdgpu_sched.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \ amdgpu_amdkfd_gfx_v7.o amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o # add GMC block amdgpu-y += \ gmc_v7_0.o \ gmc_v8_0.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index cc15b7e..a578557 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -12,30 +12,31 @@ * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: monk liu <monk@amd.com> */ #include #include "amdgpu.h" +#include "amdgpu_sched.h" static amdgpu_ctx_priority_permit(struct drm_file *filp, enum amd_sched_priority priority) { /* NORMAL and below are accessible by everyone */ if (priority <= AMD_SCHED_PRIORITY_NORMAL) return 0; if (capable(CAP_SYS_NICE)) return 0; if (drm_is_current_master(filp)) return 0; return -EACCES; @@ -234,48 +235,30 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, out->state.hangs = 0x0; /* determine if a GPU reset has occured since the last call */ reset_counter = atomic_read(>gpu_reset_counter); /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */ if (ctx->reset_counter == reset_counter) out->state.reset_status = AMDGPU_CTX_NO_RESET; else out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET; ctx->reset_counter = reset_counter; mutex_unlock(>lock); return 0; } -static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) -{ - switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH_HW: - return AMD_SCHED_PRIORITY_HIGH_HW; - case AMDGPU_CTX_PRIORITY_HIGH_SW: - return AMD_SCHED_PRIORITY_HIGH_S
[PATCH] allow DRM_MASTER to change client's priorities
Hey Everyone, This series is the first spin-off from my RFC "Exclusive gpu access for SteamVR usecases". It also includes an updated version of my priority patchset. Some notable changes to the old priority patches: * Patch 1: DRM_MASTER is now allowed to allocate high priority contexts * Patch 1: Split HIGH into HIGH_SW and HIGH_HW, same for LOW_* * Patch 2: raise ring priority when a job is submitted, instead of job_run I've also fixed the problem pointed out in the RFC thread. When all requests to change a process's priority end, that process's contexts will return to the priority specified at allocation time. Changes to raise the minimum required priority for the scheduler to schedule a job will follow in a separate series. Series available in the wip-process-priorities-v1 branch of: git://people.freedesktop.org/~lostgoat/linux ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 6/8] drm/amd/sched: allow clients to edit an entity's rq
This is useful for changing an entity's priority at runtime. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +++--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 6 +- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index a203736..c19bb85 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -121,30 +121,31 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq) int amd_sched_entity_init(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity, struct amd_sched_rq *rq, uint32_t jobs) { int r; if (!(sched && entity && rq)) return -EINVAL; memset(entity, 0, sizeof(struct amd_sched_entity)); INIT_LIST_HEAD(>list); entity->rq = rq; entity->sched = sched; + spin_lock_init(>rq_lock); spin_lock_init(>queue_lock); r = kfifo_alloc(>job_queue, jobs * sizeof(void *), GFP_KERNEL); if (r) return r; atomic_set(>fence_seq, 0); entity->fence_context = dma_fence_context_alloc(2); return 0; } /** * Query if entity is initialized * * @sched Pointer to scheduler instance @@ -192,62 +193,79 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) return true; } /** * Destroy a context entity * * @sched Pointer to scheduler instance * @entity The pointer to a valid scheduler entity * * Cleanup and free the allocated resources. */ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { - struct amd_sched_rq *rq = entity->rq; - if (!amd_sched_entity_is_initialized(sched, entity)) return; /** * The client will not queue more IBs during this fini, consume existing * queued IBs */ wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); - amd_sched_rq_remove_entity(rq, entity); + amd_sched_entity_set_rq(entity, NULL); + kfifo_free(>job_queue); } static void amd_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb) { struct amd_sched_entity *entity = container_of(cb, struct amd_sched_entity, cb); entity->dependency = NULL; dma_fence_put(f); amd_sched_wakeup(entity->sched); } static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb) { struct amd_sched_entity *entity = container_of(cb, struct amd_sched_entity, cb); entity->dependency = NULL; dma_fence_put(f); } +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, +struct amd_sched_rq *rq) +{ + if (entity->rq == rq) + return; + + spin_lock(>rq_lock); + + if (entity->rq) + amd_sched_rq_remove_entity(entity->rq, entity); + + entity->rq = rq; + if (rq) + amd_sched_rq_add_entity(rq, entity); + + spin_unlock(>rq_lock); +} + bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity) { struct amd_gpu_scheduler *sched = entity->sched; struct amd_sched_fence *s_fence; if (!fence || dma_fence_is_signaled(fence)) return false; if (fence->context == entity->fence_context) return true; s_fence = to_amd_sched_fence(fence); if (s_fence && s_fence->sched == sched) return true; return false; @@ -321,31 +339,33 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) struct amd_sched_entity *entity = sched_job->s_entity; bool added, first = false; spin_lock(>queue_lock); added = kfifo_in(>job_queue, _job, sizeof(sched_job)) == sizeof(sched_job); if (added && kfifo_len(>job_queue) == sizeof(sched_job)) first = true; spin_unlock(>queue_lock); /* first job wakes up scheduler */ if (first) { /* Add the entity to the run queue */ + spin_lock(>rq_lock); amd_sched_rq_add_entity(entity->rq, entity); + spin_unlock(>rq_lock); amd_sched_wakeup(sched); } return added; } /* job_finish is called after hw fence signaled, and * the job had already been deleted from ring_mirror_list */ stati
[PATCH 5/8] drm/amdgpu: move priority decay logic into amd_sched_priority_ctr
So that it can be re-used. A priority counter can be used to track priority requests. If no active requests exists, the counter will default to ()->default_priority. The re-factored version is now allowed to decay below NORMAL. This allows us to create requests that lower the priority. This didn't matter for rings, as no priority below NORMAL has an effect. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 69 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 6 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 122 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 21 + 4 files changed, 164 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 2d8b20a7..159ab0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -142,115 +142,86 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) /** * amdgpu_ring_undo - reset the wptr * * @ring: amdgpu_ring structure holding ring information * * Reset the driver's copy of the wptr (all asics). */ void amdgpu_ring_undo(struct amdgpu_ring *ring) { ring->wptr = ring->wptr_old; if (ring->funcs->end_use) ring->funcs->end_use(ring); } +static void amdgpu_ring_priority_set(struct amd_sched_priority_ctr *p, +enum amd_sched_priority priority) +{ + struct amdgpu_ring *ring = container_of(p, struct amdgpu_ring, + priority_ctr); + + if (ring->funcs->set_priority) + ring->funcs->set_priority(ring, priority); +} + /** * amdgpu_ring_priority_put - restore a ring's priority * * @ring: amdgpu_ring structure holding the information * @priority: target priority * * Release a request for executing at @priority */ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, enum amd_sched_priority priority) { - int i; - - if (!ring->funcs->set_priority) - return; - - if (atomic_dec_return(>num_jobs[priority]) > 0) - return; - - /* no need to restore if the job is already at the lowest priority */ - if (priority == AMD_SCHED_PRIORITY_NORMAL) - return; - - mutex_lock(>priority_mutex); - /* something higher prio is executing, no need to decay */ - if (ring->priority > priority) - goto out_unlock; - - /* decay priority to the next level with a job available */ - for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { - if (i == AMD_SCHED_PRIORITY_NORMAL - || atomic_read(>num_jobs[i])) { - ring->priority = i; - ring->funcs->set_priority(ring, i); - break; - } - } - -out_unlock: - mutex_unlock(>priority_mutex); + if (ring->funcs->set_priority) + amd_sched_priority_ctr_put(>priority_ctr, priority); } /** * amdgpu_ring_priority_get - change the ring's priority * * @ring: amdgpu_ring structure holding the information * @priority: target priority * * Request a ring's priority to be raised to @priority (refcounted). */ void amdgpu_ring_priority_get(struct amdgpu_ring *ring, enum amd_sched_priority priority) { - if (!ring->funcs->set_priority) - return; - - atomic_inc(>num_jobs[priority]); - - mutex_lock(>priority_mutex); - if (priority <= ring->priority) - goto out_unlock; - - ring->priority = priority; - ring->funcs->set_priority(ring, priority); - -out_unlock: - mutex_unlock(>priority_mutex); + if (ring->funcs->set_priority) + amd_sched_priority_ctr_get(>priority_ctr, priority); } /** * amdgpu_ring_init - init driver ring struct. * * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * @max_ndw: maximum number of dw for ring alloc * @nop: nop packet for this ring * * Initialize the driver information for the selected ring (all asics). * Returns 0 on success, error on failure. */ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned max_dw, struct amdgpu_irq_src *irq_src, unsigned irq_type) { - int r, i; + int r; if (ring->adev == NULL) { if (adev->num_rings >= AMDGPU_MAX_RINGS) return -EINVAL; ring->adev = adev; ring->idx = adev->num_rings++; adev->rings[ri
[PATCH 4/8] drm/amdgpu: make amdgpu_to_sched_priority detect invalid parameters
Returning invalid priorities as _NORMAL is a backwards compatibility quirk of amdgpu_ctx_ioctl(). Move this detail one layer up where it belongs. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 68a852f..0285eef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -219,51 +219,53 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) { switch (amdgpu_priority) { case AMDGPU_CTX_PRIORITY_HIGH_HW: return AMD_SCHED_PRIORITY_HIGH_HW; case AMDGPU_CTX_PRIORITY_HIGH_SW: return AMD_SCHED_PRIORITY_HIGH_SW; case AMDGPU_CTX_PRIORITY_NORMAL: return AMD_SCHED_PRIORITY_NORMAL; case AMDGPU_CTX_PRIORITY_LOW_SW: case AMDGPU_CTX_PRIORITY_LOW_HW: return AMD_SCHED_PRIORITY_LOW; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_NORMAL; + return AMD_SCHED_PRIORITY_INVALID; } } int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { int r; uint32_t id; enum amd_sched_priority priority; union drm_amdgpu_ctx *args = data; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_fpriv *fpriv = filp->driver_priv; r = 0; id = args->in.ctx_id; priority = amdgpu_to_sched_priority(args->in.priority); - if (priority >= AMD_SCHED_PRIORITY_MAX) - return -EINVAL; + /* For backwards compatibility reasons, we need to accept +* ioctls with garbage in the priority field */ + if (priority == AMD_SCHED_PRIORITY_INVALID) + priority = AMD_SCHED_PRIORITY_NORMAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: r = amdgpu_ctx_alloc(adev, fpriv, priority, filp, ); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: r = amdgpu_ctx_query(adev, fpriv, id, >out); break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index dbcaa2e..da040bc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -108,31 +108,32 @@ static inline bool amd_sched_invalidate_job(struct amd_sched_job *s_job, int thr */ struct amd_sched_backend_ops { struct dma_fence *(*dependency)(struct amd_sched_job *sched_job); struct dma_fence *(*run_job)(struct amd_sched_job *sched_job); void (*timedout_job)(struct amd_sched_job *sched_job); void (*free_job)(struct amd_sched_job *sched_job); }; enum amd_sched_priority { AMD_SCHED_PRIORITY_MIN, AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, AMD_SCHED_PRIORITY_NORMAL, AMD_SCHED_PRIORITY_HIGH_SW, AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, - AMD_SCHED_PRIORITY_MAX + AMD_SCHED_PRIORITY_MAX, + AMD_SCHED_PRIORITY_INVALID = -1 }; /** * One scheduler is implemented for each hardware ring */ struct amd_gpu_scheduler { const struct amd_sched_backend_ops *ops; uint32_thw_submission_limit; longtimeout; const char *name; struct amd_sched_rq sched_rq[AMD_SCHED_PRIORITY_MAX]; wait_queue_head_t wake_up_worker; wait_queue_head_t job_scheduled; atomic_thw_rq_count; atomic64_t job_id_count; -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/8] drm/amdgpu: implement ring set_priority for gfx_v8 compute v9
Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over other queues on the same pipe. Multiple queues on a pipe are timesliced so this gives us full precedence over other queues. Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the wave as follows: 0x2: CS_H 0x1: CS_M 0x0: CS_L The SPI block will then dispatch work according to the policy set by SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than gfx. In order to prevent getting stuck in loops of resources bouncing between GFX and high priority compute and introducing further latency, we statically reserve a portion of the pipe. v2: fix srbm_select to ring->queue and use ring->funcs->type v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: switch int to enum amd_sched_priority v5: corresponding changes for srbm_lock v6: change CU reservation to PIPE_PERCENT allocation v7: use kiq instead of MMIO v8: back to MMIO, and make the implementation sleep safe. v9: corresponding changes for splitting HIGH into _HW/_SW Acked-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 105 + 3 files changed, 111 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3a0561c..04ea1b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1097,30 +1097,34 @@ struct amdgpu_gfx { /* gfx status */ uint32_tgfx_current_status; /* ce ram size*/ unsignedce_ram_size; struct amdgpu_cu_info cu_info; const struct amdgpu_gfx_funcs *funcs; /* reset mask */ uint32_tgrbm_soft_reset; uint32_tsrbm_soft_reset; boolin_reset; /* s3/s4 mask */ boolin_suspend; /* NGG */ struct amdgpu_ngg ngg; + + /* pipe reservation */ + struct mutexpipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct dma_fence *f); int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f); int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); /* * CS. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0296c9e..424ac3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2012,58 +2012,60 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->dev = >dev; adev->ddev = ddev; adev->pdev = pdev; adev->flags = flags; adev->asic_type = flags & AMD_ASIC_MASK; adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; adev->mc.gtt_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); + bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); adev->smc_rreg = _invalid_rreg; adev->smc_wreg = _invalid_wreg; adev->pcie_rreg = _invalid_rreg; adev->pcie_wreg = _invalid_wreg; adev->pciep_rreg = _invalid_rreg; adev->pciep_wreg = _invalid_wreg; adev->uvd_ctx_rreg = _invalid_rreg; adev->uvd_ctx_wreg = _invalid_wreg; adev->didt_rreg = _invalid_rreg; adev->didt_wreg = _invalid_wreg; adev->gc_cac_rreg = _invalid_rreg; adev->gc_cac_wreg = _invalid_wreg; adev->audio_endpt_rreg = _block_invalid_rreg; adev->audio_endpt_wreg = _block_invalid_wreg; DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, pdev->subsystem_vendor
[PATCH 1/8] drm/amdgpu: add parameter to allocate high priority contexts v11
Add a new context creation parameter to express a global context priority. The priority ranking in descending order is as follows: * AMDGPU_CTX_PRIORITY_HIGH_HW * AMDGPU_CTX_PRIORITY_HIGH_SW * AMDGPU_CTX_PRIORITY_NORMAL * AMDGPU_CTX_PRIORITY_LOW_SW * AMDGPU_CTX_PRIORITY_LOW_HW The driver will attempt to schedule work to the hardware according to the priorities. No latency or throughput guarantees are provided by this patch. This interface intends to service the EGL_IMG_context_priority extension, and vulkan equivalents. Setting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER. v2: Instead of using flags, repurpose __pad v3: Swap enum values of _NORMAL _HIGH for backwards compatibility v4: Validate usermode priority and store it v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN v7: remove ctx->priority v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE v9: change the priority parameter to __s32 v10: split priorities into _SW and _HW v11: Allow DRM_MASTER without CAP_SYS_NICE Reviewed-by: Emil Velikov <emil.l.veli...@gmail.com> Reviewed-by: Christian König <christian.koe...@amd.com> Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 +-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 5 ++- include/uapi/drm/amdgpu_drm.h | 10 - 3 files changed, 69 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a11e443..68a852f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -13,57 +13,83 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: monk liu <monk@amd.com> */ #include #include "amdgpu.h" -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) +static amdgpu_ctx_priority_permit(struct drm_file *filp, + enum amd_sched_priority priority) +{ + /* NORMAL and below are accessible by everyone */ + if (priority <= AMD_SCHED_PRIORITY_NORMAL) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum amd_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) { unsigned i, j; int r; + if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(>refcount); spin_lock_init(>ring_lock); ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, sizeof(struct dma_fence*), GFP_KERNEL); if (!ctx->fences) return -ENOMEM; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { ctx->rings[i].sequence = 1; ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i]; } ctx->reset_counter = atomic_read(>gpu_reset_counter); /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { struct amdgpu_ring *ring = adev->rings[i]; struct amd_sched_rq *rq; - rq = >sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + rq = >sched.sched_rq[priority]; if (ring == >gfx.kiq.ring) continue; r = amd_sched_entity_init(>sched, >rings[i].entity, rq, amdgpu_sched_jobs); if (r) goto failed; } r = amdgpu_queue_mgr_init(adev, >queue_mgr); if (r) goto failed; return 0; @@ -88,49 +114,52 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) for (j = 0; j < amdgpu_sched_jobs; ++j) dma_fence_put(ctx->rings[i].fences[j]); kfree(ctx->fences);
[PATCH 2/8] drm/amdgpu: add framework for HW specific priority settings v8
Add an initial framework for changing the HW priorities of rings. The framework allows requesting priority changes for the lifetime of an amdgpu_job. After the job completes the priority will decay to the next lowest priority for which a request is still valid. A new ring function set_priority() can now be populated to take care of the HW specific programming sequence for priority changes. v2: set priority before emitting IB, and take a ref on amdgpu_job v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: plug amdgpu_ring_restore_priority_cb into amdgpu_job_free_cb v5: use atomic for tracking job priorities instead of last_job v6: rename amdgpu_ring_priority_[get/put]() and align parameters v7: replace spinlocks with mutexes for KIQ compatibility v8: raise ring priority during cs_ioctl, instead of job_run Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 7 +++ 5 files changed, 103 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b17635c..d84d026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -996,30 +996,32 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) { amdgpu_job_free(job); return r; } job->owner = p->filp; job->fence_ctx = entity->fence_context; p->fence = dma_fence_get(>base.s_fence->finished); cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); amdgpu_cs_parser_fini(p, 0, true); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(>base); + amdgpu_ring_priority_get(job->ring, +amd_sched_get_job_priority(>base)); return 0; } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; int i, r; if (!adev->accel_working) return -EBUSY; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 3d641e1..f10ce0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -89,30 +89,31 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) { struct dma_fence *f; unsigned i; /* use sched fence if available */ f = job->base.s_fence ? >base.s_fence->finished : job->fence; for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(job->adev, >ibs[i], f); } static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); dma_fence_put(job->fence); amdgpu_sync_free(>sync); amdgpu_sync_free(>dep_sync); amdgpu_sync_free(>sched_sync); kfree(job); } void amdgpu_job_free(struct amdgpu_job *job) { amdgpu_job_free_resources(job); dma_fence_put(job->fence); amdgpu_sync_free(>sync); amdgpu_sync_free(>dep_sync); amdgpu_sync_free(>sched_sync); @@ -126,30 +127,32 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, int r; job->ring = ring; if (!f) return -EINVAL; r = amd_sched_job_init(>base, >sched, entity, owner); if (r) return r; job->owner = owner; job->fence_ctx = entity->fence_context; *f = dma_fence_get(>base.s_fence->finished); amdgpu_job_free_resources(job); amd_sched_entity_push_job(>base); + amdgpu_ring_priority_get(job->ring, +amd_sched_get_job_priority(>base)); return 0; } static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) { struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; struct dma_fence *fence = amdgpu_sync_get_fence(>dep_sync); int r; if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) { r = amdgpu_sync_fence(job->adev, >sched_sync, fence);
Re: [PATCH] drm/amdgpu/gfx8: drop per-APU CU limits
On 2017-05-31 10:16 AM, Alex Deucher wrote: Always use the max for the family rather than the per sku limits. This makes sure the mask is always the max size to avoid reporting the wrong number of CUs. Cc: sta...@vger.kernel.org Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> Reviewed-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 71 +-- 1 file changed, 2 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 58cc585..b596486 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1915,46 +1915,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 2; - - switch (adev->pdev->revision) { - case 0xc4: - case 0x84: - case 0xc8: - case 0xcc: - case 0xe1: - case 0xe3: - /* B10 */ - adev->gfx.config.max_cu_per_sh = 8; - break; - case 0xc5: - case 0x81: - case 0x85: - case 0xc9: - case 0xcd: - case 0xe2: - case 0xe4: - /* B8 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc6: - case 0xca: - case 0xce: - case 0x88: - case 0xe6: - /* B6 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc7: - case 0x87: - case 0xcb: - case 0xe5: - case 0x89: - default: - /* B4 */ - adev->gfx.config.max_cu_per_sh = 4; - break; - } - + adev->gfx.config.max_cu_per_sh = 8; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 32; @@ -1971,35 +1932,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 1; - - switch (adev->pdev->revision) { - case 0x80: - case 0x81: - case 0xc0: - case 0xc1: - case 0xc2: - case 0xc4: - case 0xc8: - case 0xc9: - case 0xd6: - case 0xda: - case 0xe9: - case 0xea: - adev->gfx.config.max_cu_per_sh = 3; - break; - case 0x83: - case 0xd0: - case 0xd1: - case 0xd2: - case 0xd4: - case 0xdb: - case 0xe1: - case 0xe2: - default: - adev->gfx.config.max_cu_per_sh = 2; - break; - } - + adev->gfx.config.max_cu_per_sh = 3; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 16; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: . [PATCH] drm/amd/amdgpu: Fix ring initialization for GFX9
On 2017-06-05 03:48 PM, Xie, AlexBin wrote: Hi Andres, I think the original patch was written by you. Would you comment? Is it a bug or intentional? Thank you. Alex Bin Xie Message: 1 Date: Mon, 5 Jun 2017 10:36:59 -0400 From: Tom St DenisTo: amd-gfx@lists.freedesktop.org Subject: Re: amd-gfx Digest, Vol 13, Issue 29 Message-ID: <58773ac0-93a7-e494-d447-88e93ded9...@amd.com> Content-Type: text/plain; charset=utf-8; format=flowed On 05/06/17 10:24 AM, Xie, AlexBin wrote: Hi, Tom, You have found a bug. Your patch looks fine for me. Have you confirmed the deleted part is older version? Perhaps search email list or git history to confirm? It looks like the edits to the older GFX files (7/8) simply changed that block of code whereas the gfx9 version they pasted in the fixed block moving the old block down. Yeah there was a problem with the rebase here. Tom/Alex's fix on the mailing list is the intended solution. Regards, Andres Tom ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu/gfx9: new queue policy, take first 2 queues of each pipe
Reviewed-by: Andres Rodriguez <andre...@gmail.com> -Andres On 2017-06-05 11:06 AM, Alex Deucher wrote: Instead of taking the first pipe and giving the rest to kfd, take the first 2 queues of each pipe. Effectively, amdgpu and amdkfd own the same number of queues. But because the queues are spread over multiple pipes the hardware will be able to better handle concurrent compute workloads. amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4 amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4 gfx9 was missed when this patch set was rebased to include gfx9. Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0c48f6c..276dc06 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -873,8 +873,8 @@ static void gfx_v9_0_compute_queue_acquire(struct amdgpu_device *adev) if (mec >= adev->gfx.mec.num_mec) break; - /* policy: amdgpu owns all queues in the first pipe */ - if (mec == 0 && pipe == 0) + /* policy: amdgpu owns the first two queues of the first MEC */ + if (mec == 0 && queue < 2) set_bit(i, adev->gfx.mec.queue_bitmap); } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [RFC] Exclusive gpu access for SteamVR usecases
On 2017-05-31 02:53 AM, Christian König wrote: 2. How are the priorities from processes supposed to interact with the per context priority? Do you mean process niceness? There isn't any relationship between niceness and gpu priority. Let me know if you meant something different here. I meant something different. The application controls the per context priority when creating the context and the compositor should control the per process priority. Those two needs to be handled separately, otherwise we would could override the context priority from the compositor and so confuse the application. I suggest to cleanly separate the two. Would you then b be okay with having a list of amdgpu_process data structures stored in adev? The list would be searchable by struct pid. However, I'm not 100% convinced on having two separate priorities. It duplicates the concept and we need to introduce relationship semantics between the two. Alternatively, since the priority patches still aren't part of drm-next we still have a chance make some changes there. If we make the priority requests reference counted we should be able to remember any old requests. (ii) Job dependencies between two processes. This case is mentioned separately as it is probably the most common use case we will encounter for this feature. Most graphics applications enter producer/consumer relationship with the compositor process (window swapchain). In this case the compositor should already have all the information required to avoid a deadlock. That is nonsense. The kernel is the supervisor of resource management, so only the kernel knows how to avoid a deadlock. Let's imagine the following example: Process A is a low priority task (for example updating the clock bitmap) which needs a resource for it's command submission. Process B is a high priority task (rendering of the VR) which needs a bunch of memory for it's command submission. Now the kernel memory management decides that it needs to evict process A from VRAM to make room for the command submission of process B. To do this all command submissions of process A need to finish. In this moment the compositor hands over exclusive access to process B and never gives process A a chance to run. Now B depends on A, but A can never run because B has exclusive access -> deadlock. We somehow need to handle this inside the kernel or this whole approach won't work. Thanks for pointing that out. I thought cases like these would work okay since we always allow PRIORITY_KERNEL work to execute. But as you pointed out, I overlooked the dependency that is created once the command buffers have their final memory addresses attached. Let me read and think about this a bit more. Regards, Andres Regards, Christian. Am 30.05.2017 um 23:38 schrieb Andres Rodriguez: On 2017-05-30 11:19 AM, Christian König wrote: Looks like a good start, but a few notes in general: 1. Split the patches into two sets. One for implementing changing the priorities and one for limiting the priorities. No problem. 2. How are the priorities from processes supposed to interact with the per context priority? Do you mean process niceness? There isn't any relationship between niceness and gpu priority. Let me know if you meant something different here. 3. Thinking more about it we can't limit the minimum priority in the scheduler. For example a low priority job might block resources the high priority job needs to run. E.g. VRAM memory. We avoid deadlocks by making sure that all dependencies of an exclusive task are also elevated to the same priority as said task. Usermode (the DRM_MASTER) is responsible to maintain this guarantee. The kernel does provide an ioctl that makes this task simple, amdgpu_sched_process_priority_set(). Lets take a look at this issue through three different scenarios. (i) Job dependencies are all process internal, i.e. multiple contexts in one process. This is the trivial case. A call to amdgpu_sched_process_priority_set() will change the priority of all contexts belonging to a process in lockstep. Once amdgpu_sched_process_priority_set() returns, it is safe to raise the minimum priority using amdgpu_sched_min_priority_get(). At this point we have a guarantee that all contexts belonging to the process will be in a runnable state, or all the contexts will be in a not-runnable state. There won't be a mix of runnable and non-runnable processes. Getting into that mixed state is what could cause a deadlock, a runnable context depends on a non-runnable one. Note: the current patchset needs a fix to provide this guarantee in multi-gpu systems. (ii) Job dependencies between two processes. This case is mentioned separately as it is probably the most common use case we will encounter for this feature. Most graphics applications enter producer/consumer relationship with the compositor process (win
Re: [RFC] Exclusive gpu access for SteamVR usecases
On 2017-05-30 11:19 AM, Christian König wrote: Looks like a good start, but a few notes in general: 1. Split the patches into two sets. One for implementing changing the priorities and one for limiting the priorities. No problem. 2. How are the priorities from processes supposed to interact with the per context priority? Do you mean process niceness? There isn't any relationship between niceness and gpu priority. Let me know if you meant something different here. 3. Thinking more about it we can't limit the minimum priority in the scheduler. For example a low priority job might block resources the high priority job needs to run. E.g. VRAM memory. We avoid deadlocks by making sure that all dependencies of an exclusive task are also elevated to the same priority as said task. Usermode (the DRM_MASTER) is responsible to maintain this guarantee. The kernel does provide an ioctl that makes this task simple, amdgpu_sched_process_priority_set(). Lets take a look at this issue through three different scenarios. (i) Job dependencies are all process internal, i.e. multiple contexts in one process. This is the trivial case. A call to amdgpu_sched_process_priority_set() will change the priority of all contexts belonging to a process in lockstep. Once amdgpu_sched_process_priority_set() returns, it is safe to raise the minimum priority using amdgpu_sched_min_priority_get(). At this point we have a guarantee that all contexts belonging to the process will be in a runnable state, or all the contexts will be in a not-runnable state. There won't be a mix of runnable and non-runnable processes. Getting into that mixed state is what could cause a deadlock, a runnable context depends on a non-runnable one. Note: the current patchset needs a fix to provide this guarantee in multi-gpu systems. (ii) Job dependencies between two processes. This case is mentioned separately as it is probably the most common use case we will encounter for this feature. Most graphics applications enter producer/consumer relationship with the compositor process (window swapchain). In this case the compositor should already have all the information required to avoid a deadlock. It knows: - Itself (as a process) - The application process - The dependencies between both processes At this stage it is simple for the compositor to understand that if it wishes to perform an exclusive mode transition, all dependencies (which are known) should also be part of the exclusive group. We should be able to implement this feature without modifying a game/application. (iii) Job dependencies between multiple (3+) processes. This scenario is very uncommon for games. For example, if a game or application is split into multiple processes. Process A interacts with the compositor. Process B does some physics/compute calculations and send the results to Process A. To support this use case, we would require an interface for the application to communicate to the compositor its dependencies. I.e. Process A would say, "Also keep Process B's priority in sync with mine". This should be a simple bit of plumbing to allow Process A to share an fd from Process B with the compositor. B --[pipe_send(fdB)]--> A --[compositor_ext_priority_group_add(fdB)]--> Compositor Once the compositor is aware of all of A's dependencies, this can be handled in the same fashion as (ii). A special extension would be required for compositor protocols to communicate the dependencies fd. Applications would also need to be updated to use this extension. I think this case would be very uncommon. But it is something that we would be able to handle if the need would arise. > We need something like blocking the submitter instead (bad) or detection > of dependencies in the scheduler (good, but tricky to implement). > I definitely agree that detecting dependencies is tricky. Which is why I prefer an approach where usermode defines the dependencies. It is simple for both the kernel and usermode to implement. > Otherwise we can easily run into a deadlock situation with that approach. > The current API does allow you to deadlock yourself pretty easily if misused. But so do many other APIs, like having a thread trying to grab the same lock twice :) Thanks for the comments, Andres Regards, Christian. Am 25.05.2017 um 02:00 schrieb Andres Rodriguez: When multiple environments are running simultaneously on a system, e.g. an X desktop + a SteamVR game session, it may be useful to sacrifice performance in one environment in order to boost it on the other. This series provides a mechanism for a DRM_MASTER to provide exclusive gpu access to a group of processes. Note: This series is built on the assumption that the drm lease patch series will extend DRM_MASTER status to lesees. The libdrm we intend to provide is as follows: /** * Set the priority of all contexts in
Re: [RFC] Exclusive gpu access for SteamVR usecases
On 2017-05-26 05:02 AM, Mao, David wrote: Hi Andres, Why the fd is needed for this interface? The fd is used to identify the process for which we wish to raise the priority. It can be any fd from the target process, it doesn't have to be a drm file descriptor at all. The fd is used to retrieve the (struct pid*) of the target process on the kernel side. In effect, it is a replacement for passing a pid number across process boundaries. For reference, amdgpu_sched_process_priority_set() in patch 3 Why not just using the dev->fd instead of IIRC, if there are more than one fds opened in the process upon the same device, they will share the same amdgpu_device_handle which is guaranteed by amdgpu_device_initialize. Thanks for pointing that out. I wasn't aware that the amdgpu drm layer would always perform all command submission through the same fd (dev->fd) for the same amdgpu_device. Your suggestion actually makes it a lot simpler to deal with this issue at a file level instead of at a process level. Since only one fd per device is used for command submission. For a multi-gpu setup we would still need to share multiple fds, across the process boundaries. This also helped me realize that my current implementation doesn't deal with multi-gpu cases correctly. As I iterate over the fds belonging to a single drm device. In other word, we should not run into the case that user creates more contexts with newly opened fd after tuning the priority of existing context in the same process unless the previous fd is closed. Thanks. Best Regards, David On 25 May 2017, at 8:00 AM, Andres Rodriguez <andre...@gmail.com <mailto:andre...@gmail.com>> wrote: When multiple environments are running simultaneously on a system, e.g. an X desktop + a SteamVR game session, it may be useful to sacrifice performance in one environment in order to boost it on the other. This series provides a mechanism for a DRM_MASTER to provide exclusive gpu access to a group of processes. Note: This series is built on the assumption that the drm lease patch series will extend DRM_MASTER status to lesees. The libdrm we intend to provide is as follows: /** * Set the priority of all contexts in a process * * This function will change the priority of all contexts owned by * the process identified by fd. * * \param dev - \c [in] device handle * \param fd - \c [in] fd from target process * \param priority- \c [in] target priority AMDGPU_CTX_PRIORITY_* * * \return 0 on success\n * <0 - Negative POSIX error code * * \notes @fd can be *any* file descriptor from the target process. * \notes this function requires DRM_MASTER */ int amdgpu_sched_process_priority_set(amdgpu_device_handle dev, int fd, int32_t priority); /** * Request to raise the minimum required priority to schedule a gpu job * * Submit a request to increase the minimum required priority to schedule * a gpu job. Once this function returns, the gpu scheduler will no longer * consider jobs from contexts with priority lower than @priority. * * The minimum priority considered by the scheduler will be the highest from * all currently active requests. * * Requests are refcounted, and must be balanced using * amdgpu_sched_min_priority_put() * * \param dev - \c [in] device handle * \param priority- \c [in] target priority AMDGPU_CTX_PRIORITY_* * * \return 0 on success\n * <0 - Negative POSIX error code * * \notes this function requires DRM_MASTER */ int amdgpu_sched_min_priority_get(amdgpu_device_handle dev, int32_t priority); /** * Drop a request to raise the minimum required scheduler priority * * This call balances amdgpu_sched_min_priority_get() * * If no other active requests exists for @priority, the minimum required * priority will decay to a lower level until one is reached with an active * request or the lowest priority is reached. * * \param dev - \c [in] device handle * \param priority- \c [in] target priority AMDGPU_CTX_PRIORITY_* * * \return 0 on success\n * <0 - Negative POSIX error code * * \notes this function requires DRM_MASTER */ int amdgpu_sched_min_priority_put(amdgpu_device_handle dev, int32_t priority); Using this app, VRComposer can raise the priority of the VRapp and itself. Then it can restrict the minimum scheduler priority in order to become exclusive gpu clients. One of the areas I'd like feedback is the following scenario. If a VRapp opens a new fd and creates a new context after a call to set_priority, this specific context will be lower priority than the rest. If the minimum required priority is then raised, it is possible that this new context will be starved and deadlock the VRapp. One solution I had in mind to address this situation, is to make set_priority also raise the priority of future contexts created by the VRapp. However, that would require keeping track of the requeste
[PATCH 3/3] drm/amdgpu: add a mechanism to acquire gpu exclusivity
A DRM_MASTER may wish to restrict gpu job submission to only a limited set of clients. To enable this use case we provide the following new IOCTL APIs: * A mechanism to change a process's ctx priorities * A mechanism to limit the minimum priority required for the gpu scheduler to queue a job to the HW This functionality is useful in VR use cases, where two compositors are operating simultaneously, e.g. X + SteamVRComposer. In this case SteamVRComposer can limit gpu access to itself + the relevant clients. Once critical work is complete, and if enough time is available until the next HMD vblank, general access to the gpu can be restored. The operation is limited to DRM_MASTER since it may lead to starvation. The implementation of drm leases is required to extend DRM_MASTER status to the SteamVRComposer. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 39 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 131 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | 34 +++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 81 ++-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 14 ++- include/uapi/drm/amdgpu_drm.h | 26 + 9 files changed, 306 insertions(+), 26 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index b62d9e9..e4d3b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o + amdgpu_queue_mgr.o amdgpu_sched.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3722352..9681de7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -833,6 +833,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *fence); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); +void amdgpu_ctx_set_priority(struct amdgpu_device *adev, +struct amdgpu_ctx *ctx, +enum amd_sched_priority priority); int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 43fe5ae..996434f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -24,6 +24,7 @@ #include #include "amdgpu.h" +#include "amdgpu_sched.h" static int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority priority, @@ -198,23 +199,6 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } -static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) -{ - switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH_HW: - return AMD_SCHED_PRIORITY_HIGH_HW; - case AMDGPU_CTX_PRIORITY_HIGH_SW: - return AMD_SCHED_PRIORITY_HIGH_SW; - case AMDGPU_CTX_PRIORITY_NORMAL: - return AMD_SCHED_PRIORITY_NORMAL; - case AMDGPU_CTX_PRIORITY_LOW: - return AMD_SCHED_PRIORITY_LOW; - default: - WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_INVALID; - } -} - int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -337,6 +321,27 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, return fence; } +void amdgpu_ctx_set_priority(struct amdgpu_device *adev, +struct amdgpu_ctx *ctx, +enum amd_sched_priority priority) +{ + int i; + struct amd_sched_rq *rq; + struct amd_sched_entity *entity; + struct amdgpu_ring *ring; + + spin_lock(>ring_lock); + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + entity = >rings[i].entity; + rq = >sched.sched_rq[priority]; + +
[RFC] Exclusive gpu access for SteamVR usecases
When multiple environments are running simultaneously on a system, e.g. an X desktop + a SteamVR game session, it may be useful to sacrifice performance in one environment in order to boost it on the other. This series provides a mechanism for a DRM_MASTER to provide exclusive gpu access to a group of processes. Note: This series is built on the assumption that the drm lease patch series will extend DRM_MASTER status to lesees. The libdrm we intend to provide is as follows: /** * Set the priority of all contexts in a process * * This function will change the priority of all contexts owned by * the process identified by fd. * * \param dev - \c [in] device handle * \param fd - \c [in] fd from target process * \param priority- \c [in] target priority AMDGPU_CTX_PRIORITY_* * * \return 0 on success\n * <0 - Negative POSIX error code * * \notes @fd can be *any* file descriptor from the target process. * \notes this function requires DRM_MASTER */ int amdgpu_sched_process_priority_set(amdgpu_device_handle dev, int fd, int32_t priority); /** * Request to raise the minimum required priority to schedule a gpu job * * Submit a request to increase the minimum required priority to schedule * a gpu job. Once this function returns, the gpu scheduler will no longer * consider jobs from contexts with priority lower than @priority. * * The minimum priority considered by the scheduler will be the highest from * all currently active requests. * * Requests are refcounted, and must be balanced using * amdgpu_sched_min_priority_put() * * \param dev - \c [in] device handle * \param priority- \c [in] target priority AMDGPU_CTX_PRIORITY_* * * \return 0 on success\n * <0 - Negative POSIX error code * * \notes this function requires DRM_MASTER */ int amdgpu_sched_min_priority_get(amdgpu_device_handle dev, int32_t priority); /** * Drop a request to raise the minimum required scheduler priority * * This call balances amdgpu_sched_min_priority_get() * * If no other active requests exists for @priority, the minimum required * priority will decay to a lower level until one is reached with an active * request or the lowest priority is reached. * * \param dev - \c [in] device handle * \param priority- \c [in] target priority AMDGPU_CTX_PRIORITY_* * * \return 0 on success\n * <0 - Negative POSIX error code * * \notes this function requires DRM_MASTER */ int amdgpu_sched_min_priority_put(amdgpu_device_handle dev, int32_t priority); Using this app, VRComposer can raise the priority of the VRapp and itself. Then it can restrict the minimum scheduler priority in order to become exclusive gpu clients. One of the areas I'd like feedback is the following scenario. If a VRapp opens a new fd and creates a new context after a call to set_priority, this specific context will be lower priority than the rest. If the minimum required priority is then raised, it is possible that this new context will be starved and deadlock the VRapp. One solution I had in mind to address this situation, is to make set_priority also raise the priority of future contexts created by the VRapp. However, that would require keeping track of the requested priority on a per-process data structure. The current design appears to steer clean of keeping any process specific data, and everything instead of stored on a per-file basis. Which is why I did not pursue this approach. But if this is something you'd like me to implement let me know. One could also argue that preventing an application deadlock should be handled between the VRComposer and the VRApp. It is not the kernel's responsibility to babysit userspace applications and prevent themselves from shooting themselves in the foot. The same could be achieved by improper usage of shared fences between processes. Thoughts/feedback/comments on this issue, or others, are appreciated. Regards, Andres ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/3] drm/amdgpu: add a new scheduler priority AMD_SCHED_PRIORITY_HIGH_SW
Add a new priority level to the gpu scheduler *_HIGH_SW. This level intends to provide elevated entity priority at the sw scheduler level without the negative side effects of an elevated HW priority. Some of the negative effects of HW priorities can include stealing resources from other queues. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++- include/uapi/drm/amdgpu_drm.h | 3 ++- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index cc00110..48d0d1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -35,7 +35,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) return -EINVAL; - if (priority >= AMD_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_NICE)) + if (priority > AMD_SCHED_PRIORITY_NORMAL && !capable(CAP_SYS_NICE)) return -EACCES; memset(ctx, 0, sizeof(*ctx)); @@ -201,8 +201,10 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) { switch (amdgpu_priority) { - case AMDGPU_CTX_PRIORITY_HIGH: - return AMD_SCHED_PRIORITY_HIGH; + case AMDGPU_CTX_PRIORITY_HIGH_HW: + return AMD_SCHED_PRIORITY_HIGH_HW; + case AMDGPU_CTX_PRIORITY_HIGH_SW: + return AMD_SCHED_PRIORITY_HIGH_SW; case AMDGPU_CTX_PRIORITY_NORMAL: return AMD_SCHED_PRIORITY_NORMAL; case AMDGPU_CTX_PRIORITY_LOW: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 6147c94..396d3e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6749,19 +6749,12 @@ static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, mutex_lock(>srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - switch (priority) { - case AMD_SCHED_PRIORITY_NORMAL: - WREG32(mmCP_HQD_PIPE_PRIORITY, 0x0); - WREG32(mmCP_HQD_QUEUE_PRIORITY, 0x0); - break; - case AMD_SCHED_PRIORITY_HIGH: + if (priority >= AMD_SCHED_PRIORITY_HIGH_HW) { WREG32(mmCP_HQD_PIPE_PRIORITY, 0x2); WREG32(mmCP_HQD_QUEUE_PRIORITY, 0xf); - break; - default: - WARN(1, "Attempt to set invalid SPI priority:%d for ring:%d\n", - priority, ring->idx); - break; + } else { + WREG32(mmCP_HQD_PIPE_PRIORITY, 0x0); + WREG32(mmCP_HQD_QUEUE_PRIORITY, 0x0); } vi_srbm_select(adev, 0, 0, 0, 0); @@ -6776,7 +6769,8 @@ static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, return; gfx_v8_0_hqd_set_priority(adev, ring, priority); - gfx_v8_0_pipe_reserve_resources(adev, ring, priority >= AMD_SCHED_PRIORITY_HIGH); + gfx_v8_0_pipe_reserve_resources(adev, ring, + priority >= AMD_SCHED_PRIORITY_HIGH_HW); } static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 46c18424..dbcaa2e 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -117,7 +117,8 @@ enum amd_sched_priority { AMD_SCHED_PRIORITY_MIN, AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, AMD_SCHED_PRIORITY_NORMAL, - AMD_SCHED_PRIORITY_HIGH, + AMD_SCHED_PRIORITY_HIGH_SW, + AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, AMD_SCHED_PRIORITY_MAX }; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 88b2a52..27d0a822 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -166,7 +166,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_PRIORITY_LOW -1023 #define AMDGPU_CTX_PRIORITY_NORMAL 0 /* Selecting a priority above NORMAL requires CAP_SYS_ADMIN */ -#define AMDGPU_CTX_PRIORITY_HIGH1023 +#define AMDGPU_CTX_PRIORITY_HIGH_SW 512 +#define AMDGPU_CTX_PRIORITY_HIGH_HW 1023 struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/3] drm/amdgpu: make amdgpu_to_sched_priority detect invalid parameters
Returning invalid priorities as _NORMAL is a backwards compatibility quirk of amdgpu_ctx_ioctl(). Move this detail one layer up where it belongs. Signed-off-by: Andres Rodriguez <andre...@gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 48d0d1e..43fe5ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -211,7 +211,7 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) return AMD_SCHED_PRIORITY_LOW; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_NORMAL; + return AMD_SCHED_PRIORITY_INVALID; } } @@ -230,8 +230,10 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, id = args->in.ctx_id; priority = amdgpu_to_sched_priority(args->in.priority); - if (priority >= AMD_SCHED_PRIORITY_MAX) - return -EINVAL; + /* For backwards compatibility reasons, we need to accept +* ioctls with garbage in the priority field */ + if (priority == AMD_SCHED_PRIORITY_INVALID) + priority = AMD_SCHED_PRIORITY_NORMAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index dbcaa2e..da040bc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -120,7 +120,8 @@ enum amd_sched_priority { AMD_SCHED_PRIORITY_HIGH_SW, AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, - AMD_SCHED_PRIORITY_MAX + AMD_SCHED_PRIORITY_MAX, + AMD_SCHED_PRIORITY_INVALID = -1 }; /** -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx