Am 29.01.19 um 14:32 schrieb Marek Olšák:
On Tue, Jan 29, 2019, 3:01 AM Christian König <ckoenig.leichtzumer...@gmail.com<mailto:ckoenig.leichtzumer...@gmail.com> wrote: Am 28.01.19 um 22:52 schrieb Marek Olšák: > From: Marek Olšák <marek.ol...@amd.com<mailto:marek.ol...@amd.com>> > > Normal syncobjs signal when an IB finishes. Start syncobjs signal when > an IB starts. That approach has quite a number of problems (for example you can't allocate memory at this point). Better add a flag that we should only sync on scheduling for a dependency/syncobj instead. I don't understand. Can you give me an example of the interface and how the implementation would look? For example we add a new chunk type AMDGPU_CHUNK_ID_SCHEDULED which is handled the same way as AMDGPU_CHUNK_ID_DEPENDENCIES. Then in amdgpu_cs_process_fence_dep() we check if its a AMDGPU_CHUNK_ID_SCHEDULED and if yes extract the scheduled fence from the fence we got from amdgpu_ctx_get_fence(). Christian. Thanks, Marek Christian. > > Signed-off-by: Marek Olšák <marek.ol...@amd.com<mailto:marek.ol...@amd.com>> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 18 ++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- > include/uapi/drm/amdgpu_drm.h | 13 ++++++++++++- > 4 files changed, 33 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index d67f8b1dfe80..8e2f7e558bc9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -453,20 +453,21 @@ struct amdgpu_cs_parser { > struct dma_fence *fence; > uint64_t bytes_moved_threshold; > uint64_t bytes_moved_vis_threshold; > uint64_t bytes_moved; > uint64_t bytes_moved_vis; > struct amdgpu_bo_list_entry *evictable; > > /* user fence */ > struct amdgpu_bo_list_entry uf_entry; > > + bool get_start_syncobj; > unsigned num_post_dep_syncobjs; > struct drm_syncobj **post_dep_syncobjs; > }; > > static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, > uint32_t ib_idx, int idx) > { > return p->job->ibs[ib_idx].ptr[idx]; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 1c49b8266d69..917f3818c61c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device > *adev, > r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, > chunk_ib->ip_instance, chunk_ib->ring, > &entity); > if (r) > return r; > > if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) > parser->job->preamble_status |= > AMDGPU_PREAMBLE_IB_PRESENT; > > + if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ) > + parser->get_start_syncobj = true; > + > if (parser->entity && parser->entity != entity) > return -EINVAL; > > parser->entity = entity; > > ring = to_amdgpu_ring(entity->rq->sched); > r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? > chunk_ib->ib_bytes : 0, ib); > if (r) { > DRM_ERROR("Failed to get ib !\n"); > @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser > *p, > amdgpu_mn_lock(p->mn); > amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo<http://tv.bo>); > > if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { > r = -ERESTARTSYS; > goto error_abort; > } > } > > + if (p->get_start_syncobj) { > + struct drm_syncobj *syncobj; > + > + r = drm_syncobj_create(&syncobj, 0, > + &job->base.s_fence->scheduled); > + if (r) > + goto error_abort; > + > + r = drm_syncobj_get_handle(p->filp, syncobj, > + &cs->out.start_syncobj); > + if (r) > + goto error_abort; > + drm_syncobj_put(syncobj); > + } > + > job->owner = p->filp; > p->fence = dma_fence_get(&job->base.s_fence->finished); > > amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); > amdgpu_cs_post_dependencies(p); > > if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && > !p->ctx->preamble_presented) { > job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; > p->ctx->preamble_presented = true; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index c806f984bcc5..a230a30722d4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -64,23 +64,24 @@ > * - 3.18.0 - Export gpu always on cu bitmap > * - 3.19.0 - Add support for UVD MJPEG decode > * - 3.20.0 - Add support for local BOs > * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl > * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl > * - 3.23.0 - Add query for VRAM lost counter > * - 3.24.0 - Add high priority compute support for gfx9 > * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). > * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. > * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. > + * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ > */ > #define KMS_DRIVER_MAJOR 3 > -#define KMS_DRIVER_MINOR 27 > +#define KMS_DRIVER_MINOR 28 > #define KMS_DRIVER_PATCHLEVEL 0 > > int amdgpu_vram_limit = 0; > int amdgpu_vis_vram_limit = 0; > int amdgpu_gart_size = -1; /* auto */ > int amdgpu_gtt_size = -1; /* auto */ > int amdgpu_moverate = -1; /* auto */ > int amdgpu_benchmarking = 0; > int amdgpu_testing = 0; > int amdgpu_audio = -1; > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > index 662d379ea624..d0e0c99cea32 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in { > __u32 ctx_id; > /** Handle of resource list associated with CS */ > __u32 bo_list_handle; > __u32 num_chunks; > __u32 _pad; > /** this points to __u64 * which point to cs chunks */ > __u64 chunks; > }; > > struct drm_amdgpu_cs_out { > - __u64 handle; > + __u64 handle; /* sequence number */ > + __u32 start_syncobj; /* signalled when IB execution begins */ > + __u32 _pad; > }; > > union drm_amdgpu_cs { > struct drm_amdgpu_cs_in in; > struct drm_amdgpu_cs_out out; > }; > > /* Specify flags to be used for IB */ > > /* This IB should be submitted to CE */ > @@ -566,20 +568,29 @@ union drm_amdgpu_cs { > > /* The IB fence should do the L2 writeback but not invalidate any shader > * caches (L2/vL1/sL1/I$). */ > #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) > > /* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. > * This will reset wave ID counters for the IB. > */ > #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) > > +/* The CS ioctl will return a syncobj representing when all IBs begin > + * execution. If set, this applies to all IBs. The returned syncobj can be > + * used as an IB dependency for other IBs. > + * > + * This is used for GPU deadlock prevention when userspace uses mid-IB fences > + * to wait for mid-IB work on other rings. > + */ > +#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5) > + > struct drm_amdgpu_cs_chunk_ib { > __u32 _pad; > /** AMDGPU_IB_FLAG_* */ > __u32 flags; > /** Virtual address to begin IB execution */ > __u64 va_start; > /** Size of submission */ > __u32 ib_bytes; > /** HW IP to submit to */ > __u32 ip_type;
_______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx