Re: [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v3)

Bas Nieuwenhuizen Thu, 01 Sep 2016 01:19:31 -0700

On Thu, Sep 1, 2016 at 9:37 AM, Liu, Monk <[email protected]> wrote:
>
>
> -----Original Message-----
> From: amd-gfx [mailto:[email protected]] On Behalf Of 
> Christian K?nig
> Sent: Wednesday, August 31, 2016 7:53 PM
> To: Liu, Monk <[email protected]>; [email protected]
> Subject: Re: [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v3)
>
> Looks good to me in general, a few nit picks and sugegstions below.
>
> Am 31.08.2016 um 05:49 schrieb Monk Liu:
>> v1:
>> for gfx8, use CONTEXT_CONTROL package to dynamically skip preamble
>> CEIB and other load_xxx command in sequence.
>>
>> v2:
>> support GFX7 as well, and bump up version.
>> remove cntxcntl in compute ring funcs because CPC doesn't support this
>> packet.
>>
>> v3: fix reduntant judgement in cntxcntl.
>>
>> Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851
>> Signed-off-by: Monk Liu <[email protected]>
>>
>> Change-Id: I5d24c1bb5c14190ce4adeb6a331ee3d92b3d5c83
>> Signed-off-by: Monk Liu <[email protected]>
>
> Only one signed of by line is enough and remove the change-ids.
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  9 +++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 12 ++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 16 +++++++++-------
>>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 20 ++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 30 ++++++++++++++++++++++++++++++
>>   6 files changed, 82 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 1254410..0de5f08 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -321,6 +321,7 @@ struct amdgpu_ring_funcs {
>>       void (*begin_use)(struct amdgpu_ring *ring);
>>       void (*end_use)(struct amdgpu_ring *ring);
>>       void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>> +     void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
>>   };
>>
>>   /*
>> @@ -965,6 +966,7 @@ struct amdgpu_ctx {
>>       spinlock_t              ring_lock;
>>       struct fence            **fences;
>>       struct amdgpu_ctx_ring  rings[AMDGPU_MAX_RINGS];
>> +     bool preamble_presented;
>>   };
>>
>>   struct amdgpu_ctx_mgr {
>> @@ -1227,8 +1229,13 @@ struct amdgpu_cs_parser {
>>
>>       /* user fence */
>>       struct amdgpu_bo_list_entry     uf_entry;
>> +     bool preamble_present; /* True means this command submit involves a
>> +preamble IB */
>
> We only need this in amdgpu_cs_ib_fill() don't we? See below as well.
>
> [ML] seems good advice
>
>>   };
>>
>> +#define PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command 
>> submit involves a preamble IB */
>> +#define PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB 
>> is first presented in belonging context */
>
> Why does that makes a difference if it is seen for the first time?
>
> [ml] if it is presented for the first time for belonging ctx, means even 
> current CS do not involve context switch, we still need keep the actions in 
> preamble IB.
> Usually if current CS is from the same cntx of previous CS, that means no ctx 
> switch occurs, so we can skip the actions in preamble IB. but above case is 
> the exception.


Can't userspace just not set the preamble flag for the first submit
with a preamble? I think that would result in the same behavior,
unless having two non-preamble CE IB's in a single submit is an issue.

- Bas

>
>> +#define HAVE_CTX_SWITCH              (1 << 2) /* bit set means context 
>> switch occured */
>> +
>>   struct amdgpu_job {
>>       struct amd_sched_job    base;
>>       struct amdgpu_device    *adev;
>> @@ -1237,6 +1244,7 @@ struct amdgpu_job {
>>       struct amdgpu_sync      sync;
>>       struct amdgpu_ib        *ibs;
>>       struct fence            *fence; /* the hw fence */
>> +     uint32_t                preamble_status;
>>       uint32_t                num_ibs;
>>       void                    *owner;
>>       uint64_t                fence_ctx; /* the fence_context this job uses 
>> */
>> @@ -2264,6 +2272,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>>   #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
>>   #define amdgpu_ring_emit_hdp_invalidate(r) 
>> (r)->funcs->emit_hdp_invalidate((r))
>>   #define amdgpu_ring_emit_switch_buffer(r)
>> (r)->funcs->emit_switch_buffer((r))
>> +#define amdgpu_ring_emit_cntxcntl(r, d)
>> +(r)->funcs->emit_cntxcntl((r), (d))
>>   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>>   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>>   #define amdgpu_ring_patch_cond_exec(r,o)
>> (r)->funcs->patch_cond_exec((r),(o))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 2d4e005..6d8c050 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -792,6 +792,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>>               if (r)
>>                       return r;
>>
>> +             if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
>> +                     parser->preamble_present = true;
>> +
>>               if (parser->job->ring && parser->job->ring != ring)
>>                       return -EINVAL;
>>
>> @@ -930,6 +933,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>               return r;
>>       }
>>
>> +     if (p->preamble_present) {
>> +             job->preamble_status |= PREAMBLE_IB_PRESENT;
>> +             if (!p->ctx->preamble_presented)
>> +                     job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
>> +     }
>> +
>
> Better move this to the end of amdgpu_cs_ib_fill() where we allocate the IBs 
> as well.
> [ML] okay, good change.
>
>
>
>>       job->owner = p->filp;
>>       job->fence_ctx = entity->fence_context;
>>       p->fence = fence_get(&job->base.s_fence->finished);
>> @@ -940,6 +949,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>       trace_amdgpu_cs_ioctl(job);
>>       amd_sched_entity_push_job(&job->base);
>>
>> +     if (p->preamble_present)
>> +             p->ctx->preamble_presented = true;
>> +
>>       return 0;
>>   }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 56c85e6..44db0ab 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -55,9 +55,10 @@
>>    * - 3.3.0 - Add VM support for UVD on supported hardware.
>>    * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
>>    * - 3.5.0 - Add support for new UVD_NO_OP register.
>> + * - 3.6.0 - UMD doesn't/shouldn't need to use CONTEXT_CONTROL in IB,
>> + KMD should do it
>>    */
>>   #define KMS_DRIVER_MAJOR    3
>> -#define KMS_DRIVER_MINOR     5
>> +#define KMS_DRIVER_MINOR     6
>>   #define KMS_DRIVER_PATCHLEVEL       0
>>
>>   int amdgpu_vram_limit = 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 04263f0..b12b5ba 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
>> unsigned num_ibs,
>>   {
>>       struct amdgpu_device *adev = ring->adev;
>>       struct amdgpu_ib *ib = &ibs[0];
>> -     bool skip_preamble, need_ctx_switch;
>> +     bool need_ctx_switch;
>>       unsigned patch_offset = ~0;
>>       struct amdgpu_vm *vm;
>>       uint64_t fence_ctx;
>> +     uint32_t status = 0;
>>
>>       unsigned i;
>>       int r = 0;
>> @@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
>> unsigned num_ibs,
>>       /* always set cond_exec_polling to CONTINUE */
>>       *ring->cond_exe_cpu_addr = 1;
>>
>> -     skip_preamble = ring->current_ctx == fence_ctx;
>>       need_ctx_switch = ring->current_ctx != fence_ctx;
>> +     if (job && ring->funcs->emit_cntxcntl) {
>> +             if (need_ctx_switch)
>> +                     status |= HAVE_CTX_SWITCH;
>> +             status |= job->preamble_status;
>> +             amdgpu_ring_emit_cntxcntl(ring, status);
>> +     }
>> +
>>       for (i = 0; i < num_ibs; ++i) {
>>               ib = &ibs[i];
>> -
>> -             /* drop preamble IBs if we don't have a context switch */
>> -             if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
>> -                     continue;
>> -
>
> Would be nice to keep this functionality for cases where we don't support 
> emit_cntxcntl (e.g. SI?).
> [ML] SI support CONTEXT_CONTROL as well, and the package structure is exactly 
> the same as CI.
>
>>               amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
>>                                   need_ctx_switch);
>>               need_ctx_switch = false;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> index f055d49..0d5addb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> @@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct 
>> amdgpu_ring *ring,
>>       amdgpu_ring_write(ring, control);
>>   }
>>
>> +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring,
>> +uint32_t flags) {
>> +     uint32_t dw2 = 0;
>> +
>> +     dw2 |= 0x80000000; /* set load_enable otherwise this package is just 
>> NOPs */
>> +     if (flags & HAVE_CTX_SWITCH) {
>> +             /* set load_global_config & load_global_uconfig */
>> +             dw2 |= 0x8001;
>> +             /* set load_cs_sh_regs */
>> +             dw2 |= 0x01000000;
>> +             /* set load_per_context_state & load_gfx_sh_regs */
>> +             dw2 |= 0x10002;
>
> Better define some constants for those.
>
> [ML] I'll leave it to other guys when doing cleanups, a little hurry for 
> other jobs now ...
>
> Regards,
> Christian.
>
>> +     }
>> +
>> +     amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
>> +     amdgpu_ring_write(ring, dw2);
>> +     amdgpu_ring_write(ring, 0);
>> +}
>> +
>>   /**
>>    * gfx_v7_0_ring_test_ib - basic ring IB test
>>    *
>> @@ -4929,6 +4948,7 @@ static const struct amdgpu_ring_funcs 
>> gfx_v7_0_ring_funcs_gfx = {
>>       .test_ib = gfx_v7_0_ring_test_ib,
>>       .insert_nop = amdgpu_ring_insert_nop,
>>       .pad_ib = amdgpu_ring_generic_pad_ib,
>> +     .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
>>   };
>>
>>   static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute =
>> { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 8ba8e42..73f6ffa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -6085,6 +6085,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring 
>> *ring)
>>       amdgpu_ring_write(ring, 0);
>>   }
>>
>> +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring,
>> +uint32_t flags) {
>> +     uint32_t dw2 = 0;
>> +
>> +     dw2 |= 0x80000000; /* set load_enable otherwise this package is just 
>> NOPs */
>> +     if (flags & HAVE_CTX_SWITCH) {
>> +             /* set load_global_config & load_global_uconfig */
>> +             dw2 |= 0x8001;
>> +             /* set load_cs_sh_regs */
>> +             dw2 |= 0x01000000;
>> +             /* set load_per_context_state & load_gfx_sh_regs for GFX */
>> +             dw2 |= 0x10002;
>> +
>> +             /* set load_ce_ram if preamble presented */
>> +             if (PREAMBLE_IB_PRESENT & flags)
>> +                     dw2 |= 0x10000000;
>> +     } else {
>> +             /* still load_ce_ram if this is the first time preamble 
>> presented
>> +              * although there is no context switch happens.
>> +              */
>> +             if (PREAMBLE_IB_PRESENT_FIRST & flags)
>> +                     dw2 |= 0x10000000;
>> +     }
>> +
>> +     amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
>> +     amdgpu_ring_write(ring, dw2);
>> +     amdgpu_ring_write(ring, 0);
>> +}
>> +
>>   static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device 
>> *adev,
>>                                                enum amdgpu_interrupt_state 
>> state)
>>   {
>> @@ -6267,6 +6296,7 @@ static const struct amdgpu_ring_funcs 
>> gfx_v8_0_ring_funcs_gfx = {
>>       .insert_nop = amdgpu_ring_insert_nop,
>>       .pad_ib = amdgpu_ring_generic_pad_ib,
>>       .emit_switch_buffer = gfx_v8_ring_emit_sb,
>> +     .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
>>   };
>>
>>   static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute =
>> {
>
>
> _______________________________________________
> amd-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v3)

Reply via email to