On 5/19/2026 12:36 AM, Rob Clark wrote:
> This is needed so that SEL reg values are restored on exit from IFPC.
> 
> Signed-off-by: Rob Clark <[email protected]>
> Reviewed-by: Anna Maniscalco <[email protected]>
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 82 +++++++++++++++++++++++++--
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 11 +++-
>  drivers/gpu/drm/msm/adreno/a8xx_gpu.c |  1 +
>  3 files changed, 87 insertions(+), 7 deletions(-)
> 

<< snip >>

>  static void
>  a6xx_perfcntr_configure(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
>                       const struct msm_perfcntr_stream *stream)
>  {
> +     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>       enum adreno_pipe pipe = PIPE_NONE;
> +     uint32_t *reglist = NULL;
> +     uint32_t *reglist_sel_start;
> +
> +     if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) {
> +             WARN_ON(!a6xx_gpu->pwrup_reglist_emitted);
> +
> +             struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
> +             int off = (2 * lock->ifpc_list_len) +
> +                       (2 * lock->preemption_list_len) +
> +                       (3 * a6xx_gpu->dynamic_sel_reglist_offset);
> +
> +             reglist = (uint32_t *)&lock->regs[0];
> +             reglist += off;
> +             reglist_sel_start = reglist;
> +
> +             /* Clear any previously configured SEL reg entries: */
> +             lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset;
> +
> +             /*
> +              * Ensure CP sees the dynamic_list_len update before we
> +              * start modifying the SEL entries:
> +              */
> +             wmb();

dma_wmb() is a lighter barrier and provides the same ordering for memory
writes visible to devices.

> +     }
>  
>       for (unsigned i = 0; i < stream->nr_groups; i++) {
>               unsigned group_idx = msm_perfcntr_group_idx(stream, i);
> @@ -2567,17 +2617,15 @@ a6xx_perfcntr_configure(struct msm_gpu *gpu, struct 
> msm_ringbuffer *ring,
>  
>               const struct msm_perfcntr_counter *counter = 
> &group->counters[base];
>               unsigned nr = group_state->allocated_counters;
> -             OUT_PKT4(ring, counter->select_reg, nr);
> -             for (unsigned c = 0; c < nr; c++)
> -                     OUT_RING(ring, group_state->countables[c]);
> +             perfcntr_select(ring, pipe, counter->select_reg,
> +                             group_state->countables, nr, &reglist);
>  
>               for (unsigned s = 0; s < 
> ARRAY_SIZE(counter->slice_select_regs); s++) {
>                       if (!counter->slice_select_regs[s])
>                               break;
>  
> -                     OUT_PKT4(ring, counter->slice_select_regs[s], nr);
> -                     for (unsigned c = 0; c < nr; c++)
> -                             OUT_RING(ring, group_state->countables[c]);
> +                     perfcntr_select(ring, pipe, 
> counter->slice_select_regs[s],
> +                                     group_state->countables, nr, &reglist);
>               }
>       }
>  
> @@ -2591,6 +2639,28 @@ a6xx_perfcntr_configure(struct msm_gpu *gpu, struct 
> msm_ringbuffer *ring,
>       OUT_RING(ring, upper_32_bits(rbmemptr(ring, perfcntr_fence)));
>       OUT_RING(ring, stream->sel_fence);
>  
> +     /*
> +      * Update the pwrup reglist size before flushing.  Kgsl does a shared-
> +      * memory spinlock dance with SQE to avoid racing with IFPC exit.  But
> +      * we can skip that since the ringbuffer programming will be executed
> +      * by SQE after dynamic reglist size is updated.  So even if we lose
> +      * the race, the register programming in the rb will overwrite/correct
> +      * the SEL regs restored by SQE on IFPC exit, before sampling begins.
> +      */
> +     if (reglist) {
> +             struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
> +             unsigned nr_regs = (reglist - reglist_sel_start) / 3;
> +
> +             /*
> +              * Ensure CP sees updates to the pwrup_reglist before it
> +              * sees the new (increased) length:
> +              */
> +             wmb();

Same as above.

-Akhil


Reply via email to