On 5/19/2026 12:36 AM, Rob Clark wrote:
> This is needed so that SEL reg values are restored on exit from IFPC.
>
> Signed-off-by: Rob Clark <[email protected]>
> Reviewed-by: Anna Maniscalco <[email protected]>
> ---
> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 82 +++++++++++++++++++++++++--
> drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 11 +++-
> drivers/gpu/drm/msm/adreno/a8xx_gpu.c | 1 +
> 3 files changed, 87 insertions(+), 7 deletions(-)
>
<< snip >>
> static void
> a6xx_perfcntr_configure(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
> const struct msm_perfcntr_stream *stream)
> {
> + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> enum adreno_pipe pipe = PIPE_NONE;
> + uint32_t *reglist = NULL;
> + uint32_t *reglist_sel_start;
> +
> + if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) {
> + WARN_ON(!a6xx_gpu->pwrup_reglist_emitted);
> +
> + struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
> + int off = (2 * lock->ifpc_list_len) +
> + (2 * lock->preemption_list_len) +
> + (3 * a6xx_gpu->dynamic_sel_reglist_offset);
> +
> + reglist = (uint32_t *)&lock->regs[0];
> + reglist += off;
> + reglist_sel_start = reglist;
> +
> + /* Clear any previously configured SEL reg entries: */
> + lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset;
> +
> + /*
> + * Ensure CP sees the dynamic_list_len update before we
> + * start modifying the SEL entries:
> + */
> + wmb();
dma_wmb() is a lighter barrier and provides the same ordering for memory
writes visible to devices.
> + }
>
> for (unsigned i = 0; i < stream->nr_groups; i++) {
> unsigned group_idx = msm_perfcntr_group_idx(stream, i);
> @@ -2567,17 +2617,15 @@ a6xx_perfcntr_configure(struct msm_gpu *gpu, struct
> msm_ringbuffer *ring,
>
> const struct msm_perfcntr_counter *counter =
> &group->counters[base];
> unsigned nr = group_state->allocated_counters;
> - OUT_PKT4(ring, counter->select_reg, nr);
> - for (unsigned c = 0; c < nr; c++)
> - OUT_RING(ring, group_state->countables[c]);
> + perfcntr_select(ring, pipe, counter->select_reg,
> + group_state->countables, nr, ®list);
>
> for (unsigned s = 0; s <
> ARRAY_SIZE(counter->slice_select_regs); s++) {
> if (!counter->slice_select_regs[s])
> break;
>
> - OUT_PKT4(ring, counter->slice_select_regs[s], nr);
> - for (unsigned c = 0; c < nr; c++)
> - OUT_RING(ring, group_state->countables[c]);
> + perfcntr_select(ring, pipe,
> counter->slice_select_regs[s],
> + group_state->countables, nr, ®list);
> }
> }
>
> @@ -2591,6 +2639,28 @@ a6xx_perfcntr_configure(struct msm_gpu *gpu, struct
> msm_ringbuffer *ring,
> OUT_RING(ring, upper_32_bits(rbmemptr(ring, perfcntr_fence)));
> OUT_RING(ring, stream->sel_fence);
>
> + /*
> + * Update the pwrup reglist size before flushing. Kgsl does a shared-
> + * memory spinlock dance with SQE to avoid racing with IFPC exit. But
> + * we can skip that since the ringbuffer programming will be executed
> + * by SQE after dynamic reglist size is updated. So even if we lose
> + * the race, the register programming in the rb will overwrite/correct
> + * the SEL regs restored by SQE on IFPC exit, before sampling begins.
> + */
> + if (reglist) {
> + struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr;
> + unsigned nr_regs = (reglist - reglist_sel_start) / 3;
> +
> + /*
> + * Ensure CP sees updates to the pwrup_reglist before it
> + * sees the new (increased) length:
> + */
> + wmb();
Same as above.
-Akhil