On Mon, Sep 8, 2025 at 4:27 AM Akhil P Oommen <akhi...@oss.qualcomm.com> wrote: > > There are some special registers which are accessible even when GX power > domain is collapsed during an IFPC sleep. Accessing these registers > wakes up GPU from power collapse and allow programming these registers > without additional handshake with GMU. This patch adds support for this > special register write sequence. > > Signed-off-by: Akhil P Oommen <akhi...@oss.qualcomm.com> > --- > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 80 > ++++++++++++++++++++++++++++++- > drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + > drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 ++++---- > 3 files changed, 90 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > index > 45dd5fd1c2bfcb0a01b71a326c7d95b0f9496d99..a63dad80ef461da45d5c41a042ed4f19d8282ef5 > 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > @@ -16,6 +16,84 @@ > > #define GPU_PAS_ID 13 > > +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, > u32 status, u32 mask) > +{ > + /* Success if !writedropped0/1 */ > + if (!(status & mask)) > + return true; > + > + udelay(10); > + > + /* Try to update fenced register again */ > + gpu_write(gpu, offset, value); > + > + /* We can't do a posted write here because the power domain could be > + * in collapse state. So use the heaviest barrier instead > + */ > + mb(); > + return false; > +} > + > +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, > u32 mask) > +{ > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > + struct msm_gpu *gpu = &adreno_gpu->base; > + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; > + u32 status; > + > + gpu_write(gpu, offset, value); > + > + /* Nothing else to be done in the case of no-GMU */ > + if (adreno_has_gmu_wrapper(adreno_gpu)) > + return 0; > + > + /* We can't do a posted write here because the power domain could be > + * in collapse state. So use the heaviest barrier instead > + */ > + mb(); > + > + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, > + fence_status_check(gpu, offset, value, status, mask), > 0, 1000)) > + return 0; > + > + /* Try again for another 1ms before failing */ > + gpu_write(gpu, offset, value); > + mb(); > + > + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, > + fence_status_check(gpu, offset, value, status, mask), > 0, 1000)) { > + /* > + * The 'delay' warning is here because the pause to print this > + * warning will allow gpu to move to power collapse which > + * defeats the purpose of continuous polling for 2 ms > + */ > + dev_err_ratelimited(gmu->dev, "delay in fenced register write > (0x%x)\n", > + offset); > + return 0; > + } > + > + dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", > + offset); > + > + return -ETIMEDOUT; > +} > + > +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 > mask, bool is_64b) > +{ > + int ret; > + > + ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); > + if (ret) > + return ret; > + > + if (!is_64b) > + return 0; > + > + ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); > + > + return ret; > +} > + > static inline bool _a6xx_check_idle(struct msm_gpu *gpu) > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > @@ -86,7 +164,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct > msm_ringbuffer *ring) > /* Update HW if this is the current ring and we are not in preempt*/ > if (!a6xx_in_preempt(a6xx_gpu)) { > if (a6xx_gpu->cur_ring == ring) > - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); > + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, > wptr, BIT(0), false); > else > ring->restore_wptr = true; > } else { > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > index > 6e71f617fc3d0d564e51650dfed63a18f31042ac..e736c59d566b3fcf8c62a212494e3b110c09caa9 > 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > @@ -295,5 +295,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state); > > void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool > gx_off); > void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert); > +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, > bool is_64b);
"mask" makes it sound like it's the mask for a masked write, which it isn't. At least in the public API I'd name it something more explicit like "fence_status_mask". Also it would be nice to add defines like GMU_FENCE_STATUS_WPTR/CONTEXT_SWITCH to make the parameter values in callsites less magical. Finally, this might be personal preference, but it's not immediately obvious what the "true"/"false" in callsites mean, so it would make users clearer to add a separate "a6xx_fenced_write64" and make 64-bit reg writes use that instead of is_64b. Connor > > #endif /* __A6XX_GPU_H__ */ > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c > b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c > index > 6a12a35dabff1e64aae8440c2a8c88f5feb4803e..10625ffbc4cfc26edc36efcf11dbb4efd55ab3e0 > 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c > @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu, > } > > /* Write the most recent wptr for the given ring into the hardware */ > -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer > *ring) > +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct > msm_ringbuffer *ring) > { > unsigned long flags; > uint32_t wptr; > @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct > msm_ringbuffer *ring) > if (ring->restore_wptr) { > wptr = get_wptr(ring); > > - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); > + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, > BIT(0), false); > > ring->restore_wptr = false; > } > @@ -172,7 +172,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu) > > set_preempt_state(a6xx_gpu, PREEMPT_FINISH); > > - update_wptr(gpu, a6xx_gpu->cur_ring); > + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); > > set_preempt_state(a6xx_gpu, PREEMPT_NONE); > > @@ -268,7 +268,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) > */ > if (!ring || (a6xx_gpu->cur_ring == ring)) { > set_preempt_state(a6xx_gpu, PREEMPT_FINISH); > - update_wptr(gpu, a6xx_gpu->cur_ring); > + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); > set_preempt_state(a6xx_gpu, PREEMPT_NONE); > spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); > return; > @@ -302,13 +302,13 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) > > spin_unlock_irqrestore(&ring->preempt_lock, flags); > > - gpu_write64(gpu, > - REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, > - a6xx_gpu->preempt_smmu_iova[ring->id]); > + a6xx_fenced_write(a6xx_gpu, > + REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, > a6xx_gpu->preempt_smmu_iova[ring->id], > + BIT(1), true); > > - gpu_write64(gpu, > + a6xx_fenced_write(a6xx_gpu, > REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, > - a6xx_gpu->preempt_iova[ring->id]); > + a6xx_gpu->preempt_iova[ring->id], BIT(1), true); > > a6xx_gpu->next_ring = ring; > > @@ -328,7 +328,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) > set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); > > /* Trigger the preemption */ > - gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl); > + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, > BIT(1), false); > } > > static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, > > -- > 2.50.1 >