On 7/22/2025 7:09 PM, Dmitry Baryshkov wrote:
> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>> There are some special registers which are accessible even when GX power
>> domain is collapsed during an IFPC sleep. Accessing these registers
>> wakes up GPU from power collapse and allow programming these registers
>> without additional handshake with GMU. This patch adds support for this
>> special register write sequence.
>>
>> Signed-off-by: Akhil P Oommen <akhi...@oss.qualcomm.com>
>> ---
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 
>> ++++++++++++++++++++++++++++++-
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
>> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 
>> 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4
>>  100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -16,6 +16,67 @@
>>  
>>  #define GPU_PAS_ID 13
>>  
>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, 
>> u32 status, u32 mask)
>> +{
>> +    /* Success if !writedropped0/1 */>> +   if (!(status & mask))
>> +            return true;
>> +
>> +    udelay(10);
> 
> Why do we need udelay() here? Why can't we use interval setting inside
> gmu_poll_timeout()?

Then the delay won't be at the right place when we use gmu_poll_timeout.
We need the below sequence:
1. reg write
2. check for status
        2.1 Done if success
3. wait
4. reg write again
5. goto 2

Another option is to avoid gmu_poll_timeout and just open code the loop
here.

> 
>> +
>> +    /* Try to update fenced register again */
>> +    gpu_write(gpu, offset, value);
>> +    return false;
>> +}
>> +
>> +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, 
>> u32 mask)
>> +{
>> +    struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
>> +    struct msm_gpu *gpu = &adreno_gpu->base;
>> +    struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
>> +    u32 status;
>> +
>> +    gpu_write(gpu, offset, value);
>> +
>> +    /* Nothing else to be done in the case of no-GMU */
>> +    if (adreno_has_gmu_wrapper(adreno_gpu))
>> +            return 0;
>> +

I think we should add an mb() here like downstream just to be cautious.

>> +    if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> +                    fence_status_check(gpu, offset, value, status, mask), 
>> 0, 1000))
>> +            return 0;
>> +
>> +    dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>> +                    offset);
>> +
>> +    /* Try again for another 1ms before failing */
>> +    gpu_write(gpu, offset, value);
>> +    if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> +                    fence_status_check(gpu, offset, value, status, mask), 
>> 0, 1000))
>> +            return 0;
>> +
>> +    dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>> +                    offset);
>> +
>> +    return -ETIMEDOUT;
>> +}
>> +
>> +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 
>> mask, bool is_64b)
>> +{
>> +    int ret;
>> +
>> +    ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
>> +    if (ret)
>> +            return ret;
>> +
>> +    if (!is_64b)
>> +            return 0;
>> +
>> +    ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
> 
> no need for a separate ret assignment.

Ack

> 
>> +
>> +    return ret;
>> +}
>> +
>>  static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
>>  {
>>      struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> @@ -86,7 +147,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct 
>> msm_ringbuffer *ring)
>>      /* Update HW if this is the current ring and we are not in preempt*/
>>      if (!a6xx_in_preempt(a6xx_gpu)) {
>>              if (a6xx_gpu->cur_ring == ring)
>> -                    gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
>> +                    a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, 
>> BIT(0), false);
> 
> I can't stop but notice that we don't handle a6xx_fenced_write() errors.
> Is it fine? Or will it result in some sort of crash / reset?

Recover_worker will kick in indirectly, for eg: due to hangcheck timeout
here. Chances of failure here production devices are low though.

-Akhil

> 
>>              else
>>                      ring->restore_wptr = true;
>>      } else {
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
>> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> index 
>> 9201a53dd341bf432923ffb44947e015208a3d02..2be036a3faca58b4b559c30881e4b31d5929592a
>>  100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> @@ -291,5 +291,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
>>  
>>  void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, 
>> bool gx_off);
>>  void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
>> +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 
>> mask, bool is_64b);
>>  
>>  #endif /* __A6XX_GPU_H__ */
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c 
>> b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> index 
>> 3b17fd2dba89115a8e48ba9469e52e4305b0cdbb..5b0fd510ff58d989ab285f1a2497f6f522a6b187
>>  100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu,
>>  }
>>  
>>  /* Write the most recent wptr for the given ring into the hardware */
>> -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer 
>> *ring)
>> +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct 
>> msm_ringbuffer *ring)
>>  {
>>      unsigned long flags;
>>      uint32_t wptr;
>> @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct 
>> msm_ringbuffer *ring)
>>      if (ring->restore_wptr) {
>>              wptr = get_wptr(ring);
>>  
>> -            gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
>> +            a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), 
>> false);
>>  
>>              ring->restore_wptr = false;
>>      }
>> @@ -172,7 +172,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu)
>>  
>>      set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
>>  
>> -    update_wptr(gpu, a6xx_gpu->cur_ring);
>> +    update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>>  
>>      set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>>  
>> @@ -268,7 +268,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>>       */
>>      if (!ring || (a6xx_gpu->cur_ring == ring)) {
>>              set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
>> -            update_wptr(gpu, a6xx_gpu->cur_ring);
>> +            update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>>              set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>>              spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
>>              return;
>> @@ -302,13 +302,13 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>>  
>>      spin_unlock_irqrestore(&ring->preempt_lock, flags);
>>  
>> -    gpu_write64(gpu,
>> -            REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO,
>> -            a6xx_gpu->preempt_smmu_iova[ring->id]);
>> +    a6xx_fenced_write(a6xx_gpu,
>> +            REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, 
>> a6xx_gpu->preempt_smmu_iova[ring->id],
>> +            BIT(1), true);
>>  
>> -    gpu_write64(gpu,
>> +    a6xx_fenced_write(a6xx_gpu,
>>              REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
>> -            a6xx_gpu->preempt_iova[ring->id]);
>> +            a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
>>  
>>      a6xx_gpu->next_ring = ring;
>>  
>> @@ -328,7 +328,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>>      set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
>>  
>>      /* Trigger the preemption */
>> -    gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl);
>> +    a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, 
>> BIT(1), false);
>>  }
>>  
>>  static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
>>
>> -- 
>> 2.50.1
>>
> 

Reply via email to