On Tue, Sep 30, 2025 at 11:18:17AM +0530, Akhil P Oommen wrote:
> A8x is the next generation of Adreno GPUs, featuring a significant
> hardware design change. A major update to the design is the introduction
> of Slice architecture. Slices are sort of mini-GPUs within the GPU which
> are more independent in processing Graphics and compute workloads. Also,
> in addition to the BV and BR pipe we saw in A7x, CP has more concurrency
> with additional pipes.
> 
> From a software interface perspective, these changes have a significant
> impact on the KMD side. First, the GPU register space has been extensively
> reorganized. Second, to avoid  a register space explosion caused by the
> new slice architecture and additional pipes, many registers are now
> virtualized, instead of duplicated as in A7x. KMD must configure an
> aperture register with the appropriate slice and pipe ID before accessing
> these virtualized registers.
> 
> This patch adds only a skeleton support for the A8x family. An A8x GPU
> support will be added in an upcoming patch.

Consider this lands in a commit message. What would it mean in the Git
history?

> 
> Signed-off-by: Akhil P Oommen <[email protected]>
> ---
>  drivers/gpu/drm/msm/Makefile                      |    1 +
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c             |  103 +-
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h             |   21 +
>  drivers/gpu/drm/msm/adreno/a8xx_gpu.c             | 1238 
> +++++++++++++++++++++
>  drivers/gpu/drm/msm/adreno/adreno_gpu.h           |    7 +
>  drivers/gpu/drm/msm/registers/adreno/a6xx.xml     |    1 -
>  drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml |    1 +
>  7 files changed, 1344 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
> index 
> 7acf2cc13cd047eb7f5b3f14e1a42a1cc145e087..8aa7d07303fb0cd66869767cb6298b38a621b366
>  100644
> --- a/drivers/gpu/drm/msm/Makefile
> +++ b/drivers/gpu/drm/msm/Makefile
> @@ -24,6 +24,7 @@ adreno-y := \
>       adreno/a6xx_gmu.o \
>       adreno/a6xx_hfi.o \
>       adreno/a6xx_preempt.o \
> +     adreno/a8xx_gpu.o \
>  
>  adreno-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o \
>  
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 
> bd4f98b5457356c5454d0316e59d7e8253401712..4aeeaceb1fb30a9d68ac636c14249e3853ef73ac
>  100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -239,14 +239,21 @@ static void a6xx_set_pagetable(struct a6xx_gpu 
> *a6xx_gpu,
>       }
>  
>       if (!sysprof) {
> -             if (!adreno_is_a7xx(adreno_gpu)) {
> +             if (!(adreno_is_a7xx(adreno_gpu) || 
> adreno_is_a8xx(adreno_gpu))) {

Here and in several other similar places:

                if (!adreno_is_a7xx(adreno_gpu) &&
                    !adreno_is_a8xx(adreno_gpu))) {

>                       /* Turn off protected mode to write to special 
> registers */
>                       OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
>                       OUT_RING(ring, 0);
>               }
>  
> -             OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
> -             OUT_RING(ring, 1);
> +             if (adreno_is_a8xx(adreno_gpu)) {
> +                     OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
> +                     OUT_RING(ring, 1);
> +                     OUT_PKT4(ring, 
> REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
> +                     OUT_RING(ring, 1);
> +             } else {
> +                     OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
> +                     OUT_RING(ring, 1);
> +             }
>       }
>  
>       /* Execute the table update */
> @@ -275,7 +282,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
>        * to make sure BV doesn't race ahead while BR is still switching
>        * pagetables.
>        */
> -     if (adreno_is_a7xx(&a6xx_gpu->base)) {
> +     if (adreno_is_a7xx(&a6xx_gpu->base) && adreno_is_a8xx(&a6xx_gpu->base)) 
> {
>               OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
>               OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | 
> CP_SET_THREAD_BR);
>       }
> @@ -289,20 +296,22 @@ static void a6xx_set_pagetable(struct a6xx_gpu 
> *a6xx_gpu,
>       OUT_RING(ring, CACHE_INVALIDATE);
>  
>       if (!sysprof) {
> +             u32 reg_status = adreno_is_a8xx(adreno_gpu) ?
> +                     REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS :
> +                     REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
>               /*
>                * Wait for SRAM clear after the pgtable update, so the
>                * two can happen in parallel:
>                */
>               OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
>               OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
> -             OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(
> -                             REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
> +             OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status));
>               OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0));
>               OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
>               OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
>               OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
>  
> -             if (!adreno_is_a7xx(adreno_gpu)) {
> +             if (!(adreno_is_a7xx(adreno_gpu) || 
> adreno_is_a8xx(adreno_gpu))) {
>                       /* Re-enable protected mode: */
>                       OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
>                       OUT_RING(ring, 1);
> @@ -441,6 +450,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct 
> msm_gem_submit *submit)
>       struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>       struct msm_ringbuffer *ring = submit->ring;
>       unsigned int i, ibs = 0;
> +     u32 rbbm_perfctr_cp0, cp_always_on_counter;
>  
>       adreno_check_and_reenable_stall(adreno_gpu);
>  
> @@ -460,10 +470,16 @@ static void a7xx_submit(struct msm_gpu *gpu, struct 
> msm_gem_submit *submit)
>       if (gpu->nr_rings > 1)
>               a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
>  
> -     get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
> -             rbmemptr_stats(ring, index, cpcycles_start));
> -     get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
> -             rbmemptr_stats(ring, index, alwayson_start));
> +     if (adreno_is_a8xx(adreno_gpu)) {
> +             rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0);
> +             cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER;
> +     } else {
> +             rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0);
> +             cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER;
> +     }
> +
> +     get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, 
> cpcycles_start));
> +     get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, 
> index, alwayson_start));
>  
>       OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
>       OUT_RING(ring, CP_SET_THREAD_BOTH);
> @@ -510,10 +526,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct 
> msm_gem_submit *submit)
>               OUT_RING(ring, 0x00e); /* IB1LIST end */
>       }
>  
> -     get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
> -             rbmemptr_stats(ring, index, cpcycles_end));
> -     get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
> -             rbmemptr_stats(ring, index, alwayson_end));
> +     get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, 
> cpcycles_end));
> +     get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, 
> index, alwayson_end));
>  
>       /* Write the fence to the scratch register */
>       OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
> @@ -706,8 +720,11 @@ static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
>       /* Copy the data into the internal struct to drop the const qualifier 
> (temporarily) */
>       *cfg = *common_cfg;
>  
> -     cfg->ubwc_swizzle = 0x6;
> -     cfg->highest_bank_bit = 15;
> +     /* Use common config as is for A8x */
> +     if (!adreno_is_a8xx(gpu)) {
> +             cfg->ubwc_swizzle = 0x6;
> +             cfg->highest_bank_bit = 15;
> +     }
>  
>       if (adreno_is_a610(gpu)) {
>               cfg->highest_bank_bit = 13;
> @@ -818,7 +835,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
>                 cfg->macrotile_mode);
>  }
>  
> -static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
> +void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
>  {
>       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>       struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> @@ -868,7 +885,7 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
>       lock->dynamic_list_len = 0;
>  }
>  
> -static int a7xx_preempt_start(struct msm_gpu *gpu)
> +int a7xx_preempt_start(struct msm_gpu *gpu)
>  {
>       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>       struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> @@ -925,7 +942,7 @@ static int a6xx_cp_init(struct msm_gpu *gpu)
>       return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
>  }
>  
> -static int a7xx_cp_init(struct msm_gpu *gpu)
> +int a7xx_cp_init(struct msm_gpu *gpu)
>  {
>       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>       struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> @@ -993,7 +1010,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu 
> *a6xx_gpu,
>               return false;
>  
>       /* A7xx is safe! */
> -     if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu))
> +     if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || 
> adreno_is_a8xx(adreno_gpu))
>               return true;
>  
>       /*
> @@ -2161,7 +2178,7 @@ void a6xx_bus_clear_pending_transactions(struct 
> adreno_gpu *adreno_gpu, bool gx_
>  void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
>  {
>       /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! 
> */
> -     if (adreno_is_a610(to_adreno_gpu(gpu)))
> +     if (adreno_is_a610(to_adreno_gpu(gpu)) || 
> adreno_is_a8xx(to_adreno_gpu(gpu)))
>               return;
>  
>       gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
> @@ -2192,7 +2209,12 @@ static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
>  
>       msm_devfreq_resume(gpu);
>  
> -     adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : 
> a6xx_llc_activate(a6xx_gpu);
> +     if (adreno_is_a8xx(adreno_gpu))
> +             a8xx_llc_activate(a6xx_gpu);
> +     else if (adreno_is_a7xx(adreno_gpu))
> +             a7xx_llc_activate(a6xx_gpu);
> +     else
> +             a6xx_llc_activate(a6xx_gpu);
>  
>       return ret;
>  }
> @@ -2561,10 +2583,8 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device 
> *dev)
>       adreno_gpu->base.hw_apriv =
>               !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
>  
> -     /* gpu->info only gets assigned in adreno_gpu_init() */
> -     is_a7xx = config->info->family == ADRENO_7XX_GEN1 ||
> -               config->info->family == ADRENO_7XX_GEN2 ||
> -               config->info->family == ADRENO_7XX_GEN3;
> +     /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included 
> intentionally */
> +     is_a7xx = config->info->family >= ADRENO_7XX_GEN1;

Is A8xx also a part of is_a7xx? What about the A9XX which will come at
some point in future?

>  
>       a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
>  
> +
> +int a8xx_gpu_feature_probe(struct msm_gpu *gpu)
> +{
> +     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +     u32 fuse_val;
> +     int ret;
> +
> +     /*
> +      * Assume that if qcom scm isn't available, that whatever
> +      * replacement allows writing the fuse register ourselves.
> +      * Users of alternative firmware need to make sure this
> +      * register is writeable or indicate that it's not somehow.
> +      * Print a warning because if you mess this up you're about to
> +      * crash horribly.
> +      */
> +     if (!qcom_scm_is_available()) {

How can it be not available here?

> +             dev_warn_once(gpu->dev->dev,
> +                     "SCM is not available, poking fuse register\n");
> +             a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE,
> +                     A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
> +                     A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND |
> +                     A7XX_CX_MISC_SW_FUSE_VALUE_LPAC);
> +             adreno_gpu->has_ray_tracing = true;
> +             return 0;
> +     }
> +
> +     ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ |
> +                                  QCOM_SCM_GPU_TSENSE_EN_REQ);
> +     if (ret)
> +             return ret;
> +
> +     /*
> +      * On a750 raytracing may be disabled by the firmware, find out

It's a8xx-related code, why do you have a750 in the comment?

> +      * whether that's the case. The scm call above sets the fuse
> +      * register.
> +      */
> +     fuse_val = a6xx_llc_read(a6xx_gpu,
> +                              REG_A7XX_CX_MISC_SW_FUSE_VALUE);
> +     adreno_gpu->has_ray_tracing =
> +             !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING);
> +
> +     return 0;
> +}
> +
> +
> +#define GBIF_CLIENT_HALT_MASK                BIT(0)
> +#define GBIF_ARB_HALT_MASK           BIT(1)
> +#define VBIF_XIN_HALT_CTRL0_MASK     GENMASK(3, 0)
> +#define VBIF_RESET_ACK_MASK          0xF0
> +#define GPR0_GBIF_HALT_REQUEST               0x1E0
> +
> +void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool 
> gx_off)
> +{
> +     struct msm_gpu *gpu = &adreno_gpu->base;
> +
> +     if (gx_off) {
> +             /* Halt the gx side of GBIF */
> +             gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1);
> +             spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1);
> +     }
> +
> +     /* Halt new client requests on GBIF */
> +     gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
> +     spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
> +                     (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
> +
> +     /* Halt all AXI requests on GBIF */
> +     gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
> +     spin_until((gpu_read(gpu,  REG_A6XX_GBIF_HALT_ACK) &
> +                     (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
> +
> +     /* The GBIF halt needs to be explicitly cleared */
> +     gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
> +}
> +
> +int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
> +{
> +     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +
> +     mutex_lock(&a6xx_gpu->gmu.lock);
> +
> +     /* Force the GPU power on so we can read this register */
> +     a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
> +
> +     *value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER);
> +
> +     a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
> +
> +     mutex_unlock(&a6xx_gpu->gmu.lock);
> +
> +     return 0;
> +}
> +
> +u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
> +{
> +     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +     u64 busy_cycles;
> +
> +     /* 19.2MHz */
> +     *out_sample_rate = 19200000;
> +
> +     busy_cycles = gmu_read64(&a6xx_gpu->gmu,
> +                     REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
> +                     REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
> +
> +     return busy_cycles;
> +}
> +
> +bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +{
> +     return true;
> +}
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h 
> b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> index 
> 9831401c3bc865b803c2f9759d5e2ffcd79d19f8..6a2157f31122ba0c2f2a7005c98e3e4f1ada6acc
>  100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> @@ -90,6 +90,13 @@ struct adreno_reglist {
>       u32 value;
>  };
>  
> +/* Reglist with pipe information */
> +struct adreno_reglist_pipe {
> +     u32 offset;
> +     u32 value;
> +     u32 pipe;
> +};
> +
>  struct adreno_speedbin {
>       uint16_t fuse;
>       uint16_t speedbin;
> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml 
> b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml
> index 
> ddde2e03b748f447b5e57571e2b04c68f8f2efc2..c3a202c8dce65d414c89bf76f1cb458b206b4eca
>  100644
> --- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml
> +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml
> @@ -4876,7 +4876,6 @@ by a particular renderpass/blit.
>  <domain name="A6XX_CX_MISC" width="32" prefix="variant" varset="chip">
>       <reg32 offset="0x0001" name="SYSTEM_CACHE_CNTL_0"/>
>       <reg32 offset="0x0002" name="SYSTEM_CACHE_CNTL_1"/>
> -     <reg32 offset="0x0087" name="SLICE_ENABLE_FINAL" variants="A8XX-"/>

Why?

>       <reg32 offset="0x0039" name="CX_MISC_TCM_RET_CNTL" variants="A7XX-"/>
>       <reg32 offset="0x0087" name="CX_MISC_SLICE_ENABLE_FINAL" 
> variants="A8XX"/>
>       <reg32 offset="0x0400" name="CX_MISC_SW_FUSE_VALUE" variants="A7XX-">
> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml 
> b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml
> index 
> 5dce7934056dd6472c368309b4894f0ed4a4d960..c4e00b1263cda65dce89c2f16860e5bf6f1c6244
>  100644
> --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml
> +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml
> @@ -60,6 +60,7 @@ 
> xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
>       <reg32 offset="0x1f400" name="GMU_ICACHE_CONFIG"/>
>       <reg32 offset="0x1f401" name="GMU_DCACHE_CONFIG"/>
>       <reg32 offset="0x1f40f" name="GMU_SYS_BUS_CONFIG"/>
> +     <reg32 offset="0x1f50b" name="GMU_MRC_GBIF_QOS_CTRL"/>
>       <reg32 offset="0x1f800" name="GMU_CM3_SYSRESET"/>
>       <reg32 offset="0x1f801" name="GMU_CM3_BOOT_CONFIG"/>
>       <reg32 offset="0x1f81a" name="GMU_CM3_FW_BUSY"/>
> 
> -- 
> 2.51.0
> 

-- 
With best wishes
Dmitry

Reply via email to