a6xx: Add support for using system cache(LLC)

Jordan Crouse Wed, 23 Sep 2020 08:04:55 -0700

On Tue, Sep 22, 2020 at 11:48:17AM +0530, Sai Prakash Ranjan wrote:
> From: Sharat Masetty <smase...@codeaurora.org>
> 
> The last level system cache can be partitioned to 32 different
> slices of which GPU has two slices preallocated. One slice is
> used for caching GPU buffers and the other slice is used for
> caching the GPU SMMU pagetables. This talks to the core system
> cache driver to acquire the slice handles, configure the SCID's
> to those slices and activates and deactivates the slices upon
> GPU power collapse and restore.
> 
> Some support from the IOMMU driver is also needed to make use
> of the system cache to set the right TCR attributes. GPU then
> has the ability to override a few cacheability parameters which
> it does to override write-allocate to write-no-allocate as the
> GPU hardware does not benefit much from it.
> 
> DOMAIN_ATTR_SYS_CACHE is another domain level attribute used by the
> IOMMU driver to set the right attributes to cache the hardware
> pagetables into the system cache.
> 
> Signed-off-by: Sharat Masetty <smase...@codeaurora.org>
> [saiprakash.ranjan: fix to set attr before device attach to iommu and rebase]
> Signed-off-by: Sai Prakash Ranjan <saiprakash.ran...@codeaurora.org>
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 83 +++++++++++++++++++++++++
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |  4 ++
>  drivers/gpu/drm/msm/adreno/adreno_gpu.c | 17 +++++
>  3 files changed, 104 insertions(+)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 8915882e4444..151190ff62f7 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -8,7 +8,9 @@
>  #include "a6xx_gpu.h"
>  #include "a6xx_gmu.xml.h"
>  
> +#include <linux/bitfield.h>
>  #include <linux/devfreq.h>
> +#include <linux/soc/qcom/llcc-qcom.h>
>  
>  #define GPU_PAS_ID 13
>  
> @@ -1022,6 +1024,79 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
>       return IRQ_HANDLED;
>  }
>  
> +static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 
> or)
> +{
> +     return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
> +}
> +
> +static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
> +{
> +     return msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
> +}
> +
> +static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
> +{
> +     llcc_slice_deactivate(a6xx_gpu->llc_slice);
> +     llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
> +}
> +
> +static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
> +{
> +     u32 cntl1_regval = 0;
> +
> +     if (IS_ERR(a6xx_gpu->llc_mmio))
> +             return;
> +
> +     if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
> +             u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
> +
> +             gpu_scid &= 0x1f;
> +             cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 
> 10) |
> +                            (gpu_scid << 15) | (gpu_scid << 20);
> +     }
> +
> +     if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
> +             u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
> +
> +             gpuhtw_scid &= 0x1f;
> +             cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
> +     }
> +
> +     if (cntl1_regval) {
> +             /*
> +              * Program the slice IDs for the various GPU blocks and GPU MMU
> +              * pagetables
> +              */
> +             a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, 
> cntl1_regval);
> +
> +             /*
> +              * Program cacheability overrides to not allocate cache lines on
> +              * a write miss
> +              */
> +             a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 
> 0xF, 0x03);
> +     }
> +}


This code has been around long enough that it pre-dates a650. On a650 and other
MMU-500 targets the htw_llc is configured by the firmware and the llc_slice is
configured in a different register.

I don't think we need to pause everything and add support for the MMU-500 path,
but we do need a way to disallow LLCC on affected targets until such time that
we can get it fixed up.

Jordan

> +
> +static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
> +{
> +     llcc_slice_putd(a6xx_gpu->llc_slice);
> +     llcc_slice_putd(a6xx_gpu->htw_llc_slice);
> +}
> +
> +static void a6xx_llc_slices_init(struct platform_device *pdev,
> +             struct a6xx_gpu *a6xx_gpu)
> +{
> +     a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
> +     if (IS_ERR(a6xx_gpu->llc_mmio))
> +             return;
> +
> +     a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
> +     a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
> +
> +     if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice))
> +             a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
> +}
> +
>  static int a6xx_pm_resume(struct msm_gpu *gpu)
>  {
>       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -1038,6 +1113,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
>  
>       msm_gpu_resume_devfreq(gpu);
>  
> +     a6xx_llc_activate(a6xx_gpu);
> +
>       return 0;
>  }
>  
> @@ -1048,6 +1125,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
>  
>       trace_msm_gpu_suspend(0);
>  
> +     a6xx_llc_deactivate(a6xx_gpu);
> +
>       devfreq_suspend_device(gpu->devfreq.devfreq);
>  
>       return a6xx_gmu_stop(a6xx_gpu);
> @@ -1091,6 +1170,8 @@ static void a6xx_destroy(struct msm_gpu *gpu)
>               drm_gem_object_put(a6xx_gpu->shadow_bo);
>       }
>  
> +     a6xx_llc_slices_destroy(a6xx_gpu);
> +
>       a6xx_gmu_remove(a6xx_gpu);
>  
>       adreno_gpu_cleanup(adreno_gpu);
> @@ -1209,6 +1290,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
>       if (info && info->revn == 650)
>               adreno_gpu->base.hw_apriv = true;
>  
> +     a6xx_llc_slices_init(pdev, a6xx_gpu);
> +
>       ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
>       if (ret) {
>               a6xx_destroy(&(a6xx_gpu->base.base));
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> index 3eeebf6a754b..9e6079af679c 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> @@ -28,6 +28,10 @@ struct a6xx_gpu {
>       uint32_t *shadow;
>  
>       bool has_whereami;
> +
> +     void __iomem *llc_mmio;
> +     void *llc_slice;
> +     void *htw_llc_slice;
>  };
>  
>  #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
> b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index fd8f491f2e48..86c4fe667225 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -16,6 +16,7 @@
>  #include <linux/soc/qcom/mdt_loader.h>
>  #include <soc/qcom/ocmem.h>
>  #include "adreno_gpu.h"
> +#include "a6xx_gpu.h"
>  #include "msm_gem.h"
>  #include "msm_mmu.h"
>  
> @@ -189,6 +190,8 @@ struct msm_gem_address_space *
>  adreno_iommu_create_address_space(struct msm_gpu *gpu,
>               struct platform_device *pdev)
>  {
> +     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
>       struct iommu_domain *iommu;
>       struct msm_mmu *mmu;
>       struct msm_gem_address_space *aspace;
> @@ -198,7 +201,21 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
>       if (!iommu)
>               return NULL;
>  
> +     /*
> +      * This allows GPU to set the bus attributes required to use system
> +      * cache on behalf of the iommu page table walker.
> +      */
> +     if (!IS_ERR(a6xx_gpu->htw_llc_slice)) {
> +             int gpu_htw_llc = 1;
> +
> +             iommu_domain_set_attr(iommu, DOMAIN_ATTR_SYS_CACHE, 
> &gpu_htw_llc);
> +     }
> +
>       mmu = msm_iommu_new(&pdev->dev, iommu);
> +     if (IS_ERR(mmu)) {
> +             iommu_domain_free(iommu);
> +             return ERR_CAST(mmu);
> +     }
>  
>       /*
>        * Use the aperture start or SZ_16M, whichever is greater. This will
> -- 
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation
> 

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCHv5 4/6] drm/msm/a6xx: Add support for using system cache(LLC)

Reply via email to