Re: [Freedreno] [PATCH 5/5] drm/msm/a6xx: Add support for using system cache(LLC)

2019-12-20 Thread Jordan Crouse
On Fri, Dec 20, 2019 at 03:40:59PM +0530, smase...@codeaurora.org wrote:
> On 2019-12-20 01:28, Jordan Crouse wrote:
> >On Thu, Dec 19, 2019 at 06:44:46PM +0530, Sharat Masetty wrote:
> >>The last level system cache can be partitioned to 32 different slices
> >>of which GPU has two slices preallocated. One slice is used for caching
> >>GPU
> >>buffers and the other slice is used for caching the GPU SMMU pagetables.
> >>This patch talks to the core system cache driver to acquire the slice
> >>handles,
> >>configure the SCID's to those slices and activates and deactivates the
> >>slices
> >>upon GPU power collapse and restore.
> >>
> >>Some support from the IOMMU driver is also needed to make use of the
> >>system cache. IOMMU_QCOM_SYS_CACHE is a buffer protection flag which
> >>enables
> >>caching GPU data buffers in the system cache with memory attributes such
> >>as outer cacheable, read-allocate, write-allocate for buffers. The GPU
> >>then has the ability to override a few cacheability parameters which it
> >>does to override write-allocate to write-no-allocate as the GPU hardware
> >>does not benefit much from it.
> >>
> >>Similarly DOMAIN_ATTR_QCOM_SYS_CACHE is another domain level attribute
> >>used by the IOMMU driver to set the right attributes to cache the
> >>hardware
> >>pagetables into the system cache.
> >>
> >>Signed-off-by: Sharat Masetty 
> >>---
> >> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 122
> >>+-
> >> drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   9 +++
> >> drivers/gpu/drm/msm/msm_iommu.c   |  13 
> >> drivers/gpu/drm/msm/msm_mmu.h |   3 +
> >> 4 files changed, 146 insertions(+), 1 deletion(-)
> >>
> >>diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> >>b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> >>index faff6ff..0c7fdee 100644
> >>--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> >>+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> >>@@ -9,6 +9,7 @@
> >> #include "a6xx_gmu.xml.h"
> >>
> >> #include 
> >>+#include 
> >>
> >> #define GPU_PAS_ID 13
> >>
> >>@@ -781,6 +782,117 @@ static void
> >>a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)
> >>gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
> >> }
> >>
> >>+#define A6XX_LLC_NUM_GPU_SCIDS 5
> >>+#define A6XX_GPU_LLC_SCID_NUM_BITS 5
> >
> >As I mention below, I'm not sure if we need these
> >
> >>+#define A6XX_GPU_LLC_SCID_MASK \
> >>+   ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
> >>+
> >>+#define A6XX_GPUHTW_LLC_SCID_SHIFT 25
> >>+#define A6XX_GPUHTW_LLC_SCID_MASK \
> >>+   (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) <<
> >>A6XX_GPUHTW_LLC_SCID_SHIFT)
> >>+
> >
> >Normally these go into the envytools regmap but if we're going to do these
> >guys
> >lets use the power of  for good.
> >
> >#define A6XX_GPU_LLC_SCID GENMASK(24, 0)
> >#define A6XX_GPUHTW_LLC_SCID GENMASK(29, 25)
> >
> >>+static inline void a6xx_gpu_cx_rmw(struct a6xx_llc *llc,
> >
> >Don't mark C functions as inline - let the compiler figure it out for you.
> >
> >>+   u32 reg, u32 mask, u32 or)
> >>+{
> >>+   msm_rmw(llc->mmio + (reg << 2), mask, or);
> >>+}
> >>+
> >>+static void a6xx_llc_deactivate(struct a6xx_llc *llc)
> >>+{
> >>+   llcc_slice_deactivate(llc->gpu_llc_slice);
> >>+   llcc_slice_deactivate(llc->gpuhtw_llc_slice);
> >>+}
> >>+
> >>+static void a6xx_llc_activate(struct a6xx_llc *llc)
> >>+{
> >>+   if (!llc->mmio)
> >>+   return;
> >>+
> >>+   /* Program the sub-cache ID for all GPU blocks */
> >>+   if (!llcc_slice_activate(llc->gpu_llc_slice))
> >>+   a6xx_gpu_cx_rmw(llc,
> >>+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
> >>+   A6XX_GPU_LLC_SCID_MASK,
> >>+   (llc->cntl1_regval &
> >>+A6XX_GPU_LLC_SCID_MASK));
> >
> >This is out of order with the comments below, but if we store the slice id
> >then
> >you could calculate regval here and not have to store it.
> >
> >>+
> >>+   /* Program the sub-cache ID for the GPU pagetables */
> >>+   if (!llcc_slice_activate(llc->gpuhtw_llc_slice))
> >
> >val |= FIELD_SET(A6XX_GPUHTW_LLC_SCID, htw_llc_sliceid);
> >
> >>+   a6xx_gpu_cx_rmw(llc,
> >>+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
> >>+   A6XX_GPUHTW_LLC_SCID_MASK,
> >>+   (llc->cntl1_regval &
> >>+A6XX_GPUHTW_LLC_SCID_MASK));
> >
> >And this could be FIELD_SET(A6XX_GPUHTW_LLC_SCID, sliceid);
> >
> >In theory you could just calculate the u32 and write it directly without a
> >rmw.
> >In fact, that might be preferable - if the slice activate failed, you
> >don't want
> >to run the risk that the scid for htw is still populated.
> >
> >>+
> >>+   /* Program cacheability overrides */
> >>+   a6xx_gpu_cx_rmw(llc, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF,
> >>+   llc->cntl0_regval);
> >
> >As below, this could easily be a constant.
> >
> >>+}
> >>+
> 

Re: [PATCH 5/5] drm/msm/a6xx: Add support for using system cache(LLC)

2019-12-20 Thread smasetty

On 2019-12-20 01:28, Jordan Crouse wrote:

On Thu, Dec 19, 2019 at 06:44:46PM +0530, Sharat Masetty wrote:

The last level system cache can be partitioned to 32 different slices
of which GPU has two slices preallocated. One slice is used for 
caching GPU
buffers and the other slice is used for caching the GPU SMMU 
pagetables.
This patch talks to the core system cache driver to acquire the slice 
handles,
configure the SCID's to those slices and activates and deactivates the 
slices

upon GPU power collapse and restore.

Some support from the IOMMU driver is also needed to make use of the
system cache. IOMMU_QCOM_SYS_CACHE is a buffer protection flag which 
enables
caching GPU data buffers in the system cache with memory attributes 
such

as outer cacheable, read-allocate, write-allocate for buffers. The GPU
then has the ability to override a few cacheability parameters which 
it
does to override write-allocate to write-no-allocate as the GPU 
hardware

does not benefit much from it.

Similarly DOMAIN_ATTR_QCOM_SYS_CACHE is another domain level attribute
used by the IOMMU driver to set the right attributes to cache the 
hardware

pagetables into the system cache.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 122 
+-

 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   9 +++
 drivers/gpu/drm/msm/msm_iommu.c   |  13 
 drivers/gpu/drm/msm/msm_mmu.h |   3 +
 4 files changed, 146 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c

index faff6ff..0c7fdee 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -9,6 +9,7 @@
 #include "a6xx_gmu.xml.h"

 #include 
+#include 

 #define GPU_PAS_ID 13

@@ -781,6 +782,117 @@ static void 
a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu)

gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
 }

+#define A6XX_LLC_NUM_GPU_SCIDS 5
+#define A6XX_GPU_LLC_SCID_NUM_BITS 5


As I mention below, I'm not sure if we need these


+#define A6XX_GPU_LLC_SCID_MASK \
+   ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
+
+#define A6XX_GPUHTW_LLC_SCID_SHIFT 25
+#define A6XX_GPUHTW_LLC_SCID_MASK \
+	(((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << 
A6XX_GPUHTW_LLC_SCID_SHIFT)

+


Normally these go into the envytools regmap but if we're going to do 
these guys

lets use the power of  for good.

#define A6XX_GPU_LLC_SCID GENMASK(24, 0)
#define A6XX_GPUHTW_LLC_SCID GENMASK(29, 25)


+static inline void a6xx_gpu_cx_rmw(struct a6xx_llc *llc,


Don't mark C functions as inline - let the compiler figure it out for 
you.



+   u32 reg, u32 mask, u32 or)
+{
+   msm_rmw(llc->mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_deactivate(struct a6xx_llc *llc)
+{
+   llcc_slice_deactivate(llc->gpu_llc_slice);
+   llcc_slice_deactivate(llc->gpuhtw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct a6xx_llc *llc)
+{
+   if (!llc->mmio)
+   return;
+
+   /* Program the sub-cache ID for all GPU blocks */
+   if (!llcc_slice_activate(llc->gpu_llc_slice))
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPU_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+A6XX_GPU_LLC_SCID_MASK));


This is out of order with the comments below, but if we store the slice 
id then

you could calculate regval here and not have to store it.


+
+   /* Program the sub-cache ID for the GPU pagetables */
+   if (!llcc_slice_activate(llc->gpuhtw_llc_slice))


val |= FIELD_SET(A6XX_GPUHTW_LLC_SCID, htw_llc_sliceid);


+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPUHTW_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+A6XX_GPUHTW_LLC_SCID_MASK));


And this could be FIELD_SET(A6XX_GPUHTW_LLC_SCID, sliceid);

In theory you could just calculate the u32 and write it directly 
without a rmw.
In fact, that might be preferable - if the slice activate failed, you 
don't want

to run the risk that the scid for htw is still populated.


+
+   /* Program cacheability overrides */
+   a6xx_gpu_cx_rmw(llc, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF,
+   llc->cntl0_regval);


As below, this could easily be a constant.


+}
+
+static void a6xx_llc_slices_destroy(struct a6xx_llc *llc)
+{
+   if (llc->mmio)
+   iounmap(llc->mmio);


msm_ioremap returns a devm_ managed resource, so do not use iounmap() 
to free
it. Bets to just leave it and let the gpu device handle it when it goes 
boom.



+
+   llcc_slice_putd(llc->gpu_llc_slice);
+   llcc_slice_putd(llc->gpuhtw_llc_slice);
+}
+
+static int a6xx_llc_slices_init(struct 

Re: [PATCH 5/5] drm/msm/a6xx: Add support for using system cache(LLC)

2019-12-19 Thread Jordan Crouse
On Thu, Dec 19, 2019 at 12:58:15PM -0700, Jordan Crouse wrote:
> On Thu, Dec 19, 2019 at 06:44:46PM +0530, Sharat Masetty wrote:




> > +
> > +   /*
> > +* CNTL1 is used to specify SCID for (CP, TP, VFD, CCU and UBWC
> > +* FLAG cache) GPU blocks. This value will be passed along with
> > +* the address for any memory transaction from GPU to identify
> > +* the sub-cache for that transaction.
> > +*/
> > +   if (!IS_ERR(llc->gpu_llc_slice)) {
> > +   u32 gpu_scid = llcc_get_slice_id(llc->gpu_llc_slice);
> > +   int i;
> > +
> > +   for (i = 0; i < A6XX_LLC_NUM_GPU_SCIDS; i++)
> > +   llc->cntl1_regval |=
> > +   gpu_scid << (A6XX_GPU_LLC_SCID_NUM_BITS * i);
> 
> As above, i'm not sure a loop is better than just:
> 
> gpu_scid &= 0x1f;
> 
> llc->cntl1_regval = (gpu_scid << 0) || (gpu_scid << 5) | (gpu_scid << 10)
>  | (gpu_scid << 15) | (gpu_scid << 20);
> 
> And I'm not even sure we need do this math here in the first place.

One more question - can you get a valid slice id before activation?



Jordan

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 5/5] drm/msm/a6xx: Add support for using system cache(LLC)

2019-12-19 Thread Jordan Crouse
On Thu, Dec 19, 2019 at 06:44:46PM +0530, Sharat Masetty wrote:
> The last level system cache can be partitioned to 32 different slices
> of which GPU has two slices preallocated. One slice is used for caching GPU
> buffers and the other slice is used for caching the GPU SMMU pagetables.
> This patch talks to the core system cache driver to acquire the slice handles,
> configure the SCID's to those slices and activates and deactivates the slices
> upon GPU power collapse and restore.
> 
> Some support from the IOMMU driver is also needed to make use of the
> system cache. IOMMU_QCOM_SYS_CACHE is a buffer protection flag which enables
> caching GPU data buffers in the system cache with memory attributes such
> as outer cacheable, read-allocate, write-allocate for buffers. The GPU
> then has the ability to override a few cacheability parameters which it
> does to override write-allocate to write-no-allocate as the GPU hardware
> does not benefit much from it.
> 
> Similarly DOMAIN_ATTR_QCOM_SYS_CACHE is another domain level attribute
> used by the IOMMU driver to set the right attributes to cache the hardware
> pagetables into the system cache.
> 
> Signed-off-by: Sharat Masetty 
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 122 
> +-
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   9 +++
>  drivers/gpu/drm/msm/msm_iommu.c   |  13 
>  drivers/gpu/drm/msm/msm_mmu.h |   3 +
>  4 files changed, 146 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index faff6ff..0c7fdee 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -9,6 +9,7 @@
>  #include "a6xx_gmu.xml.h"
> 
>  #include 
> +#include 
> 
>  #define GPU_PAS_ID 13
> 
> @@ -781,6 +782,117 @@ static void a6xx_bus_clear_pending_transactions(struct 
> adreno_gpu *adreno_gpu)
>   gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
>  }
> 
> +#define A6XX_LLC_NUM_GPU_SCIDS   5
> +#define A6XX_GPU_LLC_SCID_NUM_BITS   5

As I mention below, I'm not sure if we need these 

> +#define A6XX_GPU_LLC_SCID_MASK \
> + ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
> +
> +#define A6XX_GPUHTW_LLC_SCID_SHIFT   25
> +#define A6XX_GPUHTW_LLC_SCID_MASK \
> + (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT)
> +

Normally these go into the envytools regmap but if we're going to do these guys
lets use the power of  for good.

#define A6XX_GPU_LLC_SCID GENMASK(24, 0)
#define A6XX_GPUHTW_LLC_SCID GENMASK(29, 25)

> +static inline void a6xx_gpu_cx_rmw(struct a6xx_llc *llc,

Don't mark C functions as inline - let the compiler figure it out for you.

> + u32 reg, u32 mask, u32 or)
> +{
> + msm_rmw(llc->mmio + (reg << 2), mask, or);
> +}
> +
> +static void a6xx_llc_deactivate(struct a6xx_llc *llc)
> +{
> + llcc_slice_deactivate(llc->gpu_llc_slice);
> + llcc_slice_deactivate(llc->gpuhtw_llc_slice);
> +}
> +
> +static void a6xx_llc_activate(struct a6xx_llc *llc)
> +{
> + if (!llc->mmio)
> + return;
> +
> + /* Program the sub-cache ID for all GPU blocks */
> + if (!llcc_slice_activate(llc->gpu_llc_slice))
> + a6xx_gpu_cx_rmw(llc,
> + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
> + A6XX_GPU_LLC_SCID_MASK,
> + (llc->cntl1_regval &
> +  A6XX_GPU_LLC_SCID_MASK));

This is out of order with the comments below, but if we store the slice id then
you could calculate regval here and not have to store it.

> +
> + /* Program the sub-cache ID for the GPU pagetables */
> + if (!llcc_slice_activate(llc->gpuhtw_llc_slice))

val |= FIELD_SET(A6XX_GPUHTW_LLC_SCID, htw_llc_sliceid);

> + a6xx_gpu_cx_rmw(llc,
> + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
> + A6XX_GPUHTW_LLC_SCID_MASK,
> + (llc->cntl1_regval &
> +  A6XX_GPUHTW_LLC_SCID_MASK));

And this could be FIELD_SET(A6XX_GPUHTW_LLC_SCID, sliceid);

In theory you could just calculate the u32 and write it directly without a rmw.
In fact, that might be preferable - if the slice activate failed, you don't want
to run the risk that the scid for htw is still populated.

> +
> + /* Program cacheability overrides */
> + a6xx_gpu_cx_rmw(llc, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF,
> + llc->cntl0_regval);

As below, this could easily be a constant.

> +}
> +
> +static void a6xx_llc_slices_destroy(struct a6xx_llc *llc)
> +{
> + if (llc->mmio)
> + iounmap(llc->mmio);

msm_ioremap returns a devm_ managed resource, so do not use iounmap() to free
it. Bets to just leave it and let the gpu device handle it when it goes boom.

> +
> + llcc_slice_putd(llc->gpu_llc_slice);
> + 

[PATCH 5/5] drm/msm/a6xx: Add support for using system cache(LLC)

2019-12-19 Thread Sharat Masetty
The last level system cache can be partitioned to 32 different slices
of which GPU has two slices preallocated. One slice is used for caching GPU
buffers and the other slice is used for caching the GPU SMMU pagetables.
This patch talks to the core system cache driver to acquire the slice handles,
configure the SCID's to those slices and activates and deactivates the slices
upon GPU power collapse and restore.

Some support from the IOMMU driver is also needed to make use of the
system cache. IOMMU_QCOM_SYS_CACHE is a buffer protection flag which enables
caching GPU data buffers in the system cache with memory attributes such
as outer cacheable, read-allocate, write-allocate for buffers. The GPU
then has the ability to override a few cacheability parameters which it
does to override write-allocate to write-no-allocate as the GPU hardware
does not benefit much from it.

Similarly DOMAIN_ATTR_QCOM_SYS_CACHE is another domain level attribute
used by the IOMMU driver to set the right attributes to cache the hardware
pagetables into the system cache.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 122 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   9 +++
 drivers/gpu/drm/msm/msm_iommu.c   |  13 
 drivers/gpu/drm/msm/msm_mmu.h |   3 +
 4 files changed, 146 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index faff6ff..0c7fdee 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -9,6 +9,7 @@
 #include "a6xx_gmu.xml.h"

 #include 
+#include 

 #define GPU_PAS_ID 13

@@ -781,6 +782,117 @@ static void a6xx_bus_clear_pending_transactions(struct 
adreno_gpu *adreno_gpu)
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
 }

+#define A6XX_LLC_NUM_GPU_SCIDS 5
+#define A6XX_GPU_LLC_SCID_NUM_BITS 5
+
+#define A6XX_GPU_LLC_SCID_MASK \
+   ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
+
+#define A6XX_GPUHTW_LLC_SCID_SHIFT 25
+#define A6XX_GPUHTW_LLC_SCID_MASK \
+   (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT)
+
+static inline void a6xx_gpu_cx_rmw(struct a6xx_llc *llc,
+   u32 reg, u32 mask, u32 or)
+{
+   msm_rmw(llc->mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_deactivate(struct a6xx_llc *llc)
+{
+   llcc_slice_deactivate(llc->gpu_llc_slice);
+   llcc_slice_deactivate(llc->gpuhtw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct a6xx_llc *llc)
+{
+   if (!llc->mmio)
+   return;
+
+   /* Program the sub-cache ID for all GPU blocks */
+   if (!llcc_slice_activate(llc->gpu_llc_slice))
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPU_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+A6XX_GPU_LLC_SCID_MASK));
+
+   /* Program the sub-cache ID for the GPU pagetables */
+   if (!llcc_slice_activate(llc->gpuhtw_llc_slice))
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPUHTW_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+A6XX_GPUHTW_LLC_SCID_MASK));
+
+   /* Program cacheability overrides */
+   a6xx_gpu_cx_rmw(llc, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF,
+   llc->cntl0_regval);
+}
+
+static void a6xx_llc_slices_destroy(struct a6xx_llc *llc)
+{
+   if (llc->mmio)
+   iounmap(llc->mmio);
+
+   llcc_slice_putd(llc->gpu_llc_slice);
+   llcc_slice_putd(llc->gpuhtw_llc_slice);
+}
+
+static int a6xx_llc_slices_init(struct platform_device *pdev,
+   struct a6xx_llc *llc)
+{
+   llc->mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+   if (IS_ERR_OR_NULL(llc->mmio))
+   return -ENODEV;
+
+   llc->gpu_llc_slice = llcc_slice_getd(LLCC_GPU);
+   llc->gpuhtw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
+   if (IS_ERR(llc->gpu_llc_slice) && IS_ERR(llc->gpuhtw_llc_slice))
+   return -ENODEV;
+
+   /*
+* CNTL0 provides options to override the settings for the
+* read and write allocation policies for the LLC. These
+* overrides are global for all memory transactions from
+* the GPU.
+*
+* 0x3: read-no-alloc-overridden = 0
+*  read-no-alloc = 0 - Allocate lines on read miss
+*  write-no-alloc-overridden = 1
+*  write-no-alloc = 1 - Do not allocates lines on write miss
+*/
+   llc->cntl0_regval = 0x03;
+
+   /*
+* CNTL1 is used to specify SCID for (CP, TP, VFD, CCU and UBWC
+* FLAG cache) GPU blocks. This value will be passed along with
+* the address for any memory transaction from GPU to identify
+

Re: [PATCH 5/5] drm/msm/A6xx: Add support for using system cache(llc)

2018-04-05 Thread Vivek Gautam

Hi Sharat,


On 3/23/2018 12:49 PM, Sharat Masetty wrote:

The last level system cache can be partitioned to 32
different slices of which GPU has two slices preallocated.
The "gpu" slice is used for caching GPU buffers and
the "gpuhtw" slice is used for caching the GPU SMMU
pagetables.  This patch talks to the core system cache
driver to acquire the slice handles, configure the SCID's
to those slices and activates and deactivates the slices
upon GPU power collapse and restore.

Some support from the IOMMU driver is also needed to
make use of the system cache. IOMMU_UPSTREAM_HINT is
a buffer protection flag which enables caching GPU data
buffers in the system cache with memory attributes such
as outer cacheable, read-allocate, write-allocate for buffers.
The GPU then has the ability to override a few cacheability
parameters which it does to override write-allocate to
write-no-allocate as the GPU hardware does not benefit much
from it.
Similarly DOMAIN_ATTR_USE_UPSTREAM_HINT is another domain level
attribute used by the IOMMU driver to set the right attributes
to cache the hardware pagetables into the system cache.

Signed-off-by: Sharat Masetty 
---


Couple of minor nits. Please see comments inline below.


  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 162 +-
  drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   9 ++
  drivers/gpu/drm/msm/msm_iommu.c   |  13 +++
  drivers/gpu/drm/msm/msm_mmu.h |   3 +
  4 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index bd50674..e4554eb 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -13,6 +13,7 @@
  
  #include 

  #include 
+#include 
  
  #include "msm_gem.h"

  #include "msm_mmu.h"
@@ -913,6 +914,154 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
~0
  };
  
+#define A6XX_LLC_NUM_GPU_SCIDS		5

+#define A6XX_GPU_LLC_SCID_NUM_BITS 5
+
+#define A6XX_GPU_LLC_SCID_MASK \
+   ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
+
+#define A6XX_GPUHTW_LLC_SCID_SHIFT 25
+#define A6XX_GPUHTW_LLC_SCID_MASK \
+   (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT)
+
+static inline void a6xx_gpu_cx_rmw(struct a6xx_llc *llc,
+   u32 reg, u32 mask, u32 or)
+{
+   msm_rmw(llc->mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_deactivate(struct msm_gpu *gpu)
+{
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_llc *llc = _gpu->llc;
+
+   llcc_slice_deactivate(llc->gpu_llc_slice);
+   llcc_slice_deactivate(llc->gpuhtw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct msm_gpu *gpu)
+{
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_llc *llc = _gpu->llc;
+
+   if (!llc->mmio)
+   return;
+
+   if (llc->gpu_llc_slice)
+   if (!llcc_slice_activate(llc->gpu_llc_slice))
+   /* Program the sub-cache ID for all GPU blocks */
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPU_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+   A6XX_GPU_LLC_SCID_MASK));
+
+   if (llc->gpuhtw_llc_slice)
+   if (!llcc_slice_activate(llc->gpuhtw_llc_slice))
+   /* Program the sub-cache ID for GPU pagetables */
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPUHTW_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+   A6XX_GPUHTW_LLC_SCID_MASK));
+
+   /* Program cacheability overrides */
+   a6xx_gpu_cx_rmw(llc, REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF,
+   llc->cntl0_regval);
+}
+
+void a6xx_llc_slices_destroy(struct a6xx_llc *llc)


static?


+{
+   if (llc->mmio) {
+   iounmap(llc->mmio);
+   llc->mmio = NULL;
+   }
+
+   llcc_slice_putd(llc->gpu_llc_slice);
+   llc->gpu_llc_slice = NULL;
+
+   llcc_slice_putd(llc->gpuhtw_llc_slice);
+   llc->gpuhtw_llc_slice = NULL;
+}
+
+static int a6xx_llc_slices_init(struct platform_device *pdev,
+   struct a6xx_llc *llc)
+{
+   int i;
+
+   /* Get the system cache slice descriptor for GPU and GPUHTWs */
+   llc->gpu_llc_slice = llcc_slice_getd(>dev, "gpu");
+   if (IS_ERR(llc->gpu_llc_slice))
+   llc->gpu_llc_slice = NULL;
+
+   llc->gpuhtw_llc_slice = llcc_slice_getd(>dev, "gpuhtw");
+   if (IS_ERR(llc->gpuhtw_llc_slice))
+   llc->gpuhtw_llc_slice = NULL;
+
+   

Re: [Freedreno] [PATCH 5/5] drm/msm/A6xx: Add support for using system cache(llc)

2018-04-04 Thread Sharat Masetty



On 4/4/2018 2:54 AM, Jordan Crouse wrote:

On Fri, Mar 23, 2018 at 12:49:51PM +0530, Sharat Masetty wrote:

The last level system cache can be partitioned to 32
different slices of which GPU has two slices preallocated.
The "gpu" slice is used for caching GPU buffers and
the "gpuhtw" slice is used for caching the GPU SMMU
pagetables.  This patch talks to the core system cache
driver to acquire the slice handles, configure the SCID's
to those slices and activates and deactivates the slices
upon GPU power collapse and restore.

Some support from the IOMMU driver is also needed to
make use of the system cache. IOMMU_UPSTREAM_HINT is
a buffer protection flag which enables caching GPU data
buffers in the system cache with memory attributes such
as outer cacheable, read-allocate, write-allocate for buffers.
The GPU then has the ability to override a few cacheability
parameters which it does to override write-allocate to
write-no-allocate as the GPU hardware does not benefit much
from it.
Similarly DOMAIN_ATTR_USE_UPSTREAM_HINT is another domain level
attribute used by the IOMMU driver to set the right attributes
to cache the hardware pagetables into the system cache.


This has a dependency on the LLCC driver and the API to that may change (it is
under review now).  When it does, this will have to naturally change as well but
that'll be a minor tweek and won't affect the functionality of this driver so
pending those changes..


Thanks for the review Jordan. Vivek will also submit the SMMU changes 
for the UPSTREAM_HINT support to the mailing list soon. So once the 
dependencies are sorted out, I will review and submit a fresh patch set 
if needed.


Reviewed-by: Jordan Crouse  >

Signed-off-by: Sharat Masetty 
---
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 162 +-
  drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   9 ++
  drivers/gpu/drm/msm/msm_iommu.c   |  13 +++
  drivers/gpu/drm/msm/msm_mmu.h |   3 +
  4 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index bd50674..e4554eb 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -13,6 +13,7 @@
  
  #include 

  #include 
+#include 
  
  #include "msm_gem.h"

  #include "msm_mmu.h"
@@ -913,6 +914,154 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
~0
  };
  
+#define A6XX_LLC_NUM_GPU_SCIDS		5

+#define A6XX_GPU_LLC_SCID_NUM_BITS 5
+
+#define A6XX_GPU_LLC_SCID_MASK \
+   ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
+
+#define A6XX_GPUHTW_LLC_SCID_SHIFT 25
+#define A6XX_GPUHTW_LLC_SCID_MASK \
+   (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT)
+
+static inline void a6xx_gpu_cx_rmw(struct a6xx_llc *llc,
+   u32 reg, u32 mask, u32 or)
+{
+   msm_rmw(llc->mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_deactivate(struct msm_gpu *gpu)
+{
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_llc *llc = _gpu->llc;
+
+   llcc_slice_deactivate(llc->gpu_llc_slice);
+   llcc_slice_deactivate(llc->gpuhtw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct msm_gpu *gpu)
+{
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_llc *llc = _gpu->llc;
+
+   if (!llc->mmio)
+   return;
+
+   if (llc->gpu_llc_slice)
+   if (!llcc_slice_activate(llc->gpu_llc_slice))
+   /* Program the sub-cache ID for all GPU blocks */
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPU_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+   A6XX_GPU_LLC_SCID_MASK));
+
+   if (llc->gpuhtw_llc_slice)
+   if (!llcc_slice_activate(llc->gpuhtw_llc_slice))
+   /* Program the sub-cache ID for GPU pagetables */
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPUHTW_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+   A6XX_GPUHTW_LLC_SCID_MASK));
+
+   /* Program cacheability overrides */
+   a6xx_gpu_cx_rmw(llc, REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF,
+   llc->cntl0_regval);
+}
+
+void a6xx_llc_slices_destroy(struct a6xx_llc *llc)
+{
+   if (llc->mmio) {
+   iounmap(llc->mmio);
+   llc->mmio = NULL;
+   }
+
+   llcc_slice_putd(llc->gpu_llc_slice);
+   llc->gpu_llc_slice = NULL;
+
+   llcc_slice_putd(llc->gpuhtw_llc_slice);
+   

Re: [Freedreno] [PATCH 5/5] drm/msm/A6xx: Add support for using system cache(llc)

2018-04-03 Thread Jordan Crouse
On Fri, Mar 23, 2018 at 12:49:51PM +0530, Sharat Masetty wrote:
> The last level system cache can be partitioned to 32
> different slices of which GPU has two slices preallocated.
> The "gpu" slice is used for caching GPU buffers and
> the "gpuhtw" slice is used for caching the GPU SMMU
> pagetables.  This patch talks to the core system cache
> driver to acquire the slice handles, configure the SCID's
> to those slices and activates and deactivates the slices
> upon GPU power collapse and restore.
> 
> Some support from the IOMMU driver is also needed to
> make use of the system cache. IOMMU_UPSTREAM_HINT is
> a buffer protection flag which enables caching GPU data
> buffers in the system cache with memory attributes such
> as outer cacheable, read-allocate, write-allocate for buffers.
> The GPU then has the ability to override a few cacheability
> parameters which it does to override write-allocate to
> write-no-allocate as the GPU hardware does not benefit much
> from it.
> Similarly DOMAIN_ATTR_USE_UPSTREAM_HINT is another domain level
> attribute used by the IOMMU driver to set the right attributes
> to cache the hardware pagetables into the system cache.

This has a dependency on the LLCC driver and the API to that may change (it is
under review now).  When it does, this will have to naturally change as well but
that'll be a minor tweek and won't affect the functionality of this driver so
pending those changes..

Reviewed-by: Jordan Crouse 

> Signed-off-by: Sharat Masetty 
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 162 
> +-
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   9 ++
>  drivers/gpu/drm/msm/msm_iommu.c   |  13 +++
>  drivers/gpu/drm/msm/msm_mmu.h |   3 +
>  4 files changed, 186 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index bd50674..e4554eb 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -13,6 +13,7 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  #include "msm_gem.h"
>  #include "msm_mmu.h"
> @@ -913,6 +914,154 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
>   ~0
>  };
>  
> +#define A6XX_LLC_NUM_GPU_SCIDS   5
> +#define A6XX_GPU_LLC_SCID_NUM_BITS   5
> +
> +#define A6XX_GPU_LLC_SCID_MASK \
> + ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
> +
> +#define A6XX_GPUHTW_LLC_SCID_SHIFT   25
> +#define A6XX_GPUHTW_LLC_SCID_MASK \
> + (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT)
> +
> +static inline void a6xx_gpu_cx_rmw(struct a6xx_llc *llc,
> + u32 reg, u32 mask, u32 or)
> +{
> + msm_rmw(llc->mmio + (reg << 2), mask, or);
> +}
> +
> +static void a6xx_llc_deactivate(struct msm_gpu *gpu)
> +{
> + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> + struct a6xx_llc *llc = _gpu->llc;
> +
> + llcc_slice_deactivate(llc->gpu_llc_slice);
> + llcc_slice_deactivate(llc->gpuhtw_llc_slice);
> +}
> +
> +static void a6xx_llc_activate(struct msm_gpu *gpu)
> +{
> + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> + struct a6xx_llc *llc = _gpu->llc;
> +
> + if (!llc->mmio)
> + return;
> +
> + if (llc->gpu_llc_slice)
> + if (!llcc_slice_activate(llc->gpu_llc_slice))
> + /* Program the sub-cache ID for all GPU blocks */
> + a6xx_gpu_cx_rmw(llc,
> + REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
> + A6XX_GPU_LLC_SCID_MASK,
> + (llc->cntl1_regval &
> + A6XX_GPU_LLC_SCID_MASK));
> +
> + if (llc->gpuhtw_llc_slice)
> + if (!llcc_slice_activate(llc->gpuhtw_llc_slice))
> + /* Program the sub-cache ID for GPU pagetables */
> + a6xx_gpu_cx_rmw(llc,
> + REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
> + A6XX_GPUHTW_LLC_SCID_MASK,
> + (llc->cntl1_regval &
> + A6XX_GPUHTW_LLC_SCID_MASK));
> +
> + /* Program cacheability overrides */
> + a6xx_gpu_cx_rmw(llc, REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF,
> + llc->cntl0_regval);
> +}
> +
> +void a6xx_llc_slices_destroy(struct a6xx_llc *llc)
> +{
> + if (llc->mmio) {
> + iounmap(llc->mmio);
> + llc->mmio = NULL;
> + }
> +
> + llcc_slice_putd(llc->gpu_llc_slice);
> + llc->gpu_llc_slice = NULL;
> +
> + llcc_slice_putd(llc->gpuhtw_llc_slice);
> + llc->gpuhtw_llc_slice = NULL;
> +}
> +
> +static int a6xx_llc_slices_init(struct platform_device *pdev,
> + struct a6xx_llc *llc)
> 

[PATCH 5/5] drm/msm/A6xx: Add support for using system cache(llc)

2018-03-23 Thread Sharat Masetty
The last level system cache can be partitioned to 32
different slices of which GPU has two slices preallocated.
The "gpu" slice is used for caching GPU buffers and
the "gpuhtw" slice is used for caching the GPU SMMU
pagetables.  This patch talks to the core system cache
driver to acquire the slice handles, configure the SCID's
to those slices and activates and deactivates the slices
upon GPU power collapse and restore.

Some support from the IOMMU driver is also needed to
make use of the system cache. IOMMU_UPSTREAM_HINT is
a buffer protection flag which enables caching GPU data
buffers in the system cache with memory attributes such
as outer cacheable, read-allocate, write-allocate for buffers.
The GPU then has the ability to override a few cacheability
parameters which it does to override write-allocate to
write-no-allocate as the GPU hardware does not benefit much
from it.
Similarly DOMAIN_ATTR_USE_UPSTREAM_HINT is another domain level
attribute used by the IOMMU driver to set the right attributes
to cache the hardware pagetables into the system cache.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 162 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   9 ++
 drivers/gpu/drm/msm/msm_iommu.c   |  13 +++
 drivers/gpu/drm/msm/msm_mmu.h |   3 +
 4 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index bd50674..e4554eb 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -13,6 +13,7 @@
 
 #include 
 #include 
+#include 
 
 #include "msm_gem.h"
 #include "msm_mmu.h"
@@ -913,6 +914,154 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
~0
 };
 
+#define A6XX_LLC_NUM_GPU_SCIDS 5
+#define A6XX_GPU_LLC_SCID_NUM_BITS 5
+
+#define A6XX_GPU_LLC_SCID_MASK \
+   ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
+
+#define A6XX_GPUHTW_LLC_SCID_SHIFT 25
+#define A6XX_GPUHTW_LLC_SCID_MASK \
+   (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT)
+
+static inline void a6xx_gpu_cx_rmw(struct a6xx_llc *llc,
+   u32 reg, u32 mask, u32 or)
+{
+   msm_rmw(llc->mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_deactivate(struct msm_gpu *gpu)
+{
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_llc *llc = _gpu->llc;
+
+   llcc_slice_deactivate(llc->gpu_llc_slice);
+   llcc_slice_deactivate(llc->gpuhtw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct msm_gpu *gpu)
+{
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_llc *llc = _gpu->llc;
+
+   if (!llc->mmio)
+   return;
+
+   if (llc->gpu_llc_slice)
+   if (!llcc_slice_activate(llc->gpu_llc_slice))
+   /* Program the sub-cache ID for all GPU blocks */
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPU_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+   A6XX_GPU_LLC_SCID_MASK));
+
+   if (llc->gpuhtw_llc_slice)
+   if (!llcc_slice_activate(llc->gpuhtw_llc_slice))
+   /* Program the sub-cache ID for GPU pagetables */
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPUHTW_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+   A6XX_GPUHTW_LLC_SCID_MASK));
+
+   /* Program cacheability overrides */
+   a6xx_gpu_cx_rmw(llc, REG_A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF,
+   llc->cntl0_regval);
+}
+
+void a6xx_llc_slices_destroy(struct a6xx_llc *llc)
+{
+   if (llc->mmio) {
+   iounmap(llc->mmio);
+   llc->mmio = NULL;
+   }
+
+   llcc_slice_putd(llc->gpu_llc_slice);
+   llc->gpu_llc_slice = NULL;
+
+   llcc_slice_putd(llc->gpuhtw_llc_slice);
+   llc->gpuhtw_llc_slice = NULL;
+}
+
+static int a6xx_llc_slices_init(struct platform_device *pdev,
+   struct a6xx_llc *llc)
+{
+   int i;
+
+   /* Get the system cache slice descriptor for GPU and GPUHTWs */
+   llc->gpu_llc_slice = llcc_slice_getd(>dev, "gpu");
+   if (IS_ERR(llc->gpu_llc_slice))
+   llc->gpu_llc_slice = NULL;
+
+   llc->gpuhtw_llc_slice = llcc_slice_getd(>dev, "gpuhtw");
+   if (IS_ERR(llc->gpuhtw_llc_slice))
+   llc->gpuhtw_llc_slice = NULL;
+
+   if (llc->gpu_llc_slice == NULL && llc->gpuhtw_llc_slice == NULL)
+   return -1;
+
+   /* Map registers */
+   llc->mmio =