[Intel-gfx] [PATCH v2] drm/i915: Initialize the obj flags for shmem objects

2023-02-03 Thread Aravind Iddamsetty
Obj flags for shmem objects is not being set correctly. Fixes in setting
BO_ALLOC_USER flag which applies to shmem objs as well.

Fixes: 13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on acquire")
Cc:  # v5.15+

v2: Add fixes tag (Tvrtko, Matt A)

Cc: Matthew Auld 
Cc: Tvrtko Ursulin 
Reviewed-by: Matthew Auld 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 114443096841..37d1efcd3ca6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -596,7 +596,7 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
mapping_set_gfp_mask(mapping, mask);
GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 
-   i915_gem_object_init(obj, _gem_shmem_ops, _class, 0);
+   i915_gem_object_init(obj, _gem_shmem_ops, _class, flags);
obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
-- 
2.25.1



[Intel-gfx] [PATCH] Initialize the obj flags for shmem objects

2023-02-03 Thread Aravind Iddamsetty
Obj flags for shmem objects is not being set correctly.

Cc: Matthew Auld 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 114443096841..37d1efcd3ca6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -596,7 +596,7 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
mapping_set_gfp_mask(mapping, mask);
GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 
-   i915_gem_object_init(obj, _gem_shmem_ops, _class, 0);
+   i915_gem_object_init(obj, _gem_shmem_ops, _class, flags);
obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
-- 
2.25.1



[Intel-gfx] [PATCH 4/4] drm/i915/mtl/UAPI: Disable GET/SET_CACHING IOCTL for MTL+

2022-12-05 Thread Aravind Iddamsetty
From: Pallavi Mishra 

It's a noop on all new platforms starting from MTL.
Refer: (e7737b67ab46) drm/i915/uapi: reject caching ioctls for discrete

v2:
1. block get caching ioctl
2. return ENODEV similar to DGFX
3. update the doc in i915_drm.h

Cc: Lucas De Marchi 
Cc: Matt Roper 
Cc: Joonas Lahtinen 

Signed-off-by: Pallavi Mishra 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 4 ++--
 include/uapi/drm/i915_drm.h| 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index d44a152ce680..cf817ee0aa01 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -291,7 +291,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void 
*data,
struct drm_i915_gem_object *obj;
int err = 0;
 
-   if (IS_DGFX(to_i915(dev)))
+   if (IS_DGFX(to_i915(dev)) || GRAPHICS_VER_FULL(to_i915(dev)) >= 
IP_VER(12, 70))
return -ENODEV;
 
rcu_read_lock();
@@ -329,7 +329,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
enum i915_cache_level level;
int ret = 0;
 
-   if (IS_DGFX(i915))
+   if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
return -ENODEV;
 
switch (args->caching) {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8df261c5ab9b..3467fd879427 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1626,6 +1626,9 @@ struct drm_i915_gem_busy {
  * - Everything else is always allocated and mapped as write-back, with the
  *   guarantee that everything is also coherent with the GPU.
  *
+ * Starting from MTL even on integrated platforms set/get caching is no longer
+ * supported and object will be mapped as write-combined only.
+ *
  * Note that this is likely to change in the future again, where we might need
  * more flexibility on future devices, so making this all explicit as part of a
  * new _i915_gem_create_ext extension is probable.
-- 
2.25.1



[Intel-gfx] [PATCH 3/4] drm/i915/mtl: Define new PTE encode for MTL

2022-12-05 Thread Aravind Iddamsetty
Add a separate PTE encode function for MTL. The number of PAT registers
have increased to 16 on MTL. All 16 PAT registers are available for
PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
pages.

BSPEC: 63884

Cc: Lucas De Marchi 
Cc: Matt Roper 
Co-developed-by: Fei Yang 
Signed-off-by: Fei Yang 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++-
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +--
 4 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 31e838eee2ef..4197b43150cc 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+   gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+   if (unlikely(flags & PTE_READ_ONLY))
+   pte &= ~GEN8_PAGE_RW;
+
+   if (flags & PTE_LM)
+   pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+   switch (level) {
+   case I915_CACHE_NONE:
+   pte |= GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_LLC:
+   case I915_CACHE_L3_LLC:
+   pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_WT:
+   pte |= GEN12_PPGTT_PTE_PAT0;
+   break;
+   }
+
+   return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   ppgtt->vm.pte_encode = gen8_pte_encode;
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+   ppgtt->vm.pte_encode = mtl_pte_encode;
+   else
+   ppgtt->vm.pte_encode = gen8_pte_encode;
 
ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
ppgtt->vm.insert_entries = gen8_ppgtt_insert;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index f541d19264b4..c48f1fc32909 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 enum i915_cache_level level,
 u32 flags);
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+   enum i915_cache_level level,
+   u32 flags);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 82203ad85b0e..3b6f1f6f780a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
}
 }
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+   enum i915_cache_level level,
+   u32 flags)
+{
+   gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
+
+   GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
+
+   if (flags & PTE_LM)
+   pte |= GEN12_GGTT_PTE_LM;
+
+   switch (level) {
+   case I915_CACHE_NONE:
+   pte |= MTL_GGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_LLC:
+   case I915_CACHE_L3_LLC:
+   pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_WT:
+   pte |= MTL_GGTT_PTE_PAT0;
+   break;
+   }
+
+   return pte;
+}
+
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 enum i915_cache_level level,
 u32 flags)
@@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.bind_vma= intel_ggtt_bind_vma;
ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
 
-   ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
+   else
+   ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
return ggtt_probe_common(ggtt, size);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 8a3e0a6793dd..4bb7a4005452 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES  REG_BIT(2)
 #define BYT_PTE_WRITEABLE  REG_BIT(1)
 
+#define GEN12_PPGTT_PTE_PAT3B

[Intel-gfx] [PATCH 2/4] drm/i915: Reference pte_encode through vm pointer

2022-12-05 Thread Aravind Iddamsetty
New platforms will use different encode functions.

Cc: Lucas De Marchi 
Cc: Matt Roper 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c |  4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index ad1a37b515fb..cb8ed9bfb240 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm->vma_ops.bind_vma= dpt_bind_vma;
vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-   vm->pte_encode = gen8_ggtt_pte_encode;
+   vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
dpt->obj = dpt_obj;
 
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..31e838eee2ef 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -427,7 +427,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
  u32 flags)
 {
struct i915_page_directory *pd;
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, 
flags);
gen8_pte_t *vaddr;
 
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +580,7 @@ static void gen8_ppgtt_insert_huge(struct 
i915_address_space *vm,
   enum i915_cache_level cache_level,
   u32 flags)
 {
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
 
@@ -743,7 +743,7 @@ static void gen8_ppgtt_insert_entry(struct 
i915_address_space *vm,
GEM_BUG_ON(pt->is_compact);
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
drm_clflush_virt_range([gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +773,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct 
i915_address_space *vm,
}
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,7 +820,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
pte_flags |= PTE_LM;
 
vm->scratch[0]->encode =
-   gen8_pte_encode(px_dma(vm->scratch[0]),
+   vm->pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_NONE, pte_flags);
 
for (i = 1; i <= vm->top; i++) {
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 7644738b9cdb..82203ad85b0e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -273,7 +273,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space 
*vm,
gen8_pte_t __iomem *pte =
(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 
-   gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
+   gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
 
ggtt->invalidate(ggtt);
 }
@@ -283,8 +283,8 @@ static void gen8_ggtt_insert_entries(struct 
i915_address_space *vm,
 enum i915_cache_level level,
 u32 flags)
 {
-   const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+   const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
gen8_pte_t __iomem *gte;
gen8_pte_t __iomem *end;
struct sgt_iter iter;
-- 
2.25.1



[Intel-gfx] [PATCH 1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL

2022-12-05 Thread Aravind Iddamsetty
From: Madhumitha Tolakanahalli Pradeep 


On MTL due to the introduction of L4 cache, coherency and cacheability
selections are different and also GT can no longer allocate on LLC. The
MOCS/PAT tables needs an update.

BSpec: 44509, 45101, 44235

Cc: Matt Roper 
Cc: Lucas De Marchi 
Signed-off-by: Madhumitha Tolakanahalli Pradeep 

Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gt/intel_gtt.c | 23 +++-
 drivers/gpu/drm/i915/gt/intel_gtt.h |  9 +++
 drivers/gpu/drm/i915/gt/intel_mocs.c| 76 +++--
 drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
 drivers/gpu/drm/i915/i915_pci.c |  1 +
 5 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index e37164a60d37..428849248c34 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -467,6 +467,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
}
 }
 
+static void mtl_setup_private_ppat(struct intel_uncore *uncore)
+{
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
+  MTL_PPAT_L4_0_WB);
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
+  MTL_PPAT_L4_1_WT | MTL_2_COH_1W);
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
+  MTL_PPAT_L4_3_UC | MTL_2_COH_1W);
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
+  MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
+  MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+   /*
+* Remaining PAT entries are left at the hardware-default
+* fully-cached setting
+*/
+}
+
 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 {
/* TGL doesn't support LLC or AGE settings */
@@ -602,7 +621,9 @@ void setup_private_pat(struct intel_gt *gt)
 
GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
 
-   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+   if (IS_METEORLAKE(i915))
+   mtl_setup_private_ppat(uncore);
+   else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
xehp_setup_private_ppat(gt);
else if (GRAPHICS_VER(i915) >= 12)
tgl_setup_private_ppat(uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index d1900fec6cd1..8a3e0a6793dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -147,6 +147,15 @@ typedef u64 gen8_pte_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
+#define MTL_PPAT_L4_CACHE_POLICY_MASK  REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASKREG_GENMASK(1, 0)
+#define MTL_PPAT_L4_3_UC   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_L4_1_WT   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_L4_0_WB   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON  REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
 enum i915_cache_level;
 
 struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 69b489e8dfed..89570f137b2c 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
 #define LE_COS(value)  ((value) << 15)
 #define LE_SSE(value)  ((value) << 17)
 
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define _L4_CACHEABILITY(value)((value) << 2)
+#define IG_PAT(value)  ((value) << 8)
+
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)  ((value) << 0)
 #define L3_SCC(value)  ((value) << 1)
@@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES  64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES   3
+#define MTL_NUM_MOCS_ENTRIES   16
 
 /* (e)LLC caching options */
 /*
@@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
 #define L3_2_RESERVED  _L3_CACHEABILITY(2)
 #define L3_3_WB_L3_CACHEABILITY(3)
 
+/* L4 caching options */
+#define L4_0_WB_L4_CACHEABILITY(0)
+#define L4_1_WT_L4_CACHEABILITY(1)
+#define L4_2_RESERVED  _L4_CACHEABILITY(2)
+#define L4_3_UC_L4_CACHEABILITY(3)
+
 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
[__idx] = { \
.control_value = __control_value, \
@@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = 
{
MOCS_ENTRY(2, 0, L3_3_WB),
 };
 
+static const struct drm_i915_mocs_entry 

[Intel-gfx] [PATCH 1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL

2022-11-28 Thread Aravind Iddamsetty
From: Madhumitha Tolakanahalli Pradeep 


On MTL due to the introduction of L4 cache, coherency and cacheability
selections are different and also GT can no longer allocate on LLC. The
MOCS/PAT tables needs an update.

BSpec: 44509, 45101, 44235

Cc: Matt Roper 
Cc: Lucas De Marchi 
Signed-off-by: Madhumitha Tolakanahalli Pradeep 

Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gt/intel_gtt.c | 23 +++-
 drivers/gpu/drm/i915/gt/intel_gtt.h |  9 +++
 drivers/gpu/drm/i915/gt/intel_mocs.c| 76 +++--
 drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
 drivers/gpu/drm/i915/i915_pci.c |  1 +
 5 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 2ba3983984b9..41248029d03d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -467,6 +467,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
}
 }
 
+static void mtl_setup_private_ppat(struct intel_uncore *uncore)
+{
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
+  MTL_PPAT_L4_0_WB);
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
+  MTL_PPAT_L4_1_WT | MTL_2_COH_1W);
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
+  MTL_PPAT_L4_3_UC | MTL_2_COH_1W);
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
+  MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+   intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
+  MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+   /*
+* Remaining PAT entries are left at the hardware-default
+* fully-cached setting
+*/
+}
+
 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 {
/* TGL doesn't support LLC or AGE settings */
@@ -591,7 +610,9 @@ void setup_private_pat(struct intel_gt *gt)
 
GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
 
-   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+   if (IS_METEORLAKE(i915))
+   mtl_setup_private_ppat(uncore);
+   else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
xehp_setup_private_ppat(gt);
else if (GRAPHICS_VER(i915) >= 12)
tgl_setup_private_ppat(uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 4d75ba4bb41d..43bf9188ffef 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -147,6 +147,15 @@ typedef u64 gen8_pte_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
+#define MTL_PPAT_L4_CACHE_POLICY_MASK  REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASKREG_GENMASK(1, 0)
+#define MTL_PPAT_L4_3_UC   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_L4_1_WT   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_L4_0_WB   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON  REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
 enum i915_cache_level;
 
 struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 49fdd509527a..27de7f57ff3d 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
 #define LE_COS(value)  ((value) << 15)
 #define LE_SSE(value)  ((value) << 17)
 
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define _L4_CACHEABILITY(value)((value) << 2)
+#define IG_PAT(value)  ((value) << 8)
+
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)  ((value) << 0)
 #define L3_SCC(value)  ((value) << 1)
@@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES  64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES   3
+#define MTL_NUM_MOCS_ENTRIES   16
 
 /* (e)LLC caching options */
 /*
@@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
 #define L3_2_RESERVED  _L3_CACHEABILITY(2)
 #define L3_3_WB_L3_CACHEABILITY(3)
 
+/* L4 caching options */
+#define L4_0_WB_L4_CACHEABILITY(0)
+#define L4_1_WT_L4_CACHEABILITY(1)
+#define L4_2_RESERVED  _L4_CACHEABILITY(2)
+#define L4_3_UC_L4_CACHEABILITY(3)
+
 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
[__idx] = { \
.control_value = __control_value, \
@@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = 
{
MOCS_ENTRY(2, 0, L3_3_WB),
 };
 
+static const struct drm_i915_mocs_entry 

[Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode for MTL

2022-11-28 Thread Aravind Iddamsetty
Add a separate PTE encode function for MTL. The number of PAT registers
have increased to 16 on MTL. All 16 PAT registers are available for
PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
pages.

BSPEC: 63884

Cc: Lucas De Marchi 
Cc: Matt Roper 
Co-developed-by: Fei Yang 
Signed-off-by: Fei Yang 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 43 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 +++
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 ++--
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +--
 5 files changed, 86 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index ad1a37b515fb..cb8ed9bfb240 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm->vma_ops.bind_vma= dpt_bind_vma;
vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-   vm->pte_encode = gen8_ggtt_pte_encode;
+   vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
dpt->obj = dpt_obj;
 
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..4197b43150cc 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+   gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+   if (unlikely(flags & PTE_READ_ONLY))
+   pte &= ~GEN8_PAGE_RW;
+
+   if (flags & PTE_LM)
+   pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+   switch (level) {
+   case I915_CACHE_NONE:
+   pte |= GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_LLC:
+   case I915_CACHE_L3_LLC:
+   pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_WT:
+   pte |= GEN12_PPGTT_PTE_PAT0;
+   break;
+   }
+
+   return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
  u32 flags)
 {
struct i915_page_directory *pd;
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, 
flags);
gen8_pte_t *vaddr;
 
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct 
i915_address_space *vm,
   enum i915_cache_level cache_level,
   u32 flags)
 {
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
 
@@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct 
i915_address_space *vm,
GEM_BUG_ON(pt->is_compact);
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
drm_clflush_virt_range([gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct 
i915_address_space *vm,
}
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,7 +848,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
pte_flags |= PTE_LM;
 
vm->scratch[0]->encode =
-   gen8_pte_encode(px_dma(vm->scratch[0]),
+   vm->pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_NONE, pte_flags);
 
for (i = 1; i <= vm->top; i++) {
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   ppgtt->vm.pte_encode = gen8_pte_encode;
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+   ppgtt->vm.pte_encode = mtl_pte_encode;
+   else
+   ppgtt->vm.pte_encode = gen8_pte_encode;
 
ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
ppgtt->vm

[Intel-gfx] [PATCH 3/3] drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+

2022-11-28 Thread Aravind Iddamsetty
From: Pallavi Mishra 

Caching mode for an object shall be selected via upcoming VM_BIND
interface.

Cc: Lucas De Marchi 
Cc: Matt Roper 
Cc: Joonas Lahtinen 

Signed-off-by: Pallavi Mishra 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index d44a152ce680..aebbfe186143 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -332,6 +332,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
-- 
2.25.1



[Intel-gfx] [PATCH v4] drm/i915/mtl: Media GT and Render GT share common GGTT

2022-11-21 Thread Aravind Iddamsetty
On XE_LPM+ platforms the media engines are carved out into a separate
GT but have a common GGTMMADR address range which essentially makes
the GGTT address space to be shared between media and render GT. As a
result any updates in GGTT shall invalidate TLB of GTs sharing it and
similarly any operation on GGTT requiring an action on a GT will have to
involve all GTs sharing it. setup_private_pat was being done on a per
GGTT based as that doesn't touch any GGTT structures moved it to per GT
based.

BSPEC: 63834

v2:
1. Add details to commit msg
2. includes fix for failure to add item to ggtt->gt_list, as suggested
by Lucas
3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within
it.
4. setup_private_pat moved out of intel_gt_tiles_init

v3:
1. Move out for_each_gt from i915_driver.c (Jani Nikula)

v4: drop using RCU primitives on ggtt->gt_list as it is not an RCU list
(Matt Roper)

Cc: Matt Roper 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 54 +--
 drivers/gpu/drm/i915/gt/intel_gt.c| 13 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
 drivers/gpu/drm/i915/gt/intel_gtt.h   |  4 ++
 drivers/gpu/drm/i915/i915_driver.c| 12 ++---
 drivers/gpu/drm/i915/i915_gem.c   |  2 +
 drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++--
 drivers/gpu/drm/i915/i915_vma.c   |  5 ++-
 drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +
 9 files changed, 111 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 8145851ad23d..7644738b9cdb 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 
@@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 
 void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 {
+   struct intel_gt *gt;
+
i915_ggtt_suspend_vm(>vm);
ggtt->invalidate(ggtt);
 
-   intel_gt_check_and_clear_faults(ggtt->vm.gt);
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_gt_check_and_clear_faults(gt);
 }
 
 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
@@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 
 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
-   struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct drm_i915_private *i915 = ggtt->vm.i915;
 
gen8_ggtt_invalidate(ggtt);
 
-   if (GRAPHICS_VER(i915) >= 12)
-   intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
- GEN12_GUC_TLB_INV_CR_INVALIDATE);
-   else
-   intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   if (GRAPHICS_VER(i915) >= 12) {
+   struct intel_gt *gt;
+
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_uncore_write_fw(gt->uncore,
+ GEN12_GUC_TLB_INV_CR,
+ GEN12_GUC_TLB_INV_CR_INVALIDATE);
+   } else {
+   intel_uncore_write_fw(ggtt->vm.gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   }
 }
 
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
@@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
-   setup_private_pat(ggtt->vm.gt);
-
return ggtt_probe_common(ggtt, size);
 }
 
@@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct 
intel_gt *gt)
  */
 int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 {
-   int ret;
+   struct intel_gt *gt;
+   int ret, i;
+
+   for_each_gt(gt, i915, i) {
+   ret = intel_gt_assign_ggtt(gt);
+   if (ret)
+   return ret;
+   }
 
ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
if (ret)
@@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
return 0;
 }
 
+struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915)
+{
+   struct i915_ggtt *ggtt;
+
+   ggtt = drmm_kzalloc(>drm, sizeof(*ggtt), GFP_KERNEL);
+   if (!ggtt)
+   return ERR_PTR(-ENOMEM);
+
+   INIT_LIST_HEAD(>gt_list);
+
+   return ggtt;
+}
+
 int i915_ggtt_enable_hw(struct drm_i915_private *i915)
 {
if (GRAPHICS_VER(i915) < 6)
@@ -1296,9 +1323,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
 
 void i915_ggtt_resume(struct i915_ggtt *ggtt)
 {
+   struct intel_gt *gt;
bool flush;
 
-   intel_gt_check_and_clear_faults(ggtt->vm.gt);
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_gt_check_and_clear_faults(gt);
 
flush = i915_ggtt_resume_vm(>v

[Intel-gfx] [PATCH v3] drm/i915/mtl: Media GT and Render GT share common GGTT

2022-11-15 Thread Aravind Iddamsetty
On XE_LPM+ platforms the media engines are carved out into a separate
GT but have a common GGTMMADR address range which essentially makes
the GGTT address space to be shared between media and render GT. As a
result any updates in GGTT shall invalidate TLB of GTs sharing it and
similarly any operation on GGTT requiring an action on a GT will have to
involve all GTs sharing it. setup_private_pat was being done on a per
GGTT based as that doesn't touch any GGTT structures moved it to per GT
based.

BSPEC: 63834

v2:
1. Add details to commit msg
2. includes fix for failure to add item to ggtt->gt_list, as suggested
by Lucas
3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within
it.
4. setup_private_pat moved out of intel_gt_tiles_init

v3:
1. Move out for_each_gt from i915_driver.c (Jani Nikula)

Cc: Matt Roper 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 54 +--
 drivers/gpu/drm/i915/gt/intel_gt.c| 13 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
 drivers/gpu/drm/i915/gt/intel_gtt.h   |  4 ++
 drivers/gpu/drm/i915/i915_driver.c| 12 ++---
 drivers/gpu/drm/i915/i915_gem.c   |  2 +
 drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++--
 drivers/gpu/drm/i915/i915_vma.c   |  5 ++-
 drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +
 9 files changed, 111 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 8145851ad23d..7644738b9cdb 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 
@@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 
 void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 {
+   struct intel_gt *gt;
+
i915_ggtt_suspend_vm(>vm);
ggtt->invalidate(ggtt);
 
-   intel_gt_check_and_clear_faults(ggtt->vm.gt);
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_gt_check_and_clear_faults(gt);
 }
 
 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
@@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 
 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
-   struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct drm_i915_private *i915 = ggtt->vm.i915;
 
gen8_ggtt_invalidate(ggtt);
 
-   if (GRAPHICS_VER(i915) >= 12)
-   intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
- GEN12_GUC_TLB_INV_CR_INVALIDATE);
-   else
-   intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   if (GRAPHICS_VER(i915) >= 12) {
+   struct intel_gt *gt;
+
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_uncore_write_fw(gt->uncore,
+ GEN12_GUC_TLB_INV_CR,
+ GEN12_GUC_TLB_INV_CR_INVALIDATE);
+   } else {
+   intel_uncore_write_fw(ggtt->vm.gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   }
 }
 
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
@@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
-   setup_private_pat(ggtt->vm.gt);
-
return ggtt_probe_common(ggtt, size);
 }
 
@@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct 
intel_gt *gt)
  */
 int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 {
-   int ret;
+   struct intel_gt *gt;
+   int ret, i;
+
+   for_each_gt(gt, i915, i) {
+   ret = intel_gt_assign_ggtt(gt);
+   if (ret)
+   return ret;
+   }
 
ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
if (ret)
@@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
return 0;
 }
 
+struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915)
+{
+   struct i915_ggtt *ggtt;
+
+   ggtt = drmm_kzalloc(>drm, sizeof(*ggtt), GFP_KERNEL);
+   if (!ggtt)
+   return ERR_PTR(-ENOMEM);
+
+   INIT_LIST_HEAD(>gt_list);
+
+   return ggtt;
+}
+
 int i915_ggtt_enable_hw(struct drm_i915_private *i915)
 {
if (GRAPHICS_VER(i915) < 6)
@@ -1296,9 +1323,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
 
 void i915_ggtt_resume(struct i915_ggtt *ggtt)
 {
+   struct intel_gt *gt;
bool flush;
 
-   intel_gt_check_and_clear_faults(ggtt->vm.gt);
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_gt_check_and_clear_faults(gt);
 
flush = i915_ggtt_resume_vm(>vm);
 
@@ -1307,9 +1336,6 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
if (fl

[Intel-gfx] [PATCH v2] drm/i915/mtl: Media GT and Render GT share common GGTT

2022-11-09 Thread Aravind Iddamsetty
On XE_LPM+ platforms the media engines are carved out into a separate
GT but have a common GGTMMADR address range which essentially makes
the GGTT address space to be shared between media and render GT. As a
result any updates in GGTT shall invalidate TLB of GTs sharing it and
similarly any operation on GGTT requiring an action on a GT will have to
involve all GTs sharing it. setup_private_pat was being done on a per
GGTT based as that doesn't touch any GGTT structures moved it to per GT
based.

BSPEC: 63834

v2:
1. Add details to commit msg
2. includes fix for failure to add item to ggtt->gt_list, as suggested
by Lucas
3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within
it.
4. setup_private_pat moved out of intel_gt_tiles_init

Cc: Matt Roper 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 47 ++---
 drivers/gpu/drm/i915/gt/intel_gt.c| 13 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
 drivers/gpu/drm/i915/gt/intel_gtt.h   |  4 ++
 drivers/gpu/drm/i915/i915_driver.c| 22 +++---
 drivers/gpu/drm/i915/i915_gem_evict.c | 51 +--
 drivers/gpu/drm/i915/i915_vma.c   |  5 ++-
 drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +
 8 files changed, 112 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 2518cebbf931..6ba7e9e8e2ca 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 
@@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 
 void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 {
+   struct intel_gt *gt;
+
i915_ggtt_suspend_vm(>vm);
ggtt->invalidate(ggtt);
 
-   intel_gt_check_and_clear_faults(ggtt->vm.gt);
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_gt_check_and_clear_faults(gt);
 }
 
 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
@@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 
 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
-   struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct drm_i915_private *i915 = ggtt->vm.i915;
 
gen8_ggtt_invalidate(ggtt);
 
-   if (GRAPHICS_VER(i915) >= 12)
-   intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
- GEN12_GUC_TLB_INV_CR_INVALIDATE);
-   else
-   intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   if (GRAPHICS_VER(i915) >= 12) {
+   struct intel_gt *gt;
+
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_uncore_write_fw(gt->uncore,
+ GEN12_GUC_TLB_INV_CR,
+ GEN12_GUC_TLB_INV_CR_INVALIDATE);
+   } else {
+   intel_uncore_write_fw(ggtt->vm.gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   }
 }
 
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
@@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
-   setup_private_pat(ggtt->vm.gt);
-
return ggtt_probe_common(ggtt, size);
 }
 
@@ -1186,7 +1193,7 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct 
intel_gt *gt)
(u64)ggtt->mappable_end >> 20);
drm_dbg(>drm, "DSM size = %lluM\n",
(u64)resource_size(_graphics_stolen_res) >> 20);
-
+   INIT_LIST_HEAD(>gt_list);
return 0;
 }
 
@@ -1208,6 +1215,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
return 0;
 }
 
+struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915)
+{
+   struct i915_ggtt *ggtt;
+
+   ggtt = drmm_kzalloc(>drm, sizeof(*ggtt), GFP_KERNEL);
+   if (!ggtt)
+   return ERR_PTR(-ENOMEM);
+
+   INIT_LIST_HEAD(>gt_list);
+
+   return ggtt;
+}
+
 int i915_ggtt_enable_hw(struct drm_i915_private *i915)
 {
if (GRAPHICS_VER(i915) < 6)
@@ -1296,9 +1316,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
 
 void i915_ggtt_resume(struct i915_ggtt *ggtt)
 {
+   struct intel_gt *gt;
bool flush;
 
-   intel_gt_check_and_clear_faults(ggtt->vm.gt);
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_gt_check_and_clear_faults(gt);
 
flush = i915_ggtt_resume_vm(>vm);
 
@@ -1307,9 +1329,6 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
if (flush)
wbinvd_on_all_cpus();
 
-   if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
-   setup_private_pat(ggtt->vm.gt);
-
intel_ggtt_restore_fences(ggtt);
 }
 
diff --git a/drivers/gpu/drm/i915

[Intel-gfx] [PATCH] drm/i915/mtl: Media GT and Render GT share common GGTT

2022-10-31 Thread Aravind Iddamsetty
On XE_LPM+ platforms the media engines are carved out into a separate
GT but have a common GGTMMADR address range which essentially makes
the GGTT address space to be shared between media and render GT.

BSPEC: 63834

Cc: Matt Roper 
Signed-off-by: Aravind Iddamsetty 
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 49 +++---
 drivers/gpu/drm/i915/gt/intel_gt.c| 15 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
 drivers/gpu/drm/i915/gt/intel_gtt.h   |  3 ++
 drivers/gpu/drm/i915/i915_driver.c| 19 +--
 drivers/gpu/drm/i915/i915_gem_evict.c | 63 +--
 drivers/gpu/drm/i915/i915_vma.c   |  5 +-
 drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  1 +
 9 files changed, 115 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 2518cebbf931..f5c2f3c58627 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -196,10 +196,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 
 void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 {
+   struct intel_gt *gt;
+
i915_ggtt_suspend_vm(>vm);
ggtt->invalidate(ggtt);
 
-   intel_gt_check_and_clear_faults(ggtt->vm.gt);
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_gt_check_and_clear_faults(gt);
 }
 
 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
@@ -214,27 +217,36 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
 
 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
-   struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+   struct intel_uncore *uncore;
+   struct intel_gt *gt;
 
-   /*
-* Note that as an uncached mmio write, this will flush the
-* WCB of the writes into the GGTT before it triggers the invalidate.
-*/
-   intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+   list_for_each_entry(gt, >gt_list, ggtt_link) {
+   uncore = gt->uncore;
+   /*
+* Note that as an uncached mmio write, this will flush the
+* WCB of the writes into the GGTT before it triggers the 
invalidate.
+*/
+   intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, 
GFX_FLSH_CNTL_EN);
+   }
 }
 
 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
-   struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct drm_i915_private *i915 = ggtt->vm.i915;
 
gen8_ggtt_invalidate(ggtt);
 
-   if (GRAPHICS_VER(i915) >= 12)
-   intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
- GEN12_GUC_TLB_INV_CR_INVALIDATE);
-   else
-   intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   if (GRAPHICS_VER(i915) >= 12) {
+   struct intel_gt *gt;
+
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_uncore_write_fw(gt->uncore,
+ GEN12_GUC_TLB_INV_CR,
+ GEN12_GUC_TLB_INV_CR_INVALIDATE);
+   } else {
+   intel_uncore_write_fw(ggtt->vm.gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   }
 }
 
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
@@ -986,8 +998,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
-   setup_private_pat(ggtt->vm.gt);
-
return ggtt_probe_common(ggtt, size);
 }
 
@@ -1186,7 +1196,7 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct 
intel_gt *gt)
(u64)ggtt->mappable_end >> 20);
drm_dbg(>drm, "DSM size = %lluM\n",
(u64)resource_size(_graphics_stolen_res) >> 20);
-
+   INIT_LIST_HEAD(>gt_list);
return 0;
 }
 
@@ -1296,9 +1306,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
 
 void i915_ggtt_resume(struct i915_ggtt *ggtt)
 {
+   struct intel_gt *gt;
bool flush;
 
-   intel_gt_check_and_clear_faults(ggtt->vm.gt);
+   list_for_each_entry(gt, >gt_list, ggtt_link)
+   intel_gt_check_and_clear_faults(gt);
 
flush = i915_ggtt_resume_vm(>vm);
 
@@ -1307,9 +1319,6 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
if (flush)
wbinvd_on_all_cpus();
 
-   if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
-   setup_private_pat(ggtt->vm.gt);
-
intel_ggtt_restore_fences(ggtt);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 2e796ffad911..d72efb74563a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -110,9 +110,17 @@ static int intel_gt_probe_lmem(struct in

[Intel-gfx] [PATCH v5] drm/i915/mtl: enable local stolen memory

2022-09-29 Thread Aravind Iddamsetty
As an integrated GPU, MTL does not have local memory and HAS_LMEM()
returns false.  However the platform's stolen memory is presented via
BAR2 (i.e., the BAR we traditionally consider to be the GMADR on IGFX)
and should be managed by the driver the same way that local memory is
on dgpu platforms (which includes setting the "lmem" bit on page table
entries).  We use the term "local stolen memory" to refer to this
model.

The major difference from the traditional BAR2 (GMADR) is that
the stolen area is mapped via the BAR2 while in the former BAR2 is an
aperture into the GTT VA through which access are made into stolen area.

BSPEC: 53098, 63830

v2:
1. dropped is_dsm_invalid, updated valid_stolen_size check from Lucas
(Jani, Lucas)
2. drop lmembar_is_igpu_stolen
3. revert to referring GFXMEM_BAR as GEN12_LMEM_BAR (Lucas)

v3:(Jani)
1. rename get_mtl_gms_size to mtl_get_gms_size
2. define register for MMIO address

v4:(Matt)
1. Use REG_FIELD_GET to read GMS value
2. replace the calculations with SZ_256M/SZ_8M

v5: Include more details to commit message on how it is different from
earlier platforms (Anshuman)

Cc: Matt Roper 
Cc: Lucas De Marchi 
Cc: Jani Nikula 

Signed-off-by: CQ Tang 
Signed-off-by: Aravind Iddamsetty 
Original-author: CQ Tang
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 83 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h|  3 +
 drivers/gpu/drm/i915/i915_reg.h|  4 ++
 4 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index c5a4035c99cd..910086974454 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,9 +77,9 @@ void i915_gem_stolen_remove_node(struct drm_i915_private 
*i915,
mutex_unlock(>mm.stolen_lock);
 }
 
-static bool valid_stolen_size(struct resource *dsm)
+static bool valid_stolen_size(struct drm_i915_private *i915, struct resource 
*dsm)
 {
-   return dsm->start != 0 && dsm->end > dsm->start;
+   return (dsm->start != 0 || HAS_BAR2_SMEM_STOLEN(i915)) && dsm->end > 
dsm->start;
 }
 
 static int adjust_stolen(struct drm_i915_private *i915,
@@ -88,7 +88,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
/*
@@ -135,7 +135,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
}
}
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
return 0;
@@ -149,8 +149,11 @@ static int request_smem_stolen(struct drm_i915_private 
*i915,
/*
 * With stolen lmem, we don't need to request system memory for the
 * address range since it's local to the gpu.
+*
+* Starting MTL, in IGFX devices the stolen memory is exposed via
+* BAR2 and shall be considered similar to stolen lmem.
 */
-   if (HAS_LMEM(i915))
+   if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
return 0;
 
/*
@@ -385,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private 
*i915,
 
drm_dbg(>drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
-   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
case GEN8_STOLEN_RESERVED_1M:
*size = 1024 * 1024;
@@ -404,6 +405,12 @@ static void icl_get_stolen_reserved(struct 
drm_i915_private *i915,
*size = 8 * 1024 * 1024;
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
}
+
+   if (HAS_BAR2_SMEM_STOLEN(i915))
+   /* the base is initialized to stolen top so subtract size to 
get base */
+   *base -= *size;
+   else
+   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
 }
 
 /*
@@ -833,6 +840,29 @@ static const struct intel_memory_region_ops 
i915_region_stolen_lmem_ops = {
.init_object = _i915_gem_object_stolen_init,
 };
 
+static int mtl_get_gms_size(struct intel_uncore *uncore)
+{
+   u16 ggc, gms;
+
+   ggc = intel_uncore_read16(uncore, GGC);
+
+   /* check GGMS, should be fixed 0x3 (8MB) */
+   if ((ggc & GGMS_MASK) != GGMS_MASK)
+   return -EIO;
+
+   /* return valid GMS value, -EIO if invalid */
+   gms = REG_FIELD_GET(GMS_MASK, ggc);
+   switch (gms) {
+   case 0x0 ... 0x04:
+   return gms * 32;
+   case 0xf0 ... 0xfe:
+   return (gms - 0xf0 + 1) * 4;
+   default:
+   MISSING_CASE(gms);
+   retu

[Intel-gfx] [PATCH v4] drm/i915/mtl: enable local stolen memory

2022-09-28 Thread Aravind Iddamsetty
As an integrated GPU, MTL does not have local memory and
HAS_LMEM() returns false.  However the platform's stolen memory
is presented via BAR2 (i.e., the BAR we traditionally consider
to be the LMEM BAR) and should be managed by the driver the same
way that local memory is on dgpu platforms (which includes
setting the "lmem" bit on page table entries).  We use the term
"local stolen memory" to refer to this model.

BSPEC: 53098, 63830

v2:
1. dropped is_dsm_invalid, updated valid_stolen_size check from Lucas
(Jani, Lucas)
2. drop lmembar_is_igpu_stolen
3. revert to referring GFXMEM_BAR as GEN12_LMEM_BAR (Lucas)

v3:(Jani)
1. rename get_mtl_gms_size to mtl_get_gms_size
2. define register for MMIO address

v4:(Matt)
1. Use REG_FIELD_GET to read GMS value
2. replace the calculations with SZ_256M/SZ_8M

Cc: Matt Roper 
Cc: Lucas De Marchi 
Cc: Jani Nikula 

Signed-off-by: CQ Tang 
Signed-off-by: Aravind Iddamsetty 
Original-author: CQ Tang
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 83 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h|  3 +
 drivers/gpu/drm/i915/i915_reg.h|  4 ++
 4 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index c5a4035c99cd..2f8530a0ff62 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,9 +77,9 @@ void i915_gem_stolen_remove_node(struct drm_i915_private 
*i915,
mutex_unlock(>mm.stolen_lock);
 }
 
-static bool valid_stolen_size(struct resource *dsm)
+static bool valid_stolen_size(struct drm_i915_private *i915, struct resource 
*dsm)
 {
-   return dsm->start != 0 && dsm->end > dsm->start;
+   return (dsm->start != 0 || HAS_BAR2_SMEM_STOLEN(i915)) && dsm->end > 
dsm->start;
 }
 
 static int adjust_stolen(struct drm_i915_private *i915,
@@ -88,7 +88,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
/*
@@ -135,7 +135,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
}
}
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
return 0;
@@ -149,8 +149,11 @@ static int request_smem_stolen(struct drm_i915_private 
*i915,
/*
 * With stolen lmem, we don't need to request system memory for the
 * address range since it's local to the gpu.
+*
+* Starting MTL, in IGFX devices the stolen memory is exposed via
+* BAR2 and shall be considered similar to stolen lmem.
 */
-   if (HAS_LMEM(i915))
+   if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
return 0;
 
/*
@@ -385,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private 
*i915,
 
drm_dbg(>drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
-   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
case GEN8_STOLEN_RESERVED_1M:
*size = 1024 * 1024;
@@ -404,6 +405,12 @@ static void icl_get_stolen_reserved(struct 
drm_i915_private *i915,
*size = 8 * 1024 * 1024;
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
}
+
+   if (HAS_BAR2_SMEM_STOLEN(i915))
+   /* the base is initialized to stolen top so subtract size to 
get base */
+   *base -= *size;
+   else
+   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
 }
 
 /*
@@ -833,6 +840,29 @@ static const struct intel_memory_region_ops 
i915_region_stolen_lmem_ops = {
.init_object = _i915_gem_object_stolen_init,
 };
 
+static int mtl_get_gms_size(struct intel_uncore *uncore)
+{
+   u16 ggc, gms;
+
+   ggc = intel_uncore_read16(uncore, GGC);
+
+   /* check GGMS, should be fixed 0x3 (8MB) */
+   if ((ggc & GGMS_MASK) != GGMS_MASK)
+   return -EIO;
+
+   /* return valid GMS value, -EIO if invalid */
+   gms = REG_FIELD_GET(GMS_MASK, ggc);
+   switch (gms) {
+   case 0x0 ... 0x04:
+   return gms * 32;
+   case 0xf0 ... 0xfe:
+   return (gms - 0xf0 + 1) * 4;
+   default:
+   MISSING_CASE(gms);
+   return -EIO;
+   }
+}
+
 struct intel_memory_region *
 i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
   u16 instance)
@@ -843,6 +873,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
struct intel_memory_regio

[Intel-gfx] [PATCH v3] drm/i915/mtl: enable local stolen memory

2022-09-26 Thread Aravind Iddamsetty
As an integrated GPU, MTL does not have local memory and
HAS_LMEM() returns false.  However the platform's stolen memory
is presented via BAR2 (i.e., the BAR we traditionally consider
to be the LMEM BAR) and should be managed by the driver the same
way that local memory is on dgpu platforms (which includes
setting the "lmem" bit on page table entries).  We use the term
"local stolen memory" to refer to this model.

v2:
1. dropped is_dsm_invalid, updated valid_stolen_size check from Lucas
(Jani, Lucas)
2. drop lmembar_is_igpu_stolen
3. revert to referring GFXMEM_BAR as GEN12_LMEM_BAR (Lucas)

v3:(Jani)
1. rename get_mtl_gms_size to mtl_get_gms_size
2. define register for MMIO address

Cc: Matt Roper 
Cc: Lucas De Marchi 
Cc: Jani Nikula 

Signed-off-by: CQ Tang 
Signed-off-by: Aravind Iddamsetty 
Original-author: CQ Tang
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 88 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h|  3 +
 drivers/gpu/drm/i915/i915_reg.h|  5 ++
 4 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index c5a4035c99cd..0eb66c55bbf3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,9 +77,9 @@ void i915_gem_stolen_remove_node(struct drm_i915_private 
*i915,
mutex_unlock(>mm.stolen_lock);
 }
 
-static bool valid_stolen_size(struct resource *dsm)
+static bool valid_stolen_size(struct drm_i915_private *i915, struct resource 
*dsm)
 {
-   return dsm->start != 0 && dsm->end > dsm->start;
+   return (dsm->start != 0 || HAS_BAR2_SMEM_STOLEN(i915)) && dsm->end > 
dsm->start;
 }
 
 static int adjust_stolen(struct drm_i915_private *i915,
@@ -88,7 +88,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
/*
@@ -135,7 +135,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
}
}
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
return 0;
@@ -148,9 +148,10 @@ static int request_smem_stolen(struct drm_i915_private 
*i915,
 
/*
 * With stolen lmem, we don't need to request system memory for the
-* address range since it's local to the gpu.
+* address range since it's local to the gpu and in some IGFX devices
+* BAR2 is exposed as stolen
 */
-   if (HAS_LMEM(i915))
+   if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
return 0;
 
/*
@@ -385,8 +386,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private 
*i915,
 
drm_dbg(>drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
-   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
case GEN8_STOLEN_RESERVED_1M:
*size = 1024 * 1024;
@@ -404,6 +403,12 @@ static void icl_get_stolen_reserved(struct 
drm_i915_private *i915,
*size = 8 * 1024 * 1024;
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
}
+
+   if (HAS_BAR2_SMEM_STOLEN(i915))
+   /* the base is initialized to stolen top so subtract size to 
get base */
+   *base -= *size;
+   else
+   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
 }
 
 /*
@@ -833,6 +838,34 @@ static const struct intel_memory_region_ops 
i915_region_stolen_lmem_ops = {
.init_object = _i915_gem_object_stolen_init,
 };
 
+static int mtl_get_gms_size(struct intel_uncore *uncore)
+{
+   u16 ggc, gms;
+
+   ggc = intel_uncore_read16(uncore, GGC);
+
+   /* check GGMS, should be fixed 0x3 (8MB) */
+   if ((ggc & GGMS_MASK) != GGMS_MASK)
+   return -EIO;
+
+   /* return valid GMS value, -EIO if invalid */
+   gms = (ggc & GMS_MASK) >> GMS_SHIFT;
+   switch (gms) {
+   case 0x0 ... 0x10:
+   return gms * 32;
+   case 0x20:
+   return 1024;
+   case 0x30:
+   return 1536;
+   case 0x40:
+   return 2048;
+   case 0xf0 ... 0xfe:
+   return (gms - 0xf0 + 1) * 4;
+   default:
+   return -EIO;
+   }
+}
+
 struct intel_memory_region *
 i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
   u16 instance)
@@ -843,6 +876,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
struct intel_memory_region *mem;
resource_size_t io_start, io_size;
 

[Intel-gfx] [PATCH v3] drm/i915/mtl: enable local stolen memory

2022-09-26 Thread Aravind Iddamsetty
As an integrated GPU, MTL does not have local memory and
HAS_LMEM() returns false.  However the platform's stolen memory
is presented via BAR2 (i.e., the BAR we traditionally consider
to be the LMEM BAR) and should be managed by the driver the same
way that local memory is on dgpu platforms (which includes
setting the "lmem" bit on page table entries).  We use the term
"local stolen memory" to refer to this model.

v2:
1. dropped is_dsm_invalid, updated valid_stolen_size check from Lucas
(Jani, Lucas)
2. drop lmembar_is_igpu_stolen
3. revert to referring GFXMEM_BAR as GEN12_LMEM_BAR (Lucas)

v3:(Jani)
1. rename get_mtl_gms_size to mtl_get_gms_size
2. define register for MMIO address

Cc: Matt Roper 
Cc: Lucas De Marchi 
Cc: Jani Nikula 

Signed-off-by: CQ Tang 
Signed-off-by: Aravind Iddamsetty 
Original-author: CQ Tang
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 88 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h|  3 +
 drivers/gpu/drm/i915/i915_reg.h|  5 ++
 4 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index c5a4035c99cd..0eb66c55bbf3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,9 +77,9 @@ void i915_gem_stolen_remove_node(struct drm_i915_private 
*i915,
mutex_unlock(>mm.stolen_lock);
 }
 
-static bool valid_stolen_size(struct resource *dsm)
+static bool valid_stolen_size(struct drm_i915_private *i915, struct resource 
*dsm)
 {
-   return dsm->start != 0 && dsm->end > dsm->start;
+   return (dsm->start != 0 || HAS_BAR2_SMEM_STOLEN(i915)) && dsm->end > 
dsm->start;
 }
 
 static int adjust_stolen(struct drm_i915_private *i915,
@@ -88,7 +88,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
/*
@@ -135,7 +135,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
}
}
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
return 0;
@@ -148,9 +148,10 @@ static int request_smem_stolen(struct drm_i915_private 
*i915,
 
/*
 * With stolen lmem, we don't need to request system memory for the
-* address range since it's local to the gpu.
+* address range since it's local to the gpu and in some IGFX devices
+* BAR2 is exposed as stolen
 */
-   if (HAS_LMEM(i915))
+   if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
return 0;
 
/*
@@ -385,8 +386,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private 
*i915,
 
drm_dbg(>drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
-   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
case GEN8_STOLEN_RESERVED_1M:
*size = 1024 * 1024;
@@ -404,6 +403,12 @@ static void icl_get_stolen_reserved(struct 
drm_i915_private *i915,
*size = 8 * 1024 * 1024;
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
}
+
+   if (HAS_BAR2_SMEM_STOLEN(i915))
+   /* the base is initialized to stolen top so subtract size to 
get base */
+   *base -= *size;
+   else
+   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
 }
 
 /*
@@ -833,6 +838,34 @@ static const struct intel_memory_region_ops 
i915_region_stolen_lmem_ops = {
.init_object = _i915_gem_object_stolen_init,
 };
 
+static int mtl_get_gms_size(struct intel_uncore *uncore)
+{
+   u16 ggc, gms;
+
+   ggc = intel_uncore_read16(uncore, GGC);
+
+   /* check GGMS, should be fixed 0x3 (8MB) */
+   if ((ggc & GGMS_MASK) != GGMS_MASK)
+   return -EIO;
+
+   /* return valid GMS value, -EIO if invalid */
+   gms = (ggc & GMS_MASK) >> GMS_SHIFT;
+   switch (gms) {
+   case 0x0 ... 0x10:
+   return gms * 32;
+   case 0x20:
+   return 1024;
+   case 0x30:
+   return 1536;
+   case 0x40:
+   return 2048;
+   case 0xf0 ... 0xfe:
+   return (gms - 0xf0 + 1) * 4;
+   default:
+   return -EIO;
+   }
+}
+
 struct intel_memory_region *
 i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
   u16 instance)
@@ -843,6 +876,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
struct intel_memory_region *mem;
resource_size_t io_start, io_size;
 

[Intel-gfx] [PATCH v2] drm/i915/mtl: enable local stolen memory

2022-09-26 Thread Aravind Iddamsetty
As an integrated GPU, MTL does not have local memory and
HAS_LMEM() returns false.  However the platform's stolen memory
is presented via BAR2 (i.e., the BAR we traditionally consider
to be the LMEM BAR) and should be managed by the driver the same
way that local memory is on dgpu platforms (which includes
setting the "lmem" bit on page table entries).  We use the term
"local stolen memory" to refer to this model.

v2:
1. dropped is_dsm_invalid, updated valid_stolen_size check from Lucas
(Jani, Lucas)
2. drop lmembar_is_igpu_stolen
3. revert to referring GFXMEM_BAR as GEN12_LMEM_BAR (Lucas)

Cc: Matt Roper 
Cc: Lucas De Marchi 

Signed-off-by: CQ Tang 
Signed-off-by: Aravind Iddamsetty 
Original-author: CQ Tang
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 88 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h|  3 +
 3 files changed, 76 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index c5a4035c99cd..582c4d7d2a9a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,9 +77,9 @@ void i915_gem_stolen_remove_node(struct drm_i915_private 
*i915,
mutex_unlock(>mm.stolen_lock);
 }
 
-static bool valid_stolen_size(struct resource *dsm)
+static bool valid_stolen_size(struct drm_i915_private *i915, struct resource 
*dsm)
 {
-   return dsm->start != 0 && dsm->end > dsm->start;
+   return (dsm->start != 0 || HAS_BAR2_SMEM_STOLEN(i915)) && dsm->end > 
dsm->start;
 }
 
 static int adjust_stolen(struct drm_i915_private *i915,
@@ -88,7 +88,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
/*
@@ -135,7 +135,7 @@ static int adjust_stolen(struct drm_i915_private *i915,
}
}
 
-   if (!valid_stolen_size(dsm))
+   if (!valid_stolen_size(i915, dsm))
return -EINVAL;
 
return 0;
@@ -148,9 +148,10 @@ static int request_smem_stolen(struct drm_i915_private 
*i915,
 
/*
 * With stolen lmem, we don't need to request system memory for the
-* address range since it's local to the gpu.
+* address range since it's local to the gpu and in some IGFX devices
+* BAR2 is exposed as stolen
 */
-   if (HAS_LMEM(i915))
+   if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
return 0;
 
/*
@@ -385,8 +386,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private 
*i915,
 
drm_dbg(>drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
-   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
case GEN8_STOLEN_RESERVED_1M:
*size = 1024 * 1024;
@@ -404,6 +403,12 @@ static void icl_get_stolen_reserved(struct 
drm_i915_private *i915,
*size = 8 * 1024 * 1024;
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
}
+
+   if (HAS_BAR2_SMEM_STOLEN(i915))
+   /* the base is initialized to stolen top so subtract size to 
get base */
+   *base -= *size;
+   else
+   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
 }
 
 /*
@@ -833,6 +838,34 @@ static const struct intel_memory_region_ops 
i915_region_stolen_lmem_ops = {
.init_object = _i915_gem_object_stolen_init,
 };
 
+static int get_mtl_gms_size(struct intel_uncore *uncore)
+{
+   u16 ggc, gms;
+
+   ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
+
+   /* check GGMS, should be fixed 0x3 (8MB) */
+   if ((ggc & 0xc0) != 0xc0)
+   return -EIO;
+
+   /* return valid GMS value, -EIO if invalid */
+   gms = ggc >> 8;
+   switch (gms) {
+   case 0x0 ... 0x10:
+   return gms * 32;
+   case 0x20:
+   return 1024;
+   case 0x30:
+   return 1536;
+   case 0x40:
+   return 2048;
+   case 0xf0 ... 0xfe:
+   return (gms - 0xf0 + 1) * 4;
+   default:
+   return -EIO;
+   }
+}
+
 struct intel_memory_region *
 i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
   u16 instance)
@@ -843,6 +876,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
struct intel_memory_region *mem;
resource_size_t io_start, io_size;
resource_size_t min_page_size;
+   int ret;
 
if (WARN_ON_ONCE(instance))
return ERR_PTR(-ENODEV);
@@ -850,12 +884,8 @@ i915_gem_stolen

[Intel-gfx] [PATCH 1/1] drm/i915/mtl: enable local stolen memory

2022-09-20 Thread Aravind Iddamsetty
As an integrated GPU, MTL does not have local memory and
HAS_LMEM() returns false.  However the platform's stolen memory
is presented via BAR2 (i.e., the BAR we traditionally consider
to be the LMEM BAR) and should be managed by the driver the same
way that local memory is on dgpu platforms (which includes
setting the "lmem" bit on page table entries).  We use the term
"local stolen memory" to refer to this model.

Cc: Matt Roper 
Cc: Lucas De Marchi 

Signed-off-by: CQ Tang 
Signed-off-by: Aravind Iddamsetty 
Original-author: CQ Tang
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 +
 drivers/gpu/drm/i915/gt/intel_ggtt.c   |   2 +-
 drivers/gpu/drm/i915/i915_drv.h|   3 +
 3 files changed, 100 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index acc561c0f0aa..bad5250fb764 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct drm_i915_private 
*i915,
mutex_unlock(>mm.stolen_lock);
 }
 
+static bool is_dsm_invalid(struct drm_i915_private *i915, struct resource *dsm)
+{
+   if (!HAS_BAR2_SMEM_STOLEN(i915)) {
+   if (dsm->start == 0)
+   return true;
+   }
+
+   if (dsm->end <= dsm->start)
+   return true;
+
+   return false;
+}
+
 static int i915_adjust_stolen(struct drm_i915_private *i915,
  struct resource *dsm)
 {
@@ -84,7 +97,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct resource *r;
 
-   if (dsm->start == 0 || dsm->end <= dsm->start)
+   if (is_dsm_invalid(i915, dsm))
return -EINVAL;
 
/*
@@ -136,7 +149,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
 * overlaps with the non-stolen system memory range, since lmem is local
 * to the gpu.
 */
-   if (HAS_LMEM(i915))
+   if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
return 0;
 
/*
@@ -371,8 +384,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private 
*i915,
 
drm_dbg(>drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
-   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
case GEN8_STOLEN_RESERVED_1M:
*size = 1024 * 1024;
@@ -390,6 +401,12 @@ static void icl_get_stolen_reserved(struct 
drm_i915_private *i915,
*size = 8 * 1024 * 1024;
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
}
+
+   if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) && !IS_DGFX(i915))
+   /* the base is initialized to stolen top so subtract size to 
get base */
+   *base -= *size;
+   else
+   *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
 }
 
 static int i915_gem_init_stolen(struct intel_memory_region *mem)
@@ -423,8 +440,7 @@ static int i915_gem_init_stolen(struct intel_memory_region 
*mem)
if (i915_adjust_stolen(i915, >dsm))
return 0;
 
-   GEM_BUG_ON(i915->dsm.start == 0);
-   GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
+   GEM_BUG_ON(is_dsm_invalid(i915, >dsm));
 
stolen_top = i915->dsm.end + 1;
reserved_base = stolen_top;
@@ -796,6 +812,46 @@ static const struct intel_memory_region_ops 
i915_region_stolen_lmem_ops = {
.init_object = _i915_gem_object_stolen_init,
 };
 
+static int get_mtl_gms_size(struct intel_uncore *uncore)
+{
+   u16 ggc, gms;
+
+   ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
+
+   /* check GGMS, should be fixed 0x3 (8MB) */
+   if ((ggc & 0xc0) != 0xc0)
+   return -EIO;
+
+   /* return valid GMS value, -EIO if invalid */
+   gms = ggc >> 8;
+   switch (gms) {
+   case 0x0 ... 0x10:
+   return gms * 32;
+   case 0x20:
+   return 1024;
+   case 0x30:
+   return 1536;
+   case 0x40:
+   return 2048;
+   case 0xf0 ... 0xfe:
+   return (gms - 0xf0 + 1) * 4;
+   default:
+   return -EIO;
+   }
+}
+
+static inline bool lmembar_is_igpu_stolen(struct drm_i915_private *i915)
+{
+   u32 regions = RUNTIME_INFO(i915)->memory_regions;
+
+   if (regions & REGION_LMEM)
+   return false;
+
+   drm_WARN_ON(>drm, (regions & REGION_STOLEN_LMEM) == 0);
+
+   return true;
+}
+
 struct intel_memory_region *
 i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
   u16 instance)
@@ -806,19 +862,16 @@ i915_gem_stolen_lmem_setup(struct drm_