[PATCH v5] drm/i915: stop using swiotlb

2022-07-26 Thread Robert Beckett
Calling swiotlb functions directly is nowadays considered harmful. See
https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/

Replace swiotlb_max_segment() calls with dma_max_mapping_size().
In i915_gem_object_get_pages_internal() no longer consider max_segment
only if CONFIG_SWIOTLB is enabled. There can be other (iommu related)
causes of specific max segment sizes.

Cc: Christoph Hellwig 
Cc: Tvrtko Ursulin 
Cc: Thomas Hellstrom 
Cc: Matthew Auld 

v2: - restore UINT_MAX clamp in i915_sg_segment_size()
- drop PAGE_SIZE check as it will always be >= PAGE_SIZE
v3: - actually clamp to UINT_MAX in i915_sg_segment_size()
v4: - round down max segment size to PAGE_SIZE
v5: - fix checkpatch whitespace issue

Reviewed-by: Christoph Hellwig 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 ---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 ++--
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c  |  2 +-
 drivers/gpu/drm/i915/i915_scatterlist.h  | 16 
 5 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..24f37658f1bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -6,7 +6,6 @@
 
 #include 
 #include 
-#include 
 
 #include "i915_drv.h"
 #include "i915_gem.h"
@@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
struct scatterlist *sg;
unsigned int sg_page_sizes;
unsigned int npages;
-   int max_order;
+   int max_order = MAX_ORDER;
+   unsigned int max_segment;
gfp_t gfp;
 
-   max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
+   max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+   max_order = min(max_order, ilog2(max_segment));
 
gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
if (IS_I965GM(i915) || IS_I965G(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4eed3dd90ba8..34b9c76cd8e6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
-   unsigned int max_segment = i915_sg_segment_size();
+   unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
struct sgt_iter sgt_iter;
struct page *page;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 5a5cf332d8a5..7a828c9c0f6d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device 
*bdev,
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
-   const unsigned int max_segment = i915_sg_segment_size();
+   const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
struct file *filp = i915_tt->filp;
struct sgt_iter sgt_iter;
@@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct 
ttm_tt *ttm)
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
-   i915_sg_segment_size(), GFP_KERNEL);
+   i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
if (ret) {
st->sgl = NULL;
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 094f06b4ce33..dfc35905dba2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -129,7 +1

Re: [PATCH v4] drm/i915: stop using swiotlb

2022-07-26 Thread Robert Beckett




On 26/07/2022 14:12, Tvrtko Ursulin wrote:


On 25/07/2022 15:18, Robert Beckett wrote:

Calling swiotlb functions directly is nowadays considered harmful. See
https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/

Replace swiotlb_max_segment() calls with dma_max_mapping_size().
In i915_gem_object_get_pages_internal() no longer consider max_segment
only if CONFIG_SWIOTLB is enabled. There can be other (iommu related)
causes of specific max segment sizes.

Cc: Christoph Hellwig 
Cc: Tvrtko Ursulin 
Cc: Thomas Hellstrom 
Cc: Matthew Auld 

v2: - restore UINT_MAX clamp in i915_sg_segment_size()
 - drop PAGE_SIZE check as it will always be >= PAGE_SIZE
v3: - actually clamp to UINT_MAX in i915_sg_segment_size()
v4: - round down max segment size to PAGE_SIZE

Reviewed-by: Christoph Hellwig 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Robert Beckett 
---
  drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 ---
  drivers/gpu/drm/i915/gem/i915_gem_shmem.c    |  2 +-
  drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 ++--
  drivers/gpu/drm/i915/gem/i915_gem_userptr.c  |  2 +-
  drivers/gpu/drm/i915/i915_scatterlist.h  | 17 -
  5 files changed, 12 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c

index c698f95af15f..24f37658f1bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -6,7 +6,6 @@
  #include 
  #include 
-#include 
  #include "i915_drv.h"
  #include "i915_gem.h"
@@ -38,22 +37,12 @@ static int 
i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)

  struct scatterlist *sg;
  unsigned int sg_page_sizes;
  unsigned int npages;
-    int max_order;
+    int max_order = MAX_ORDER;
+    unsigned int max_segment;
  gfp_t gfp;
-    max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-    if (is_swiotlb_active(obj->base.dev->dev)) {
-    unsigned int max_segment;
-
-    max_segment = swiotlb_max_segment();
-    if (max_segment) {
-    max_segment = max_t(unsigned int, max_segment,
-    PAGE_SIZE) >> PAGE_SHIFT;
-    max_order = min(max_order, ilog2(max_segment));
-    }
-    }
-#endif
+    max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+    max_order = min(max_order, ilog2(max_segment));
  gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
  if (IS_I965GM(i915) || IS_I965G(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c

index 4eed3dd90ba8..34b9c76cd8e6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct 
drm_i915_gem_object *obj)

  struct intel_memory_region *mem = obj->mm.region;
  struct address_space *mapping = obj->base.filp->f_mapping;
  const unsigned long page_count = obj->base.size / PAGE_SIZE;
-    unsigned int max_segment = i915_sg_segment_size();
+    unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
  struct sg_table *st;
  struct sgt_iter sgt_iter;
  struct page *page;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c

index 5a5cf332d8a5..7a828c9c0f6d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct 
ttm_device *bdev,
  struct drm_i915_private *i915 = container_of(bdev, 
typeof(*i915), bdev);
  struct intel_memory_region *mr = 
i915->mm.regions[INTEL_MEMORY_SYSTEM];
  struct i915_ttm_tt *i915_tt = container_of(ttm, 
typeof(*i915_tt), ttm);

-    const unsigned int max_segment = i915_sg_segment_size();
+    const unsigned int max_segment = 
i915_sg_segment_size(i915->drm.dev);

  const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
  struct file *filp = i915_tt->filp;
  struct sgt_iter sgt_iter;
@@ -568,7 +568,7 @@ static struct i915_refct_sgt 
*i915_ttm_tt_get_st(struct ttm_tt *ttm)

  ret = sg_alloc_table_from_pages_segment(st,
  ttm->pages, ttm->num_pages,
  0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
-    i915_sg_segment_size(), GFP_KERNEL);
+    i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
  if (ret) {
  st->sgl = NULL;
  return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c

index 094f06b4ce33..dfc35905dba2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -129,7 +129,7 @@ static void 
i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)

  static int i915_gem_userptr_get_pages(struc

[PATCH v4] drm/i915: stop using swiotlb

2022-07-25 Thread Robert Beckett
Calling swiotlb functions directly is nowadays considered harmful. See
https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/

Replace swiotlb_max_segment() calls with dma_max_mapping_size().
In i915_gem_object_get_pages_internal() no longer consider max_segment
only if CONFIG_SWIOTLB is enabled. There can be other (iommu related)
causes of specific max segment sizes.

Cc: Christoph Hellwig 
Cc: Tvrtko Ursulin 
Cc: Thomas Hellstrom 
Cc: Matthew Auld 

v2: - restore UINT_MAX clamp in i915_sg_segment_size()
- drop PAGE_SIZE check as it will always be >= PAGE_SIZE
v3: - actually clamp to UINT_MAX in i915_sg_segment_size()
v4: - round down max segment size to PAGE_SIZE

Reviewed-by: Christoph Hellwig 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 ---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 ++--
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c  |  2 +-
 drivers/gpu/drm/i915/i915_scatterlist.h  | 17 -
 5 files changed, 12 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..24f37658f1bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -6,7 +6,6 @@
 
 #include 
 #include 
-#include 
 
 #include "i915_drv.h"
 #include "i915_gem.h"
@@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
struct scatterlist *sg;
unsigned int sg_page_sizes;
unsigned int npages;
-   int max_order;
+   int max_order = MAX_ORDER;
+   unsigned int max_segment;
gfp_t gfp;
 
-   max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
+   max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+   max_order = min(max_order, ilog2(max_segment));
 
gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
if (IS_I965GM(i915) || IS_I965G(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4eed3dd90ba8..34b9c76cd8e6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
-   unsigned int max_segment = i915_sg_segment_size();
+   unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
struct sgt_iter sgt_iter;
struct page *page;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 5a5cf332d8a5..7a828c9c0f6d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device 
*bdev,
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
-   const unsigned int max_segment = i915_sg_segment_size();
+   const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
struct file *filp = i915_tt->filp;
struct sgt_iter sgt_iter;
@@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct 
ttm_tt *ttm)
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
-   i915_sg_segment_size(), GFP_KERNEL);
+   i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
if (ret) {
st->sgl = NULL;
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 094f06b4ce33..dfc35905dba2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(s

[PATCH v3] drm/i915: stop using swiotlb

2022-07-22 Thread Robert Beckett
Calling swiotlb functions directly is nowadays considered harmful. See
https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/

Replace swiotlb_max_segment() calls with dma_max_mapping_size().
In i915_gem_object_get_pages_internal() no longer consider max_segment
only if CONFIG_SWIOTLB is enabled. There can be other (iommu related)
causes of specific max segment sizes.

Cc: Christoph Hellwig 
Cc: Tvrtko Ursulin 
Cc: Thomas Hellstrom 
Cc: Matthew Auld 

v2: - restore UINT_MAX clamp in i915_sg_segment_size()
- drop PAGE_SIZE check as it will always be >= PAGE_SIZE
v3: - actually clamp to UINT_MAX in i915_sg_segment_size()

Reviewed-by: Christoph Hellwig 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 ---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 ++--
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c  |  2 +-
 drivers/gpu/drm/i915/i915_scatterlist.h  | 16 +++-
 5 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..24f37658f1bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -6,7 +6,6 @@
 
 #include 
 #include 
-#include 
 
 #include "i915_drv.h"
 #include "i915_gem.h"
@@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
struct scatterlist *sg;
unsigned int sg_page_sizes;
unsigned int npages;
-   int max_order;
+   int max_order = MAX_ORDER;
+   unsigned int max_segment;
gfp_t gfp;
 
-   max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
+   max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+   max_order = min(max_order, ilog2(max_segment));
 
gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
if (IS_I965GM(i915) || IS_I965G(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4eed3dd90ba8..34b9c76cd8e6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
-   unsigned int max_segment = i915_sg_segment_size();
+   unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
struct sgt_iter sgt_iter;
struct page *page;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 5a5cf332d8a5..7a828c9c0f6d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device 
*bdev,
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
-   const unsigned int max_segment = i915_sg_segment_size();
+   const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
struct file *filp = i915_tt->filp;
struct sgt_iter sgt_iter;
@@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct 
ttm_tt *ttm)
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
-   i915_sg_segment_size(), GFP_KERNEL);
+   i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
if (ret) {
st->sgl = NULL;
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 094f06b4ce33..dfc35905dba2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct 
drm_i915_gem_object *obj)
 static int i9

Re: [PATCH v2] drm/i915: stop using swiotlb

2022-07-22 Thread Robert Beckett




On 22/07/2022 16:03, Christoph Hellwig wrote:

+   return max_t(size_t, UINT_MAX, dma_max_mapping_size(dev));


Shouldn't this be a min?


eugh! yes. Stand by for v3


[PATCH v2] drm/i915: stop using swiotlb

2022-07-22 Thread Robert Beckett
Calling swiotlb functions directly is nowadays considered harmful. See
https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/

Replace swiotlb_max_segment() calls with dma_max_mapping_size().
In i915_gem_object_get_pages_internal() no longer consider max_segment
only if CONFIG_SWIOTLB is enabled. There can be other (iommu related)
causes of specific max segment sizes.

Cc: Christoph Hellwig 
Cc: Tvrtko Ursulin 
Cc: Thomas Hellstrom 
Cc: Matthew Auld 

v2: - restore UINT_MAX clamp in i915_sg_segment_size()
- drop PAGE_SIZE check as it will always be >= PAGE_SIZE

Reviewed-by: Christoph Hellwig 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 ---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 ++--
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c  |  2 +-
 drivers/gpu/drm/i915/i915_scatterlist.h  | 16 +++-
 5 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..24f37658f1bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -6,7 +6,6 @@
 
 #include 
 #include 
-#include 
 
 #include "i915_drv.h"
 #include "i915_gem.h"
@@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
struct scatterlist *sg;
unsigned int sg_page_sizes;
unsigned int npages;
-   int max_order;
+   int max_order = MAX_ORDER;
+   unsigned int max_segment;
gfp_t gfp;
 
-   max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
+   max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+   max_order = min(max_order, ilog2(max_segment));
 
gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
if (IS_I965GM(i915) || IS_I965G(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4eed3dd90ba8..34b9c76cd8e6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
-   unsigned int max_segment = i915_sg_segment_size();
+   unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
struct sgt_iter sgt_iter;
struct page *page;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 5a5cf332d8a5..7a828c9c0f6d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device 
*bdev,
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
-   const unsigned int max_segment = i915_sg_segment_size();
+   const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
struct file *filp = i915_tt->filp;
struct sgt_iter sgt_iter;
@@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct 
ttm_tt *ttm)
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
-   i915_sg_segment_size(), GFP_KERNEL);
+   i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
if (ret) {
st->sgl = NULL;
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 094f06b4ce33..dfc35905dba2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct 
drm_i915_gem_object *obj)
 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *

[PATCH] drm/i915: stop using swiotlb

2022-07-21 Thread Robert Beckett
Calling swiotlb functions directly is nowadays considered harmful. See
https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/

Replace swiotlb_max_segment() calls with dma_max_mapping_size().
In i915_gem_object_get_pages_internal() no longer consider max_segment
only if CONFIG_SWIOTLB is enabled. There can be other (iommu related)
causes of specific max segment sizes.

Cc: Christoph Hellwig 
Cc: Tvrtko Ursulin 
Cc: Thomas Hellstrom 
Cc: Matthew Auld 

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c | 20 +---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  4 ++--
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c  |  2 +-
 drivers/gpu/drm/i915/i915_scatterlist.h  | 16 
 5 files changed, 9 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..e1aca378d90f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -6,7 +6,6 @@
 
 #include 
 #include 
-#include 
 
 #include "i915_drv.h"
 #include "i915_gem.h"
@@ -38,22 +37,13 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
struct scatterlist *sg;
unsigned int sg_page_sizes;
unsigned int npages;
-   int max_order;
+   int max_order = MAX_ORDER;
+   size_t max_segment;
gfp_t gfp;
 
-   max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
+   max_segment = dma_max_mapping_size(i915->drm.dev);
+   max_segment = max_t(size_t, max_segment, PAGE_SIZE) >> PAGE_SHIFT;
+   max_order = min(max_order, ilog2(max_segment));
 
gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
if (IS_I965GM(i915) || IS_I965G(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4eed3dd90ba8..b0ec65b7c1da 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
-   unsigned int max_segment = i915_sg_segment_size();
+   unsigned int max_segment = dma_max_mapping_size(i915->drm.dev);
struct sg_table *st;
struct sgt_iter sgt_iter;
struct page *page;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 5a5cf332d8a5..882f046f4d18 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device 
*bdev,
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
-   const unsigned int max_segment = i915_sg_segment_size();
+   const unsigned int max_segment = dma_max_mapping_size(i915->drm.dev);
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
struct file *filp = i915_tt->filp;
struct sgt_iter sgt_iter;
@@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct 
ttm_tt *ttm)
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
-   i915_sg_segment_size(), GFP_KERNEL);
+   dma_max_mapping_size(i915_tt->dev), GFP_KERNEL);
if (ret) {
st->sgl = NULL;
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 094f06b4ce33..8a62a71859e6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct 
drm_i915_gem_object *obj)
 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 {
const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
-   unsigned int max_segment = i915_sg_seg

Re: susetting the remaining swioltb couplin in DRM

2022-07-21 Thread Robert Beckett




On 18/07/2022 12:36, Tvrtko Ursulin wrote:


Hi,

On 12/07/2022 06:00, Christoph Hellwig wrote:

On Mon, Jul 11, 2022 at 04:31:49PM -0400, Rodrigo Vivi wrote:

On Mon, Jul 11, 2022 at 10:26:14AM +0200, Christoph Hellwig wrote:

Hi i915 and nouveau maintainers,

any chance I could get some help to remove the remaining direct
driver calls into swiotlb, namely swiotlb_max_segment and
is_swiotlb_active.  Either should not matter to a driver as they
should be written to the DMA API.


Hi Christoph,

while we take a look here, could you please share the reasons
behind sunsetting this calls?


Because they are a completely broken layering violation.  A driver has
absolutely no business knowing the dma-mapping violation.  The DMA
API reports what we think is all useful constraints (e.g.
dma_max_mapping_size()), and provides useful APIs to (e.g.
dma_alloc_noncoherent or dma_alloc_noncontiguous) to allocate pages
that can be mapped without bounce buffering and drivers should use
the proper API instead of poking into one particular implementation
and restrict it from changing.

swiotlb_max_segment in particular returns a value that isn't actually
correct (a driver can't just use all of swiotlb) AND actually doesn't
work as is in various scenarious that are becoming more common,
most notably host with memory encryption schemes that always require
bounce buffering.


All these are either in the internal backend or in the old shmem 
backend. I understand both are soon to be retired or deprecated. I think.


+ Matt & Thomas, and Bob actually as well, as I think authorities in the 
shmem, TTM and internal backend at the moment. Could you guys please 
have look if and how the TTM backend needs to handle this and what is 
the timeline of retirement if relevant?


Regards,

Tvrtko


So currently these are used directly in the internal backend and 
indirectly via i915_sg_segment_size() in shmem, ttm and userptr backends.


internal and userptr are being refactored currently (internal is ready 
but lacking review), but the refactoring would just make them use the 
ttm backend which still uses these.


It seems to me like a simple solution would be to just replace 
swiotlb_max_segment() calls with dma_max_mapping_size() as a drop in 
replacement. This follows the same logic as drm_prime_pages_to_sg().


[PATCH v11 05/10] drm/i915: sanitize mem_flags for stolen buffers

2022-07-13 Thread Robert Beckett
Stolen regions are not page backed or considered iomem.
Prevent flags indicating such.
This correctly prevents stolen buffers from attempting to directly map
them.

See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem()
usage for where it would break otherwise.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 2cc2c08bd50f..18d574ac167f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -14,6 +14,7 @@
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
 #include "gem/i915_gem_ttm_move.h"
+#include "gem/i915_gem_stolen.h"
 
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
@@ -130,8 +131,9 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
 
obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
 
-   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
-   I915_BO_FLAG_STRUCT_PAGE;
+   if (!i915_gem_object_is_stolen(obj))
+   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
+   I915_BO_FLAG_STRUCT_PAGE;
 
if (!obj->ttm.cache_level_override) {
cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
-- 
2.25.1



[PATCH v11 08/10] drm/i915/selftest: don't attempt engine reset of guc submission engines

2022-07-13 Thread Robert Beckett
igt_reset_engines_stolen tries to reset engines without checking if it
is possible.
Engines using GuC submission are not able to be reset from the host.

In this scenario, the reset exits early, then on the next iteration of
the each engine loop, the async teardown of the spinner request
context's ring occurs while the next engine is under test.

This is seen as a stolen memory corruption as the ring buffer was busy
initially, but free during the confirmation check and had been poisoned
during cleanup.

Fix this by not testing GuC submission using engines.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gt/selftest_reset.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c 
b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 37c38bdd5f47..55f3b34e5f6e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -194,6 +194,8 @@ static int igt_reset_engines_stolen(void *arg)
return 0;
 
for_each_engine(engine, gt, id) {
+   if (intel_engine_uses_guc(engine))
+   continue;
err = __igt_reset_stolen(gt, engine->mask, engine->name);
if (err)
return err;
-- 
2.25.1



[PATCH v11 07/10] drm/i915/ttm: add buffer pin on alloc flag

2022-07-13 Thread Robert Beckett
For situations where allocations need to fail on alloc instead of
delayed get_pages, add a new alloc flag to pin the ttm bo.
This makes sure that the resource has been allocated during buffer
creation, allowing it to fail with an error if the placement is
exhausted.
This allows existing fallback options for stolen backend allocation like
create_ring_vma to work as expected.

Signed-off-by: Robert Beckett 
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 13 ++
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   | 25 ++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 14937cf1daaa..283a4b84971a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -325,17 +325,20 @@ struct drm_i915_gem_object {
  * dealing with userspace objects the CPU fault handler is free to ignore this.
  */
 #define I915_BO_ALLOC_GPU_ONLY   BIT(6)
+/* object should be pinned in destination region from allocation */
+#define I915_BO_ALLOC_PINNED BIT(7)
 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
 I915_BO_ALLOC_VOLATILE | \
 I915_BO_ALLOC_CPU_CLEAR | \
 I915_BO_ALLOC_USER | \
 I915_BO_ALLOC_PM_VOLATILE | \
 I915_BO_ALLOC_PM_EARLY | \
-I915_BO_ALLOC_GPU_ONLY)
-#define I915_BO_READONLY  BIT(7)
-#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
-#define I915_BO_PROTECTED BIT(9)
-#define I915_BO_WAS_BOUND_BIT 10
+I915_BO_ALLOC_GPU_ONLY | \
+I915_BO_ALLOC_PINNED)
+#define I915_BO_READONLY  BIT(8)
+#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */
+#define I915_BO_PROTECTED BIT(10)
+#define I915_BO_WAS_BOUND_BIT 11
/**
 * @mem_flags - Mutable placement-related flags
 *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index b6c3fc25d9d1..d34ebe9fcff8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1011,6 +1011,13 @@ static void i915_ttm_delayed_free(struct 
drm_i915_gem_object *obj)
 {
GEM_BUG_ON(!obj->ttm.created);
 
+   /* stolen objects are pinned for lifetime. Unpin before putting */
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL);
+   ttm_bo_unpin(i915_gem_to_ttm(obj));
+   ttm_bo_unreserve(i915_gem_to_ttm(obj));
+   }
+
ttm_bo_put(i915_gem_to_ttm(obj));
 }
 
@@ -1206,6 +1213,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
.no_wait_gpu = false,
};
enum ttm_bo_type bo_type;
+   struct ttm_place _place;
+   struct ttm_placement _placement;
+   struct ttm_placement *placement;
int ret;
 
drm_gem_private_object_init(>drm, >base, size);
@@ -1235,6 +1245,17 @@ int __i915_gem_ttm_object_init(struct 
intel_memory_region *mem,
 */
i915_gem_object_make_unshrinkable(obj);
 
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   i915_ttm_place_from_region(mem, &_place, obj->bo_offset,
+  obj->base.size, obj->flags);
+   _placement.num_placement = 1;
+   _placement.placement = &_place;
+   _placement.num_busy_placement = 0;
+   _placement.busy_placement = NULL;
+   placement = &_placement;
+   } else {
+   placement = _sys_placement;
+   }
/*
 * If this function fails, it will call the destructor, but
 * our caller still owns the object. So no freeing in the
@@ -1243,7 +1264,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
 * until successful initialization.
 */
ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), bo_type,
-  _sys_placement, page_size >> PAGE_SHIFT,
+  placement, page_size >> PAGE_SHIFT,
   , NULL, NULL, i915_ttm_bo_destroy);
if (ret)
return i915_ttm_err_to_gem(ret);
@@ -1254,6 +1275,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
obj->ttm.cache_level_override = false;
+   if (obj->flags & I915_BO_ALLOC_PINNED)
+   ttm_bo_pin(i915_gem_to_ttm(obj));
i915_gem_object_unlock(obj);
 
return 0;
-- 
2.25.1



[PATCH v11 09/10] drm/i915/selftest: maintain context ref during reset test

2022-07-13 Thread Robert Beckett
Commit "bcb9aa45d5a0 Revert "drm/i915: Hold reference to intel_context over 
life of i915_request""
Stopped requests from maintaining a ref on the context.
This caused the contexts to be freed, releasing stolen memory while
under test, leading to false positive detection of stolen corruption.
Fix this by maintaining a ref on the contexts until testing is complete.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gt/selftest_reset.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c 
b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 55f3b34e5f6e..ba536e8a2e32 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -6,6 +6,7 @@
 #include 
 
 #include "gem/i915_gem_stolen.h"
+#include "gt/intel_gt.h"
 
 #include "i915_memcpy.h"
 #include "i915_selftest.h"
@@ -26,6 +27,7 @@ __igt_reset_stolen(struct intel_gt *gt,
intel_wakeref_t wakeref;
enum intel_engine_id id;
struct igt_spinner spin;
+   struct intel_context *contexts[I915_NUM_ENGINES] = {0};
long max, count;
void *tmp;
u32 *crc;
@@ -71,12 +73,12 @@ __igt_reset_stolen(struct intel_gt *gt,
goto err_spin;
}
rq = igt_spinner_create_request(, ce, MI_ARB_CHECK);
-   intel_context_put(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_spin;
}
i915_request_add(rq);
+   contexts[id] = ce;
}
 
for (page = 0; page < num_pages; page++) {
@@ -165,8 +167,21 @@ __igt_reset_stolen(struct intel_gt *gt,
err = -EINVAL;
}
 
+   err = intel_gt_wait_for_idle(gt, HZ);
+   if (err < 0) {
+   pr_err("%s failed to wait for gt idle: %d\n", msg, err);
+   goto err_spin;
+   }
+
+   err = 0;
+
 err_spin:
igt_spinner_fini();
+   for (id = 0; id < I915_NUM_ENGINES; id++) {
+   if (!contexts[id])
+   continue;
+   intel_context_put(contexts[id]);
+   }
 
 err_lock:
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
-- 
2.25.1



[PATCH v11 10/10] drm/i915: stolen memory use ttm backend

2022-07-13 Thread Robert Beckett
refactor stolen memory region to use ttm.
this necessitates using ttm resources to track reserved stolen regions
instead of drm_mm_nodes.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  16 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   6 +-
 drivers/gpu/drm/i915/intel_region_ttm.c   |  42 +-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 13 files changed, 280 insertions(+), 351 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c 
b/drivers/gpu/drm/i915/display/intel_fbc.c
index 16537830ccf0..7dd42b11f4b8 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -38,6 +38,7 @@
  * forcibly disable it to allow proper screen updates.
  */
 
+#include "gem/i915_gem_stolen.h"
 #include 
 
 #include 
@@ -52,6 +53,7 @@
 #include "intel_display_types.h"
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
+#include "gem/i915_gem_region.h"
 
 #define for_each_fbc_id(__dev_priv, __fbc_id) \
for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; 
(__fbc_id)++) \
@@ -93,8 +95,8 @@ struct intel_fbc {
struct mutex lock;
unsigned int busy_bits;
 
-   struct drm_mm_node compressed_fb;
-   struct drm_mm_node compressed_llb;
+   struct ttm_resource *compressed_fb;
+   struct ttm_resource *compressed_llb;
 
enum intel_fbc_id id;
 
@@ -332,16 +334,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc)
 static void i8xx_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
+   u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb);
 
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET);
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_fb.start, U32_MAX));
+fb_offset, U32_MAX));
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_llb.start, U32_MAX));
+llb_offset, U32_MAX));
 
intel_de_write(i915, FBC_CFB_BASE,
-  i915->dsm.start + fbc->compressed_fb.start);
+  i915->dsm.start + fb_offset);
intel_de_write(i915, FBC_LL_BASE,
-  i915->dsm.start + fbc->compressed_llb.start);
+  i915->dsm.start + llb_offset);
 }
 
 static const struct intel_fbc_funcs i8xx_fbc_funcs = {
@@ -449,8 +455,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc)
 static void g4x_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, DPFC_CB_BASE, fb_offset);
 }
 
 static const struct intel_fbc_funcs g4x_fbc_funcs = {
@@ -500,8 +508,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc)
 static void ilk_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), 
fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset);
 }
 
 static const struct intel_fbc_funcs ilk_fbc_funcs = {
@@ -745,21 +755,24 @@ static int find_compression_limit(struct intel_fbc *fbc,
 {
struct drm_i915_private *i915 = fbc->i915;
u64 end = intel_fbc_stolen_end(i915);
-   int ret, limit = min_limit;
+   int limit = min_limit;
+   struct ttm_resource *res;
 
size /= limit;
 
/* Try to over-allocate to reduce reallocations and fragmentation. */
-   ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb,
-  size <<= 1, 4096, 0, end);
-   if (ret == 0)
+   res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)

[PATCH v11 04/10] drm/i915: instantiate ttm ranger manager for stolen memory

2022-07-13 Thread Robert Beckett
prepare for ttm based stolen region by using ttm range manager
as the resource manager for stolen region.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c |  6 ++--
 drivers/gpu/drm/i915/intel_region_ttm.c  | 31 +++-
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index a949594237d9..2cc2c08bd50f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -66,11 +66,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
 
/* There's some room for optimization here... */
-   GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
-  ttm_mem_type < I915_PL_LMEM0);
+   GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT);
+
if (ttm_mem_type == I915_PL_SYSTEM)
return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
  0);
+   if (ttm_mem_type == I915_PL_STOLEN)
+   return i915->mm.stolen_region;
 
return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
  ttm_mem_type - I915_PL_LMEM0);
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 642cd1587976..caac110a0a2c 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private 
*dev_priv)
 
 /*
  * Map the i915 memory regions to TTM memory types. We use the
- * driver-private types for now, reserving TTM_PL_VRAM for stolen
+ * driver-private types for now, reserving I915_PL_STOLEN for stolen
  * memory and TTM_PL_TT for GGTT use if decided to implement this.
  */
 int intel_region_to_ttm_type(const struct intel_memory_region *mem)
@@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct 
intel_memory_region *mem)
 
GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL &&
   mem->type != INTEL_MEMORY_MOCK &&
-  mem->type != INTEL_MEMORY_SYSTEM);
+  mem->type != INTEL_MEMORY_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_LOCAL);
 
if (mem->type == INTEL_MEMORY_SYSTEM)
return TTM_PL_SYSTEM;
 
+   if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM ||
+   mem->type == INTEL_MEMORY_STOLEN_LOCAL)
+   return I915_PL_STOLEN;
+
type = mem->instance + TTM_PL_PRIV;
GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES);
 
@@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
int mem_type = intel_region_to_ttm_type(mem);
int ret;
 
-   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
- resource_size(>region),
- mem->io_size,
- mem->min_page_size, PAGE_SIZE);
+   if (mem_type == I915_PL_STOLEN) {
+   ret = ttm_range_man_init(bdev, mem_type, false,
+resource_size(>region) >> 
PAGE_SHIFT);
+   mem->is_range_manager = true;
+   } else {
+   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
+ resource_size(>region),
+ mem->io_size,
+ mem->min_page_size, PAGE_SIZE);
+   }
if (ret)
return ret;
 
@@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
 int intel_region_ttm_fini(struct intel_memory_region *mem)
 {
struct ttm_resource_manager *man = mem->region_private;
+   int mem_type = intel_region_to_ttm_type(mem);
int ret = -EBUSY;
int count;
 
@@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
if (ret || !man)
return ret;
 
-   ret = i915_ttm_buddy_man_fini(>i915->bdev,
- intel_region_to_ttm_type(mem));
+   if (mem_type == I915_PL_STOLEN)
+   ret = ttm_range_man_fini(>i915->bdev, mem_type);
+   else
+   ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type);
GEM_WARN_ON(ret);
mem->region_private = NULL;
 
-- 
2.25.1



[PATCH v11 03/10] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level

2022-07-13 Thread Robert Beckett
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP.
This is divergent from existing backends code which only considers
HAS_LLC.
Testing shows that trusting snooping on gen5- is unreliable and bsw via
ggtt mappings, so limit DGFX for now and maintain previous behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 042c2237e287..a949594237d9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -52,7 +52,9 @@ static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 struct ttm_tt *ttm)
 {
-   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915);
+
+   return ((HAS_LLC(i915) || can_snoop) &&
!i915_ttm_gtt_binds_lmem(res) &&
ttm->caching == ttm_cached) ? I915_CACHE_LLC :
I915_CACHE_NONE;
-- 
2.25.1



[PATCH v11 06/10] drm/i915: ttm move/clear logic fix

2022-07-13 Thread Robert Beckett
ttm managed buffers start off with system resource definitions and ttm_tt
tracking structures allocated (though unpopulated).
currently this prevents clearing of buffers on first move to desired
placements.

The desired behaviour is to clear user allocated buffers and any kernel
buffers that specifically requests it only.
Make the logic match the desired behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 18d574ac167f..6671345b2abe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -3,6 +3,7 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "drm/ttm/ttm_tt.h"
 #include 
 
 #include "i915_deps.h"
@@ -546,6 +547,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
return fence;
 }
 
+static bool
+allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct 
ttm_resource *dst_mem)
+{
+   /* never clear stolen */
+   if (dst_mem->mem_type == I915_PL_STOLEN)
+   return false;
+   /*
+* we want to clear user buffers and any kernel buffers
+* that specifically request clearing.
+*/
+   if (obj->flags & I915_BO_ALLOC_USER)
+   return true;
+
+   if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)
+   return true;
+
+   return false;
+}
+
 /**
  * i915_ttm_move - The TTM move callback used by i915.
  * @bo: The buffer object.
@@ -596,7 +616,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
return PTR_ERR(dst_rsgt);
 
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || 
!ttm_tt_is_populated(ttm));
-   if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
+   if (!clear || allow_clear(obj, ttm, dst_mem)) {
struct i915_deps deps;
 
i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | 
__GFP_NOWARN);
-- 
2.25.1



[PATCH v11 02/10] drm/i915: limit ttm to dma32 for i965G[M]

2022-07-13 Thread Robert Beckett
i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 6873808a7015..642cd1587976 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
struct drm_device *drm = _priv->drm;
+   bool use_dma32 = false;
+
+   /* i965g[m] cannot relocate objects above 4GiB. */
+   if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+   use_dma32 = true;
 
return ttm_device_init(_priv->bdev, i915_ttm_driver(),
   drm->dev, drm->anon_inode->i_mapping,
-  drm->vma_offset_manager, false, false);
+  drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1



[PATCH v11 01/10] drm/i915/ttm: dont trample cache_level overrides during ttm move

2022-07-13 Thread Robert Beckett
Various places within the driver override the default chosen cache_level.
Before ttm, these overrides were permanent until explicitly changed again
or for the lifetime of the buffer.

TTM movement code came along and decided that it could make that
decision at that time, which is usually well after object creation, so
overrode the cache_level decision and reverted it back to its default
decision.

Add logic to indicate whether the caching mode has been set by anything
other than the move logic. If so, assume that the code that overrode the
defaults knows best and keep it.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++---
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index ccec4055fde3..966ac2d778d5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct 
drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
obj->cache_level = cache_level;
+   obj->ttm.cache_level_override = true;
 
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5cf36a130061..14937cf1daaa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -623,6 +623,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
+   bool cache_level_override:1;
} ttm;
 
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 053b0022ddd0..b6c3fc25d9d1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1253,6 +1253,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_gem_object_init_memory_region(obj, mem);
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
+   obj->ttm.cache_level_override = false;
i915_gem_object_unlock(obj);
 
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 9a7e50534b84..042c2237e287 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -129,9 +129,12 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
I915_BO_FLAG_STRUCT_PAGE;
 
-   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-  bo->ttm);
-   i915_gem_object_set_cache_coherency(obj, cache_level);
+   if (!obj->ttm.cache_level_override) {
+   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
+  bo->resource, bo->ttm);
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+   obj->ttm.cache_level_override = false;
+   }
 }
 
 /**
-- 
2.25.1



[PATCH v11 00/10] drm/i915: ttm for stolen

2022-07-13 Thread Robert Beckett
This series refactors i915's stolen memory region to use ttm.

v2: handle disabled stolen similar to legacy version.
relying on ttm to fail allocs works fine, but is dmesg noisy and causes 
testing
dmesg warning regressions.

v3: rebase to latest drm-tip.
fix v2 code refactor which could leave a buffer pinned.
locally passes fftl again now.

v4: - Allow memory regions creators to do allocation. Allows stolen region 
to track
  it's own reservations.
- Pre-reserve first page of stolen mem (add back 
WaSkipStolenMemoryFirstPage:bdw+)
- Improve commit descritpion for "drm/i915: sanitize mem_flags for 
stolen buffers"
- replace i915_gem_object_pin_pages_unlocked() call with manual locking 
and pinning.
  this avoids ww ctx class reuse during context creation -> ring vma 
obj alloc.

v5: - detect both types of stolen as stolen buffers in
  "drm/i915: sanitize mem_flags for stolen buffers"
- in stolen_object_init limit page size to mem region minimum.
  The range allocator expects the page_size to define the
  alignment

v6: - Share first 4 patches from ttm for internal series as generic
  i915 ttm fixes
- Drop patch 4 from v5. We don't need separate object ops just
  to satisfy test interfaces. The tests have now been fixed via
  checking whether the memory region is private to decide
  whether to mmap
- Add new buffer pin alloc flag to allow creation of buffers in
  their final ttm placement instead of deferring until
  get_pages. This fixes legacy fallback paths for buffer
  allocations during stolen memory pressure.

v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET

v8: - Reserve I915_GEM_STOLEN_BIAS area from stolen

v9: - drop patch 8 "drm/i915: allow memory region creators to alloc and 
free the region"
  store bias reservation in drm_i915_private instead.
- Restrict reset selftest to only test !GuC engines.
  Resetting individual GuC engines from host is not supported
- Wait for outstanding requests in reset selftest
  This prevents previous engine test context cleanup appearing
  as false positive stolen corruption check

v10:- Fix wiating on requests early error path during reset selftest
  If a single request fails to complete, the others would not be
  put, resulting in leaks. Make sure all requests are put before
  test exit.

v11:- rebased to latest drm-tip
- commit "bcb9aa45d5a0 Revert "drm/i915: Hold reference to 
intel_context over life of i915_request""
  broke the selftest@live@reset test, causing the context ringbuffer to
  be freed during testing. Fixed via maintinaing context ref
  during testing.
- drop patch 4 "drm/i915/gem: selftest should not attempt mmap of 
private regions"
  it is no longer needed.

Robert Beckett (10):
  drm/i915/ttm: dont trample cache_level overrides during ttm move
  drm/i915: limit ttm to dma32 for i965G[M]
  drm/i915/ttm: only trust snooping for dgfx when deciding default
cache_level
  drm/i915: instantiate ttm ranger manager for stolen memory
  drm/i915: sanitize mem_flags for stolen buffers
  drm/i915: ttm move/clear logic fix
  drm/i915/ttm: add buffer pin on alloc flag
  drm/i915/selftest: don't attempt engine reset of guc submission
engines
  drm/i915/selftest: maintain context ref during reset test
  drm/i915: stolen memory use ttm backend

 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   1 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  16 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  29 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  47 +-
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  35 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   6 +-
 drivers/gpu/drm/i915/intel_region_ttm.c   |  80 +++-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 15 files changed, 400 insertions(+), 376 deletions(-)

-- 
2.25.1



[PATCH v4 8/8] drm/i915: internal buffers use ttm backend

2022-07-13 Thread Robert Beckett
Create a kernel only internal memory region that uses ttm pool allocator to
allocate volatile system pages.
Refactor internal buffer backend to simply allocate from this new
region.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  | 187 +-
 drivers/gpu/drm/i915/gem/i915_gem_internal.h  |   5 -
 drivers/gpu/drm/i915/i915_pci.c   |   4 +-
 drivers/gpu/drm/i915/intel_memory_region.c|   8 +-
 drivers/gpu/drm/i915/intel_memory_region.h|   2 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   2 +-
 6 files changed, 17 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..a83751867ac7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -4,188 +4,9 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
-#include 
-#include 
-#include 
-
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_region.h"
 #include "i915_drv.h"
-#include "i915_gem.h"
-#include "i915_gem_internal.h"
-#include "i915_gem_object.h"
-#include "i915_scatterlist.h"
-#include "i915_utils.h"
-
-#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
-#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
-
-static void internal_free_pages(struct sg_table *st)
-{
-   struct scatterlist *sg;
-
-   for (sg = st->sgl; sg; sg = __sg_next(sg)) {
-   if (sg_page(sg))
-   __free_pages(sg_page(sg), get_order(sg->length));
-   }
-
-   sg_free_table(st);
-   kfree(st);
-}
-
-static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
-{
-   struct drm_i915_private *i915 = to_i915(obj->base.dev);
-   struct sg_table *st;
-   struct scatterlist *sg;
-   unsigned int sg_page_sizes;
-   unsigned int npages;
-   int max_order;
-   gfp_t gfp;
-
-   max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
-
-   gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
-   if (IS_I965GM(i915) || IS_I965G(i915)) {
-   /* 965gm cannot relocate objects above 4GiB. */
-   gfp &= ~__GFP_HIGHMEM;
-   gfp |= __GFP_DMA32;
-   }
-
-create_st:
-   st = kmalloc(sizeof(*st), GFP_KERNEL);
-   if (!st)
-   return -ENOMEM;
-
-   npages = obj->base.size / PAGE_SIZE;
-   if (sg_alloc_table(st, npages, GFP_KERNEL)) {
-   kfree(st);
-   return -ENOMEM;
-   }
-
-   sg = st->sgl;
-   st->nents = 0;
-   sg_page_sizes = 0;
-
-   do {
-   int order = min(fls(npages) - 1, max_order);
-   struct page *page;
-
-   do {
-   page = alloc_pages(gfp | (order ? QUIET : MAYFAIL),
-  order);
-   if (page)
-   break;
-   if (!order--)
-   goto err;
-
-   /* Limit subsequent allocations as well */
-   max_order = order;
-   } while (1);
-
-   sg_set_page(sg, page, PAGE_SIZE << order, 0);
-   sg_page_sizes |= PAGE_SIZE << order;
-   st->nents++;
-
-   npages -= 1 << order;
-   if (!npages) {
-   sg_mark_end(sg);
-   break;
-   }
-
-   sg = __sg_next(sg);
-   } while (1);
-
-   if (i915_gem_gtt_prepare_pages(obj, st)) {
-   /* Failed to dma-map try again with single page sg segments */
-   if (get_order(st->sgl->length)) {
-   internal_free_pages(st);
-   max_order = 0;
-   goto create_st;
-   }
-   goto err;
-   }
-
-   __i915_gem_object_set_pages(obj, st, sg_page_sizes);
-
-   return 0;
-
-err:
-   sg_set_page(sg, NULL, 0, 0);
-   sg_mark_end(sg);
-   internal_free_pages(st);
-
-   return -ENOMEM;
-}
-
-static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
-  struct sg_table *pages)
-{
-   i915_gem_gtt_finish_pages(obj, pages);
-   internal_free_pages(pages);
-
-   obj->mm.dirty = false;
-
-   __start_cpu_write(obj

[PATCH v4 5/8] drm/i915: setup ggtt scratch page after memory regions

2022-07-13 Thread Robert Beckett
Reorder scratch page allocation so that memory regions are available
to allocate the buffers

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 20 ++--
 drivers/gpu/drm/i915/gt/intel_gtt.h  |  1 +
 drivers/gpu/drm/i915/i915_driver.c   | 16 ++--
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 15a915bb4088..c4ad03e53236 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -866,8 +866,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 
size)
struct drm_i915_private *i915 = ggtt->vm.i915;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
phys_addr_t phys_addr;
-   u32 pte_flags;
-   int ret;
 
GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
@@ -889,6 +887,24 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 
size)
}
 
kref_init(>vm.resv_ref);
+
+   return 0;
+}
+
+/**
+ * i915_ggtt_setup_scratch_page - setup ggtt scratch page
+ * @i915: i915 device
+ */
+int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915)
+{
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   u32 pte_flags;
+   int ret;
+
+   /* gen5- scratch setup currently happens in @intel_gtt_init */
+   if (GRAPHICS_VER(i915) <= 5)
+   return 0;
+
ret = setup_scratch_page(>vm);
if (ret) {
drm_err(>drm, "Scratch setup failed\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index e639434e97fd..4ebdf70b5273 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -579,6 +579,7 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res);
 
 int i915_ggtt_probe_hw(struct drm_i915_private *i915);
+int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915);
 int i915_ggtt_init_hw(struct drm_i915_private *i915);
 int i915_ggtt_enable_hw(struct drm_i915_private *i915);
 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt);
diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index deb8a8b76965..fa0956840fcc 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -69,6 +69,7 @@
 #include "gem/i915_gem_mman.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gtt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_rc6.h"
 
@@ -609,12 +610,16 @@ static int i915_driver_hw_probe(struct drm_i915_private 
*dev_priv)
 
ret = intel_gt_tiles_init(dev_priv);
if (ret)
-   goto err_mem_regions;
+   goto err_ggtt;
+
+   ret = i915_ggtt_setup_scratch_page(dev_priv);
+   if (ret)
+   goto err_ggtt;
 
ret = i915_ggtt_enable_hw(dev_priv);
if (ret) {
drm_err(_priv->drm, "failed to enable GGTT\n");
-   goto err_mem_regions;
+   goto err_ggtt;
}
 
pci_set_master(pdev);
@@ -675,11 +680,10 @@ static int i915_driver_hw_probe(struct drm_i915_private 
*dev_priv)
 err_msi:
if (pdev->msi_enabled)
pci_disable_msi(pdev);
-err_mem_regions:
-   intel_memory_regions_driver_release(dev_priv);
 err_ggtt:
i915_ggtt_driver_release(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
+   intel_memory_regions_driver_release(dev_priv);
i915_ggtt_driver_late_release(dev_priv);
 err_perf:
i915_perf_fini(dev_priv);
@@ -928,9 +932,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct 
pci_device_id *ent)
intel_modeset_driver_remove_nogem(i915);
 out_cleanup_hw:
i915_driver_hw_remove(i915);
-   intel_memory_regions_driver_release(i915);
i915_ggtt_driver_release(i915);
i915_gem_drain_freed_objects(i915);
+   intel_memory_regions_driver_release(i915);
i915_ggtt_driver_late_release(i915);
 out_cleanup_mmio:
i915_driver_mmio_release(i915);
@@ -987,9 +991,9 @@ static void i915_driver_release(struct drm_device *dev)
 
i915_gem_driver_release(dev_priv);
 
-   intel_memory_regions_driver_release(dev_priv);
i915_ggtt_driver_release(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
+   intel_memory_regions_driver_release(dev_priv);
i915_ggtt_driver_late_release(dev_priv);
 
i915_driver_mmio_release(dev_priv);
-- 
2.25.1



[PATCH v4 4/8] drm/i915: add gen6 ppgtt dummy creation function

2022-07-13 Thread Robert Beckett
Internal gem objects will soon just be volatile system memory region
objects.
To enable this, create a separate dummy object creation function
for gen6 ppgtt. The object only exists as a fake object pointing to ggtt
and gains no benefit in going via the internal backend.
Instead, create a dummy gem object and avoid having to maintain a custom
ops api in the internal backend, which makes later refactoring easier.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++--
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 1bb766c79dcb..f3b660cfeb7f 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops 
pd_dummy_obj_ops = {
.put_pages = pd_dummy_obj_put_pages,
 };
 
+static struct drm_i915_gem_object *
+i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size)
+{
+   static struct lock_class_key lock_class;
+   struct drm_i915_gem_object *obj;
+   unsigned int cache_level;
+
+   GEM_BUG_ON(!size);
+   GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+   if (overflows_type(size, obj->base.size))
+   return ERR_PTR(-E2BIG);
+
+   obj = i915_gem_object_alloc();
+   if (!obj)
+   return ERR_PTR(-ENOMEM);
+
+   drm_gem_private_object_init(>drm, >base, size);
+   i915_gem_object_init(obj, _dummy_obj_ops, _class, 0);
+   obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+
+   /*
+* Mark the object as volatile, such that the pages are marked as
+* dontneed whilst they are still pinned. As soon as they are unpinned
+* they are allowed to be reaped by the shrinker, and the caller is
+* expected to repopulate - the contents of this object are only valid
+* whilst active and pinned.
+*/
+   i915_gem_object_set_volatile(obj);
+
+   obj->read_domains = I915_GEM_DOMAIN_CPU;
+   obj->write_domain = I915_GEM_DOMAIN_CPU;
+
+   cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+
+   return obj;
+}
+
 static struct i915_page_directory *
 gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
 {
@@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
 
-   pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
-   _dummy_obj_ops,
-   I915_PDES * SZ_4K);
+   pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt->i915, 
I915_PDES * SZ_4K);
if (IS_ERR(pd->pt.base)) {
err = PTR_ERR(pd->pt.base);
pd->pt.base = NULL;
-- 
2.25.1



[PATCH v4 7/8] drm/i915/gem: further fix mman selftest

2022-07-13 Thread Robert Beckett
In commit 450cede7f380 ("drm/i915/gem: Fix the mman selftest") we fixed up
the mman selftest to allocate user buffers via smem only if we have lmem,
otherwise it uses internal buffers.

As the commit message asserts, we should only be using buffers that
userland should be able to create.
Internal buffers are not intended to be used by userland.

Instead, fix the code to always create buffers from smem.
In the case of integrated, this will create them from the shmem non-ttm
backend, which is fine.

This also fixes up the code to allow conversion of internal backend to
ttm without breaking this test.

Signed-off-by: Robert Beckett 
---
 .../gpu/drm/i915/gem/selftests/i915_gem_mman.c  | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 3ced9948a331..e529eb8461ff 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -596,17 +596,12 @@ static enum i915_mmap_type default_mapping(struct 
drm_i915_private *i915)
 }
 
 static struct drm_i915_gem_object *
-create_sys_or_internal(struct drm_i915_private *i915,
-  unsigned long size)
+create_sys(struct drm_i915_private *i915, unsigned long size)
 {
-   if (HAS_LMEM(i915)) {
-   struct intel_memory_region *sys_region =
-   i915->mm.regions[INTEL_REGION_SMEM];
+   struct intel_memory_region *sys_region =
+   i915->mm.regions[INTEL_REGION_SMEM];
 
-   return __i915_gem_object_create_user(i915, size, _region, 
1);
-   }
-
-   return i915_gem_object_create_internal(i915, size);
+   return __i915_gem_object_create_user(i915, size, _region, 1);
 }
 
 static bool assert_mmap_offset(struct drm_i915_private *i915,
@@ -617,7 +612,7 @@ static bool assert_mmap_offset(struct drm_i915_private 
*i915,
u64 offset;
int ret;
 
-   obj = create_sys_or_internal(i915, size);
+   obj = create_sys(i915, size);
if (IS_ERR(obj))
return expected && expected == PTR_ERR(obj);
 
@@ -719,7 +714,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
}
 
/* Fill the hole, further allocation attempts should then fail */
-   obj = create_sys_or_internal(i915, PAGE_SIZE);
+   obj = create_sys(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
pr_err("Unable to create object for reclaimed hole\n");
-- 
2.25.1



[PATCH v4 6/8] drm/i915: allow volatile buffers to use ttm pool allocator

2022-07-13 Thread Robert Beckett
Internal/volatile buffers should not be shmem backed.
If a volatile buffer is requested, allow ttm to use the pool allocator
to provide volatile pages as backing.
Fix i915_ttm_shrink to handle !is_shmem volatile buffers by purging.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index b6c3fc25d9d1..599ed2713359 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -291,7 +291,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct 
ttm_buffer_object *bo,
page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
 
caching = i915_ttm_select_tt_caching(obj);
-   if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+   if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached &&
+   !i915_gem_object_is_volatile(obj)) {
page_flags |= TTM_TT_FLAG_EXTERNAL |
  TTM_TT_FLAG_EXTERNAL_MAPPABLE;
i915_tt->is_shmem = true;
@@ -513,9 +514,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, 
unsigned int flags)
if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
return 0;
 
-   GEM_BUG_ON(!i915_tt->is_shmem);
+   GEM_BUG_ON(!i915_tt->is_shmem && obj->mm.madv != I915_MADV_DONTNEED);
 
-   if (!i915_tt->filp)
+   if (i915_tt->is_shmem && !i915_tt->filp)
return 0;
 
ret = ttm_bo_wait_ctx(bo, );
-- 
2.25.1



[PATCH v4 3/8] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level

2022-07-13 Thread Robert Beckett
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP.
This is divergent from existing backends code which only considers
HAS_LLC.
Testing shows that trusting snooping on gen5- is unreliable and bsw via
ggtt mappings, so limit DGFX for now and maintain previous behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 042c2237e287..a949594237d9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -52,7 +52,9 @@ static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 struct ttm_tt *ttm)
 {
-   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915);
+
+   return ((HAS_LLC(i915) || can_snoop) &&
!i915_ttm_gtt_binds_lmem(res) &&
ttm->caching == ttm_cached) ? I915_CACHE_LLC :
I915_CACHE_NONE;
-- 
2.25.1



[PATCH v4 0/8] drm/i915: ttm for internal

2022-07-13 Thread Robert Beckett
This series refactors i915's internal buffer backend to use ttm.
It uses ttm's pool allocator to allocate volatile pages in place of the
old code which rolled its own via alloc_pages.
This is continuing progress to align all backends on using ttm.

v2: - commit message improvements to add detail
- fix i915_ttm_shrink to purge !is_shmem volatile buffers
- limit ttm pool allocator to using dma32 on i965G[M]
- fix mman selftest to always use smem buffers
- create new internal memory region
- make internal backend allocate from internal region
- Fixed various issues with tests and i915 ttm usage as a result
  of supporting regions other than lmem via ttm.

v3: - limit i915 ttm default cache_level selection to only trust
  HAS_SNOOP on DGFX.

v4: - rebase to drm-tip and handle conflicts

Robert Beckett (8):
  drm/i915/ttm: dont trample cache_level overrides during ttm move
  drm/i915: limit ttm to dma32 for i965G[M]
  drm/i915/ttm: only trust snooping for dgfx when deciding default
cache_level
  drm/i915: add gen6 ppgtt dummy creation function
  drm/i915: setup ggtt scratch page after memory regions
  drm/i915: allow volatile buffers to use ttm pool allocator
  drm/i915/gem: further fix mman selftest
  drm/i915: internal buffers use ttm backend

 drivers/gpu/drm/i915/gem/i915_gem_internal.c  | 187 +-
 drivers/gpu/drm/i915/gem/i915_gem_internal.h  |   5 -
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   1 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   8 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  13 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  17 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  43 +++-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  20 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   |   1 +
 drivers/gpu/drm/i915/i915_driver.c|  16 +-
 drivers/gpu/drm/i915/i915_pci.c   |   4 +-
 drivers/gpu/drm/i915/intel_memory_region.c|   8 +-
 drivers/gpu/drm/i915/intel_memory_region.h|   2 +
 drivers/gpu/drm/i915/intel_region_ttm.c   |   7 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   2 +-
 16 files changed, 114 insertions(+), 221 deletions(-)

-- 
2.25.1



[PATCH v4 1/8] drm/i915/ttm: dont trample cache_level overrides during ttm move

2022-07-13 Thread Robert Beckett
Various places within the driver override the default chosen cache_level.
Before ttm, these overrides were permanent until explicitly changed again
or for the lifetime of the buffer.

TTM movement code came along and decided that it could make that
decision at that time, which is usually well after object creation, so
overrode the cache_level decision and reverted it back to its default
decision.

Add logic to indicate whether the caching mode has been set by anything
other than the move logic. If so, assume that the code that overrode the
defaults knows best and keep it.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++---
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index ccec4055fde3..966ac2d778d5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct 
drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
obj->cache_level = cache_level;
+   obj->ttm.cache_level_override = true;
 
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5cf36a130061..14937cf1daaa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -623,6 +623,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
+   bool cache_level_override:1;
} ttm;
 
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 053b0022ddd0..b6c3fc25d9d1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1253,6 +1253,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_gem_object_init_memory_region(obj, mem);
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
+   obj->ttm.cache_level_override = false;
i915_gem_object_unlock(obj);
 
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 9a7e50534b84..042c2237e287 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -129,9 +129,12 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
I915_BO_FLAG_STRUCT_PAGE;
 
-   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-  bo->ttm);
-   i915_gem_object_set_cache_coherency(obj, cache_level);
+   if (!obj->ttm.cache_level_override) {
+   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
+  bo->resource, bo->ttm);
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+   obj->ttm.cache_level_override = false;
+   }
 }
 
 /**
-- 
2.25.1



[PATCH v4 2/8] drm/i915: limit ttm to dma32 for i965G[M]

2022-07-13 Thread Robert Beckett
i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 6873808a7015..642cd1587976 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
struct drm_device *drm = _priv->drm;
+   bool use_dma32 = false;
+
+   /* i965g[m] cannot relocate objects above 4GiB. */
+   if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+   use_dma32 = true;
 
return ttm_device_init(_priv->bdev, i915_ttm_driver(),
   drm->dev, drm->anon_inode->i_mapping,
-  drm->vma_offset_manager, false, false);
+  drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1



Re: [PATCH v10 04/11] drm/i915/gem: selftest should not attempt mmap of private regions

2022-07-08 Thread Robert Beckett




On 08/07/2022 14:27, Matthew Auld wrote:

On 08/07/2022 14:22, Robert Beckett wrote:



On 08/07/2022 08:53, Matthew Auld wrote:

On 07/07/2022 21:02, Robert Beckett wrote:

During testing make can_mmap consider whether the region is private.


Do we still need this with: 938d2fd17d17 ("drm/i915/selftests: skip 
the mman tests for stolen") ?


huh, I guess not. That wasn't in my tree. I guess I should rebase.

Looking at it, my patch would have been preferable initially I think. 
Each location of the additional checks in that patch first call 
cam_mmap(), which I think is the most appropriate place to make the 
decision.


It fails at the object_create() I think (on small-BAR I mean), which is 
before we can call can_mmap(), passing in the object.


ah, okay. That makes sense to keep as is then.
I'll drop this patch.
Thanks.





I could do a replacement patch that reverts that one if preferred, or 
we can leave it as is and I will drop this patch.







Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
  drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c

index 5bc93a1ce3e3..76181e28c75e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object 
*obj, enum i915_mmap_type type)

  struct drm_i915_private *i915 = to_i915(obj->base.dev);
  bool no_map;
+    if (obj->mm.region && obj->mm.region->private)
+    return false;
+
  if (obj->ops->mmap_offset)
  return type == I915_MMAP_TYPE_FIXED;
  else if (type == I915_MMAP_TYPE_FIXED)


Re: [PATCH v10 04/11] drm/i915/gem: selftest should not attempt mmap of private regions

2022-07-08 Thread Robert Beckett




On 08/07/2022 08:53, Matthew Auld wrote:

On 07/07/2022 21:02, Robert Beckett wrote:

During testing make can_mmap consider whether the region is private.


Do we still need this with: 938d2fd17d17 ("drm/i915/selftests: skip the 
mman tests for stolen") ?


huh, I guess not. That wasn't in my tree. I guess I should rebase.

Looking at it, my patch would have been preferable initially I think. 
Each location of the additional checks in that patch first call 
cam_mmap(), which I think is the most appropriate place to make the 
decision.


I could do a replacement patch that reverts that one if preferred, or we 
can leave it as is and I will drop this patch.







Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
  drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c

index 5bc93a1ce3e3..76181e28c75e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object 
*obj, enum i915_mmap_type type)

  struct drm_i915_private *i915 = to_i915(obj->base.dev);
  bool no_map;
+    if (obj->mm.region && obj->mm.region->private)
+    return false;
+
  if (obj->ops->mmap_offset)
  return type == I915_MMAP_TYPE_FIXED;
  else if (type == I915_MMAP_TYPE_FIXED)


[PATCH v10 10/11] drm/i915/selftest: wait for requests during engine reset selftest

2022-07-07 Thread Robert Beckett
While looping around each engine and testing for corrupted solen memory
during engine reset, the old requests from the previous engine can still
be yet to retire.
To prevent false positive corruption tests, wait for the outstanding
requests at the end of the test

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gt/selftest_reset.c | 35 ++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c 
b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 55f3b34e5f6e..a2558bc31408 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -6,6 +6,7 @@
 #include 
 
 #include "gem/i915_gem_stolen.h"
+#include "gt/intel_gt.h"
 
 #include "i915_memcpy.h"
 #include "i915_selftest.h"
@@ -26,6 +27,7 @@ __igt_reset_stolen(struct intel_gt *gt,
intel_wakeref_t wakeref;
enum intel_engine_id id;
struct igt_spinner spin;
+   struct i915_request *requests[I915_NUM_ENGINES] = {0};
long max, count;
void *tmp;
u32 *crc;
@@ -68,15 +70,16 @@ __igt_reset_stolen(struct intel_gt *gt,
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
-   goto err_spin;
+   goto err_requests;
}
rq = igt_spinner_create_request(, ce, MI_ARB_CHECK);
intel_context_put(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
-   goto err_spin;
+   goto err_requests;
}
i915_request_add(rq);
+   requests[id] = i915_request_get(rq);
}
 
for (page = 0; page < num_pages; page++) {
@@ -165,6 +168,34 @@ __igt_reset_stolen(struct intel_gt *gt,
err = -EINVAL;
}
 
+   /* wait for requests and idle, otherwise cleanup can happen on next 
loop */
+   for (id = 0; id < I915_NUM_ENGINES; id++) {
+   if (!requests[id])
+   continue;
+   err = i915_request_wait(requests[id], I915_WAIT_INTERRUPTIBLE, 
HZ);
+   if (err < 0) {
+   pr_err("%s failed to wait for rq: %d\n", msg, err);
+   goto err_requests;
+   }
+
+   i915_request_put(requests[id]);
+   requests[id] = NULL;
+   }
+
+   err = intel_gt_wait_for_idle(gt, HZ);
+   if (err < 0) {
+   pr_err("%s failed to wait for gt idle: %d\n", msg, err);
+   goto err_spin;
+   }
+
+   err = 0;
+
+err_requests:
+   for (id = 0; id < I915_NUM_ENGINES; id++) {
+   if (!requests[id])
+   continue;
+   i915_request_put(requests[id]);
+   }
 err_spin:
igt_spinner_fini();
 
-- 
2.25.1



[PATCH v10 09/11] drm/i915/selftest: don't attempt engine reset of guc submission engines

2022-07-07 Thread Robert Beckett
igt_reset_engines_stolen tries to reset engines without checking if it
is possible.
Engines using GuC submission are not able to be reset from the host.

In this scenario, the reset exits early, then on the next iteration of
the each engine loop, the async teardown of the spinner request
context's ring occurs while the next engine is under test.

This is seen as a stolen memory corruption as the ring buffer was busy
initially, but free during the confirmation check and had been poisoned
during cleanup.

Fix this by not testing GuC submission using engines.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gt/selftest_reset.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c 
b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 37c38bdd5f47..55f3b34e5f6e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -194,6 +194,8 @@ static int igt_reset_engines_stolen(void *arg)
return 0;
 
for_each_engine(engine, gt, id) {
+   if (intel_engine_uses_guc(engine))
+   continue;
err = __igt_reset_stolen(gt, engine->mask, engine->name);
if (err)
return err;
-- 
2.25.1



[PATCH v10 11/11] drm/i915: stolen memory use ttm backend

2022-07-07 Thread Robert Beckett
refactor stolen memory region to use ttm.
this necessitates using ttm resources to track reserved stolen regions
instead of drm_mm_nodes.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  16 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   6 +-
 drivers/gpu/drm/i915/intel_region_ttm.c   |  42 +-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 13 files changed, 280 insertions(+), 351 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c 
b/drivers/gpu/drm/i915/display/intel_fbc.c
index 8b807284cde1..6f3afac5e8c9 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -38,6 +38,7 @@
  * forcibly disable it to allow proper screen updates.
  */
 
+#include "gem/i915_gem_stolen.h"
 #include 
 
 #include 
@@ -51,6 +52,7 @@
 #include "intel_display_types.h"
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
+#include "gem/i915_gem_region.h"
 
 #define for_each_fbc_id(__dev_priv, __fbc_id) \
for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; 
(__fbc_id)++) \
@@ -92,8 +94,8 @@ struct intel_fbc {
struct mutex lock;
unsigned int busy_bits;
 
-   struct drm_mm_node compressed_fb;
-   struct drm_mm_node compressed_llb;
+   struct ttm_resource *compressed_fb;
+   struct ttm_resource *compressed_llb;
 
enum intel_fbc_id id;
 
@@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc)
 static void i8xx_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
+   u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb);
 
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET);
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_fb.start, U32_MAX));
+fb_offset, U32_MAX));
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_llb.start, U32_MAX));
+llb_offset, U32_MAX));
 
intel_de_write(i915, FBC_CFB_BASE,
-  i915->dsm.start + fbc->compressed_fb.start);
+  i915->dsm.start + fb_offset);
intel_de_write(i915, FBC_LL_BASE,
-  i915->dsm.start + fbc->compressed_llb.start);
+  i915->dsm.start + llb_offset);
 }
 
 static const struct intel_fbc_funcs i8xx_fbc_funcs = {
@@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc)
 static void g4x_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, DPFC_CB_BASE, fb_offset);
 }
 
 static const struct intel_fbc_funcs g4x_fbc_funcs = {
@@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc)
 static void ilk_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), 
fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset);
 }
 
 static const struct intel_fbc_funcs ilk_fbc_funcs = {
@@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc,
 {
struct drm_i915_private *i915 = fbc->i915;
u64 end = intel_fbc_stolen_end(i915);
-   int ret, limit = min_limit;
+   int limit = min_limit;
+   struct ttm_resource *res;
 
size /= limit;
 
/* Try to over-allocate to reduce reallocations and fragmentation. */
-   ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb,
-  size <<= 1, 4096, 0, end);
-   if (ret == 0)
+   res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)

[PATCH v10 05/11] drm/i915: instantiate ttm ranger manager for stolen memory

2022-07-07 Thread Robert Beckett
prepare for ttm based stolen region by using ttm range manager
as the resource manager for stolen region.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c |  6 ++--
 drivers/gpu/drm/i915/intel_region_ttm.c  | 31 +++-
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 40249fa28a7a..675e9ab30396 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
 
/* There's some room for optimization here... */
-   GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
-  ttm_mem_type < I915_PL_LMEM0);
+   GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT);
+
if (ttm_mem_type == I915_PL_SYSTEM)
return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
  0);
+   if (ttm_mem_type == I915_PL_STOLEN)
+   return i915->mm.stolen_region;
 
return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
  ttm_mem_type - I915_PL_LMEM0);
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index fd2ecfdd8fa1..694e9acb69e2 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private 
*dev_priv)
 
 /*
  * Map the i915 memory regions to TTM memory types. We use the
- * driver-private types for now, reserving TTM_PL_VRAM for stolen
+ * driver-private types for now, reserving I915_PL_STOLEN for stolen
  * memory and TTM_PL_TT for GGTT use if decided to implement this.
  */
 int intel_region_to_ttm_type(const struct intel_memory_region *mem)
@@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct 
intel_memory_region *mem)
 
GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL &&
   mem->type != INTEL_MEMORY_MOCK &&
-  mem->type != INTEL_MEMORY_SYSTEM);
+  mem->type != INTEL_MEMORY_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_LOCAL);
 
if (mem->type == INTEL_MEMORY_SYSTEM)
return TTM_PL_SYSTEM;
 
+   if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM ||
+   mem->type == INTEL_MEMORY_STOLEN_LOCAL)
+   return I915_PL_STOLEN;
+
type = mem->instance + TTM_PL_PRIV;
GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES);
 
@@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
int mem_type = intel_region_to_ttm_type(mem);
int ret;
 
-   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
- resource_size(>region),
- mem->io_size,
- mem->min_page_size, PAGE_SIZE);
+   if (mem_type == I915_PL_STOLEN) {
+   ret = ttm_range_man_init(bdev, mem_type, false,
+resource_size(>region) >> 
PAGE_SHIFT);
+   mem->is_range_manager = true;
+   } else {
+   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
+ resource_size(>region),
+ mem->io_size,
+ mem->min_page_size, PAGE_SIZE);
+   }
if (ret)
return ret;
 
@@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
 int intel_region_ttm_fini(struct intel_memory_region *mem)
 {
struct ttm_resource_manager *man = mem->region_private;
+   int mem_type = intel_region_to_ttm_type(mem);
int ret = -EBUSY;
int count;
 
@@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
if (ret || !man)
return ret;
 
-   ret = i915_ttm_buddy_man_fini(>i915->bdev,
- intel_region_to_ttm_type(mem));
+   if (mem_type == I915_PL_STOLEN)
+   ret = ttm_range_man_fini(>i915->bdev, mem_type);
+   else
+   ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type);
GEM_WARN_ON(ret);
mem->region_private = NULL;
 
-- 
2.25.1



[PATCH v10 04/11] drm/i915/gem: selftest should not attempt mmap of private regions

2022-07-07 Thread Robert Beckett
During testing make can_mmap consider whether the region is private.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5bc93a1ce3e3..76181e28c75e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum 
i915_mmap_type type)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
bool no_map;
 
+   if (obj->mm.region && obj->mm.region->private)
+   return false;
+
if (obj->ops->mmap_offset)
return type == I915_MMAP_TYPE_FIXED;
else if (type == I915_MMAP_TYPE_FIXED)
-- 
2.25.1



[PATCH v10 07/11] drm/i915: ttm move/clear logic fix

2022-07-07 Thread Robert Beckett
ttm managed buffers start off with system resource definitions and ttm_tt
tracking structures allocated (though unpopulated).
currently this prevents clearing of buffers on first move to desired
placements.

The desired behaviour is to clear user allocated buffers and any kernel
buffers that specifically requests it only.
Make the logic match the desired behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 81c67ca9edda..a3f8fc056dbc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -3,6 +3,7 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "drm/ttm/ttm_tt.h"
 #include 
 
 #include "i915_deps.h"
@@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
return fence;
 }
 
+static bool
+allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct 
ttm_resource *dst_mem)
+{
+   /* never clear stolen */
+   if (dst_mem->mem_type == I915_PL_STOLEN)
+   return false;
+   /*
+* we want to clear user buffers and any kernel buffers
+* that specifically request clearing.
+*/
+   if (obj->flags & I915_BO_ALLOC_USER)
+   return true;
+
+   if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)
+   return true;
+
+   return false;
+}
+
 /**
  * i915_ttm_move - The TTM move callback used by i915.
  * @bo: The buffer object.
@@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
return PTR_ERR(dst_rsgt);
 
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || 
!ttm_tt_is_populated(ttm));
-   if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
+   if (!clear || allow_clear(obj, ttm, dst_mem)) {
struct i915_deps deps;
 
i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | 
__GFP_NOWARN);
-- 
2.25.1



[PATCH v10 08/11] drm/i915/ttm: add buffer pin on alloc flag

2022-07-07 Thread Robert Beckett
For situations where allocations need to fail on alloc instead of
delayed get_pages, add a new alloc flag to pin the ttm bo.
This makes sure that the resource has been allocated during buffer
creation, allowing it to fail with an error if the placement is
exhausted.
This allows existing fallback options for stolen backend allocation like
create_ring_vma to work as expected.

Signed-off-by: Robert Beckett 
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 13 ++
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   | 25 ++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 6632ed52e919..07bc11247a3e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -325,17 +325,20 @@ struct drm_i915_gem_object {
  * dealing with userspace objects the CPU fault handler is free to ignore this.
  */
 #define I915_BO_ALLOC_GPU_ONLY   BIT(6)
+/* object should be pinned in destination region from allocation */
+#define I915_BO_ALLOC_PINNED BIT(7)
 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
 I915_BO_ALLOC_VOLATILE | \
 I915_BO_ALLOC_CPU_CLEAR | \
 I915_BO_ALLOC_USER | \
 I915_BO_ALLOC_PM_VOLATILE | \
 I915_BO_ALLOC_PM_EARLY | \
-I915_BO_ALLOC_GPU_ONLY)
-#define I915_BO_READONLY  BIT(7)
-#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
-#define I915_BO_PROTECTED BIT(9)
-#define I915_BO_WAS_BOUND_BIT 10
+I915_BO_ALLOC_GPU_ONLY | \
+I915_BO_ALLOC_PINNED)
+#define I915_BO_READONLY  BIT(8)
+#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */
+#define I915_BO_PROTECTED BIT(10)
+#define I915_BO_WAS_BOUND_BIT 11
/**
 * @mem_flags - Mutable placement-related flags
 *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 27d59639177f..bb988608296d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct 
drm_i915_gem_object *obj)
 {
GEM_BUG_ON(!obj->ttm.created);
 
+   /* stolen objects are pinned for lifetime. Unpin before putting */
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL);
+   ttm_bo_unpin(i915_gem_to_ttm(obj));
+   ttm_bo_unreserve(i915_gem_to_ttm(obj));
+   }
+
ttm_bo_put(i915_gem_to_ttm(obj));
 }
 
@@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
.no_wait_gpu = false,
};
enum ttm_bo_type bo_type;
+   struct ttm_place _place;
+   struct ttm_placement _placement;
+   struct ttm_placement *placement;
int ret;
 
drm_gem_private_object_init(>drm, >base, size);
@@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct 
intel_memory_region *mem,
 */
i915_gem_object_make_unshrinkable(obj);
 
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   i915_ttm_place_from_region(mem, &_place, obj->bo_offset,
+  obj->base.size, obj->flags);
+   _placement.num_placement = 1;
+   _placement.placement = &_place;
+   _placement.num_busy_placement = 0;
+   _placement.busy_placement = NULL;
+   placement = &_placement;
+   } else {
+   placement = _sys_placement;
+   }
/*
 * If this function fails, it will call the destructor, but
 * our caller still owns the object. So no freeing in the
@@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
 * until successful initialization.
 */
ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size,
-  bo_type, _sys_placement,
+  bo_type, placement,
   page_size >> PAGE_SHIFT,
   , NULL, NULL, i915_ttm_bo_destroy);
if (ret)
@@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
obj->ttm.cache_level_override = false;
+   if (obj->flags & I915_BO_ALLOC_PINNED)
+   ttm_bo_pin(i915_gem_to_ttm(obj));
i915_gem_object_unlock(obj);
 
return 0;
-- 
2.25.1



[PATCH v10 01/11] drm/i915/ttm: dont trample cache_level overrides during ttm move

2022-07-07 Thread Robert Beckett
Various places within the driver override the default chosen cache_level.
Before ttm, these overrides were permanent until explicitly changed again
or for the lifetime of the buffer.

TTM movement code came along and decided that it could make that
decision at that time, which is usually well after object creation, so
overrode the cache_level decision and reverted it back to its default
decision.

Add logic to indicate whether the caching mode has been set by anything
other than the move logic. If so, assume that the code that overrode the
defaults knows best and keep it.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++---
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 06b1b188ce5a..519887769c08 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct 
drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
obj->cache_level = cache_level;
+   obj->ttm.cache_level_override = true;
 
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2c88bdb8ff7c..6632ed52e919 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -605,6 +605,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
+   bool cache_level_override:1;
} ttm;
 
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4c25d9b2f138..27d59639177f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_gem_object_init_memory_region(obj, mem);
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
+   obj->ttm.cache_level_override = false;
i915_gem_object_unlock(obj);
 
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index a10716f4e717..4c1de0b4a10f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
I915_BO_FLAG_STRUCT_PAGE;
 
-   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-  bo->ttm);
-   i915_gem_object_set_cache_coherency(obj, cache_level);
+   if (!obj->ttm.cache_level_override) {
+   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
+  bo->resource, bo->ttm);
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+   obj->ttm.cache_level_override = false;
+   }
 }
 
 /**
-- 
2.25.1



[PATCH v10 06/11] drm/i915: sanitize mem_flags for stolen buffers

2022-07-07 Thread Robert Beckett
Stolen regions are not page backed or considered iomem.
Prevent flags indicating such.
This correctly prevents stolen buffers from attempting to directly map
them.

See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem()
usage for where it would break otherwise.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 675e9ab30396..81c67ca9edda 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -14,6 +14,7 @@
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
 #include "gem/i915_gem_ttm_move.h"
+#include "gem/i915_gem_stolen.h"
 
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
@@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
 
obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
 
-   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
-   I915_BO_FLAG_STRUCT_PAGE;
+   if (!i915_gem_object_is_stolen(obj))
+   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
+   I915_BO_FLAG_STRUCT_PAGE;
 
if (!obj->ttm.cache_level_override) {
cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
-- 
2.25.1



[PATCH v10 02/11] drm/i915: limit ttm to dma32 for i965G[M]

2022-07-07 Thread Robert Beckett
i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 62ff77445b01..fd2ecfdd8fa1 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
struct drm_device *drm = _priv->drm;
+   bool use_dma32 = false;
+
+   /* i965g[m] cannot relocate objects above 4GiB. */
+   if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+   use_dma32 = true;
 
return ttm_device_init(_priv->bdev, i915_ttm_driver(),
   drm->dev, drm->anon_inode->i_mapping,
-  drm->vma_offset_manager, false, false);
+  drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1



[PATCH v10 03/11] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level

2022-07-07 Thread Robert Beckett
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP.
This is divergent from existing backends code which only considers
HAS_LLC.
Testing shows that trusting snooping on gen5- is unreliable and bsw via
ggtt mappings, so limit DGFX for now and maintain previous behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 4c1de0b4a10f..40249fa28a7a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -46,7 +46,9 @@ static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 struct ttm_tt *ttm)
 {
-   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915);
+
+   return ((HAS_LLC(i915) || can_snoop) &&
!i915_ttm_gtt_binds_lmem(res) &&
ttm->caching == ttm_cached) ? I915_CACHE_LLC :
I915_CACHE_NONE;
-- 
2.25.1



[PATCH v10 00/11] drm/i915: ttm for stolen

2022-07-07 Thread Robert Beckett
This series refactors i915's stolen memory region to use ttm.

v2: handle disabled stolen similar to legacy version.
relying on ttm to fail allocs works fine, but is dmesg noisy and causes 
testing
dmesg warning regressions.

v3: rebase to latest drm-tip.
fix v2 code refactor which could leave a buffer pinned.
locally passes fftl again now.

v4: - Allow memory regions creators to do allocation. Allows stolen region 
to track
  it's own reservations.
- Pre-reserve first page of stolen mem (add back 
WaSkipStolenMemoryFirstPage:bdw+)
- Improve commit descritpion for "drm/i915: sanitize mem_flags for 
stolen buffers"
- replace i915_gem_object_pin_pages_unlocked() call with manual locking 
and pinning.
  this avoids ww ctx class reuse during context creation -> ring vma 
obj alloc.

v5: - detect both types of stolen as stolen buffers in
  "drm/i915: sanitize mem_flags for stolen buffers"
- in stolen_object_init limit page size to mem region minimum.
  The range allocator expects the page_size to define the
  alignment

v6: - Share first 4 patches from ttm for internal series as generic
  i915 ttm fixes
- Drop patch 4 from v5. We don't need separate object ops just
  to satisfy test interfaces. The tests have now been fixed via
  checking whether the memory region is private to decide
  whether to mmap
- Add new buffer pin alloc flag to allow creation of buffers in
  their final ttm placement instead of deferring until
  get_pages. This fixes legacy fallback paths for buffer
  allocations during stolen memory pressure.

v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET

v8: - Reserve I915_GEM_STOLEN_BIAS area from stolen

v9: - drop patch 8 "drm/i915: allow memory region creators to alloc and 
free the region"
  store bias reservation in drm_i915_private instead.
- Restrict reset selftest to only test !GuC engines.
  Resetting individual GuC engines from host is not supported
- Wait for outstanding requests in reset selftest
  This prevents previous engine test context cleanup appearing
  as false positive stolen corruption check

v10:- Fix wiating on requests early error path during reset selftest
  If a single request fails to complete, the others would not be
  put, resulting in leaks. Make sure all requests are put before
      test exit.

Robert Beckett (11):
  drm/i915/ttm: dont trample cache_level overrides during ttm move
  drm/i915: limit ttm to dma32 for i965G[M]
  drm/i915/ttm: only trust snooping for dgfx when deciding default
cache_level
  drm/i915/gem: selftest should not attempt mmap of private regions
  drm/i915: instantiate ttm ranger manager for stolen memory
  drm/i915: sanitize mem_flags for stolen buffers
  drm/i915: ttm move/clear logic fix
  drm/i915/ttm: add buffer pin on alloc flag
  drm/i915/selftest: don't attempt engine reset of guc submission
engines
  drm/i915/selftest: wait for requests during engine reset selftest
  drm/i915: stolen memory use ttm backend

 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   1 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  16 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  29 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  47 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|   3 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  53 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   6 +-
 drivers/gpu/drm/i915/intel_region_ttm.c   |  80 +++-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 16 files changed, 420 insertions(+), 377 deletions(-)

-- 
2.25.1



[PATCH v9 11/11] drm/i915: stolen memory use ttm backend

2022-07-07 Thread Robert Beckett
refactor stolen memory region to use ttm.
this necessitates using ttm resources to track reserved stolen regions
instead of drm_mm_nodes.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  16 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   6 +-
 drivers/gpu/drm/i915/intel_region_ttm.c   |  42 +-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 13 files changed, 280 insertions(+), 351 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c 
b/drivers/gpu/drm/i915/display/intel_fbc.c
index 8b807284cde1..6f3afac5e8c9 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -38,6 +38,7 @@
  * forcibly disable it to allow proper screen updates.
  */
 
+#include "gem/i915_gem_stolen.h"
 #include 
 
 #include 
@@ -51,6 +52,7 @@
 #include "intel_display_types.h"
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
+#include "gem/i915_gem_region.h"
 
 #define for_each_fbc_id(__dev_priv, __fbc_id) \
for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; 
(__fbc_id)++) \
@@ -92,8 +94,8 @@ struct intel_fbc {
struct mutex lock;
unsigned int busy_bits;
 
-   struct drm_mm_node compressed_fb;
-   struct drm_mm_node compressed_llb;
+   struct ttm_resource *compressed_fb;
+   struct ttm_resource *compressed_llb;
 
enum intel_fbc_id id;
 
@@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc)
 static void i8xx_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
+   u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb);
 
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET);
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_fb.start, U32_MAX));
+fb_offset, U32_MAX));
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_llb.start, U32_MAX));
+llb_offset, U32_MAX));
 
intel_de_write(i915, FBC_CFB_BASE,
-  i915->dsm.start + fbc->compressed_fb.start);
+  i915->dsm.start + fb_offset);
intel_de_write(i915, FBC_LL_BASE,
-  i915->dsm.start + fbc->compressed_llb.start);
+  i915->dsm.start + llb_offset);
 }
 
 static const struct intel_fbc_funcs i8xx_fbc_funcs = {
@@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc)
 static void g4x_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, DPFC_CB_BASE, fb_offset);
 }
 
 static const struct intel_fbc_funcs g4x_fbc_funcs = {
@@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc)
 static void ilk_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), 
fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset);
 }
 
 static const struct intel_fbc_funcs ilk_fbc_funcs = {
@@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc,
 {
struct drm_i915_private *i915 = fbc->i915;
u64 end = intel_fbc_stolen_end(i915);
-   int ret, limit = min_limit;
+   int limit = min_limit;
+   struct ttm_resource *res;
 
size /= limit;
 
/* Try to over-allocate to reduce reallocations and fragmentation. */
-   ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb,
-  size <<= 1, 4096, 0, end);
-   if (ret == 0)
+   res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)

[PATCH v9 09/11] drm/i915/selftest: don't attempt engine reset of guc submission engines

2022-07-07 Thread Robert Beckett
igt_reset_engines_stolen tries to reset engines without checking if it
is possible.
Engines using GuC submission are not able to be reset from the host.

In this scenario, the reset exits early, then on the next iteration of
the each engine loop, the async teardown of the spinner request
context's ring occurs while the next engine is under test.

This is seen as a stolen memory corruption as the ring buffer was busy
initially, but free during the confirmation check and had been poisoned
during cleanup.

Fix this by not testing GuC submission using engines.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gt/selftest_reset.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c 
b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 37c38bdd5f47..55f3b34e5f6e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -194,6 +194,8 @@ static int igt_reset_engines_stolen(void *arg)
return 0;
 
for_each_engine(engine, gt, id) {
+   if (intel_engine_uses_guc(engine))
+   continue;
err = __igt_reset_stolen(gt, engine->mask, engine->name);
if (err)
return err;
-- 
2.25.1



[PATCH v9 10/11] drm/i915/selftest: wait for requests during engine reset selftest

2022-07-07 Thread Robert Beckett
While looping around each engine and testing for corrupted solen memory
during engine reset, the old requests from the previous engine can still
be yet to retire.
To prevent false positive corruption tests, wait for the outstanding
requests at the end of the test

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gt/selftest_reset.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c 
b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 55f3b34e5f6e..52acef647396 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -6,6 +6,7 @@
 #include 
 
 #include "gem/i915_gem_stolen.h"
+#include "gt/intel_gt.h"
 
 #include "i915_memcpy.h"
 #include "i915_selftest.h"
@@ -26,6 +27,7 @@ __igt_reset_stolen(struct intel_gt *gt,
intel_wakeref_t wakeref;
enum intel_engine_id id;
struct igt_spinner spin;
+   struct i915_request *requests[I915_NUM_ENGINES] = {0};
long max, count;
void *tmp;
u32 *crc;
@@ -77,6 +79,7 @@ __igt_reset_stolen(struct intel_gt *gt,
goto err_spin;
}
i915_request_add(rq);
+   requests[id] = i915_request_get(rq);
}
 
for (page = 0; page < num_pages; page++) {
@@ -165,6 +168,27 @@ __igt_reset_stolen(struct intel_gt *gt,
err = -EINVAL;
}
 
+   /* wait for requests and idle, otherwise cleanup can happen on next 
loop */
+   for (id = 0; id < I915_NUM_ENGINES; id++) {
+   if (!requests[id])
+   continue;
+   err = i915_request_wait(requests[id], I915_WAIT_INTERRUPTIBLE, 
HZ);
+   if (err < 0) {
+   pr_err("%s failed to wait for rq: %d\n", msg, err);
+   goto err_spin;
+   }
+
+   i915_request_put(requests[id]);
+   }
+
+   err = intel_gt_wait_for_idle(gt, HZ);
+   if (err < 0) {
+   pr_err("%s failed to wait for gt idle: %d\n", msg, err);
+   goto err_spin;
+   }
+
+   err = 0;
+
 err_spin:
igt_spinner_fini();
 
-- 
2.25.1



[PATCH v9 08/11] drm/i915/ttm: add buffer pin on alloc flag

2022-07-07 Thread Robert Beckett
For situations where allocations need to fail on alloc instead of
delayed get_pages, add a new alloc flag to pin the ttm bo.
This makes sure that the resource has been allocated during buffer
creation, allowing it to fail with an error if the placement is
exhausted.
This allows existing fallback options for stolen backend allocation like
create_ring_vma to work as expected.

Signed-off-by: Robert Beckett 
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 13 ++
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   | 25 ++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 6632ed52e919..07bc11247a3e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -325,17 +325,20 @@ struct drm_i915_gem_object {
  * dealing with userspace objects the CPU fault handler is free to ignore this.
  */
 #define I915_BO_ALLOC_GPU_ONLY   BIT(6)
+/* object should be pinned in destination region from allocation */
+#define I915_BO_ALLOC_PINNED BIT(7)
 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
 I915_BO_ALLOC_VOLATILE | \
 I915_BO_ALLOC_CPU_CLEAR | \
 I915_BO_ALLOC_USER | \
 I915_BO_ALLOC_PM_VOLATILE | \
 I915_BO_ALLOC_PM_EARLY | \
-I915_BO_ALLOC_GPU_ONLY)
-#define I915_BO_READONLY  BIT(7)
-#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
-#define I915_BO_PROTECTED BIT(9)
-#define I915_BO_WAS_BOUND_BIT 10
+I915_BO_ALLOC_GPU_ONLY | \
+I915_BO_ALLOC_PINNED)
+#define I915_BO_READONLY  BIT(8)
+#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */
+#define I915_BO_PROTECTED BIT(10)
+#define I915_BO_WAS_BOUND_BIT 11
/**
 * @mem_flags - Mutable placement-related flags
 *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 27d59639177f..bb988608296d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct 
drm_i915_gem_object *obj)
 {
GEM_BUG_ON(!obj->ttm.created);
 
+   /* stolen objects are pinned for lifetime. Unpin before putting */
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL);
+   ttm_bo_unpin(i915_gem_to_ttm(obj));
+   ttm_bo_unreserve(i915_gem_to_ttm(obj));
+   }
+
ttm_bo_put(i915_gem_to_ttm(obj));
 }
 
@@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
.no_wait_gpu = false,
};
enum ttm_bo_type bo_type;
+   struct ttm_place _place;
+   struct ttm_placement _placement;
+   struct ttm_placement *placement;
int ret;
 
drm_gem_private_object_init(>drm, >base, size);
@@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct 
intel_memory_region *mem,
 */
i915_gem_object_make_unshrinkable(obj);
 
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   i915_ttm_place_from_region(mem, &_place, obj->bo_offset,
+  obj->base.size, obj->flags);
+   _placement.num_placement = 1;
+   _placement.placement = &_place;
+   _placement.num_busy_placement = 0;
+   _placement.busy_placement = NULL;
+   placement = &_placement;
+   } else {
+   placement = _sys_placement;
+   }
/*
 * If this function fails, it will call the destructor, but
 * our caller still owns the object. So no freeing in the
@@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
 * until successful initialization.
 */
ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size,
-  bo_type, _sys_placement,
+  bo_type, placement,
   page_size >> PAGE_SHIFT,
   , NULL, NULL, i915_ttm_bo_destroy);
if (ret)
@@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
obj->ttm.cache_level_override = false;
+   if (obj->flags & I915_BO_ALLOC_PINNED)
+   ttm_bo_pin(i915_gem_to_ttm(obj));
i915_gem_object_unlock(obj);
 
return 0;
-- 
2.25.1



[PATCH v9 07/11] drm/i915: ttm move/clear logic fix

2022-07-07 Thread Robert Beckett
ttm managed buffers start off with system resource definitions and ttm_tt
tracking structures allocated (though unpopulated).
currently this prevents clearing of buffers on first move to desired
placements.

The desired behaviour is to clear user allocated buffers and any kernel
buffers that specifically requests it only.
Make the logic match the desired behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 81c67ca9edda..a3f8fc056dbc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -3,6 +3,7 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "drm/ttm/ttm_tt.h"
 #include 
 
 #include "i915_deps.h"
@@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
return fence;
 }
 
+static bool
+allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct 
ttm_resource *dst_mem)
+{
+   /* never clear stolen */
+   if (dst_mem->mem_type == I915_PL_STOLEN)
+   return false;
+   /*
+* we want to clear user buffers and any kernel buffers
+* that specifically request clearing.
+*/
+   if (obj->flags & I915_BO_ALLOC_USER)
+   return true;
+
+   if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)
+   return true;
+
+   return false;
+}
+
 /**
  * i915_ttm_move - The TTM move callback used by i915.
  * @bo: The buffer object.
@@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
return PTR_ERR(dst_rsgt);
 
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || 
!ttm_tt_is_populated(ttm));
-   if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
+   if (!clear || allow_clear(obj, ttm, dst_mem)) {
struct i915_deps deps;
 
i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | 
__GFP_NOWARN);
-- 
2.25.1



[PATCH v9 05/11] drm/i915: instantiate ttm ranger manager for stolen memory

2022-07-07 Thread Robert Beckett
prepare for ttm based stolen region by using ttm range manager
as the resource manager for stolen region.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c |  6 ++--
 drivers/gpu/drm/i915/intel_region_ttm.c  | 31 +++-
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 40249fa28a7a..675e9ab30396 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
 
/* There's some room for optimization here... */
-   GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
-  ttm_mem_type < I915_PL_LMEM0);
+   GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT);
+
if (ttm_mem_type == I915_PL_SYSTEM)
return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
  0);
+   if (ttm_mem_type == I915_PL_STOLEN)
+   return i915->mm.stolen_region;
 
return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
  ttm_mem_type - I915_PL_LMEM0);
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index fd2ecfdd8fa1..694e9acb69e2 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private 
*dev_priv)
 
 /*
  * Map the i915 memory regions to TTM memory types. We use the
- * driver-private types for now, reserving TTM_PL_VRAM for stolen
+ * driver-private types for now, reserving I915_PL_STOLEN for stolen
  * memory and TTM_PL_TT for GGTT use if decided to implement this.
  */
 int intel_region_to_ttm_type(const struct intel_memory_region *mem)
@@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct 
intel_memory_region *mem)
 
GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL &&
   mem->type != INTEL_MEMORY_MOCK &&
-  mem->type != INTEL_MEMORY_SYSTEM);
+  mem->type != INTEL_MEMORY_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_LOCAL);
 
if (mem->type == INTEL_MEMORY_SYSTEM)
return TTM_PL_SYSTEM;
 
+   if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM ||
+   mem->type == INTEL_MEMORY_STOLEN_LOCAL)
+   return I915_PL_STOLEN;
+
type = mem->instance + TTM_PL_PRIV;
GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES);
 
@@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
int mem_type = intel_region_to_ttm_type(mem);
int ret;
 
-   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
- resource_size(>region),
- mem->io_size,
- mem->min_page_size, PAGE_SIZE);
+   if (mem_type == I915_PL_STOLEN) {
+   ret = ttm_range_man_init(bdev, mem_type, false,
+resource_size(>region) >> 
PAGE_SHIFT);
+   mem->is_range_manager = true;
+   } else {
+   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
+ resource_size(>region),
+ mem->io_size,
+ mem->min_page_size, PAGE_SIZE);
+   }
if (ret)
return ret;
 
@@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
 int intel_region_ttm_fini(struct intel_memory_region *mem)
 {
struct ttm_resource_manager *man = mem->region_private;
+   int mem_type = intel_region_to_ttm_type(mem);
int ret = -EBUSY;
int count;
 
@@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
if (ret || !man)
return ret;
 
-   ret = i915_ttm_buddy_man_fini(>i915->bdev,
- intel_region_to_ttm_type(mem));
+   if (mem_type == I915_PL_STOLEN)
+   ret = ttm_range_man_fini(>i915->bdev, mem_type);
+   else
+   ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type);
GEM_WARN_ON(ret);
mem->region_private = NULL;
 
-- 
2.25.1



[PATCH v9 03/11] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level

2022-07-07 Thread Robert Beckett
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP.
This is divergent from existing backends code which only considers
HAS_LLC.
Testing shows that trusting snooping on gen5- is unreliable and bsw via
ggtt mappings, so limit DGFX for now and maintain previous behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 4c1de0b4a10f..40249fa28a7a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -46,7 +46,9 @@ static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 struct ttm_tt *ttm)
 {
-   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915);
+
+   return ((HAS_LLC(i915) || can_snoop) &&
!i915_ttm_gtt_binds_lmem(res) &&
ttm->caching == ttm_cached) ? I915_CACHE_LLC :
I915_CACHE_NONE;
-- 
2.25.1



[PATCH v9 06/11] drm/i915: sanitize mem_flags for stolen buffers

2022-07-07 Thread Robert Beckett
Stolen regions are not page backed or considered iomem.
Prevent flags indicating such.
This correctly prevents stolen buffers from attempting to directly map
them.

See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem()
usage for where it would break otherwise.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 675e9ab30396..81c67ca9edda 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -14,6 +14,7 @@
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
 #include "gem/i915_gem_ttm_move.h"
+#include "gem/i915_gem_stolen.h"
 
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
@@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
 
obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
 
-   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
-   I915_BO_FLAG_STRUCT_PAGE;
+   if (!i915_gem_object_is_stolen(obj))
+   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
+   I915_BO_FLAG_STRUCT_PAGE;
 
if (!obj->ttm.cache_level_override) {
cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
-- 
2.25.1



[PATCH v9 04/11] drm/i915/gem: selftest should not attempt mmap of private regions

2022-07-07 Thread Robert Beckett
During testing make can_mmap consider whether the region is private.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5bc93a1ce3e3..76181e28c75e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum 
i915_mmap_type type)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
bool no_map;
 
+   if (obj->mm.region && obj->mm.region->private)
+   return false;
+
if (obj->ops->mmap_offset)
return type == I915_MMAP_TYPE_FIXED;
else if (type == I915_MMAP_TYPE_FIXED)
-- 
2.25.1



[PATCH v9 01/11] drm/i915/ttm: dont trample cache_level overrides during ttm move

2022-07-07 Thread Robert Beckett
Various places within the driver override the default chosen cache_level.
Before ttm, these overrides were permanent until explicitly changed again
or for the lifetime of the buffer.

TTM movement code came along and decided that it could make that
decision at that time, which is usually well after object creation, so
overrode the cache_level decision and reverted it back to its default
decision.

Add logic to indicate whether the caching mode has been set by anything
other than the move logic. If so, assume that the code that overrode the
defaults knows best and keep it.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++---
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 06b1b188ce5a..519887769c08 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct 
drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
obj->cache_level = cache_level;
+   obj->ttm.cache_level_override = true;
 
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2c88bdb8ff7c..6632ed52e919 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -605,6 +605,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
+   bool cache_level_override:1;
} ttm;
 
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4c25d9b2f138..27d59639177f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_gem_object_init_memory_region(obj, mem);
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
+   obj->ttm.cache_level_override = false;
i915_gem_object_unlock(obj);
 
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index a10716f4e717..4c1de0b4a10f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
I915_BO_FLAG_STRUCT_PAGE;
 
-   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-  bo->ttm);
-   i915_gem_object_set_cache_coherency(obj, cache_level);
+   if (!obj->ttm.cache_level_override) {
+   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
+  bo->resource, bo->ttm);
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+   obj->ttm.cache_level_override = false;
+   }
 }
 
 /**
-- 
2.25.1



[PATCH v9 02/11] drm/i915: limit ttm to dma32 for i965G[M]

2022-07-07 Thread Robert Beckett
i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 62ff77445b01..fd2ecfdd8fa1 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
struct drm_device *drm = _priv->drm;
+   bool use_dma32 = false;
+
+   /* i965g[m] cannot relocate objects above 4GiB. */
+   if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+   use_dma32 = true;
 
return ttm_device_init(_priv->bdev, i915_ttm_driver(),
   drm->dev, drm->anon_inode->i_mapping,
-  drm->vma_offset_manager, false, false);
+  drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1



[PATCH v9 00/11] drm/i915: ttm for stolen

2022-07-07 Thread Robert Beckett
This series refactors i915's stolen memory region to use ttm.

v2: handle disabled stolen similar to legacy version.
relying on ttm to fail allocs works fine, but is dmesg noisy and causes 
testing
dmesg warning regressions.

v3: rebase to latest drm-tip.
fix v2 code refactor which could leave a buffer pinned.
locally passes fftl again now.

v4: - Allow memory regions creators to do allocation. Allows stolen region 
to track
  it's own reservations.
- Pre-reserve first page of stolen mem (add back 
WaSkipStolenMemoryFirstPage:bdw+)
- Improve commit descritpion for "drm/i915: sanitize mem_flags for 
stolen buffers"
- replace i915_gem_object_pin_pages_unlocked() call with manual locking 
and pinning.
  this avoids ww ctx class reuse during context creation -> ring vma 
obj alloc.

v5: - detect both types of stolen as stolen buffers in
  "drm/i915: sanitize mem_flags for stolen buffers"
- in stolen_object_init limit page size to mem region minimum.
  The range allocator expects the page_size to define the
  alignment

v6: - Share first 4 patches from ttm for internal series as generic
  i915 ttm fixes
- Drop patch 4 from v5. We don't need separate object ops just
  to satisfy test interfaces. The tests have now been fixed via
  checking whether the memory region is private to decide
  whether to mmap
- Add new buffer pin alloc flag to allow creation of buffers in
  their final ttm placement instead of deferring until
  get_pages. This fixes legacy fallback paths for buffer
  allocations during stolen memory pressure.

v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET

v8: - Reserve I915_GEM_STOLEN_BIAS area from stolen

v9: - drop patch 8 "drm/i915: allow memory region creators to alloc and 
free the region"
  store bias reservation in drm_i915_private instead.
- Restrict reset selftest to only test !GuC engines.
  Resetting individual GuC engines from host is not supported
- Wait for outstanding requests in reset selftest
  This prevents previous engine test context cleanup appearing
  as false positive stolen corruption check

Robert Beckett (11):
  drm/i915/ttm: dont trample cache_level overrides during ttm move
  drm/i915: limit ttm to dma32 for i965G[M]
  drm/i915/ttm: only trust snooping for dgfx when deciding default
cache_level
  drm/i915/gem: selftest should not attempt mmap of private regions
  drm/i915: instantiate ttm ranger manager for stolen memory
  drm/i915: sanitize mem_flags for stolen buffers
  drm/i915: ttm move/clear logic fix
  drm/i915/ttm: add buffer pin on alloc flag
  drm/i915/selftest: don't attempt engine reset of guc submission
engines
  drm/i915/selftest: wait for requests during engine reset selftest
  drm/i915: stolen memory use ttm backend

 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   1 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  16 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  29 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  47 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|   3 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  42 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   6 +-
 drivers/gpu/drm/i915/intel_region_ttm.c   |  80 +++-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 16 files changed, 411 insertions(+), 375 deletions(-)

-- 
2.25.1



[PATCH v8 10/10] drm/i915: stolen memory use ttm backend

2022-06-21 Thread Robert Beckett
refactor stolen memory region to use ttm.
this necessitates using ttm resources to track reserved stolen regions
instead of drm_mm_nodes.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 441 +++---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  16 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   5 -
 drivers/gpu/drm/i915/intel_region_ttm.c   |  42 +-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 13 files changed, 304 insertions(+), 342 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c 
b/drivers/gpu/drm/i915/display/intel_fbc.c
index 8b807284cde1..6f3afac5e8c9 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -38,6 +38,7 @@
  * forcibly disable it to allow proper screen updates.
  */
 
+#include "gem/i915_gem_stolen.h"
 #include 
 
 #include 
@@ -51,6 +52,7 @@
 #include "intel_display_types.h"
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
+#include "gem/i915_gem_region.h"
 
 #define for_each_fbc_id(__dev_priv, __fbc_id) \
for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; 
(__fbc_id)++) \
@@ -92,8 +94,8 @@ struct intel_fbc {
struct mutex lock;
unsigned int busy_bits;
 
-   struct drm_mm_node compressed_fb;
-   struct drm_mm_node compressed_llb;
+   struct ttm_resource *compressed_fb;
+   struct ttm_resource *compressed_llb;
 
enum intel_fbc_id id;
 
@@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc)
 static void i8xx_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
+   u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb);
 
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET);
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_fb.start, U32_MAX));
+fb_offset, U32_MAX));
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_llb.start, U32_MAX));
+llb_offset, U32_MAX));
 
intel_de_write(i915, FBC_CFB_BASE,
-  i915->dsm.start + fbc->compressed_fb.start);
+  i915->dsm.start + fb_offset);
intel_de_write(i915, FBC_LL_BASE,
-  i915->dsm.start + fbc->compressed_llb.start);
+  i915->dsm.start + llb_offset);
 }
 
 static const struct intel_fbc_funcs i8xx_fbc_funcs = {
@@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc)
 static void g4x_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, DPFC_CB_BASE, fb_offset);
 }
 
 static const struct intel_fbc_funcs g4x_fbc_funcs = {
@@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc)
 static void ilk_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), 
fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset);
 }
 
 static const struct intel_fbc_funcs ilk_fbc_funcs = {
@@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc,
 {
struct drm_i915_private *i915 = fbc->i915;
u64 end = intel_fbc_stolen_end(i915);
-   int ret, limit = min_limit;
+   int limit = min_limit;
+   struct ttm_resource *res;
 
size /= limit;
 
/* Try to over-allocate to reduce reallocations and fragmentation. */
-   ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb,
-  size <<= 1, 4096, 0, end);
-   if (ret == 0)
+   res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)

[PATCH v8 09/10] drm/i915/ttm: add buffer pin on alloc flag

2022-06-21 Thread Robert Beckett
For situations where allocations need to fail on alloc instead of
delayed get_pages, add a new alloc flag to pin the ttm bo.
This makes sure that the resource has been allocated during buffer
creation, allowing it to fail with an error if the placement is
exhausted.
This allows existing fallback options for stolen backend allocation like
create_ring_vma to work as expected.

Signed-off-by: Robert Beckett 
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 13 ++
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   | 25 ++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 6632ed52e919..07bc11247a3e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -325,17 +325,20 @@ struct drm_i915_gem_object {
  * dealing with userspace objects the CPU fault handler is free to ignore this.
  */
 #define I915_BO_ALLOC_GPU_ONLY   BIT(6)
+/* object should be pinned in destination region from allocation */
+#define I915_BO_ALLOC_PINNED BIT(7)
 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
 I915_BO_ALLOC_VOLATILE | \
 I915_BO_ALLOC_CPU_CLEAR | \
 I915_BO_ALLOC_USER | \
 I915_BO_ALLOC_PM_VOLATILE | \
 I915_BO_ALLOC_PM_EARLY | \
-I915_BO_ALLOC_GPU_ONLY)
-#define I915_BO_READONLY  BIT(7)
-#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
-#define I915_BO_PROTECTED BIT(9)
-#define I915_BO_WAS_BOUND_BIT 10
+I915_BO_ALLOC_GPU_ONLY | \
+I915_BO_ALLOC_PINNED)
+#define I915_BO_READONLY  BIT(8)
+#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */
+#define I915_BO_PROTECTED BIT(10)
+#define I915_BO_WAS_BOUND_BIT 11
/**
 * @mem_flags - Mutable placement-related flags
 *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 27d59639177f..bb988608296d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct 
drm_i915_gem_object *obj)
 {
GEM_BUG_ON(!obj->ttm.created);
 
+   /* stolen objects are pinned for lifetime. Unpin before putting */
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL);
+   ttm_bo_unpin(i915_gem_to_ttm(obj));
+   ttm_bo_unreserve(i915_gem_to_ttm(obj));
+   }
+
ttm_bo_put(i915_gem_to_ttm(obj));
 }
 
@@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
.no_wait_gpu = false,
};
enum ttm_bo_type bo_type;
+   struct ttm_place _place;
+   struct ttm_placement _placement;
+   struct ttm_placement *placement;
int ret;
 
drm_gem_private_object_init(>drm, >base, size);
@@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct 
intel_memory_region *mem,
 */
i915_gem_object_make_unshrinkable(obj);
 
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   i915_ttm_place_from_region(mem, &_place, obj->bo_offset,
+  obj->base.size, obj->flags);
+   _placement.num_placement = 1;
+   _placement.placement = &_place;
+   _placement.num_busy_placement = 0;
+   _placement.busy_placement = NULL;
+   placement = &_placement;
+   } else {
+   placement = _sys_placement;
+   }
/*
 * If this function fails, it will call the destructor, but
 * our caller still owns the object. So no freeing in the
@@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
 * until successful initialization.
 */
ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size,
-  bo_type, _sys_placement,
+  bo_type, placement,
   page_size >> PAGE_SHIFT,
   , NULL, NULL, i915_ttm_bo_destroy);
if (ret)
@@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
obj->ttm.cache_level_override = false;
+   if (obj->flags & I915_BO_ALLOC_PINNED)
+   ttm_bo_pin(i915_gem_to_ttm(obj));
i915_gem_object_unlock(obj);
 
return 0;
-- 
2.25.1



[PATCH v8 06/10] drm/i915: sanitize mem_flags for stolen buffers

2022-06-21 Thread Robert Beckett
Stolen regions are not page backed or considered iomem.
Prevent flags indicating such.
This correctly prevents stolen buffers from attempting to directly map
them.

See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem()
usage for where it would break otherwise.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 675e9ab30396..81c67ca9edda 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -14,6 +14,7 @@
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
 #include "gem/i915_gem_ttm_move.h"
+#include "gem/i915_gem_stolen.h"
 
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
@@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
 
obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
 
-   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
-   I915_BO_FLAG_STRUCT_PAGE;
+   if (!i915_gem_object_is_stolen(obj))
+   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
+   I915_BO_FLAG_STRUCT_PAGE;
 
if (!obj->ttm.cache_level_override) {
cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
-- 
2.25.1



[PATCH v8 07/10] drm/i915: ttm move/clear logic fix

2022-06-21 Thread Robert Beckett
ttm managed buffers start off with system resource definitions and ttm_tt
tracking structures allocated (though unpopulated).
currently this prevents clearing of buffers on first move to desired
placements.

The desired behaviour is to clear user allocated buffers and any kernel
buffers that specifically requests it only.
Make the logic match the desired behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 81c67ca9edda..a3f8fc056dbc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -3,6 +3,7 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "drm/ttm/ttm_tt.h"
 #include 
 
 #include "i915_deps.h"
@@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
return fence;
 }
 
+static bool
+allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct 
ttm_resource *dst_mem)
+{
+   /* never clear stolen */
+   if (dst_mem->mem_type == I915_PL_STOLEN)
+   return false;
+   /*
+* we want to clear user buffers and any kernel buffers
+* that specifically request clearing.
+*/
+   if (obj->flags & I915_BO_ALLOC_USER)
+   return true;
+
+   if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)
+   return true;
+
+   return false;
+}
+
 /**
  * i915_ttm_move - The TTM move callback used by i915.
  * @bo: The buffer object.
@@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
return PTR_ERR(dst_rsgt);
 
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || 
!ttm_tt_is_populated(ttm));
-   if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
+   if (!clear || allow_clear(obj, ttm, dst_mem)) {
struct i915_deps deps;
 
i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | 
__GFP_NOWARN);
-- 
2.25.1



[PATCH v8 08/10] drm/i915: allow memory region creators to alloc and free the region

2022-06-21 Thread Robert Beckett
add callbacks for alloc and free.
this allows region creators to allocate any extra storage they may
require.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/intel_memory_region.c | 16 +---
 drivers/gpu/drm/i915/intel_memory_region.h |  2 ++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_memory_region.c 
b/drivers/gpu/drm/i915/intel_memory_region.c
index e38d2db1c3e3..3da07a712f90 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -231,7 +231,10 @@ intel_memory_region_create(struct drm_i915_private *i915,
struct intel_memory_region *mem;
int err;
 
-   mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+   if (ops->alloc)
+   mem = ops->alloc();
+   else
+   mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem)
return ERR_PTR(-ENOMEM);
 
@@ -265,7 +268,10 @@ intel_memory_region_create(struct drm_i915_private *i915,
if (mem->ops->release)
mem->ops->release(mem);
 err_free:
-   kfree(mem);
+   if (mem->ops->free)
+   mem->ops->free(mem);
+   else
+   kfree(mem);
return ERR_PTR(err);
 }
 
@@ -288,7 +294,11 @@ void intel_memory_region_destroy(struct 
intel_memory_region *mem)
 
GEM_WARN_ON(!list_empty_careful(>objects.list));
mutex_destroy(>objects.lock);
-   if (!ret)
+   if (ret)
+   return;
+   if (mem->ops->free)
+   mem->ops->free(mem);
+   else
kfree(mem);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h 
b/drivers/gpu/drm/i915/intel_memory_region.h
index 3d8378c1b447..048955b5429f 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -61,6 +61,8 @@ struct intel_memory_region_ops {
   resource_size_t size,
   resource_size_t page_size,
   unsigned int flags);
+   struct intel_memory_region *(*alloc)(void);
+   void (*free)(struct intel_memory_region *mem);
 };
 
 struct intel_memory_region {
-- 
2.25.1



[PATCH v8 03/10] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level

2022-06-21 Thread Robert Beckett
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP.
This is divergent from existing backends code which only considers
HAS_LLC.
Testing shows that trusting snooping on gen5- is unreliable and bsw via
ggtt mappings, so limit DGFX for now and maintain previous behaviour.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 4c1de0b4a10f..40249fa28a7a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -46,7 +46,9 @@ static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 struct ttm_tt *ttm)
 {
-   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915);
+
+   return ((HAS_LLC(i915) || can_snoop) &&
!i915_ttm_gtt_binds_lmem(res) &&
ttm->caching == ttm_cached) ? I915_CACHE_LLC :
I915_CACHE_NONE;
-- 
2.25.1



[PATCH v8 05/10] drm/i915: instantiate ttm ranger manager for stolen memory

2022-06-21 Thread Robert Beckett
prepare for ttm based stolen region by using ttm range manager
as the resource manager for stolen region.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c |  6 ++--
 drivers/gpu/drm/i915/intel_region_ttm.c  | 31 +++-
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 40249fa28a7a..675e9ab30396 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
 
/* There's some room for optimization here... */
-   GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
-  ttm_mem_type < I915_PL_LMEM0);
+   GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT);
+
if (ttm_mem_type == I915_PL_SYSTEM)
return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
  0);
+   if (ttm_mem_type == I915_PL_STOLEN)
+   return i915->mm.stolen_region;
 
return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
  ttm_mem_type - I915_PL_LMEM0);
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index fd2ecfdd8fa1..694e9acb69e2 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private 
*dev_priv)
 
 /*
  * Map the i915 memory regions to TTM memory types. We use the
- * driver-private types for now, reserving TTM_PL_VRAM for stolen
+ * driver-private types for now, reserving I915_PL_STOLEN for stolen
  * memory and TTM_PL_TT for GGTT use if decided to implement this.
  */
 int intel_region_to_ttm_type(const struct intel_memory_region *mem)
@@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct 
intel_memory_region *mem)
 
GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL &&
   mem->type != INTEL_MEMORY_MOCK &&
-  mem->type != INTEL_MEMORY_SYSTEM);
+  mem->type != INTEL_MEMORY_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_LOCAL);
 
if (mem->type == INTEL_MEMORY_SYSTEM)
return TTM_PL_SYSTEM;
 
+   if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM ||
+   mem->type == INTEL_MEMORY_STOLEN_LOCAL)
+   return I915_PL_STOLEN;
+
type = mem->instance + TTM_PL_PRIV;
GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES);
 
@@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
int mem_type = intel_region_to_ttm_type(mem);
int ret;
 
-   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
- resource_size(>region),
- mem->io_size,
- mem->min_page_size, PAGE_SIZE);
+   if (mem_type == I915_PL_STOLEN) {
+   ret = ttm_range_man_init(bdev, mem_type, false,
+resource_size(>region) >> 
PAGE_SHIFT);
+   mem->is_range_manager = true;
+   } else {
+   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
+ resource_size(>region),
+ mem->io_size,
+ mem->min_page_size, PAGE_SIZE);
+   }
if (ret)
return ret;
 
@@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
 int intel_region_ttm_fini(struct intel_memory_region *mem)
 {
struct ttm_resource_manager *man = mem->region_private;
+   int mem_type = intel_region_to_ttm_type(mem);
int ret = -EBUSY;
int count;
 
@@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
if (ret || !man)
return ret;
 
-   ret = i915_ttm_buddy_man_fini(>i915->bdev,
- intel_region_to_ttm_type(mem));
+   if (mem_type == I915_PL_STOLEN)
+   ret = ttm_range_man_fini(>i915->bdev, mem_type);
+   else
+   ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type);
GEM_WARN_ON(ret);
mem->region_private = NULL;
 
-- 
2.25.1



[PATCH v8 04/10] drm/i915/gem: selftest should not attempt mmap of private regions

2022-06-21 Thread Robert Beckett
During testing make can_mmap consider whether the region is private.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5bc93a1ce3e3..76181e28c75e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum 
i915_mmap_type type)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
bool no_map;
 
+   if (obj->mm.region && obj->mm.region->private)
+   return false;
+
if (obj->ops->mmap_offset)
return type == I915_MMAP_TYPE_FIXED;
else if (type == I915_MMAP_TYPE_FIXED)
-- 
2.25.1



[PATCH v8 02/10] drm/i915: limit ttm to dma32 for i965G[M]

2022-06-21 Thread Robert Beckett
i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 62ff77445b01..fd2ecfdd8fa1 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
struct drm_device *drm = _priv->drm;
+   bool use_dma32 = false;
+
+   /* i965g[m] cannot relocate objects above 4GiB. */
+   if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+   use_dma32 = true;
 
return ttm_device_init(_priv->bdev, i915_ttm_driver(),
   drm->dev, drm->anon_inode->i_mapping,
-  drm->vma_offset_manager, false, false);
+  drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1



[PATCH v8 01/10] drm/i915/ttm: dont trample cache_level overrides during ttm move

2022-06-21 Thread Robert Beckett
Various places within the driver override the default chosen cache_level.
Before ttm, these overrides were permanent until explicitly changed again
or for the lifetime of the buffer.

TTM movement code came along and decided that it could make that
decision at that time, which is usually well after object creation, so
overrode the cache_level decision and reverted it back to its default
decision.

Add logic to indicate whether the caching mode has been set by anything
other than the move logic. If so, assume that the code that overrode the
defaults knows best and keep it.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++---
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 06b1b188ce5a..519887769c08 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct 
drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
obj->cache_level = cache_level;
+   obj->ttm.cache_level_override = true;
 
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2c88bdb8ff7c..6632ed52e919 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -605,6 +605,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
+   bool cache_level_override:1;
} ttm;
 
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4c25d9b2f138..27d59639177f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_gem_object_init_memory_region(obj, mem);
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
+   obj->ttm.cache_level_override = false;
i915_gem_object_unlock(obj);
 
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index a10716f4e717..4c1de0b4a10f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
I915_BO_FLAG_STRUCT_PAGE;
 
-   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-  bo->ttm);
-   i915_gem_object_set_cache_coherency(obj, cache_level);
+   if (!obj->ttm.cache_level_override) {
+   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
+  bo->resource, bo->ttm);
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+   obj->ttm.cache_level_override = false;
+   }
 }
 
 /**
-- 
2.25.1



[PATCH v8 00/10] drm/i915: ttm for stolen

2022-06-21 Thread Robert Beckett
This series refactors i915's stolen memory region to use ttm.

v2: handle disabled stolen similar to legacy version.
relying on ttm to fail allocs works fine, but is dmesg noisy and causes 
testing
dmesg warning regressions.

v3: rebase to latest drm-tip.
fix v2 code refactor which could leave a buffer pinned.
locally passes fftl again now.

v4: - Allow memory regions creators to do allocation. Allows stolen region 
to track
  it's own reservations.
- Pre-reserve first page of stolen mem (add back 
WaSkipStolenMemoryFirstPage:bdw+)
- Improve commit descritpion for "drm/i915: sanitize mem_flags for 
stolen buffers"
- replace i915_gem_object_pin_pages_unlocked() call with manual locking 
and pinning.
  this avoids ww ctx class reuse during context creation -> ring vma 
obj alloc.

v5: - detect both types of stolen as stolen buffers in
  "drm/i915: sanitize mem_flags for stolen buffers"
- in stolen_object_init limit page size to mem region minimum.
  The range allocator expects the page_size to define the
  alignment

v6: - Share first 4 patches from ttm for internal series as generic
  i915 ttm fixes
- Drop patch 4 from v5. We don't need separate object ops just
  to satisfy test interfaces. The tests have now been fixed via
  checking whether the memory region is private to decide
  whether to mmap
- Add new buffer pin alloc flag to allow creation of buffers in
  their final ttm placement instead of deferring until
  get_pages. This fixes legacy fallback paths for buffer
  allocations during stolen memory pressure.

v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET

v8: - Reserve I915_GEM_STOLEN_BIAS area from stolen

Robert Beckett (10):
  drm/i915/ttm: dont trample cache_level overrides during ttm move
  drm/i915: limit ttm to dma32 for i965G[M]
  drm/i915/ttm: only trust snooping for dgfx when deciding default
cache_level
  drm/i915/gem: selftest should not attempt mmap of private regions
  drm/i915: instantiate ttm ranger manager for stolen memory
  drm/i915: sanitize mem_flags for stolen buffers
  drm/i915: ttm move/clear logic fix
  drm/i915: allow memory region creators to alloc and free the region
  drm/i915/ttm: add buffer pin on alloc flag
  drm/i915: stolen memory use ttm backend

 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   1 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  16 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 441 +++---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  29 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  47 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|   3 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  16 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   5 -
 drivers/gpu/drm/i915/intel_memory_region.c|  16 +-
 drivers/gpu/drm/i915/intel_memory_region.h|   2 +
 drivers/gpu/drm/i915/intel_region_ttm.c   |  80 +++-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 18 files changed, 424 insertions(+), 369 deletions(-)

-- 
2.25.1



[PATCH v7 10/10] drm/i915: stolen memory use ttm backend

2022-06-20 Thread Robert Beckett
refactor stolen memory region to use ttm.
this necessitates using ttm resources to track reserved stolen regions
instead of drm_mm_nodes.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 440 +++---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  16 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   5 -
 drivers/gpu/drm/i915/intel_region_ttm.c   |  42 +-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 13 files changed, 303 insertions(+), 342 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c 
b/drivers/gpu/drm/i915/display/intel_fbc.c
index 8b807284cde1..6f3afac5e8c9 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -38,6 +38,7 @@
  * forcibly disable it to allow proper screen updates.
  */
 
+#include "gem/i915_gem_stolen.h"
 #include 
 
 #include 
@@ -51,6 +52,7 @@
 #include "intel_display_types.h"
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
+#include "gem/i915_gem_region.h"
 
 #define for_each_fbc_id(__dev_priv, __fbc_id) \
for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; 
(__fbc_id)++) \
@@ -92,8 +94,8 @@ struct intel_fbc {
struct mutex lock;
unsigned int busy_bits;
 
-   struct drm_mm_node compressed_fb;
-   struct drm_mm_node compressed_llb;
+   struct ttm_resource *compressed_fb;
+   struct ttm_resource *compressed_llb;
 
enum intel_fbc_id id;
 
@@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc)
 static void i8xx_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
+   u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb);
 
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET);
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_fb.start, U32_MAX));
+fb_offset, U32_MAX));
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_llb.start, U32_MAX));
+llb_offset, U32_MAX));
 
intel_de_write(i915, FBC_CFB_BASE,
-  i915->dsm.start + fbc->compressed_fb.start);
+  i915->dsm.start + fb_offset);
intel_de_write(i915, FBC_LL_BASE,
-  i915->dsm.start + fbc->compressed_llb.start);
+  i915->dsm.start + llb_offset);
 }
 
 static const struct intel_fbc_funcs i8xx_fbc_funcs = {
@@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc)
 static void g4x_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, DPFC_CB_BASE, fb_offset);
 }
 
 static const struct intel_fbc_funcs g4x_fbc_funcs = {
@@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc)
 static void ilk_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), 
fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset);
 }
 
 static const struct intel_fbc_funcs ilk_fbc_funcs = {
@@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc,
 {
struct drm_i915_private *i915 = fbc->i915;
u64 end = intel_fbc_stolen_end(i915);
-   int ret, limit = min_limit;
+   int limit = min_limit;
+   struct ttm_resource *res;
 
size /= limit;
 
/* Try to over-allocate to reduce reallocations and fragmentation. */
-   ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb,
-  size <<= 1, 4096, 0, end);
-   if (ret == 0)
+   res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)

[PATCH v7 05/10] drm/i915: instantiate ttm ranger manager for stolen memory

2022-06-20 Thread Robert Beckett
prepare for ttm based stolen region by using ttm range manager
as the resource manager for stolen region.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c |  6 ++--
 drivers/gpu/drm/i915/intel_region_ttm.c  | 31 +++-
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 40249fa28a7a..675e9ab30396 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
 
/* There's some room for optimization here... */
-   GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
-  ttm_mem_type < I915_PL_LMEM0);
+   GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT);
+
if (ttm_mem_type == I915_PL_SYSTEM)
return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
  0);
+   if (ttm_mem_type == I915_PL_STOLEN)
+   return i915->mm.stolen_region;
 
return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
  ttm_mem_type - I915_PL_LMEM0);
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index fd2ecfdd8fa1..694e9acb69e2 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private 
*dev_priv)
 
 /*
  * Map the i915 memory regions to TTM memory types. We use the
- * driver-private types for now, reserving TTM_PL_VRAM for stolen
+ * driver-private types for now, reserving I915_PL_STOLEN for stolen
  * memory and TTM_PL_TT for GGTT use if decided to implement this.
  */
 int intel_region_to_ttm_type(const struct intel_memory_region *mem)
@@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct 
intel_memory_region *mem)
 
GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL &&
   mem->type != INTEL_MEMORY_MOCK &&
-  mem->type != INTEL_MEMORY_SYSTEM);
+  mem->type != INTEL_MEMORY_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_LOCAL);
 
if (mem->type == INTEL_MEMORY_SYSTEM)
return TTM_PL_SYSTEM;
 
+   if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM ||
+   mem->type == INTEL_MEMORY_STOLEN_LOCAL)
+   return I915_PL_STOLEN;
+
type = mem->instance + TTM_PL_PRIV;
GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES);
 
@@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
int mem_type = intel_region_to_ttm_type(mem);
int ret;
 
-   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
- resource_size(>region),
- mem->io_size,
- mem->min_page_size, PAGE_SIZE);
+   if (mem_type == I915_PL_STOLEN) {
+   ret = ttm_range_man_init(bdev, mem_type, false,
+resource_size(>region) >> 
PAGE_SHIFT);
+   mem->is_range_manager = true;
+   } else {
+   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
+ resource_size(>region),
+ mem->io_size,
+ mem->min_page_size, PAGE_SIZE);
+   }
if (ret)
return ret;
 
@@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
 int intel_region_ttm_fini(struct intel_memory_region *mem)
 {
struct ttm_resource_manager *man = mem->region_private;
+   int mem_type = intel_region_to_ttm_type(mem);
int ret = -EBUSY;
int count;
 
@@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
if (ret || !man)
return ret;
 
-   ret = i915_ttm_buddy_man_fini(>i915->bdev,
- intel_region_to_ttm_type(mem));
+   if (mem_type == I915_PL_STOLEN)
+   ret = ttm_range_man_fini(>i915->bdev, mem_type);
+   else
+   ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type);
GEM_WARN_ON(ret);
mem->region_private = NULL;
 
-- 
2.25.1



[PATCH v7 07/10] drm/i915: ttm move/clear logic fix

2022-06-20 Thread Robert Beckett
ttm managed buffers start off with system resource definitions and ttm_tt
tracking structures allocated (though unpopulated).
currently this prevents clearing of buffers on first move to desired
placements.

The desired behaviour is to clear user allocated buffers and any kernel
buffers that specifically requests it only.
Make the logic match the desired behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 81c67ca9edda..a3f8fc056dbc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -3,6 +3,7 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "drm/ttm/ttm_tt.h"
 #include 
 
 #include "i915_deps.h"
@@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
return fence;
 }
 
+static bool
+allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct 
ttm_resource *dst_mem)
+{
+   /* never clear stolen */
+   if (dst_mem->mem_type == I915_PL_STOLEN)
+   return false;
+   /*
+* we want to clear user buffers and any kernel buffers
+* that specifically request clearing.
+*/
+   if (obj->flags & I915_BO_ALLOC_USER)
+   return true;
+
+   if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)
+   return true;
+
+   return false;
+}
+
 /**
  * i915_ttm_move - The TTM move callback used by i915.
  * @bo: The buffer object.
@@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
return PTR_ERR(dst_rsgt);
 
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || 
!ttm_tt_is_populated(ttm));
-   if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
+   if (!clear || allow_clear(obj, ttm, dst_mem)) {
struct i915_deps deps;
 
i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | 
__GFP_NOWARN);
-- 
2.25.1



[PATCH v7 09/10] drm/i915/ttm: add buffer pin on alloc flag

2022-06-20 Thread Robert Beckett
For situations where allocations need to fail on alloc instead of
delayed get_pages, add a new alloc flag to pin the ttm bo.
This makes sure that the resource has been allocated during buffer
creation, allowing it to fail with an error if the placement is
exhausted.
This allows existing fallback options for stolen backend allocation like
create_ring_vma to work as expected.

Signed-off-by: Robert Beckett 
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 13 ++
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   | 25 ++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 6632ed52e919..07bc11247a3e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -325,17 +325,20 @@ struct drm_i915_gem_object {
  * dealing with userspace objects the CPU fault handler is free to ignore this.
  */
 #define I915_BO_ALLOC_GPU_ONLY   BIT(6)
+/* object should be pinned in destination region from allocation */
+#define I915_BO_ALLOC_PINNED BIT(7)
 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
 I915_BO_ALLOC_VOLATILE | \
 I915_BO_ALLOC_CPU_CLEAR | \
 I915_BO_ALLOC_USER | \
 I915_BO_ALLOC_PM_VOLATILE | \
 I915_BO_ALLOC_PM_EARLY | \
-I915_BO_ALLOC_GPU_ONLY)
-#define I915_BO_READONLY  BIT(7)
-#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
-#define I915_BO_PROTECTED BIT(9)
-#define I915_BO_WAS_BOUND_BIT 10
+I915_BO_ALLOC_GPU_ONLY | \
+I915_BO_ALLOC_PINNED)
+#define I915_BO_READONLY  BIT(8)
+#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */
+#define I915_BO_PROTECTED BIT(10)
+#define I915_BO_WAS_BOUND_BIT 11
/**
 * @mem_flags - Mutable placement-related flags
 *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 27d59639177f..bb988608296d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct 
drm_i915_gem_object *obj)
 {
GEM_BUG_ON(!obj->ttm.created);
 
+   /* stolen objects are pinned for lifetime. Unpin before putting */
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL);
+   ttm_bo_unpin(i915_gem_to_ttm(obj));
+   ttm_bo_unreserve(i915_gem_to_ttm(obj));
+   }
+
ttm_bo_put(i915_gem_to_ttm(obj));
 }
 
@@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
.no_wait_gpu = false,
};
enum ttm_bo_type bo_type;
+   struct ttm_place _place;
+   struct ttm_placement _placement;
+   struct ttm_placement *placement;
int ret;
 
drm_gem_private_object_init(>drm, >base, size);
@@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct 
intel_memory_region *mem,
 */
i915_gem_object_make_unshrinkable(obj);
 
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   i915_ttm_place_from_region(mem, &_place, obj->bo_offset,
+  obj->base.size, obj->flags);
+   _placement.num_placement = 1;
+   _placement.placement = &_place;
+   _placement.num_busy_placement = 0;
+   _placement.busy_placement = NULL;
+   placement = &_placement;
+   } else {
+   placement = _sys_placement;
+   }
/*
 * If this function fails, it will call the destructor, but
 * our caller still owns the object. So no freeing in the
@@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
 * until successful initialization.
 */
ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size,
-  bo_type, _sys_placement,
+  bo_type, placement,
   page_size >> PAGE_SHIFT,
   , NULL, NULL, i915_ttm_bo_destroy);
if (ret)
@@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
obj->ttm.cache_level_override = false;
+   if (obj->flags & I915_BO_ALLOC_PINNED)
+   ttm_bo_pin(i915_gem_to_ttm(obj));
i915_gem_object_unlock(obj);
 
return 0;
-- 
2.25.1



[PATCH v7 04/10] drm/i915/gem: selftest should not attempt mmap of private regions

2022-06-20 Thread Robert Beckett
During testing make can_mmap consider whether the region is private.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5bc93a1ce3e3..76181e28c75e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum 
i915_mmap_type type)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
bool no_map;
 
+   if (obj->mm.region && obj->mm.region->private)
+   return false;
+
if (obj->ops->mmap_offset)
return type == I915_MMAP_TYPE_FIXED;
else if (type == I915_MMAP_TYPE_FIXED)
-- 
2.25.1



[PATCH v7 08/10] drm/i915: allow memory region creators to alloc and free the region

2022-06-20 Thread Robert Beckett
add callbacks for alloc and free.
this allows region creators to allocate any extra storage they may
require.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/intel_memory_region.c | 16 +---
 drivers/gpu/drm/i915/intel_memory_region.h |  2 ++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_memory_region.c 
b/drivers/gpu/drm/i915/intel_memory_region.c
index e38d2db1c3e3..3da07a712f90 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -231,7 +231,10 @@ intel_memory_region_create(struct drm_i915_private *i915,
struct intel_memory_region *mem;
int err;
 
-   mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+   if (ops->alloc)
+   mem = ops->alloc();
+   else
+   mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem)
return ERR_PTR(-ENOMEM);
 
@@ -265,7 +268,10 @@ intel_memory_region_create(struct drm_i915_private *i915,
if (mem->ops->release)
mem->ops->release(mem);
 err_free:
-   kfree(mem);
+   if (mem->ops->free)
+   mem->ops->free(mem);
+   else
+   kfree(mem);
return ERR_PTR(err);
 }
 
@@ -288,7 +294,11 @@ void intel_memory_region_destroy(struct 
intel_memory_region *mem)
 
GEM_WARN_ON(!list_empty_careful(>objects.list));
mutex_destroy(>objects.lock);
-   if (!ret)
+   if (ret)
+   return;
+   if (mem->ops->free)
+   mem->ops->free(mem);
+   else
kfree(mem);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h 
b/drivers/gpu/drm/i915/intel_memory_region.h
index 3d8378c1b447..048955b5429f 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -61,6 +61,8 @@ struct intel_memory_region_ops {
   resource_size_t size,
   resource_size_t page_size,
   unsigned int flags);
+   struct intel_memory_region *(*alloc)(void);
+   void (*free)(struct intel_memory_region *mem);
 };
 
 struct intel_memory_region {
-- 
2.25.1



[PATCH v7 06/10] drm/i915: sanitize mem_flags for stolen buffers

2022-06-20 Thread Robert Beckett
Stolen regions are not page backed or considered iomem.
Prevent flags indicating such.
This correctly prevents stolen buffers from attempting to directly map
them.

See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem()
usage for where it would break otherwise.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 675e9ab30396..81c67ca9edda 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -14,6 +14,7 @@
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
 #include "gem/i915_gem_ttm_move.h"
+#include "gem/i915_gem_stolen.h"
 
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
@@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
 
obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
 
-   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
-   I915_BO_FLAG_STRUCT_PAGE;
+   if (!i915_gem_object_is_stolen(obj))
+   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
+   I915_BO_FLAG_STRUCT_PAGE;
 
if (!obj->ttm.cache_level_override) {
cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
-- 
2.25.1



[PATCH v7 02/10] drm/i915: limit ttm to dma32 for i965G[M]

2022-06-20 Thread Robert Beckett
i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 62ff77445b01..fd2ecfdd8fa1 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
struct drm_device *drm = _priv->drm;
+   bool use_dma32 = false;
+
+   /* i965g[m] cannot relocate objects above 4GiB. */
+   if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+   use_dma32 = true;
 
return ttm_device_init(_priv->bdev, i915_ttm_driver(),
   drm->dev, drm->anon_inode->i_mapping,
-  drm->vma_offset_manager, false, false);
+  drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1



[PATCH v7 03/10] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level

2022-06-20 Thread Robert Beckett
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP.
This is divergent from existing backends code which only considers
HAS_LLC.
Testing shows that trusting snooping on gen5- is unreliable and bsw via
ggtt mappings, so limit DGFX for now and maintain previous behaviour.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 4c1de0b4a10f..40249fa28a7a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -46,7 +46,9 @@ static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 struct ttm_tt *ttm)
 {
-   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915);
+
+   return ((HAS_LLC(i915) || can_snoop) &&
!i915_ttm_gtt_binds_lmem(res) &&
ttm->caching == ttm_cached) ? I915_CACHE_LLC :
I915_CACHE_NONE;
-- 
2.25.1



[PATCH v7 01/10] drm/i915/ttm: dont trample cache_level overrides during ttm move

2022-06-20 Thread Robert Beckett
Various places within the driver override the default chosen cache_level.
Before ttm, these overrides were permanent until explicitly changed again
or for the lifetime of the buffer.

TTM movement code came along and decided that it could make that
decision at that time, which is usually well after object creation, so
overrode the cache_level decision and reverted it back to its default
decision.

Add logic to indicate whether the caching mode has been set by anything
other than the move logic. If so, assume that the code that overrode the
defaults knows best and keep it.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++---
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 06b1b188ce5a..519887769c08 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct 
drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
obj->cache_level = cache_level;
+   obj->ttm.cache_level_override = true;
 
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2c88bdb8ff7c..6632ed52e919 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -605,6 +605,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
+   bool cache_level_override:1;
} ttm;
 
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4c25d9b2f138..27d59639177f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_gem_object_init_memory_region(obj, mem);
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
+   obj->ttm.cache_level_override = false;
i915_gem_object_unlock(obj);
 
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index a10716f4e717..4c1de0b4a10f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
I915_BO_FLAG_STRUCT_PAGE;
 
-   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-  bo->ttm);
-   i915_gem_object_set_cache_coherency(obj, cache_level);
+   if (!obj->ttm.cache_level_override) {
+   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
+  bo->resource, bo->ttm);
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+   obj->ttm.cache_level_override = false;
+   }
 }
 
 /**
-- 
2.25.1



[PATCH v7 00/10] drm/i915: ttm for stolen

2022-06-20 Thread Robert Beckett
This series refactors i915's stolen memory region to use ttm.

v2: handle disabled stolen similar to legacy version.
relying on ttm to fail allocs works fine, but is dmesg noisy and causes 
testing
dmesg warning regressions.

v3: rebase to latest drm-tip.
fix v2 code refactor which could leave a buffer pinned.
locally passes fftl again now.

v4: - Allow memory regions creators to do allocation. Allows stolen region 
to track
  it's own reservations.
- Pre-reserve first page of stolen mem (add back 
WaSkipStolenMemoryFirstPage:bdw+)
- Improve commit descritpion for "drm/i915: sanitize mem_flags for 
stolen buffers"
- replace i915_gem_object_pin_pages_unlocked() call with manual locking 
and pinning.
  this avoids ww ctx class reuse during context creation -> ring vma 
obj alloc.

v5: - detect both types of stolen as stolen buffers in
  "drm/i915: sanitize mem_flags for stolen buffers"
- in stolen_object_init limit page size to mem region minimum.
  The range allocator expects the page_size to define the
  alignment

v6: - Share first 4 patches from ttm for internal series as generic
  i915 ttm fixes
- Drop patch 4 from v5. We don't need separate object ops just
  to satisfy test interfaces. The tests have now been fixed via
  checking whether the memory region is private to decide
  whether to mmap
- Add new buffer pin alloc flag to allow creation of buffers in
  their final ttm placement instead of deferring until
  get_pages. This fixes legacy fallback paths for buffer
  allocations during stolen memory pressure.

v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET

Robert Beckett (10):
  drm/i915/ttm: dont trample cache_level overrides during ttm move
  drm/i915: limit ttm to dma32 for i965G[M]
  drm/i915/ttm: only trust snooping for dgfx when deciding default
cache_level
  drm/i915/gem: selftest should not attempt mmap of private regions
  drm/i915: instantiate ttm ranger manager for stolen memory
  drm/i915: sanitize mem_flags for stolen buffers
  drm/i915: ttm move/clear logic fix
  drm/i915: allow memory region creators to alloc and free the region
  drm/i915/ttm: add buffer pin on alloc flag
  drm/i915: stolen memory use ttm backend

 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   1 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  16 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 440 +++---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  29 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  47 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|   3 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  16 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   5 -
 drivers/gpu/drm/i915/intel_memory_region.c|  16 +-
 drivers/gpu/drm/i915/intel_memory_region.h|   2 +
 drivers/gpu/drm/i915/intel_region_ttm.c   |  80 +++-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 18 files changed, 423 insertions(+), 369 deletions(-)

-- 
2.25.1



[PATCH v6 08/10] drm/i915: allow memory region creators to alloc and free the region

2022-06-17 Thread Robert Beckett
add callbacks for alloc and free.
this allows region creators to allocate any extra storage they may
require.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/intel_memory_region.c | 16 +---
 drivers/gpu/drm/i915/intel_memory_region.h |  2 ++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_memory_region.c 
b/drivers/gpu/drm/i915/intel_memory_region.c
index e38d2db1c3e3..3da07a712f90 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -231,7 +231,10 @@ intel_memory_region_create(struct drm_i915_private *i915,
struct intel_memory_region *mem;
int err;
 
-   mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+   if (ops->alloc)
+   mem = ops->alloc();
+   else
+   mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem)
return ERR_PTR(-ENOMEM);
 
@@ -265,7 +268,10 @@ intel_memory_region_create(struct drm_i915_private *i915,
if (mem->ops->release)
mem->ops->release(mem);
 err_free:
-   kfree(mem);
+   if (mem->ops->free)
+   mem->ops->free(mem);
+   else
+   kfree(mem);
return ERR_PTR(err);
 }
 
@@ -288,7 +294,11 @@ void intel_memory_region_destroy(struct 
intel_memory_region *mem)
 
GEM_WARN_ON(!list_empty_careful(>objects.list));
mutex_destroy(>objects.lock);
-   if (!ret)
+   if (ret)
+   return;
+   if (mem->ops->free)
+   mem->ops->free(mem);
+   else
kfree(mem);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h 
b/drivers/gpu/drm/i915/intel_memory_region.h
index 3d8378c1b447..048955b5429f 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -61,6 +61,8 @@ struct intel_memory_region_ops {
   resource_size_t size,
   resource_size_t page_size,
   unsigned int flags);
+   struct intel_memory_region *(*alloc)(void);
+   void (*free)(struct intel_memory_region *mem);
 };
 
 struct intel_memory_region {
-- 
2.25.1



[PATCH v6 06/10] drm/i915: sanitize mem_flags for stolen buffers

2022-06-17 Thread Robert Beckett
Stolen regions are not page backed or considered iomem.
Prevent flags indicating such.
This correctly prevents stolen buffers from attempting to directly map
them.

See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem()
usage for where it would break otherwise.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 675e9ab30396..81c67ca9edda 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -14,6 +14,7 @@
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
 #include "gem/i915_gem_ttm_move.h"
+#include "gem/i915_gem_stolen.h"
 
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
@@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
 
obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
 
-   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
-   I915_BO_FLAG_STRUCT_PAGE;
+   if (!i915_gem_object_is_stolen(obj))
+   obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
+   I915_BO_FLAG_STRUCT_PAGE;
 
if (!obj->ttm.cache_level_override) {
cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
-- 
2.25.1



[PATCH v6 07/10] drm/i915: ttm move/clear logic fix

2022-06-17 Thread Robert Beckett
ttm managed buffers start off with system resource definitions and ttm_tt
tracking structures allocated (though unpopulated).
currently this prevents clearing of buffers on first move to desired
placements.

The desired behaviour is to clear user allocated buffers and any kernel
buffers that specifically requests it only.
Make the logic match the desired behaviour.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 81c67ca9edda..a3f8fc056dbc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -3,6 +3,7 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "drm/ttm/ttm_tt.h"
 #include 
 
 #include "i915_deps.h"
@@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
return fence;
 }
 
+static bool
+allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct 
ttm_resource *dst_mem)
+{
+   /* never clear stolen */
+   if (dst_mem->mem_type == I915_PL_STOLEN)
+   return false;
+   /*
+* we want to clear user buffers and any kernel buffers
+* that specifically request clearing.
+*/
+   if (obj->flags & I915_BO_ALLOC_USER)
+   return true;
+
+   if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)
+   return true;
+
+   return false;
+}
+
 /**
  * i915_ttm_move - The TTM move callback used by i915.
  * @bo: The buffer object.
@@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
return PTR_ERR(dst_rsgt);
 
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || 
!ttm_tt_is_populated(ttm));
-   if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
+   if (!clear || allow_clear(obj, ttm, dst_mem)) {
struct i915_deps deps;
 
i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | 
__GFP_NOWARN);
-- 
2.25.1



[PATCH v6 09/10] drm/i915/ttm: add buffer pin on alloc flag

2022-06-17 Thread Robert Beckett
For situations where allocations need to fail on alloc instead of
delayed get_pages, add a new alloc flag to pin the ttm bo.
This makes sure that the resource has been allocated during buffer
creation, allowing it to fail with an error if the placement is
exhausted.
This allows existing fallback options for stolen backend allocation like
create_ring_vma to work as expected.

Signed-off-by: Robert Beckett 
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 13 ++
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   | 25 ++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 6632ed52e919..07bc11247a3e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -325,17 +325,20 @@ struct drm_i915_gem_object {
  * dealing with userspace objects the CPU fault handler is free to ignore this.
  */
 #define I915_BO_ALLOC_GPU_ONLY   BIT(6)
+/* object should be pinned in destination region from allocation */
+#define I915_BO_ALLOC_PINNED BIT(7)
 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
 I915_BO_ALLOC_VOLATILE | \
 I915_BO_ALLOC_CPU_CLEAR | \
 I915_BO_ALLOC_USER | \
 I915_BO_ALLOC_PM_VOLATILE | \
 I915_BO_ALLOC_PM_EARLY | \
-I915_BO_ALLOC_GPU_ONLY)
-#define I915_BO_READONLY  BIT(7)
-#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
-#define I915_BO_PROTECTED BIT(9)
-#define I915_BO_WAS_BOUND_BIT 10
+I915_BO_ALLOC_GPU_ONLY | \
+I915_BO_ALLOC_PINNED)
+#define I915_BO_READONLY  BIT(8)
+#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */
+#define I915_BO_PROTECTED BIT(10)
+#define I915_BO_WAS_BOUND_BIT 11
/**
 * @mem_flags - Mutable placement-related flags
 *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 27d59639177f..bb988608296d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct 
drm_i915_gem_object *obj)
 {
GEM_BUG_ON(!obj->ttm.created);
 
+   /* stolen objects are pinned for lifetime. Unpin before putting */
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL);
+   ttm_bo_unpin(i915_gem_to_ttm(obj));
+   ttm_bo_unreserve(i915_gem_to_ttm(obj));
+   }
+
ttm_bo_put(i915_gem_to_ttm(obj));
 }
 
@@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
.no_wait_gpu = false,
};
enum ttm_bo_type bo_type;
+   struct ttm_place _place;
+   struct ttm_placement _placement;
+   struct ttm_placement *placement;
int ret;
 
drm_gem_private_object_init(>drm, >base, size);
@@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct 
intel_memory_region *mem,
 */
i915_gem_object_make_unshrinkable(obj);
 
+   if (obj->flags & I915_BO_ALLOC_PINNED) {
+   i915_ttm_place_from_region(mem, &_place, obj->bo_offset,
+  obj->base.size, obj->flags);
+   _placement.num_placement = 1;
+   _placement.placement = &_place;
+   _placement.num_busy_placement = 0;
+   _placement.busy_placement = NULL;
+   placement = &_placement;
+   } else {
+   placement = _sys_placement;
+   }
/*
 * If this function fails, it will call the destructor, but
 * our caller still owns the object. So no freeing in the
@@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
 * until successful initialization.
 */
ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size,
-  bo_type, _sys_placement,
+  bo_type, placement,
   page_size >> PAGE_SHIFT,
   , NULL, NULL, i915_ttm_bo_destroy);
if (ret)
@@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
obj->ttm.cache_level_override = false;
+   if (obj->flags & I915_BO_ALLOC_PINNED)
+   ttm_bo_pin(i915_gem_to_ttm(obj));
i915_gem_object_unlock(obj);
 
return 0;
-- 
2.25.1



[PATCH v6 10/10] drm/i915: stolen memory use ttm backend

2022-06-17 Thread Robert Beckett
refactor stolen memory region to use ttm.
this necessitates using ttm resources to track reserved stolen regions
instead of drm_mm_nodes.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 440 +++---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  16 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   5 -
 drivers/gpu/drm/i915/intel_region_ttm.c   |  42 +-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |   3 +-
 13 files changed, 294 insertions(+), 342 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c 
b/drivers/gpu/drm/i915/display/intel_fbc.c
index 8b807284cde1..6f3afac5e8c9 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -38,6 +38,7 @@
  * forcibly disable it to allow proper screen updates.
  */
 
+#include "gem/i915_gem_stolen.h"
 #include 
 
 #include 
@@ -51,6 +52,7 @@
 #include "intel_display_types.h"
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
+#include "gem/i915_gem_region.h"
 
 #define for_each_fbc_id(__dev_priv, __fbc_id) \
for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; 
(__fbc_id)++) \
@@ -92,8 +94,8 @@ struct intel_fbc {
struct mutex lock;
unsigned int busy_bits;
 
-   struct drm_mm_node compressed_fb;
-   struct drm_mm_node compressed_llb;
+   struct ttm_resource *compressed_fb;
+   struct ttm_resource *compressed_llb;
 
enum intel_fbc_id id;
 
@@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc)
 static void i8xx_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
+   u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb);
 
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET);
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_fb.start, U32_MAX));
+fb_offset, U32_MAX));
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
-fbc->compressed_llb.start, U32_MAX));
+llb_offset, U32_MAX));
 
intel_de_write(i915, FBC_CFB_BASE,
-  i915->dsm.start + fbc->compressed_fb.start);
+  i915->dsm.start + fb_offset);
intel_de_write(i915, FBC_LL_BASE,
-  i915->dsm.start + fbc->compressed_llb.start);
+  i915->dsm.start + llb_offset);
 }
 
 static const struct intel_fbc_funcs i8xx_fbc_funcs = {
@@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc)
 static void g4x_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, DPFC_CB_BASE, fb_offset);
 }
 
 static const struct intel_fbc_funcs g4x_fbc_funcs = {
@@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc)
 static void ilk_fbc_program_cfb(struct intel_fbc *fbc)
 {
struct drm_i915_private *i915 = fbc->i915;
+   u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb);
 
-   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), 
fbc->compressed_fb.start);
+   GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET);
+   intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset);
 }
 
 static const struct intel_fbc_funcs ilk_fbc_funcs = {
@@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc,
 {
struct drm_i915_private *i915 = fbc->i915;
u64 end = intel_fbc_stolen_end(i915);
-   int ret, limit = min_limit;
+   int limit = min_limit;
+   struct ttm_resource *res;
 
size /= limit;
 
/* Try to over-allocate to reduce reallocations and fragmentation. */
-   ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb,
-  size <<= 1, 4096, 0, end);
-   if (ret == 0)
+   res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)

[PATCH v6 05/10] drm/i915: instantiate ttm ranger manager for stolen memory

2022-06-17 Thread Robert Beckett
prepare for ttm based stolen region by using ttm range manager
as the resource manager for stolen region.

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c |  6 ++--
 drivers/gpu/drm/i915/intel_region_ttm.c  | 31 +++-
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 40249fa28a7a..675e9ab30396 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
 
/* There's some room for optimization here... */
-   GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
-  ttm_mem_type < I915_PL_LMEM0);
+   GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT);
+
if (ttm_mem_type == I915_PL_SYSTEM)
return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
  0);
+   if (ttm_mem_type == I915_PL_STOLEN)
+   return i915->mm.stolen_region;
 
return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
  ttm_mem_type - I915_PL_LMEM0);
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index fd2ecfdd8fa1..694e9acb69e2 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private 
*dev_priv)
 
 /*
  * Map the i915 memory regions to TTM memory types. We use the
- * driver-private types for now, reserving TTM_PL_VRAM for stolen
+ * driver-private types for now, reserving I915_PL_STOLEN for stolen
  * memory and TTM_PL_TT for GGTT use if decided to implement this.
  */
 int intel_region_to_ttm_type(const struct intel_memory_region *mem)
@@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct 
intel_memory_region *mem)
 
GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL &&
   mem->type != INTEL_MEMORY_MOCK &&
-  mem->type != INTEL_MEMORY_SYSTEM);
+  mem->type != INTEL_MEMORY_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_SYSTEM &&
+  mem->type != INTEL_MEMORY_STOLEN_LOCAL);
 
if (mem->type == INTEL_MEMORY_SYSTEM)
return TTM_PL_SYSTEM;
 
+   if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM ||
+   mem->type == INTEL_MEMORY_STOLEN_LOCAL)
+   return I915_PL_STOLEN;
+
type = mem->instance + TTM_PL_PRIV;
GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES);
 
@@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
int mem_type = intel_region_to_ttm_type(mem);
int ret;
 
-   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
- resource_size(>region),
- mem->io_size,
- mem->min_page_size, PAGE_SIZE);
+   if (mem_type == I915_PL_STOLEN) {
+   ret = ttm_range_man_init(bdev, mem_type, false,
+resource_size(>region) >> 
PAGE_SHIFT);
+   mem->is_range_manager = true;
+   } else {
+   ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
+ resource_size(>region),
+ mem->io_size,
+ mem->min_page_size, PAGE_SIZE);
+   }
if (ret)
return ret;
 
@@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
 int intel_region_ttm_fini(struct intel_memory_region *mem)
 {
struct ttm_resource_manager *man = mem->region_private;
+   int mem_type = intel_region_to_ttm_type(mem);
int ret = -EBUSY;
int count;
 
@@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
if (ret || !man)
return ret;
 
-   ret = i915_ttm_buddy_man_fini(>i915->bdev,
- intel_region_to_ttm_type(mem));
+   if (mem_type == I915_PL_STOLEN)
+   ret = ttm_range_man_fini(>i915->bdev, mem_type);
+   else
+   ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type);
GEM_WARN_ON(ret);
mem->region_private = NULL;
 
-- 
2.25.1



[PATCH v6 02/10] drm/i915: limit ttm to dma32 for i965G[M]

2022-06-17 Thread Robert Beckett
i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 62ff77445b01..fd2ecfdd8fa1 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
struct drm_device *drm = _priv->drm;
+   bool use_dma32 = false;
+
+   /* i965g[m] cannot relocate objects above 4GiB. */
+   if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+   use_dma32 = true;
 
return ttm_device_init(_priv->bdev, i915_ttm_driver(),
   drm->dev, drm->anon_inode->i_mapping,
-  drm->vma_offset_manager, false, false);
+  drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1



[PATCH v6 04/10] drm/i915/gem: selftest should not attempt mmap of private regions

2022-06-17 Thread Robert Beckett
During testing make can_mmap consider whether the region is private.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5bc93a1ce3e3..76181e28c75e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum 
i915_mmap_type type)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
bool no_map;
 
+   if (obj->mm.region && obj->mm.region->private)
+   return false;
+
if (obj->ops->mmap_offset)
return type == I915_MMAP_TYPE_FIXED;
else if (type == I915_MMAP_TYPE_FIXED)
-- 
2.25.1



[PATCH v6 03/10] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level

2022-06-17 Thread Robert Beckett
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP.
This is divergent from existing backends code which only considers
HAS_LLC.
Testing shows that trusting snooping on gen5- is unreliable and bsw via
ggtt mappings, so limit DGFX for now and maintain previous behaviour.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 4c1de0b4a10f..40249fa28a7a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -46,7 +46,9 @@ static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 struct ttm_tt *ttm)
 {
-   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915);
+
+   return ((HAS_LLC(i915) || can_snoop) &&
!i915_ttm_gtt_binds_lmem(res) &&
ttm->caching == ttm_cached) ? I915_CACHE_LLC :
I915_CACHE_NONE;
-- 
2.25.1



[PATCH v6 01/10] drm/i915/ttm: dont trample cache_level overrides during ttm move

2022-06-17 Thread Robert Beckett
Various places within the driver override the default chosen cache_level.
Before ttm, these overrides were permanent until explicitly changed again
or for the lifetime of the buffer.

TTM movement code came along and decided that it could make that
decision at that time, which is usually well after object creation, so
overrode the cache_level decision and reverted it back to its default
decision.

Add logic to indicate whether the caching mode has been set by anything
other than the move logic. If so, assume that the code that overrode the
defaults knows best and keep it.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++---
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 06b1b188ce5a..519887769c08 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct 
drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
obj->cache_level = cache_level;
+   obj->ttm.cache_level_override = true;
 
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2c88bdb8ff7c..6632ed52e919 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -605,6 +605,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
+   bool cache_level_override:1;
} ttm;
 
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4c25d9b2f138..27d59639177f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_gem_object_init_memory_region(obj, mem);
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
+   obj->ttm.cache_level_override = false;
i915_gem_object_unlock(obj);
 
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index a10716f4e717..4c1de0b4a10f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
I915_BO_FLAG_STRUCT_PAGE;
 
-   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-  bo->ttm);
-   i915_gem_object_set_cache_coherency(obj, cache_level);
+   if (!obj->ttm.cache_level_override) {
+   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
+  bo->resource, bo->ttm);
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+   obj->ttm.cache_level_override = false;
+   }
 }
 
 /**
-- 
2.25.1



[PATCH v6 00/10] drm/i915: ttm for stolen

2022-06-17 Thread Robert Beckett
This series refactors i915's stolen memory region to use ttm.

v2: handle disabled stolen similar to legacy version.
relying on ttm to fail allocs works fine, but is dmesg noisy and causes 
testing
dmesg warning regressions.

v3: rebase to latest drm-tip.
fix v2 code refactor which could leave a buffer pinned.
locally passes fftl again now.

v4: - Allow memory regions creators to do allocation. Allows stolen region 
to track
  it's own reservations.
- Pre-reserve first page of stolen mem (add back 
WaSkipStolenMemoryFirstPage:bdw+)
- Improve commit descritpion for "drm/i915: sanitize mem_flags for 
stolen buffers"
- replace i915_gem_object_pin_pages_unlocked() call with manual locking 
and pinning.
  this avoids ww ctx class reuse during context creation -> ring vma 
obj alloc.

v5: - detect both types of stolen as stolen buffers in
  "drm/i915: sanitize mem_flags for stolen buffers"
- in stolen_object_init limit page size to mem region minimum.
  The range allocator expects the page_size to define the
  alignment

v6: - Share first 4 patches from ttm for internal series as generic
  i915 ttm fixes
- Drop patch 4 from v5. We don't need separate object ops just
  to satisfy test interfaces. The tests have now been fixed via
  checking whether the memory region is private to decide
  whether to mmap
- Add new buffer pin alloc flag to allow creation of buffers in
  their final ttm placement instead of deferring until
  get_pages. This fixes legacy fallback paths for buffer
  allocations during stolen memory pressure.

Robert Beckett (10):
  drm/i915/ttm: dont trample cache_level overrides during ttm move
  drm/i915: limit ttm to dma32 for i965G[M]
  drm/i915/ttm: only trust snooping for dgfx when deciding default
cache_level
  drm/i915/gem: selftest should not attempt mmap of private regions
  drm/i915: instantiate ttm ranger manager for stolen memory
  drm/i915: sanitize mem_flags for stolen buffers
  drm/i915: ttm move/clear logic fix
  drm/i915: allow memory region creators to alloc and free the region
  drm/i915/ttm: add buffer pin on alloc flag
  drm/i915: stolen memory use ttm backend

 drivers/gpu/drm/i915/display/intel_fbc.c  |  78 ++--
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   1 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  16 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 440 +++---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h|  21 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |  29 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   7 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  47 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|   3 +
 drivers/gpu/drm/i915/gt/intel_rc6.c   |   4 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  16 +-
 drivers/gpu/drm/i915/i915_debugfs.c   |   7 +-
 drivers/gpu/drm/i915/i915_drv.h   |   5 -
 drivers/gpu/drm/i915/intel_memory_region.c|  16 +-
 drivers/gpu/drm/i915/intel_memory_region.h|   2 +
 drivers/gpu/drm/i915/intel_region_ttm.c   |  80 +++-
 drivers/gpu/drm/i915/intel_region_ttm.h   |   8 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |   3 +-
 18 files changed, 414 insertions(+), 369 deletions(-)

-- 
2.25.1



[PATCH v3 3/8] drm/i915: setup ggtt scratch page after memory regions

2022-06-09 Thread Robert Beckett
Reorder scratch page allocation so that memory regions are available
to allocate the buffers

Signed-off-by: Robert Beckett 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 20 ++--
 drivers/gpu/drm/i915/gt/intel_gt_gmch.h |  6 ++
 drivers/gpu/drm/i915/i915_driver.c  | 16 ++--
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c 
b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c
index 18e488672d1b..5411df1734ac 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c
@@ -440,8 +440,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 
size)
struct drm_i915_private *i915 = ggtt->vm.i915;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
phys_addr_t phys_addr;
-   u32 pte_flags;
-   int ret;
 
GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
@@ -463,6 +461,24 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 
size)
}
 
kref_init(>vm.resv_ref);
+
+   return 0;
+}
+
+/**
+ * i915_ggtt_setup_scratch_page - setup ggtt scratch page
+ * @i915: i915 device
+ */
+int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915)
+{
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   u32 pte_flags;
+   int ret;
+
+   /* gen5- scratch setup currently happens in @intel_gtt_init */
+   if (GRAPHICS_VER(i915) <= 5)
+   return 0;
+
ret = setup_scratch_page(>vm);
if (ret) {
drm_err(>drm, "Scratch setup failed\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h 
b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h
index 75ed55c1f30a..c6b79cb78637 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h
@@ -15,6 +15,7 @@ int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt);
 int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt);
 int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt);
 int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915);
+int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915);
 
 /* Stubs for non-x86 platforms */
 #else
@@ -41,6 +42,11 @@ static inline int intel_gt_gmch_gen5_enable_hw(struct 
drm_i915_private *i915)
/* No HW should be enabled for this case yet, return fail */
return -ENODEV;
 }
+
+static inline int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915)
+{
+   return 0;
+}
 #endif
 
 #endif /* __INTEL_GT_GMCH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index d26dcca7e654..4e8a92ffbfe9 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -69,6 +69,7 @@
 #include "gem/i915_gem_mman.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_gmch.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_rc6.h"
 
@@ -605,12 +606,16 @@ static int i915_driver_hw_probe(struct drm_i915_private 
*dev_priv)
 
ret = intel_gt_tiles_init(dev_priv);
if (ret)
-   goto err_mem_regions;
+   goto err_ggtt;
+
+   ret = i915_ggtt_setup_scratch_page(dev_priv);
+   if (ret)
+   goto err_ggtt;
 
ret = i915_ggtt_enable_hw(dev_priv);
if (ret) {
drm_err(_priv->drm, "failed to enable GGTT\n");
-   goto err_mem_regions;
+   goto err_ggtt;
}
 
pci_set_master(pdev);
@@ -662,11 +667,10 @@ static int i915_driver_hw_probe(struct drm_i915_private 
*dev_priv)
 err_msi:
if (pdev->msi_enabled)
pci_disable_msi(pdev);
-err_mem_regions:
-   intel_memory_regions_driver_release(dev_priv);
 err_ggtt:
i915_ggtt_driver_release(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
+   intel_memory_regions_driver_release(dev_priv);
i915_ggtt_driver_late_release(dev_priv);
 err_perf:
i915_perf_fini(dev_priv);
@@ -912,9 +916,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct 
pci_device_id *ent)
intel_modeset_driver_remove_nogem(i915);
 out_cleanup_hw:
i915_driver_hw_remove(i915);
-   intel_memory_regions_driver_release(i915);
i915_ggtt_driver_release(i915);
i915_gem_drain_freed_objects(i915);
+   intel_memory_regions_driver_release(i915);
i915_ggtt_driver_late_release(i915);
 out_cleanup_mmio:
i915_driver_mmio_release(i915);
@@ -971,9 +975,9 @@ static void i915_driver_release(struct drm_device *dev)
 
i915_gem_driver_release(dev_priv);
 
-   intel_memory_regions_driver_release(dev_priv);
i915_ggtt_driver_release(dev_priv);
i915_gem_drain_freed_objects(dev_p

[PATCH v3 5/8] drm/i915: limit ttm to dma32 for i965G[M]

2022-06-09 Thread Robert Beckett
i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 62ff77445b01..fd2ecfdd8fa1 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
struct drm_device *drm = _priv->drm;
+   bool use_dma32 = false;
+
+   /* i965g[m] cannot relocate objects above 4GiB. */
+   if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+   use_dma32 = true;
 
return ttm_device_init(_priv->bdev, i915_ttm_driver(),
   drm->dev, drm->anon_inode->i_mapping,
-  drm->vma_offset_manager, false, false);
+  drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1



[PATCH v3 6/8] drm/i915/gem: further fix mman selftest

2022-06-09 Thread Robert Beckett
In commit 450cede7f380 ("drm/i915/gem: Fix the mman selftest") we fixed up
the mman selftest to allocate user buffers via smem only if we have lmem,
otherwise it uses internal buffers.

As the commit message asserts, we should only be using buffers that
userland should be able to create.
Internal buffers are not intended to be used by userland.

Instead, fix the code to always create buffers from smem.
In the case of integrated, this will create them from the shmem non-ttm
backend, which is fine.

This also fixes up the code to allow conversion of internal backend to
ttm without breaking this test.

Signed-off-by: Robert Beckett 
---
 .../gpu/drm/i915/gem/selftests/i915_gem_mman.c  | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5bc93a1ce3e3..ee2ad1281f97 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -594,17 +594,12 @@ static enum i915_mmap_type default_mapping(struct 
drm_i915_private *i915)
 }
 
 static struct drm_i915_gem_object *
-create_sys_or_internal(struct drm_i915_private *i915,
-  unsigned long size)
+create_sys(struct drm_i915_private *i915, unsigned long size)
 {
-   if (HAS_LMEM(i915)) {
-   struct intel_memory_region *sys_region =
-   i915->mm.regions[INTEL_REGION_SMEM];
+   struct intel_memory_region *sys_region =
+   i915->mm.regions[INTEL_REGION_SMEM];
 
-   return __i915_gem_object_create_user(i915, size, _region, 
1);
-   }
-
-   return i915_gem_object_create_internal(i915, size);
+   return __i915_gem_object_create_user(i915, size, _region, 1);
 }
 
 static bool assert_mmap_offset(struct drm_i915_private *i915,
@@ -615,7 +610,7 @@ static bool assert_mmap_offset(struct drm_i915_private 
*i915,
u64 offset;
int ret;
 
-   obj = create_sys_or_internal(i915, size);
+   obj = create_sys(i915, size);
if (IS_ERR(obj))
return expected && expected == PTR_ERR(obj);
 
@@ -717,7 +712,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
}
 
/* Fill the hole, further allocation attempts should then fail */
-   obj = create_sys_or_internal(i915, PAGE_SIZE);
+   obj = create_sys(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
pr_err("Unable to create object for reclaimed hole\n");
-- 
2.25.1



[PATCH v3 8/8] drm/i915: internal buffers use ttm backend

2022-06-09 Thread Robert Beckett
Create a kernel only internal memory region that uses ttm pool allocator to
allocate volatile system pages.
Refactor internal buffer backend to simply allocate from this new
region.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  | 187 +-
 drivers/gpu/drm/i915/gem/i915_gem_internal.h  |   5 -
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   1 +
 .../drm/i915/gem/selftests/i915_gem_mman.c|   3 +
 drivers/gpu/drm/i915/i915_pci.c   |   4 +-
 drivers/gpu/drm/i915/intel_memory_region.c|   8 +-
 drivers/gpu/drm/i915/intel_memory_region.h|   2 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   2 +-
 8 files changed, 21 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..a83751867ac7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -4,188 +4,9 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
-#include 
-#include 
-#include 
-
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_region.h"
 #include "i915_drv.h"
-#include "i915_gem.h"
-#include "i915_gem_internal.h"
-#include "i915_gem_object.h"
-#include "i915_scatterlist.h"
-#include "i915_utils.h"
-
-#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
-#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
-
-static void internal_free_pages(struct sg_table *st)
-{
-   struct scatterlist *sg;
-
-   for (sg = st->sgl; sg; sg = __sg_next(sg)) {
-   if (sg_page(sg))
-   __free_pages(sg_page(sg), get_order(sg->length));
-   }
-
-   sg_free_table(st);
-   kfree(st);
-}
-
-static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
-{
-   struct drm_i915_private *i915 = to_i915(obj->base.dev);
-   struct sg_table *st;
-   struct scatterlist *sg;
-   unsigned int sg_page_sizes;
-   unsigned int npages;
-   int max_order;
-   gfp_t gfp;
-
-   max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
-
-   gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
-   if (IS_I965GM(i915) || IS_I965G(i915)) {
-   /* 965gm cannot relocate objects above 4GiB. */
-   gfp &= ~__GFP_HIGHMEM;
-   gfp |= __GFP_DMA32;
-   }
-
-create_st:
-   st = kmalloc(sizeof(*st), GFP_KERNEL);
-   if (!st)
-   return -ENOMEM;
-
-   npages = obj->base.size / PAGE_SIZE;
-   if (sg_alloc_table(st, npages, GFP_KERNEL)) {
-   kfree(st);
-   return -ENOMEM;
-   }
-
-   sg = st->sgl;
-   st->nents = 0;
-   sg_page_sizes = 0;
-
-   do {
-   int order = min(fls(npages) - 1, max_order);
-   struct page *page;
-
-   do {
-   page = alloc_pages(gfp | (order ? QUIET : MAYFAIL),
-  order);
-   if (page)
-   break;
-   if (!order--)
-   goto err;
-
-   /* Limit subsequent allocations as well */
-   max_order = order;
-   } while (1);
-
-   sg_set_page(sg, page, PAGE_SIZE << order, 0);
-   sg_page_sizes |= PAGE_SIZE << order;
-   st->nents++;
-
-   npages -= 1 << order;
-   if (!npages) {
-   sg_mark_end(sg);
-   break;
-   }
-
-   sg = __sg_next(sg);
-   } while (1);
-
-   if (i915_gem_gtt_prepare_pages(obj, st)) {
-   /* Failed to dma-map try again with single page sg segments */
-   if (get_order(st->sgl->length)) {
-   internal_free_pages(st);
-   max_order = 0;
-   goto create_st;
-   }
-   goto err;
-   }
-
-   __i915_gem_object_set_pages(obj, st, sg_page_sizes);
-
-   return 0;
-
-err:
-   sg_set_page(sg, NULL, 0, 0);
-   sg_mark_end(sg);
-   internal_free_pages(st);
-
-   return -ENOMEM;
-}
-
-static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
-  struct sg_table *pages)
-{
-   i915_gem_gtt_finish_pages(

[PATCH v3 7/8] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level

2022-06-09 Thread Robert Beckett
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP.
This is divergent from existing backends code which only considers
HAS_LLC.
Testing shows that trusting snooping on gen5- is unreliable and bsw via
ggtt mappings, so limit DGFX for now and maintain previous behaviour.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 4c1de0b4a10f..40249fa28a7a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -46,7 +46,9 @@ static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 struct ttm_tt *ttm)
 {
-   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915);
+
+   return ((HAS_LLC(i915) || can_snoop) &&
!i915_ttm_gtt_binds_lmem(res) &&
ttm->caching == ttm_cached) ? I915_CACHE_LLC :
I915_CACHE_NONE;
-- 
2.25.1



[PATCH v3 4/8] drm/i915: allow volatile buffers to use ttm pool allocator

2022-06-09 Thread Robert Beckett
Internal/volatile buffers should not be shmem backed.
If a volatile buffer is requested, allow ttm to use the pool allocator
to provide volatile pages as backing.
Fix i915_ttm_shrink to handle !is_shmem volatile buffers by purging.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 27d59639177f..8edce04a0509 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct 
ttm_buffer_object *bo,
page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
 
caching = i915_ttm_select_tt_caching(obj);
-   if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+   if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached &&
+   !i915_gem_object_is_volatile(obj)) {
page_flags |= TTM_TT_FLAG_EXTERNAL |
  TTM_TT_FLAG_EXTERNAL_MAPPABLE;
i915_tt->is_shmem = true;
@@ -531,9 +532,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, 
unsigned int flags)
if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
return 0;
 
-   GEM_BUG_ON(!i915_tt->is_shmem);
+   GEM_BUG_ON(!i915_tt->is_shmem && obj->mm.madv != I915_MADV_DONTNEED);
 
-   if (!i915_tt->filp)
+   if (i915_tt->is_shmem && !i915_tt->filp)
return 0;
 
ret = ttm_bo_wait_ctx(bo, );
-- 
2.25.1



[PATCH v3 2/8] drm/i915: add gen6 ppgtt dummy creation function

2022-06-09 Thread Robert Beckett
Internal gem objects will soon just be volatile system memory region
objects.
To enable this, create a separate dummy object creation function
for gen6 ppgtt. The object only exists as a fake object pointing to ggtt
and gains no benefit in going via the internal backend.
Instead, create a dummy gem object and avoid having to maintain a custom
ops api in the internal backend, which makes later refactoring easier.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++--
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 1bb766c79dcb..f3b660cfeb7f 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops 
pd_dummy_obj_ops = {
.put_pages = pd_dummy_obj_put_pages,
 };
 
+static struct drm_i915_gem_object *
+i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size)
+{
+   static struct lock_class_key lock_class;
+   struct drm_i915_gem_object *obj;
+   unsigned int cache_level;
+
+   GEM_BUG_ON(!size);
+   GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+   if (overflows_type(size, obj->base.size))
+   return ERR_PTR(-E2BIG);
+
+   obj = i915_gem_object_alloc();
+   if (!obj)
+   return ERR_PTR(-ENOMEM);
+
+   drm_gem_private_object_init(>drm, >base, size);
+   i915_gem_object_init(obj, _dummy_obj_ops, _class, 0);
+   obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+
+   /*
+* Mark the object as volatile, such that the pages are marked as
+* dontneed whilst they are still pinned. As soon as they are unpinned
+* they are allowed to be reaped by the shrinker, and the caller is
+* expected to repopulate - the contents of this object are only valid
+* whilst active and pinned.
+*/
+   i915_gem_object_set_volatile(obj);
+
+   obj->read_domains = I915_GEM_DOMAIN_CPU;
+   obj->write_domain = I915_GEM_DOMAIN_CPU;
+
+   cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+
+   return obj;
+}
+
 static struct i915_page_directory *
 gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
 {
@@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
 
-   pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
-   _dummy_obj_ops,
-   I915_PDES * SZ_4K);
+   pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt->i915, 
I915_PDES * SZ_4K);
if (IS_ERR(pd->pt.base)) {
err = PTR_ERR(pd->pt.base);
pd->pt.base = NULL;
-- 
2.25.1



[PATCH v3 1/8] drm/i915/ttm: dont trample cache_level overrides during ttm move

2022-06-09 Thread Robert Beckett
Various places within the driver override the default chosen cache_level.
Before ttm, these overrides were permanent until explicitly changed again
or for the lifetime of the buffer.

TTM movement code came along and decided that it could make that
decision at that time, which is usually well after object creation, so
overrode the cache_level decision and reverted it back to its default
decision.

Add logic to indicate whether the caching mode has been set by anything
other than the move logic. If so, assume that the code that overrode the
defaults knows best and keep it.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++---
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 06b1b188ce5a..519887769c08 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct 
drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
obj->cache_level = cache_level;
+   obj->ttm.cache_level_override = true;
 
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2c88bdb8ff7c..6632ed52e919 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -605,6 +605,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
+   bool cache_level_override:1;
} ttm;
 
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4c25d9b2f138..27d59639177f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region 
*mem,
i915_gem_object_init_memory_region(obj, mem);
i915_ttm_adjust_domains_after_move(obj);
i915_ttm_adjust_gem_after_move(obj);
+   obj->ttm.cache_level_override = false;
i915_gem_object_unlock(obj);
 
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index a10716f4e717..4c1de0b4a10f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct 
drm_i915_gem_object *obj)
obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? 
I915_BO_FLAG_IOMEM :
I915_BO_FLAG_STRUCT_PAGE;
 
-   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
-  bo->ttm);
-   i915_gem_object_set_cache_coherency(obj, cache_level);
+   if (!obj->ttm.cache_level_override) {
+   cache_level = i915_ttm_cache_level(to_i915(bo->base.dev),
+  bo->resource, bo->ttm);
+   i915_gem_object_set_cache_coherency(obj, cache_level);
+   obj->ttm.cache_level_override = false;
+   }
 }
 
 /**
-- 
2.25.1



[PATCH v3 0/8] drm/i915: ttm for internal

2022-06-09 Thread Robert Beckett
This series refactors i915's internal buffer backend to use ttm.
It uses ttm's pool allocator to allocate volatile pages in place of the
old code which rolled its own via alloc_pages.
This is continuing progress to align all backends on using ttm.

v2: - commit message improvements to add detail
- fix i915_ttm_shrink to purge !is_shmem volatile buffers
- limit ttm pool allocator to using dma32 on i965G[M]
- fix mman selftest to always use smem buffers
- create new internal memory region
- make internal backend allocate from internal region
- Fixed various issues with tests and i915 ttm usage as a result
  of supporting regions other than lmem via ttm.

v4: - limit i915 ttm default cache_level selection to only trust
v4: - limit i915 ttm default cache_level selection to only trust
  HAS_SNOOP on DGFX.

Robert Beckett (8):
  drm/i915/ttm: dont trample cache_level overrides during ttm move
  drm/i915: add gen6 ppgtt dummy creation function
  drm/i915: setup ggtt scratch page after memory regions
  drm/i915: allow volatile buffers to use ttm pool allocator
  drm/i915: limit ttm to dma32 for i965G[M]
  drm/i915/gem: further fix mman selftest
  drm/i915/ttm: only trust snooping for dgfx when deciding default
cache_level
  drm/i915: internal buffers use ttm backend

 drivers/gpu/drm/i915/gem/i915_gem_internal.c  | 187 +-
 drivers/gpu/drm/i915/gem/i915_gem_internal.h  |   5 -
 drivers/gpu/drm/i915/gem/i915_gem_object.c|   1 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   |   8 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  13 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  20 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  43 +++-
 drivers/gpu/drm/i915/gt/intel_gt_gmch.c   |  20 +-
 drivers/gpu/drm/i915/gt/intel_gt_gmch.h   |   6 +
 drivers/gpu/drm/i915/i915_driver.c|  16 +-
 drivers/gpu/drm/i915/i915_pci.c   |   4 +-
 drivers/gpu/drm/i915/intel_memory_region.c|   8 +-
 drivers/gpu/drm/i915/intel_memory_region.h|   2 +
 drivers/gpu/drm/i915/intel_region_ttm.c   |   7 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   2 +-
 17 files changed, 123 insertions(+), 221 deletions(-)

-- 
2.25.1



[PATCH v2 4/8] drm/i915: allow volatile buffers to use ttm pool allocator

2022-06-08 Thread Robert Beckett
Internal/volatile buffers should not be shmem backed.
If a volatile buffer is requested, allow ttm to use the pool allocator
to provide volatile pages as backing.
Fix i915_ttm_shrink to handle !is_shmem volatile buffers by purging.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 27d59639177f..8edce04a0509 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct 
ttm_buffer_object *bo,
page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
 
caching = i915_ttm_select_tt_caching(obj);
-   if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+   if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached &&
+   !i915_gem_object_is_volatile(obj)) {
page_flags |= TTM_TT_FLAG_EXTERNAL |
  TTM_TT_FLAG_EXTERNAL_MAPPABLE;
i915_tt->is_shmem = true;
@@ -531,9 +532,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, 
unsigned int flags)
if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
return 0;
 
-   GEM_BUG_ON(!i915_tt->is_shmem);
+   GEM_BUG_ON(!i915_tt->is_shmem && obj->mm.madv != I915_MADV_DONTNEED);
 
-   if (!i915_tt->filp)
+   if (i915_tt->is_shmem && !i915_tt->filp)
return 0;
 
ret = ttm_bo_wait_ctx(bo, );
-- 
2.25.1



[PATCH v2 8/8] drm/i915: internal buffers use ttm backend

2022-06-08 Thread Robert Beckett
Create a kernel only internal memory region that uses ttm pool allocator to
allocate volatile system pages.
Refactor internal buffer backend to simply allocate from this new
region.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  | 187 +-
 drivers/gpu/drm/i915/gem/i915_gem_internal.h  |   5 -
 drivers/gpu/drm/i915/gem/i915_gem_ttm.h   |   1 +
 .../drm/i915/gem/selftests/i915_gem_mman.c|   3 +
 drivers/gpu/drm/i915/i915_pci.c   |   4 +-
 drivers/gpu/drm/i915/intel_memory_region.c|   8 +-
 drivers/gpu/drm/i915/intel_memory_region.h|   2 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   2 +-
 8 files changed, 21 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..a83751867ac7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -4,188 +4,9 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
-#include 
-#include 
-#include 
-
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_region.h"
 #include "i915_drv.h"
-#include "i915_gem.h"
-#include "i915_gem_internal.h"
-#include "i915_gem_object.h"
-#include "i915_scatterlist.h"
-#include "i915_utils.h"
-
-#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
-#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
-
-static void internal_free_pages(struct sg_table *st)
-{
-   struct scatterlist *sg;
-
-   for (sg = st->sgl; sg; sg = __sg_next(sg)) {
-   if (sg_page(sg))
-   __free_pages(sg_page(sg), get_order(sg->length));
-   }
-
-   sg_free_table(st);
-   kfree(st);
-}
-
-static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
-{
-   struct drm_i915_private *i915 = to_i915(obj->base.dev);
-   struct sg_table *st;
-   struct scatterlist *sg;
-   unsigned int sg_page_sizes;
-   unsigned int npages;
-   int max_order;
-   gfp_t gfp;
-
-   max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active(obj->base.dev->dev)) {
-   unsigned int max_segment;
-
-   max_segment = swiotlb_max_segment();
-   if (max_segment) {
-   max_segment = max_t(unsigned int, max_segment,
-   PAGE_SIZE) >> PAGE_SHIFT;
-   max_order = min(max_order, ilog2(max_segment));
-   }
-   }
-#endif
-
-   gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
-   if (IS_I965GM(i915) || IS_I965G(i915)) {
-   /* 965gm cannot relocate objects above 4GiB. */
-   gfp &= ~__GFP_HIGHMEM;
-   gfp |= __GFP_DMA32;
-   }
-
-create_st:
-   st = kmalloc(sizeof(*st), GFP_KERNEL);
-   if (!st)
-   return -ENOMEM;
-
-   npages = obj->base.size / PAGE_SIZE;
-   if (sg_alloc_table(st, npages, GFP_KERNEL)) {
-   kfree(st);
-   return -ENOMEM;
-   }
-
-   sg = st->sgl;
-   st->nents = 0;
-   sg_page_sizes = 0;
-
-   do {
-   int order = min(fls(npages) - 1, max_order);
-   struct page *page;
-
-   do {
-   page = alloc_pages(gfp | (order ? QUIET : MAYFAIL),
-  order);
-   if (page)
-   break;
-   if (!order--)
-   goto err;
-
-   /* Limit subsequent allocations as well */
-   max_order = order;
-   } while (1);
-
-   sg_set_page(sg, page, PAGE_SIZE << order, 0);
-   sg_page_sizes |= PAGE_SIZE << order;
-   st->nents++;
-
-   npages -= 1 << order;
-   if (!npages) {
-   sg_mark_end(sg);
-   break;
-   }
-
-   sg = __sg_next(sg);
-   } while (1);
-
-   if (i915_gem_gtt_prepare_pages(obj, st)) {
-   /* Failed to dma-map try again with single page sg segments */
-   if (get_order(st->sgl->length)) {
-   internal_free_pages(st);
-   max_order = 0;
-   goto create_st;
-   }
-   goto err;
-   }
-
-   __i915_gem_object_set_pages(obj, st, sg_page_sizes);
-
-   return 0;
-
-err:
-   sg_set_page(sg, NULL, 0, 0);
-   sg_mark_end(sg);
-   internal_free_pages(st);
-
-   return -ENOMEM;
-}
-
-static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
-  struct sg_table *pages)
-{
-   i915_gem_gtt_finish_pages(

[PATCH v2 6/8] drm/i915/gem: further fix mman selftest

2022-06-08 Thread Robert Beckett
In commit 450cede7f380 ("drm/i915/gem: Fix the mman selftest") we fixed up
the mman selftest to allocate user buffers via smem only if we have lmem,
otherwise it uses internal buffers.

As the commit message asserts, we should only be using buffers that
userland should be able to create.
Internal buffers are not intended to be used by userland.

Instead, fix the code to always create buffers from smem.
In the case of integrated, this will create them from the shmem non-ttm
backend, which is fine.

This also fixes up the code to allow conversion of internal backend to
ttm without breaking this test.

Signed-off-by: Robert Beckett 
---
 .../gpu/drm/i915/gem/selftests/i915_gem_mman.c  | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5bc93a1ce3e3..ee2ad1281f97 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -594,17 +594,12 @@ static enum i915_mmap_type default_mapping(struct 
drm_i915_private *i915)
 }
 
 static struct drm_i915_gem_object *
-create_sys_or_internal(struct drm_i915_private *i915,
-  unsigned long size)
+create_sys(struct drm_i915_private *i915, unsigned long size)
 {
-   if (HAS_LMEM(i915)) {
-   struct intel_memory_region *sys_region =
-   i915->mm.regions[INTEL_REGION_SMEM];
+   struct intel_memory_region *sys_region =
+   i915->mm.regions[INTEL_REGION_SMEM];
 
-   return __i915_gem_object_create_user(i915, size, _region, 
1);
-   }
-
-   return i915_gem_object_create_internal(i915, size);
+   return __i915_gem_object_create_user(i915, size, _region, 1);
 }
 
 static bool assert_mmap_offset(struct drm_i915_private *i915,
@@ -615,7 +610,7 @@ static bool assert_mmap_offset(struct drm_i915_private 
*i915,
u64 offset;
int ret;
 
-   obj = create_sys_or_internal(i915, size);
+   obj = create_sys(i915, size);
if (IS_ERR(obj))
return expected && expected == PTR_ERR(obj);
 
@@ -717,7 +712,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
}
 
/* Fill the hole, further allocation attempts should then fail */
-   obj = create_sys_or_internal(i915, PAGE_SIZE);
+   obj = create_sys(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
pr_err("Unable to create object for reclaimed hole\n");
-- 
2.25.1



[PATCH v2 5/8] drm/i915: limit ttm to dma32 for i965G[M]

2022-06-08 Thread Robert Beckett
i965G[M] cannot relocate objects above 4GiB.
Ensure ttm uses dma32 on these systems.

Signed-off-by: Robert Beckett 
---
 drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c 
b/drivers/gpu/drm/i915/intel_region_ttm.c
index 62ff77445b01..fd2ecfdd8fa1 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -32,10 +32,15 @@
 int intel_region_ttm_device_init(struct drm_i915_private *dev_priv)
 {
struct drm_device *drm = _priv->drm;
+   bool use_dma32 = false;
+
+   /* i965g[m] cannot relocate objects above 4GiB. */
+   if (IS_I965GM(dev_priv) || IS_I965G(dev_priv))
+   use_dma32 = true;
 
return ttm_device_init(_priv->bdev, i915_ttm_driver(),
   drm->dev, drm->anon_inode->i_mapping,
-  drm->vma_offset_manager, false, false);
+  drm->vma_offset_manager, false, use_dma32);
 }
 
 /**
-- 
2.25.1



  1   2   3   >