[PATCH v5] drm/i915: stop using swiotlb
Calling swiotlb functions directly is nowadays considered harmful. See https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/ Replace swiotlb_max_segment() calls with dma_max_mapping_size(). In i915_gem_object_get_pages_internal() no longer consider max_segment only if CONFIG_SWIOTLB is enabled. There can be other (iommu related) causes of specific max segment sizes. Cc: Christoph Hellwig Cc: Tvrtko Ursulin Cc: Thomas Hellstrom Cc: Matthew Auld v2: - restore UINT_MAX clamp in i915_sg_segment_size() - drop PAGE_SIZE check as it will always be >= PAGE_SIZE v3: - actually clamp to UINT_MAX in i915_sg_segment_size() v4: - round down max segment size to PAGE_SIZE v5: - fix checkpatch whitespace issue Reviewed-by: Christoph Hellwig Reviewed-by: Tvrtko Ursulin Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 2 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.h | 16 5 files changed, 12 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..24f37658f1bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4eed3dd90ba8..34b9c76cd8e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5a5cf332d8a5..7a828c9c0f6d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 094f06b4ce33..dfc35905dba2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +1
Re: [PATCH v4] drm/i915: stop using swiotlb
On 26/07/2022 14:12, Tvrtko Ursulin wrote: On 25/07/2022 15:18, Robert Beckett wrote: Calling swiotlb functions directly is nowadays considered harmful. See https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/ Replace swiotlb_max_segment() calls with dma_max_mapping_size(). In i915_gem_object_get_pages_internal() no longer consider max_segment only if CONFIG_SWIOTLB is enabled. There can be other (iommu related) causes of specific max segment sizes. Cc: Christoph Hellwig Cc: Tvrtko Ursulin Cc: Thomas Hellstrom Cc: Matthew Auld v2: - restore UINT_MAX clamp in i915_sg_segment_size() - drop PAGE_SIZE check as it will always be >= PAGE_SIZE v3: - actually clamp to UINT_MAX in i915_sg_segment_size() v4: - round down max segment size to PAGE_SIZE Reviewed-by: Christoph Hellwig Reviewed-by: Tvrtko Ursulin Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.h | 17 - 5 files changed, 12 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..24f37658f1bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4eed3dd90ba8..34b9c76cd8e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5a5cf332d8a5..7a828c9c0f6d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 094f06b4ce33..dfc35905dba2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struc
[PATCH v4] drm/i915: stop using swiotlb
Calling swiotlb functions directly is nowadays considered harmful. See https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/ Replace swiotlb_max_segment() calls with dma_max_mapping_size(). In i915_gem_object_get_pages_internal() no longer consider max_segment only if CONFIG_SWIOTLB is enabled. There can be other (iommu related) causes of specific max segment sizes. Cc: Christoph Hellwig Cc: Tvrtko Ursulin Cc: Thomas Hellstrom Cc: Matthew Auld v2: - restore UINT_MAX clamp in i915_sg_segment_size() - drop PAGE_SIZE check as it will always be >= PAGE_SIZE v3: - actually clamp to UINT_MAX in i915_sg_segment_size() v4: - round down max segment size to PAGE_SIZE Reviewed-by: Christoph Hellwig Reviewed-by: Tvrtko Ursulin Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 2 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.h | 17 - 5 files changed, 12 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..24f37658f1bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4eed3dd90ba8..34b9c76cd8e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5a5cf332d8a5..7a828c9c0f6d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 094f06b4ce33..dfc35905dba2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(s
[PATCH v3] drm/i915: stop using swiotlb
Calling swiotlb functions directly is nowadays considered harmful. See https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/ Replace swiotlb_max_segment() calls with dma_max_mapping_size(). In i915_gem_object_get_pages_internal() no longer consider max_segment only if CONFIG_SWIOTLB is enabled. There can be other (iommu related) causes of specific max segment sizes. Cc: Christoph Hellwig Cc: Tvrtko Ursulin Cc: Thomas Hellstrom Cc: Matthew Auld v2: - restore UINT_MAX clamp in i915_sg_segment_size() - drop PAGE_SIZE check as it will always be >= PAGE_SIZE v3: - actually clamp to UINT_MAX in i915_sg_segment_size() Reviewed-by: Christoph Hellwig Reviewed-by: Tvrtko Ursulin Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 2 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.h | 16 +++- 5 files changed, 11 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..24f37658f1bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4eed3dd90ba8..34b9c76cd8e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5a5cf332d8a5..7a828c9c0f6d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 094f06b4ce33..dfc35905dba2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i9
Re: [PATCH v2] drm/i915: stop using swiotlb
On 22/07/2022 16:03, Christoph Hellwig wrote: + return max_t(size_t, UINT_MAX, dma_max_mapping_size(dev)); Shouldn't this be a min? eugh! yes. Stand by for v3
[PATCH v2] drm/i915: stop using swiotlb
Calling swiotlb functions directly is nowadays considered harmful. See https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/ Replace swiotlb_max_segment() calls with dma_max_mapping_size(). In i915_gem_object_get_pages_internal() no longer consider max_segment only if CONFIG_SWIOTLB is enabled. There can be other (iommu related) causes of specific max segment sizes. Cc: Christoph Hellwig Cc: Tvrtko Ursulin Cc: Thomas Hellstrom Cc: Matthew Auld v2: - restore UINT_MAX clamp in i915_sg_segment_size() - drop PAGE_SIZE check as it will always be >= PAGE_SIZE Reviewed-by: Christoph Hellwig Reviewed-by: Tvrtko Ursulin Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 2 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.h | 16 +++- 5 files changed, 11 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..24f37658f1bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4eed3dd90ba8..34b9c76cd8e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5a5cf332d8a5..7a828c9c0f6d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 094f06b4ce33..dfc35905dba2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *
[PATCH] drm/i915: stop using swiotlb
Calling swiotlb functions directly is nowadays considered harmful. See https://lore.kernel.org/intel-gfx/20220711082614.ga29...@lst.de/ Replace swiotlb_max_segment() calls with dma_max_mapping_size(). In i915_gem_object_get_pages_internal() no longer consider max_segment only if CONFIG_SWIOTLB is enabled. There can be other (iommu related) causes of specific max segment sizes. Cc: Christoph Hellwig Cc: Tvrtko Ursulin Cc: Thomas Hellstrom Cc: Matthew Auld Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 20 +--- drivers/gpu/drm/i915/gem/i915_gem_shmem.c| 2 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.h | 16 5 files changed, 9 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..e1aca378d90f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,13 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + size_t max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = dma_max_mapping_size(i915->drm.dev); + max_segment = max_t(size_t, max_segment, PAGE_SIZE) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4eed3dd90ba8..b0ec65b7c1da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = dma_max_mapping_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5a5cf332d8a5..882f046f4d18 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = dma_max_mapping_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -568,7 +568,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + dma_max_mapping_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 094f06b4ce33..8a62a71859e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - unsigned int max_segment = i915_sg_seg
Re: susetting the remaining swioltb couplin in DRM
On 18/07/2022 12:36, Tvrtko Ursulin wrote: Hi, On 12/07/2022 06:00, Christoph Hellwig wrote: On Mon, Jul 11, 2022 at 04:31:49PM -0400, Rodrigo Vivi wrote: On Mon, Jul 11, 2022 at 10:26:14AM +0200, Christoph Hellwig wrote: Hi i915 and nouveau maintainers, any chance I could get some help to remove the remaining direct driver calls into swiotlb, namely swiotlb_max_segment and is_swiotlb_active. Either should not matter to a driver as they should be written to the DMA API. Hi Christoph, while we take a look here, could you please share the reasons behind sunsetting this calls? Because they are a completely broken layering violation. A driver has absolutely no business knowing the dma-mapping violation. The DMA API reports what we think is all useful constraints (e.g. dma_max_mapping_size()), and provides useful APIs to (e.g. dma_alloc_noncoherent or dma_alloc_noncontiguous) to allocate pages that can be mapped without bounce buffering and drivers should use the proper API instead of poking into one particular implementation and restrict it from changing. swiotlb_max_segment in particular returns a value that isn't actually correct (a driver can't just use all of swiotlb) AND actually doesn't work as is in various scenarious that are becoming more common, most notably host with memory encryption schemes that always require bounce buffering. All these are either in the internal backend or in the old shmem backend. I understand both are soon to be retired or deprecated. I think. + Matt & Thomas, and Bob actually as well, as I think authorities in the shmem, TTM and internal backend at the moment. Could you guys please have look if and how the TTM backend needs to handle this and what is the timeline of retirement if relevant? Regards, Tvrtko So currently these are used directly in the internal backend and indirectly via i915_sg_segment_size() in shmem, ttm and userptr backends. internal and userptr are being refactored currently (internal is ready but lacking review), but the refactoring would just make them use the ttm backend which still uses these. It seems to me like a simple solution would be to just replace swiotlb_max_segment() calls with dma_max_mapping_size() as a drop in replacement. This follows the same logic as drm_prime_pages_to_sg().
[PATCH v11 05/10] drm/i915: sanitize mem_flags for stolen buffers
Stolen regions are not page backed or considered iomem. Prevent flags indicating such. This correctly prevents stolen buffers from attempting to directly map them. See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem() usage for where it would break otherwise. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 2cc2c08bd50f..18d574ac167f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -14,6 +14,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" +#include "gem/i915_gem_stolen.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" @@ -130,8 +131,9 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); - obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : - I915_BO_FLAG_STRUCT_PAGE; + if (!i915_gem_object_is_stolen(obj)) + obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; if (!obj->ttm.cache_level_override) { cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), -- 2.25.1
[PATCH v11 08/10] drm/i915/selftest: don't attempt engine reset of guc submission engines
igt_reset_engines_stolen tries to reset engines without checking if it is possible. Engines using GuC submission are not able to be reset from the host. In this scenario, the reset exits early, then on the next iteration of the each engine loop, the async teardown of the spinner request context's ring occurs while the next engine is under test. This is seen as a stolen memory corruption as the ring buffer was busy initially, but free during the confirmation check and had been poisoned during cleanup. Fix this by not testing GuC submission using engines. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gt/selftest_reset.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 37c38bdd5f47..55f3b34e5f6e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -194,6 +194,8 @@ static int igt_reset_engines_stolen(void *arg) return 0; for_each_engine(engine, gt, id) { + if (intel_engine_uses_guc(engine)) + continue; err = __igt_reset_stolen(gt, engine->mask, engine->name); if (err) return err; -- 2.25.1
[PATCH v11 07/10] drm/i915/ttm: add buffer pin on alloc flag
For situations where allocations need to fail on alloc instead of delayed get_pages, add a new alloc flag to pin the ttm bo. This makes sure that the resource has been allocated during buffer creation, allowing it to fail with an error if the placement is exhausted. This allows existing fallback options for stolen backend allocation like create_ring_vma to work as expected. Signed-off-by: Robert Beckett --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 13 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 25 ++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 14937cf1daaa..283a4b84971a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -325,17 +325,20 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +/* object should be pinned in destination region from allocation */ +#define I915_BO_ALLOC_PINNED BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ -I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) -#define I915_BO_WAS_BOUND_BIT 10 +I915_BO_ALLOC_GPU_ONLY | \ +I915_BO_ALLOC_PINNED) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) +#define I915_BO_WAS_BOUND_BIT 11 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index b6c3fc25d9d1..d34ebe9fcff8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1011,6 +1011,13 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created); + /* stolen objects are pinned for lifetime. Unpin before putting */ + if (obj->flags & I915_BO_ALLOC_PINNED) { + ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL); + ttm_bo_unpin(i915_gem_to_ttm(obj)); + ttm_bo_unreserve(i915_gem_to_ttm(obj)); + } + ttm_bo_put(i915_gem_to_ttm(obj)); } @@ -1206,6 +1213,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, .no_wait_gpu = false, }; enum ttm_bo_type bo_type; + struct ttm_place _place; + struct ttm_placement _placement; + struct ttm_placement *placement; int ret; drm_gem_private_object_init(>drm, >base, size); @@ -1235,6 +1245,17 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ i915_gem_object_make_unshrinkable(obj); + if (obj->flags & I915_BO_ALLOC_PINNED) { + i915_ttm_place_from_region(mem, &_place, obj->bo_offset, + obj->base.size, obj->flags); + _placement.num_placement = 1; + _placement.placement = &_place; + _placement.num_busy_placement = 0; + _placement.busy_placement = NULL; + placement = &_placement; + } else { + placement = _sys_placement; + } /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -1243,7 +1264,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, * until successful initialization. */ ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), bo_type, - _sys_placement, page_size >> PAGE_SHIFT, + placement, page_size >> PAGE_SHIFT, , NULL, NULL, i915_ttm_bo_destroy); if (ret) return i915_ttm_err_to_gem(ret); @@ -1254,6 +1275,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); obj->ttm.cache_level_override = false; + if (obj->flags & I915_BO_ALLOC_PINNED) + ttm_bo_pin(i915_gem_to_ttm(obj)); i915_gem_object_unlock(obj); return 0; -- 2.25.1
[PATCH v11 09/10] drm/i915/selftest: maintain context ref during reset test
Commit "bcb9aa45d5a0 Revert "drm/i915: Hold reference to intel_context over life of i915_request"" Stopped requests from maintaining a ref on the context. This caused the contexts to be freed, releasing stolen memory while under test, leading to false positive detection of stolen corruption. Fix this by maintaining a ref on the contexts until testing is complete. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gt/selftest_reset.c | 17 - 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 55f3b34e5f6e..ba536e8a2e32 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -6,6 +6,7 @@ #include #include "gem/i915_gem_stolen.h" +#include "gt/intel_gt.h" #include "i915_memcpy.h" #include "i915_selftest.h" @@ -26,6 +27,7 @@ __igt_reset_stolen(struct intel_gt *gt, intel_wakeref_t wakeref; enum intel_engine_id id; struct igt_spinner spin; + struct intel_context *contexts[I915_NUM_ENGINES] = {0}; long max, count; void *tmp; u32 *crc; @@ -71,12 +73,12 @@ __igt_reset_stolen(struct intel_gt *gt, goto err_spin; } rq = igt_spinner_create_request(, ce, MI_ARB_CHECK); - intel_context_put(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_spin; } i915_request_add(rq); + contexts[id] = ce; } for (page = 0; page < num_pages; page++) { @@ -165,8 +167,21 @@ __igt_reset_stolen(struct intel_gt *gt, err = -EINVAL; } + err = intel_gt_wait_for_idle(gt, HZ); + if (err < 0) { + pr_err("%s failed to wait for gt idle: %d\n", msg, err); + goto err_spin; + } + + err = 0; + err_spin: igt_spinner_fini(); + for (id = 0; id < I915_NUM_ENGINES; id++) { + if (!contexts[id]) + continue; + intel_context_put(contexts[id]); + } err_lock: intel_runtime_pm_put(gt->uncore->rpm, wakeref); -- 2.25.1
[PATCH v11 10/10] drm/i915: stolen memory use ttm backend
refactor stolen memory region to use ttm. this necessitates using ttm resources to track reserved stolen regions instead of drm_mm_nodes. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++ drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/intel_region_ttm.c | 42 +- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 13 files changed, 280 insertions(+), 351 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 16537830ccf0..7dd42b11f4b8 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -38,6 +38,7 @@ * forcibly disable it to allow proper screen updates. */ +#include "gem/i915_gem_stolen.h" #include #include @@ -52,6 +53,7 @@ #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "gem/i915_gem_region.h" #define for_each_fbc_id(__dev_priv, __fbc_id) \ for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; (__fbc_id)++) \ @@ -93,8 +95,8 @@ struct intel_fbc { struct mutex lock; unsigned int busy_bits; - struct drm_mm_node compressed_fb; - struct drm_mm_node compressed_llb; + struct ttm_resource *compressed_fb; + struct ttm_resource *compressed_llb; enum intel_fbc_id id; @@ -332,16 +334,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); + u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_fb.start, U32_MAX)); +fb_offset, U32_MAX)); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_llb.start, U32_MAX)); +llb_offset, U32_MAX)); intel_de_write(i915, FBC_CFB_BASE, - i915->dsm.start + fbc->compressed_fb.start); + i915->dsm.start + fb_offset); intel_de_write(i915, FBC_LL_BASE, - i915->dsm.start + fbc->compressed_llb.start); + i915->dsm.start + llb_offset); } static const struct intel_fbc_funcs i8xx_fbc_funcs = { @@ -449,8 +455,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc) static void g4x_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, DPFC_CB_BASE, fb_offset); } static const struct intel_fbc_funcs g4x_fbc_funcs = { @@ -500,8 +508,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc) static void ilk_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset); } static const struct intel_fbc_funcs ilk_fbc_funcs = { @@ -745,21 +755,24 @@ static int find_compression_limit(struct intel_fbc *fbc, { struct drm_i915_private *i915 = fbc->i915; u64 end = intel_fbc_stolen_end(i915); - int ret, limit = min_limit; + int limit = min_limit; + struct ttm_resource *res; size /= limit; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb, - size <<= 1, 4096, 0, end); - if (ret == 0) + res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)
[PATCH v11 04/10] drm/i915: instantiate ttm ranger manager for stolen memory
prepare for ttm based stolen region by using ttm range manager as the resource manager for stolen region. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 ++-- drivers/gpu/drm/i915/intel_region_ttm.c | 31 +++- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index a949594237d9..2cc2c08bd50f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -66,11 +66,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); /* There's some room for optimization here... */ - GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && - ttm_mem_type < I915_PL_LMEM0); + GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT); + if (ttm_mem_type == I915_PL_SYSTEM) return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, 0); + if (ttm_mem_type == I915_PL_STOLEN) + return i915->mm.stolen_region; return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, ttm_mem_type - I915_PL_LMEM0); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 642cd1587976..caac110a0a2c 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) /* * Map the i915 memory regions to TTM memory types. We use the - * driver-private types for now, reserving TTM_PL_VRAM for stolen + * driver-private types for now, reserving I915_PL_STOLEN for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ int intel_region_to_ttm_type(const struct intel_memory_region *mem) @@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && mem->type != INTEL_MEMORY_MOCK && - mem->type != INTEL_MEMORY_SYSTEM); + mem->type != INTEL_MEMORY_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_LOCAL); if (mem->type == INTEL_MEMORY_SYSTEM) return TTM_PL_SYSTEM; + if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM || + mem->type == INTEL_MEMORY_STOLEN_LOCAL) + return I915_PL_STOLEN; + type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); @@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = i915_ttm_buddy_man_init(bdev, mem_type, false, - resource_size(>region), - mem->io_size, - mem->min_page_size, PAGE_SIZE); + if (mem_type == I915_PL_STOLEN) { + ret = ttm_range_man_init(bdev, mem_type, false, +resource_size(>region) >> PAGE_SHIFT); + mem->is_range_manager = true; + } else { + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(>region), + mem->io_size, + mem->min_page_size, PAGE_SIZE); + } if (ret) return ret; @@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int intel_region_ttm_fini(struct intel_memory_region *mem) { struct ttm_resource_manager *man = mem->region_private; + int mem_type = intel_region_to_ttm_type(mem); int ret = -EBUSY; int count; @@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) if (ret || !man) return ret; - ret = i915_ttm_buddy_man_fini(>i915->bdev, - intel_region_to_ttm_type(mem)); + if (mem_type == I915_PL_STOLEN) + ret = ttm_range_man_fini(>i915->bdev, mem_type); + else + ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type); GEM_WARN_ON(ret); mem->region_private = NULL; -- 2.25.1
[PATCH v11 03/10] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP. This is divergent from existing backends code which only considers HAS_LLC. Testing shows that trusting snooping on gen5- is unreliable and bsw via ggtt mappings, so limit DGFX for now and maintain previous behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 042c2237e287..a949594237d9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -52,7 +52,9 @@ static enum i915_cache_level i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, struct ttm_tt *ttm) { - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915); + + return ((HAS_LLC(i915) || can_snoop) && !i915_ttm_gtt_binds_lmem(res) && ttm->caching == ttm_cached) ? I915_CACHE_LLC : I915_CACHE_NONE; -- 2.25.1
[PATCH v11 06/10] drm/i915: ttm move/clear logic fix
ttm managed buffers start off with system resource definitions and ttm_tt tracking structures allocated (though unpopulated). currently this prevents clearing of buffers on first move to desired placements. The desired behaviour is to clear user allocated buffers and any kernel buffers that specifically requests it only. Make the logic match the desired behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 18d574ac167f..6671345b2abe 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */ +#include "drm/ttm/ttm_tt.h" #include #include "i915_deps.h" @@ -546,6 +547,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } +static bool +allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct ttm_resource *dst_mem) +{ + /* never clear stolen */ + if (dst_mem->mem_type == I915_PL_STOLEN) + return false; + /* +* we want to clear user buffers and any kernel buffers +* that specifically request clearing. +*/ + if (obj->flags & I915_BO_ALLOC_USER) + return true; + + if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) + return true; + + return false; +} + /** * i915_ttm_move - The TTM move callback used by i915. * @bo: The buffer object. @@ -596,7 +616,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, return PTR_ERR(dst_rsgt); clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + if (!clear || allow_clear(obj, ttm, dst_mem)) { struct i915_deps deps; i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); -- 2.25.1
[PATCH v11 02/10] drm/i915: limit ttm to dma32 for i965G[M]
i965G[M] cannot relocate objects above 4GiB. Ensure ttm uses dma32 on these systems. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 6873808a7015..642cd1587976 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -32,10 +32,15 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = _priv->drm; + bool use_dma32 = false; + + /* i965g[m] cannot relocate objects above 4GiB. */ + if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) + use_dma32 = true; return ttm_device_init(_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, - drm->vma_offset_manager, false, false); + drm->vma_offset_manager, false, use_dma32); } /** -- 2.25.1
[PATCH v11 01/10] drm/i915/ttm: dont trample cache_level overrides during ttm move
Various places within the driver override the default chosen cache_level. Before ttm, these overrides were permanent until explicitly changed again or for the lifetime of the buffer. TTM movement code came along and decided that it could make that decision at that time, which is usually well after object creation, so overrode the cache_level decision and reverted it back to its default decision. Add logic to indicate whether the caching mode has been set by anything other than the move logic. If so, assume that the code that overrode the defaults knows best and keep it. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++--- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index ccec4055fde3..966ac2d778d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); obj->cache_level = cache_level; + obj->ttm.cache_level_override = true; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 5cf36a130061..14937cf1daaa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -623,6 +623,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; + bool cache_level_override:1; } ttm; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 053b0022ddd0..b6c3fc25d9d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1253,6 +1253,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); + obj->ttm.cache_level_override = false; i915_gem_object_unlock(obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 9a7e50534b84..042c2237e287 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -129,9 +129,12 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : I915_BO_FLAG_STRUCT_PAGE; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + if (!obj->ttm.cache_level_override) { + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), + bo->resource, bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->ttm.cache_level_override = false; + } } /** -- 2.25.1
[PATCH v11 00/10] drm/i915: ttm for stolen
This series refactors i915's stolen memory region to use ttm. v2: handle disabled stolen similar to legacy version. relying on ttm to fail allocs works fine, but is dmesg noisy and causes testing dmesg warning regressions. v3: rebase to latest drm-tip. fix v2 code refactor which could leave a buffer pinned. locally passes fftl again now. v4: - Allow memory regions creators to do allocation. Allows stolen region to track it's own reservations. - Pre-reserve first page of stolen mem (add back WaSkipStolenMemoryFirstPage:bdw+) - Improve commit descritpion for "drm/i915: sanitize mem_flags for stolen buffers" - replace i915_gem_object_pin_pages_unlocked() call with manual locking and pinning. this avoids ww ctx class reuse during context creation -> ring vma obj alloc. v5: - detect both types of stolen as stolen buffers in "drm/i915: sanitize mem_flags for stolen buffers" - in stolen_object_init limit page size to mem region minimum. The range allocator expects the page_size to define the alignment v6: - Share first 4 patches from ttm for internal series as generic i915 ttm fixes - Drop patch 4 from v5. We don't need separate object ops just to satisfy test interfaces. The tests have now been fixed via checking whether the memory region is private to decide whether to mmap - Add new buffer pin alloc flag to allow creation of buffers in their final ttm placement instead of deferring until get_pages. This fixes legacy fallback paths for buffer allocations during stolen memory pressure. v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET v8: - Reserve I915_GEM_STOLEN_BIAS area from stolen v9: - drop patch 8 "drm/i915: allow memory region creators to alloc and free the region" store bias reservation in drm_i915_private instead. - Restrict reset selftest to only test !GuC engines. Resetting individual GuC engines from host is not supported - Wait for outstanding requests in reset selftest This prevents previous engine test context cleanup appearing as false positive stolen corruption check v10:- Fix wiating on requests early error path during reset selftest If a single request fails to complete, the others would not be put, resulting in leaks. Make sure all requests are put before test exit. v11:- rebased to latest drm-tip - commit "bcb9aa45d5a0 Revert "drm/i915: Hold reference to intel_context over life of i915_request"" broke the selftest@live@reset test, causing the context ringbuffer to be freed during testing. Fixed via maintinaing context ref during testing. - drop patch 4 "drm/i915/gem: selftest should not attempt mmap of private regions" it is no longer needed. Robert Beckett (10): drm/i915/ttm: dont trample cache_level overrides during ttm move drm/i915: limit ttm to dma32 for i965G[M] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level drm/i915: instantiate ttm ranger manager for stolen memory drm/i915: sanitize mem_flags for stolen buffers drm/i915: ttm move/clear logic fix drm/i915/ttm: add buffer pin on alloc flag drm/i915/selftest: don't attempt engine reset of guc submission engines drm/i915/selftest: maintain context ref during reset test drm/i915: stolen memory use ttm backend drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- drivers/gpu/drm/i915/gem/i915_gem_object.c| 1 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 16 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++ drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 29 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 47 +- drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 35 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/intel_region_ttm.c | 80 +++- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 15 files changed, 400 insertions(+), 376 deletions(-) -- 2.25.1
[PATCH v4 8/8] drm/i915: internal buffers use ttm backend
Create a kernel only internal memory region that uses ttm pool allocator to allocate volatile system pages. Refactor internal buffer backend to simply allocate from this new region. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 187 +- drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/i915_pci.c | 4 +- drivers/gpu/drm/i915/intel_memory_region.c| 8 +- drivers/gpu/drm/i915/intel_memory_region.h| 2 + .../gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 6 files changed, 17 insertions(+), 191 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..a83751867ac7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -4,188 +4,9 @@ * Copyright © 2014-2016 Intel Corporation */ -#include -#include -#include - +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" #include "i915_drv.h" -#include "i915_gem.h" -#include "i915_gem_internal.h" -#include "i915_gem_object.h" -#include "i915_scatterlist.h" -#include "i915_utils.h" - -#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -static void internal_free_pages(struct sg_table *st) -{ - struct scatterlist *sg; - - for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); - } - - sg_free_table(st); - kfree(st); -} - -static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - unsigned int npages; - int max_order; - gfp_t gfp; - - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif - - gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; - if (IS_I965GM(i915) || IS_I965G(i915)) { - /* 965gm cannot relocate objects above 4GiB. */ - gfp &= ~__GFP_HIGHMEM; - gfp |= __GFP_DMA32; - } - -create_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; - - npages = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } - - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - - do { - int order = min(fls(npages) - 1, max_order); - struct page *page; - - do { - page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), - order); - if (page) - break; - if (!order--) - goto err; - - /* Limit subsequent allocations as well */ - max_order = order; - } while (1); - - sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; - st->nents++; - - npages -= 1 << order; - if (!npages) { - sg_mark_end(sg); - break; - } - - sg = __sg_next(sg); - } while (1); - - if (i915_gem_gtt_prepare_pages(obj, st)) { - /* Failed to dma-map try again with single page sg segments */ - if (get_order(st->sgl->length)) { - internal_free_pages(st); - max_order = 0; - goto create_st; - } - goto err; - } - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; - -err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - internal_free_pages(st); - - return -ENOMEM; -} - -static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - i915_gem_gtt_finish_pages(obj, pages); - internal_free_pages(pages); - - obj->mm.dirty = false; - - __start_cpu_write(obj
[PATCH v4 5/8] drm/i915: setup ggtt scratch page after memory regions
Reorder scratch page allocation so that memory regions are available to allocate the buffers Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 20 ++-- drivers/gpu/drm/i915/gt/intel_gtt.h | 1 + drivers/gpu/drm/i915/i915_driver.c | 16 ++-- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 15a915bb4088..c4ad03e53236 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -866,8 +866,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) struct drm_i915_private *i915 = ggtt->vm.i915; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); phys_addr_t phys_addr; - u32 pte_flags; - int ret; GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); @@ -889,6 +887,24 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) } kref_init(>vm.resv_ref); + + return 0; +} + +/** + * i915_ggtt_setup_scratch_page - setup ggtt scratch page + * @i915: i915 device + */ +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + u32 pte_flags; + int ret; + + /* gen5- scratch setup currently happens in @intel_gtt_init */ + if (GRAPHICS_VER(i915) <= 5) + return 0; + ret = setup_scratch_page(>vm); if (ret) { drm_err(>drm, "Scratch setup failed\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index e639434e97fd..4ebdf70b5273 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -579,6 +579,7 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma_resource *vma_res); int i915_ggtt_probe_hw(struct drm_i915_private *i915); +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915); int i915_ggtt_init_hw(struct drm_i915_private *i915); int i915_ggtt_enable_hw(struct drm_i915_private *i915); void i915_ggtt_enable_guc(struct i915_ggtt *ggtt); diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index deb8a8b76965..fa0956840fcc 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -69,6 +69,7 @@ #include "gem/i915_gem_mman.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gtt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" @@ -609,12 +610,16 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) ret = intel_gt_tiles_init(dev_priv); if (ret) - goto err_mem_regions; + goto err_ggtt; + + ret = i915_ggtt_setup_scratch_page(dev_priv); + if (ret) + goto err_ggtt; ret = i915_ggtt_enable_hw(dev_priv); if (ret) { drm_err(_priv->drm, "failed to enable GGTT\n"); - goto err_mem_regions; + goto err_ggtt; } pci_set_master(pdev); @@ -675,11 +680,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); -err_mem_regions: - intel_memory_regions_driver_release(dev_priv); err_ggtt: i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_late_release(dev_priv); err_perf: i915_perf_fini(dev_priv); @@ -928,9 +932,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) intel_modeset_driver_remove_nogem(i915); out_cleanup_hw: i915_driver_hw_remove(i915); - intel_memory_regions_driver_release(i915); i915_ggtt_driver_release(i915); i915_gem_drain_freed_objects(i915); + intel_memory_regions_driver_release(i915); i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); @@ -987,9 +991,9 @@ static void i915_driver_release(struct drm_device *dev) i915_gem_driver_release(dev_priv); - intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_late_release(dev_priv); i915_driver_mmio_release(dev_priv); -- 2.25.1
[PATCH v4 4/8] drm/i915: add gen6 ppgtt dummy creation function
Internal gem objects will soon just be volatile system memory region objects. To enable this, create a separate dummy object creation function for gen6 ppgtt. The object only exists as a fake object pointing to ggtt and gains no benefit in going via the internal backend. Instead, create a dummy gem object and avoid having to maintain a custom ops api in the internal backend, which makes later refactoring easier. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1bb766c79dcb..f3b660cfeb7f 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { .put_pages = pd_dummy_obj_put_pages, }; +static struct drm_i915_gem_object * +i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size) +{ + static struct lock_class_key lock_class; + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(>drm, >base, size); + i915_gem_object_init(obj, _dummy_obj_ops, _class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + + /* +* Mark the object as volatile, such that the pages are marked as +* dontneed whilst they are still pinned. As soon as they are unpinned +* they are allowed to be reaped by the shrinker, and the caller is +* expected to repopulate - the contents of this object are only valid +* whilst active and pinned. +*/ + i915_gem_object_set_volatile(obj); + + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + + return obj; +} + static struct i915_page_directory * gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { @@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915, - _dummy_obj_ops, - I915_PDES * SZ_4K); + pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt->i915, I915_PDES * SZ_4K); if (IS_ERR(pd->pt.base)) { err = PTR_ERR(pd->pt.base); pd->pt.base = NULL; -- 2.25.1
[PATCH v4 7/8] drm/i915/gem: further fix mman selftest
In commit 450cede7f380 ("drm/i915/gem: Fix the mman selftest") we fixed up the mman selftest to allocate user buffers via smem only if we have lmem, otherwise it uses internal buffers. As the commit message asserts, we should only be using buffers that userland should be able to create. Internal buffers are not intended to be used by userland. Instead, fix the code to always create buffers from smem. In the case of integrated, this will create them from the shmem non-ttm backend, which is fine. This also fixes up the code to allow conversion of internal backend to ttm without breaking this test. Signed-off-by: Robert Beckett --- .../gpu/drm/i915/gem/selftests/i915_gem_mman.c | 17 ++--- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3ced9948a331..e529eb8461ff 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -596,17 +596,12 @@ static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) } static struct drm_i915_gem_object * -create_sys_or_internal(struct drm_i915_private *i915, - unsigned long size) +create_sys(struct drm_i915_private *i915, unsigned long size) { - if (HAS_LMEM(i915)) { - struct intel_memory_region *sys_region = - i915->mm.regions[INTEL_REGION_SMEM]; + struct intel_memory_region *sys_region = + i915->mm.regions[INTEL_REGION_SMEM]; - return __i915_gem_object_create_user(i915, size, _region, 1); - } - - return i915_gem_object_create_internal(i915, size); + return __i915_gem_object_create_user(i915, size, _region, 1); } static bool assert_mmap_offset(struct drm_i915_private *i915, @@ -617,7 +612,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, u64 offset; int ret; - obj = create_sys_or_internal(i915, size); + obj = create_sys(i915, size); if (IS_ERR(obj)) return expected && expected == PTR_ERR(obj); @@ -719,7 +714,7 @@ static int igt_mmap_offset_exhaustion(void *arg) } /* Fill the hole, further allocation attempts should then fail */ - obj = create_sys_or_internal(i915, PAGE_SIZE); + obj = create_sys(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); pr_err("Unable to create object for reclaimed hole\n"); -- 2.25.1
[PATCH v4 6/8] drm/i915: allow volatile buffers to use ttm pool allocator
Internal/volatile buffers should not be shmem backed. If a volatile buffer is requested, allow ttm to use the pool allocator to provide volatile pages as backing. Fix i915_ttm_shrink to handle !is_shmem volatile buffers by purging. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index b6c3fc25d9d1..599ed2713359 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -291,7 +291,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, page_flags |= TTM_TT_FLAG_ZERO_ALLOC; caching = i915_ttm_select_tt_caching(obj); - if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached && + !i915_gem_object_is_volatile(obj)) { page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; i915_tt->is_shmem = true; @@ -513,9 +514,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags) if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM) return 0; - GEM_BUG_ON(!i915_tt->is_shmem); + GEM_BUG_ON(!i915_tt->is_shmem && obj->mm.madv != I915_MADV_DONTNEED); - if (!i915_tt->filp) + if (i915_tt->is_shmem && !i915_tt->filp) return 0; ret = ttm_bo_wait_ctx(bo, ); -- 2.25.1
[PATCH v4 3/8] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP. This is divergent from existing backends code which only considers HAS_LLC. Testing shows that trusting snooping on gen5- is unreliable and bsw via ggtt mappings, so limit DGFX for now and maintain previous behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 042c2237e287..a949594237d9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -52,7 +52,9 @@ static enum i915_cache_level i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, struct ttm_tt *ttm) { - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915); + + return ((HAS_LLC(i915) || can_snoop) && !i915_ttm_gtt_binds_lmem(res) && ttm->caching == ttm_cached) ? I915_CACHE_LLC : I915_CACHE_NONE; -- 2.25.1
[PATCH v4 0/8] drm/i915: ttm for internal
This series refactors i915's internal buffer backend to use ttm. It uses ttm's pool allocator to allocate volatile pages in place of the old code which rolled its own via alloc_pages. This is continuing progress to align all backends on using ttm. v2: - commit message improvements to add detail - fix i915_ttm_shrink to purge !is_shmem volatile buffers - limit ttm pool allocator to using dma32 on i965G[M] - fix mman selftest to always use smem buffers - create new internal memory region - make internal backend allocate from internal region - Fixed various issues with tests and i915 ttm usage as a result of supporting regions other than lmem via ttm. v3: - limit i915 ttm default cache_level selection to only trust HAS_SNOOP on DGFX. v4: - rebase to drm-tip and handle conflicts Robert Beckett (8): drm/i915/ttm: dont trample cache_level overrides during ttm move drm/i915: limit ttm to dma32 for i965G[M] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level drm/i915: add gen6 ppgtt dummy creation function drm/i915: setup ggtt scratch page after memory regions drm/i915: allow volatile buffers to use ttm pool allocator drm/i915/gem: further fix mman selftest drm/i915: internal buffers use ttm backend drivers/gpu/drm/i915/gem/i915_gem_internal.c | 187 +- drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_object.c| 1 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 8 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 13 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 17 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 +++- drivers/gpu/drm/i915/gt/intel_ggtt.c | 20 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 1 + drivers/gpu/drm/i915/i915_driver.c| 16 +- drivers/gpu/drm/i915/i915_pci.c | 4 +- drivers/gpu/drm/i915/intel_memory_region.c| 8 +- drivers/gpu/drm/i915/intel_memory_region.h| 2 + drivers/gpu/drm/i915/intel_region_ttm.c | 7 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 16 files changed, 114 insertions(+), 221 deletions(-) -- 2.25.1
[PATCH v4 1/8] drm/i915/ttm: dont trample cache_level overrides during ttm move
Various places within the driver override the default chosen cache_level. Before ttm, these overrides were permanent until explicitly changed again or for the lifetime of the buffer. TTM movement code came along and decided that it could make that decision at that time, which is usually well after object creation, so overrode the cache_level decision and reverted it back to its default decision. Add logic to indicate whether the caching mode has been set by anything other than the move logic. If so, assume that the code that overrode the defaults knows best and keep it. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++--- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index ccec4055fde3..966ac2d778d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); obj->cache_level = cache_level; + obj->ttm.cache_level_override = true; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 5cf36a130061..14937cf1daaa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -623,6 +623,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; + bool cache_level_override:1; } ttm; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 053b0022ddd0..b6c3fc25d9d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1253,6 +1253,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); + obj->ttm.cache_level_override = false; i915_gem_object_unlock(obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 9a7e50534b84..042c2237e287 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -129,9 +129,12 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : I915_BO_FLAG_STRUCT_PAGE; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + if (!obj->ttm.cache_level_override) { + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), + bo->resource, bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->ttm.cache_level_override = false; + } } /** -- 2.25.1
[PATCH v4 2/8] drm/i915: limit ttm to dma32 for i965G[M]
i965G[M] cannot relocate objects above 4GiB. Ensure ttm uses dma32 on these systems. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 6873808a7015..642cd1587976 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -32,10 +32,15 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = _priv->drm; + bool use_dma32 = false; + + /* i965g[m] cannot relocate objects above 4GiB. */ + if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) + use_dma32 = true; return ttm_device_init(_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, - drm->vma_offset_manager, false, false); + drm->vma_offset_manager, false, use_dma32); } /** -- 2.25.1
Re: [PATCH v10 04/11] drm/i915/gem: selftest should not attempt mmap of private regions
On 08/07/2022 14:27, Matthew Auld wrote: On 08/07/2022 14:22, Robert Beckett wrote: On 08/07/2022 08:53, Matthew Auld wrote: On 07/07/2022 21:02, Robert Beckett wrote: During testing make can_mmap consider whether the region is private. Do we still need this with: 938d2fd17d17 ("drm/i915/selftests: skip the mman tests for stolen") ? huh, I guess not. That wasn't in my tree. I guess I should rebase. Looking at it, my patch would have been preferable initially I think. Each location of the additional checks in that patch first call cam_mmap(), which I think is the most appropriate place to make the decision. It fails at the object_create() I think (on small-BAR I mean), which is before we can call can_mmap(), passing in the object. ah, okay. That makes sense to keep as is then. I'll drop this patch. Thanks. I could do a replacement patch that reverts that one if preferred, or we can leave it as is and I will drop this patch. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..76181e28c75e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; + if (obj->mm.region && obj->mm.region->private) + return false; + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED)
Re: [PATCH v10 04/11] drm/i915/gem: selftest should not attempt mmap of private regions
On 08/07/2022 08:53, Matthew Auld wrote: On 07/07/2022 21:02, Robert Beckett wrote: During testing make can_mmap consider whether the region is private. Do we still need this with: 938d2fd17d17 ("drm/i915/selftests: skip the mman tests for stolen") ? huh, I guess not. That wasn't in my tree. I guess I should rebase. Looking at it, my patch would have been preferable initially I think. Each location of the additional checks in that patch first call cam_mmap(), which I think is the most appropriate place to make the decision. I could do a replacement patch that reverts that one if preferred, or we can leave it as is and I will drop this patch. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..76181e28c75e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; + if (obj->mm.region && obj->mm.region->private) + return false; + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED)
[PATCH v10 10/11] drm/i915/selftest: wait for requests during engine reset selftest
While looping around each engine and testing for corrupted solen memory during engine reset, the old requests from the previous engine can still be yet to retire. To prevent false positive corruption tests, wait for the outstanding requests at the end of the test Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gt/selftest_reset.c | 35 ++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 55f3b34e5f6e..a2558bc31408 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -6,6 +6,7 @@ #include #include "gem/i915_gem_stolen.h" +#include "gt/intel_gt.h" #include "i915_memcpy.h" #include "i915_selftest.h" @@ -26,6 +27,7 @@ __igt_reset_stolen(struct intel_gt *gt, intel_wakeref_t wakeref; enum intel_engine_id id; struct igt_spinner spin; + struct i915_request *requests[I915_NUM_ENGINES] = {0}; long max, count; void *tmp; u32 *crc; @@ -68,15 +70,16 @@ __igt_reset_stolen(struct intel_gt *gt, ce = intel_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); - goto err_spin; + goto err_requests; } rq = igt_spinner_create_request(, ce, MI_ARB_CHECK); intel_context_put(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_spin; + goto err_requests; } i915_request_add(rq); + requests[id] = i915_request_get(rq); } for (page = 0; page < num_pages; page++) { @@ -165,6 +168,34 @@ __igt_reset_stolen(struct intel_gt *gt, err = -EINVAL; } + /* wait for requests and idle, otherwise cleanup can happen on next loop */ + for (id = 0; id < I915_NUM_ENGINES; id++) { + if (!requests[id]) + continue; + err = i915_request_wait(requests[id], I915_WAIT_INTERRUPTIBLE, HZ); + if (err < 0) { + pr_err("%s failed to wait for rq: %d\n", msg, err); + goto err_requests; + } + + i915_request_put(requests[id]); + requests[id] = NULL; + } + + err = intel_gt_wait_for_idle(gt, HZ); + if (err < 0) { + pr_err("%s failed to wait for gt idle: %d\n", msg, err); + goto err_spin; + } + + err = 0; + +err_requests: + for (id = 0; id < I915_NUM_ENGINES; id++) { + if (!requests[id]) + continue; + i915_request_put(requests[id]); + } err_spin: igt_spinner_fini(); -- 2.25.1
[PATCH v10 09/11] drm/i915/selftest: don't attempt engine reset of guc submission engines
igt_reset_engines_stolen tries to reset engines without checking if it is possible. Engines using GuC submission are not able to be reset from the host. In this scenario, the reset exits early, then on the next iteration of the each engine loop, the async teardown of the spinner request context's ring occurs while the next engine is under test. This is seen as a stolen memory corruption as the ring buffer was busy initially, but free during the confirmation check and had been poisoned during cleanup. Fix this by not testing GuC submission using engines. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gt/selftest_reset.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 37c38bdd5f47..55f3b34e5f6e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -194,6 +194,8 @@ static int igt_reset_engines_stolen(void *arg) return 0; for_each_engine(engine, gt, id) { + if (intel_engine_uses_guc(engine)) + continue; err = __igt_reset_stolen(gt, engine->mask, engine->name); if (err) return err; -- 2.25.1
[PATCH v10 11/11] drm/i915: stolen memory use ttm backend
refactor stolen memory region to use ttm. this necessitates using ttm resources to track reserved stolen regions instead of drm_mm_nodes. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++ drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/intel_region_ttm.c | 42 +- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 13 files changed, 280 insertions(+), 351 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 8b807284cde1..6f3afac5e8c9 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -38,6 +38,7 @@ * forcibly disable it to allow proper screen updates. */ +#include "gem/i915_gem_stolen.h" #include #include @@ -51,6 +52,7 @@ #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "gem/i915_gem_region.h" #define for_each_fbc_id(__dev_priv, __fbc_id) \ for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; (__fbc_id)++) \ @@ -92,8 +94,8 @@ struct intel_fbc { struct mutex lock; unsigned int busy_bits; - struct drm_mm_node compressed_fb; - struct drm_mm_node compressed_llb; + struct ttm_resource *compressed_fb; + struct ttm_resource *compressed_llb; enum intel_fbc_id id; @@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); + u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_fb.start, U32_MAX)); +fb_offset, U32_MAX)); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_llb.start, U32_MAX)); +llb_offset, U32_MAX)); intel_de_write(i915, FBC_CFB_BASE, - i915->dsm.start + fbc->compressed_fb.start); + i915->dsm.start + fb_offset); intel_de_write(i915, FBC_LL_BASE, - i915->dsm.start + fbc->compressed_llb.start); + i915->dsm.start + llb_offset); } static const struct intel_fbc_funcs i8xx_fbc_funcs = { @@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc) static void g4x_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, DPFC_CB_BASE, fb_offset); } static const struct intel_fbc_funcs g4x_fbc_funcs = { @@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc) static void ilk_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset); } static const struct intel_fbc_funcs ilk_fbc_funcs = { @@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc, { struct drm_i915_private *i915 = fbc->i915; u64 end = intel_fbc_stolen_end(i915); - int ret, limit = min_limit; + int limit = min_limit; + struct ttm_resource *res; size /= limit; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb, - size <<= 1, 4096, 0, end); - if (ret == 0) + res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)
[PATCH v10 05/11] drm/i915: instantiate ttm ranger manager for stolen memory
prepare for ttm based stolen region by using ttm range manager as the resource manager for stolen region. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 ++-- drivers/gpu/drm/i915/intel_region_ttm.c | 31 +++- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 40249fa28a7a..675e9ab30396 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); /* There's some room for optimization here... */ - GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && - ttm_mem_type < I915_PL_LMEM0); + GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT); + if (ttm_mem_type == I915_PL_SYSTEM) return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, 0); + if (ttm_mem_type == I915_PL_STOLEN) + return i915->mm.stolen_region; return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, ttm_mem_type - I915_PL_LMEM0); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index fd2ecfdd8fa1..694e9acb69e2 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) /* * Map the i915 memory regions to TTM memory types. We use the - * driver-private types for now, reserving TTM_PL_VRAM for stolen + * driver-private types for now, reserving I915_PL_STOLEN for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ int intel_region_to_ttm_type(const struct intel_memory_region *mem) @@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && mem->type != INTEL_MEMORY_MOCK && - mem->type != INTEL_MEMORY_SYSTEM); + mem->type != INTEL_MEMORY_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_LOCAL); if (mem->type == INTEL_MEMORY_SYSTEM) return TTM_PL_SYSTEM; + if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM || + mem->type == INTEL_MEMORY_STOLEN_LOCAL) + return I915_PL_STOLEN; + type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); @@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = i915_ttm_buddy_man_init(bdev, mem_type, false, - resource_size(>region), - mem->io_size, - mem->min_page_size, PAGE_SIZE); + if (mem_type == I915_PL_STOLEN) { + ret = ttm_range_man_init(bdev, mem_type, false, +resource_size(>region) >> PAGE_SHIFT); + mem->is_range_manager = true; + } else { + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(>region), + mem->io_size, + mem->min_page_size, PAGE_SIZE); + } if (ret) return ret; @@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int intel_region_ttm_fini(struct intel_memory_region *mem) { struct ttm_resource_manager *man = mem->region_private; + int mem_type = intel_region_to_ttm_type(mem); int ret = -EBUSY; int count; @@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) if (ret || !man) return ret; - ret = i915_ttm_buddy_man_fini(>i915->bdev, - intel_region_to_ttm_type(mem)); + if (mem_type == I915_PL_STOLEN) + ret = ttm_range_man_fini(>i915->bdev, mem_type); + else + ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type); GEM_WARN_ON(ret); mem->region_private = NULL; -- 2.25.1
[PATCH v10 04/11] drm/i915/gem: selftest should not attempt mmap of private regions
During testing make can_mmap consider whether the region is private. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..76181e28c75e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; + if (obj->mm.region && obj->mm.region->private) + return false; + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED) -- 2.25.1
[PATCH v10 07/11] drm/i915: ttm move/clear logic fix
ttm managed buffers start off with system resource definitions and ttm_tt tracking structures allocated (though unpopulated). currently this prevents clearing of buffers on first move to desired placements. The desired behaviour is to clear user allocated buffers and any kernel buffers that specifically requests it only. Make the logic match the desired behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 81c67ca9edda..a3f8fc056dbc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */ +#include "drm/ttm/ttm_tt.h" #include #include "i915_deps.h" @@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } +static bool +allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct ttm_resource *dst_mem) +{ + /* never clear stolen */ + if (dst_mem->mem_type == I915_PL_STOLEN) + return false; + /* +* we want to clear user buffers and any kernel buffers +* that specifically request clearing. +*/ + if (obj->flags & I915_BO_ALLOC_USER) + return true; + + if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) + return true; + + return false; +} + /** * i915_ttm_move - The TTM move callback used by i915. * @bo: The buffer object. @@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, return PTR_ERR(dst_rsgt); clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + if (!clear || allow_clear(obj, ttm, dst_mem)) { struct i915_deps deps; i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); -- 2.25.1
[PATCH v10 08/11] drm/i915/ttm: add buffer pin on alloc flag
For situations where allocations need to fail on alloc instead of delayed get_pages, add a new alloc flag to pin the ttm bo. This makes sure that the resource has been allocated during buffer creation, allowing it to fail with an error if the placement is exhausted. This allows existing fallback options for stolen backend allocation like create_ring_vma to work as expected. Signed-off-by: Robert Beckett --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 13 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 25 ++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 6632ed52e919..07bc11247a3e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -325,17 +325,20 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +/* object should be pinned in destination region from allocation */ +#define I915_BO_ALLOC_PINNED BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ -I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) -#define I915_BO_WAS_BOUND_BIT 10 +I915_BO_ALLOC_GPU_ONLY | \ +I915_BO_ALLOC_PINNED) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) +#define I915_BO_WAS_BOUND_BIT 11 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 27d59639177f..bb988608296d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created); + /* stolen objects are pinned for lifetime. Unpin before putting */ + if (obj->flags & I915_BO_ALLOC_PINNED) { + ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL); + ttm_bo_unpin(i915_gem_to_ttm(obj)); + ttm_bo_unreserve(i915_gem_to_ttm(obj)); + } + ttm_bo_put(i915_gem_to_ttm(obj)); } @@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, .no_wait_gpu = false, }; enum ttm_bo_type bo_type; + struct ttm_place _place; + struct ttm_placement _placement; + struct ttm_placement *placement; int ret; drm_gem_private_object_init(>drm, >base, size); @@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ i915_gem_object_make_unshrinkable(obj); + if (obj->flags & I915_BO_ALLOC_PINNED) { + i915_ttm_place_from_region(mem, &_place, obj->bo_offset, + obj->base.size, obj->flags); + _placement.num_placement = 1; + _placement.placement = &_place; + _placement.num_busy_placement = 0; + _placement.busy_placement = NULL; + placement = &_placement; + } else { + placement = _sys_placement; + } /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, * until successful initialization. */ ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size, - bo_type, _sys_placement, + bo_type, placement, page_size >> PAGE_SHIFT, , NULL, NULL, i915_ttm_bo_destroy); if (ret) @@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); obj->ttm.cache_level_override = false; + if (obj->flags & I915_BO_ALLOC_PINNED) + ttm_bo_pin(i915_gem_to_ttm(obj)); i915_gem_object_unlock(obj); return 0; -- 2.25.1
[PATCH v10 01/11] drm/i915/ttm: dont trample cache_level overrides during ttm move
Various places within the driver override the default chosen cache_level. Before ttm, these overrides were permanent until explicitly changed again or for the lifetime of the buffer. TTM movement code came along and decided that it could make that decision at that time, which is usually well after object creation, so overrode the cache_level decision and reverted it back to its default decision. Add logic to indicate whether the caching mode has been set by anything other than the move logic. If so, assume that the code that overrode the defaults knows best and keep it. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++--- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 06b1b188ce5a..519887769c08 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); obj->cache_level = cache_level; + obj->ttm.cache_level_override = true; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2c88bdb8ff7c..6632ed52e919 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -605,6 +605,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; + bool cache_level_override:1; } ttm; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..27d59639177f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); + obj->ttm.cache_level_override = false; i915_gem_object_unlock(obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index a10716f4e717..4c1de0b4a10f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : I915_BO_FLAG_STRUCT_PAGE; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + if (!obj->ttm.cache_level_override) { + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), + bo->resource, bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->ttm.cache_level_override = false; + } } /** -- 2.25.1
[PATCH v10 06/11] drm/i915: sanitize mem_flags for stolen buffers
Stolen regions are not page backed or considered iomem. Prevent flags indicating such. This correctly prevents stolen buffers from attempting to directly map them. See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem() usage for where it would break otherwise. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 675e9ab30396..81c67ca9edda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -14,6 +14,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" +#include "gem/i915_gem_stolen.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" @@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); - obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : - I915_BO_FLAG_STRUCT_PAGE; + if (!i915_gem_object_is_stolen(obj)) + obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; if (!obj->ttm.cache_level_override) { cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), -- 2.25.1
[PATCH v10 02/11] drm/i915: limit ttm to dma32 for i965G[M]
i965G[M] cannot relocate objects above 4GiB. Ensure ttm uses dma32 on these systems. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..fd2ecfdd8fa1 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -32,10 +32,15 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = _priv->drm; + bool use_dma32 = false; + + /* i965g[m] cannot relocate objects above 4GiB. */ + if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) + use_dma32 = true; return ttm_device_init(_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, - drm->vma_offset_manager, false, false); + drm->vma_offset_manager, false, use_dma32); } /** -- 2.25.1
[PATCH v10 03/11] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP. This is divergent from existing backends code which only considers HAS_LLC. Testing shows that trusting snooping on gen5- is unreliable and bsw via ggtt mappings, so limit DGFX for now and maintain previous behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 4c1de0b4a10f..40249fa28a7a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -46,7 +46,9 @@ static enum i915_cache_level i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, struct ttm_tt *ttm) { - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915); + + return ((HAS_LLC(i915) || can_snoop) && !i915_ttm_gtt_binds_lmem(res) && ttm->caching == ttm_cached) ? I915_CACHE_LLC : I915_CACHE_NONE; -- 2.25.1
[PATCH v10 00/11] drm/i915: ttm for stolen
This series refactors i915's stolen memory region to use ttm. v2: handle disabled stolen similar to legacy version. relying on ttm to fail allocs works fine, but is dmesg noisy and causes testing dmesg warning regressions. v3: rebase to latest drm-tip. fix v2 code refactor which could leave a buffer pinned. locally passes fftl again now. v4: - Allow memory regions creators to do allocation. Allows stolen region to track it's own reservations. - Pre-reserve first page of stolen mem (add back WaSkipStolenMemoryFirstPage:bdw+) - Improve commit descritpion for "drm/i915: sanitize mem_flags for stolen buffers" - replace i915_gem_object_pin_pages_unlocked() call with manual locking and pinning. this avoids ww ctx class reuse during context creation -> ring vma obj alloc. v5: - detect both types of stolen as stolen buffers in "drm/i915: sanitize mem_flags for stolen buffers" - in stolen_object_init limit page size to mem region minimum. The range allocator expects the page_size to define the alignment v6: - Share first 4 patches from ttm for internal series as generic i915 ttm fixes - Drop patch 4 from v5. We don't need separate object ops just to satisfy test interfaces. The tests have now been fixed via checking whether the memory region is private to decide whether to mmap - Add new buffer pin alloc flag to allow creation of buffers in their final ttm placement instead of deferring until get_pages. This fixes legacy fallback paths for buffer allocations during stolen memory pressure. v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET v8: - Reserve I915_GEM_STOLEN_BIAS area from stolen v9: - drop patch 8 "drm/i915: allow memory region creators to alloc and free the region" store bias reservation in drm_i915_private instead. - Restrict reset selftest to only test !GuC engines. Resetting individual GuC engines from host is not supported - Wait for outstanding requests in reset selftest This prevents previous engine test context cleanup appearing as false positive stolen corruption check v10:- Fix wiating on requests early error path during reset selftest If a single request fails to complete, the others would not be put, resulting in leaks. Make sure all requests are put before test exit. Robert Beckett (11): drm/i915/ttm: dont trample cache_level overrides during ttm move drm/i915: limit ttm to dma32 for i965G[M] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level drm/i915/gem: selftest should not attempt mmap of private regions drm/i915: instantiate ttm ranger manager for stolen memory drm/i915: sanitize mem_flags for stolen buffers drm/i915: ttm move/clear logic fix drm/i915/ttm: add buffer pin on alloc flag drm/i915/selftest: don't attempt engine reset of guc submission engines drm/i915/selftest: wait for requests during engine reset selftest drm/i915: stolen memory use ttm backend drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- drivers/gpu/drm/i915/gem/i915_gem_object.c| 1 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 16 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++ drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 29 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 47 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 3 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 53 ++- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/intel_region_ttm.c | 80 +++- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 16 files changed, 420 insertions(+), 377 deletions(-) -- 2.25.1
[PATCH v9 11/11] drm/i915: stolen memory use ttm backend
refactor stolen memory region to use ttm. this necessitates using ttm resources to track reserved stolen regions instead of drm_mm_nodes. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++ drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/intel_region_ttm.c | 42 +- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 13 files changed, 280 insertions(+), 351 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 8b807284cde1..6f3afac5e8c9 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -38,6 +38,7 @@ * forcibly disable it to allow proper screen updates. */ +#include "gem/i915_gem_stolen.h" #include #include @@ -51,6 +52,7 @@ #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "gem/i915_gem_region.h" #define for_each_fbc_id(__dev_priv, __fbc_id) \ for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; (__fbc_id)++) \ @@ -92,8 +94,8 @@ struct intel_fbc { struct mutex lock; unsigned int busy_bits; - struct drm_mm_node compressed_fb; - struct drm_mm_node compressed_llb; + struct ttm_resource *compressed_fb; + struct ttm_resource *compressed_llb; enum intel_fbc_id id; @@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); + u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_fb.start, U32_MAX)); +fb_offset, U32_MAX)); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_llb.start, U32_MAX)); +llb_offset, U32_MAX)); intel_de_write(i915, FBC_CFB_BASE, - i915->dsm.start + fbc->compressed_fb.start); + i915->dsm.start + fb_offset); intel_de_write(i915, FBC_LL_BASE, - i915->dsm.start + fbc->compressed_llb.start); + i915->dsm.start + llb_offset); } static const struct intel_fbc_funcs i8xx_fbc_funcs = { @@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc) static void g4x_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, DPFC_CB_BASE, fb_offset); } static const struct intel_fbc_funcs g4x_fbc_funcs = { @@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc) static void ilk_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset); } static const struct intel_fbc_funcs ilk_fbc_funcs = { @@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc, { struct drm_i915_private *i915 = fbc->i915; u64 end = intel_fbc_stolen_end(i915); - int ret, limit = min_limit; + int limit = min_limit; + struct ttm_resource *res; size /= limit; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb, - size <<= 1, 4096, 0, end); - if (ret == 0) + res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)
[PATCH v9 09/11] drm/i915/selftest: don't attempt engine reset of guc submission engines
igt_reset_engines_stolen tries to reset engines without checking if it is possible. Engines using GuC submission are not able to be reset from the host. In this scenario, the reset exits early, then on the next iteration of the each engine loop, the async teardown of the spinner request context's ring occurs while the next engine is under test. This is seen as a stolen memory corruption as the ring buffer was busy initially, but free during the confirmation check and had been poisoned during cleanup. Fix this by not testing GuC submission using engines. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gt/selftest_reset.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 37c38bdd5f47..55f3b34e5f6e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -194,6 +194,8 @@ static int igt_reset_engines_stolen(void *arg) return 0; for_each_engine(engine, gt, id) { + if (intel_engine_uses_guc(engine)) + continue; err = __igt_reset_stolen(gt, engine->mask, engine->name); if (err) return err; -- 2.25.1
[PATCH v9 10/11] drm/i915/selftest: wait for requests during engine reset selftest
While looping around each engine and testing for corrupted solen memory during engine reset, the old requests from the previous engine can still be yet to retire. To prevent false positive corruption tests, wait for the outstanding requests at the end of the test Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gt/selftest_reset.c | 24 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 55f3b34e5f6e..52acef647396 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -6,6 +6,7 @@ #include #include "gem/i915_gem_stolen.h" +#include "gt/intel_gt.h" #include "i915_memcpy.h" #include "i915_selftest.h" @@ -26,6 +27,7 @@ __igt_reset_stolen(struct intel_gt *gt, intel_wakeref_t wakeref; enum intel_engine_id id; struct igt_spinner spin; + struct i915_request *requests[I915_NUM_ENGINES] = {0}; long max, count; void *tmp; u32 *crc; @@ -77,6 +79,7 @@ __igt_reset_stolen(struct intel_gt *gt, goto err_spin; } i915_request_add(rq); + requests[id] = i915_request_get(rq); } for (page = 0; page < num_pages; page++) { @@ -165,6 +168,27 @@ __igt_reset_stolen(struct intel_gt *gt, err = -EINVAL; } + /* wait for requests and idle, otherwise cleanup can happen on next loop */ + for (id = 0; id < I915_NUM_ENGINES; id++) { + if (!requests[id]) + continue; + err = i915_request_wait(requests[id], I915_WAIT_INTERRUPTIBLE, HZ); + if (err < 0) { + pr_err("%s failed to wait for rq: %d\n", msg, err); + goto err_spin; + } + + i915_request_put(requests[id]); + } + + err = intel_gt_wait_for_idle(gt, HZ); + if (err < 0) { + pr_err("%s failed to wait for gt idle: %d\n", msg, err); + goto err_spin; + } + + err = 0; + err_spin: igt_spinner_fini(); -- 2.25.1
[PATCH v9 08/11] drm/i915/ttm: add buffer pin on alloc flag
For situations where allocations need to fail on alloc instead of delayed get_pages, add a new alloc flag to pin the ttm bo. This makes sure that the resource has been allocated during buffer creation, allowing it to fail with an error if the placement is exhausted. This allows existing fallback options for stolen backend allocation like create_ring_vma to work as expected. Signed-off-by: Robert Beckett --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 13 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 25 ++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 6632ed52e919..07bc11247a3e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -325,17 +325,20 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +/* object should be pinned in destination region from allocation */ +#define I915_BO_ALLOC_PINNED BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ -I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) -#define I915_BO_WAS_BOUND_BIT 10 +I915_BO_ALLOC_GPU_ONLY | \ +I915_BO_ALLOC_PINNED) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) +#define I915_BO_WAS_BOUND_BIT 11 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 27d59639177f..bb988608296d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created); + /* stolen objects are pinned for lifetime. Unpin before putting */ + if (obj->flags & I915_BO_ALLOC_PINNED) { + ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL); + ttm_bo_unpin(i915_gem_to_ttm(obj)); + ttm_bo_unreserve(i915_gem_to_ttm(obj)); + } + ttm_bo_put(i915_gem_to_ttm(obj)); } @@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, .no_wait_gpu = false, }; enum ttm_bo_type bo_type; + struct ttm_place _place; + struct ttm_placement _placement; + struct ttm_placement *placement; int ret; drm_gem_private_object_init(>drm, >base, size); @@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ i915_gem_object_make_unshrinkable(obj); + if (obj->flags & I915_BO_ALLOC_PINNED) { + i915_ttm_place_from_region(mem, &_place, obj->bo_offset, + obj->base.size, obj->flags); + _placement.num_placement = 1; + _placement.placement = &_place; + _placement.num_busy_placement = 0; + _placement.busy_placement = NULL; + placement = &_placement; + } else { + placement = _sys_placement; + } /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, * until successful initialization. */ ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size, - bo_type, _sys_placement, + bo_type, placement, page_size >> PAGE_SHIFT, , NULL, NULL, i915_ttm_bo_destroy); if (ret) @@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); obj->ttm.cache_level_override = false; + if (obj->flags & I915_BO_ALLOC_PINNED) + ttm_bo_pin(i915_gem_to_ttm(obj)); i915_gem_object_unlock(obj); return 0; -- 2.25.1
[PATCH v9 07/11] drm/i915: ttm move/clear logic fix
ttm managed buffers start off with system resource definitions and ttm_tt tracking structures allocated (though unpopulated). currently this prevents clearing of buffers on first move to desired placements. The desired behaviour is to clear user allocated buffers and any kernel buffers that specifically requests it only. Make the logic match the desired behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 81c67ca9edda..a3f8fc056dbc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */ +#include "drm/ttm/ttm_tt.h" #include #include "i915_deps.h" @@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } +static bool +allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct ttm_resource *dst_mem) +{ + /* never clear stolen */ + if (dst_mem->mem_type == I915_PL_STOLEN) + return false; + /* +* we want to clear user buffers and any kernel buffers +* that specifically request clearing. +*/ + if (obj->flags & I915_BO_ALLOC_USER) + return true; + + if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) + return true; + + return false; +} + /** * i915_ttm_move - The TTM move callback used by i915. * @bo: The buffer object. @@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, return PTR_ERR(dst_rsgt); clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + if (!clear || allow_clear(obj, ttm, dst_mem)) { struct i915_deps deps; i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); -- 2.25.1
[PATCH v9 05/11] drm/i915: instantiate ttm ranger manager for stolen memory
prepare for ttm based stolen region by using ttm range manager as the resource manager for stolen region. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 ++-- drivers/gpu/drm/i915/intel_region_ttm.c | 31 +++- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 40249fa28a7a..675e9ab30396 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); /* There's some room for optimization here... */ - GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && - ttm_mem_type < I915_PL_LMEM0); + GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT); + if (ttm_mem_type == I915_PL_SYSTEM) return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, 0); + if (ttm_mem_type == I915_PL_STOLEN) + return i915->mm.stolen_region; return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, ttm_mem_type - I915_PL_LMEM0); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index fd2ecfdd8fa1..694e9acb69e2 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) /* * Map the i915 memory regions to TTM memory types. We use the - * driver-private types for now, reserving TTM_PL_VRAM for stolen + * driver-private types for now, reserving I915_PL_STOLEN for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ int intel_region_to_ttm_type(const struct intel_memory_region *mem) @@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && mem->type != INTEL_MEMORY_MOCK && - mem->type != INTEL_MEMORY_SYSTEM); + mem->type != INTEL_MEMORY_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_LOCAL); if (mem->type == INTEL_MEMORY_SYSTEM) return TTM_PL_SYSTEM; + if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM || + mem->type == INTEL_MEMORY_STOLEN_LOCAL) + return I915_PL_STOLEN; + type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); @@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = i915_ttm_buddy_man_init(bdev, mem_type, false, - resource_size(>region), - mem->io_size, - mem->min_page_size, PAGE_SIZE); + if (mem_type == I915_PL_STOLEN) { + ret = ttm_range_man_init(bdev, mem_type, false, +resource_size(>region) >> PAGE_SHIFT); + mem->is_range_manager = true; + } else { + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(>region), + mem->io_size, + mem->min_page_size, PAGE_SIZE); + } if (ret) return ret; @@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int intel_region_ttm_fini(struct intel_memory_region *mem) { struct ttm_resource_manager *man = mem->region_private; + int mem_type = intel_region_to_ttm_type(mem); int ret = -EBUSY; int count; @@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) if (ret || !man) return ret; - ret = i915_ttm_buddy_man_fini(>i915->bdev, - intel_region_to_ttm_type(mem)); + if (mem_type == I915_PL_STOLEN) + ret = ttm_range_man_fini(>i915->bdev, mem_type); + else + ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type); GEM_WARN_ON(ret); mem->region_private = NULL; -- 2.25.1
[PATCH v9 03/11] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP. This is divergent from existing backends code which only considers HAS_LLC. Testing shows that trusting snooping on gen5- is unreliable and bsw via ggtt mappings, so limit DGFX for now and maintain previous behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 4c1de0b4a10f..40249fa28a7a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -46,7 +46,9 @@ static enum i915_cache_level i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, struct ttm_tt *ttm) { - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915); + + return ((HAS_LLC(i915) || can_snoop) && !i915_ttm_gtt_binds_lmem(res) && ttm->caching == ttm_cached) ? I915_CACHE_LLC : I915_CACHE_NONE; -- 2.25.1
[PATCH v9 06/11] drm/i915: sanitize mem_flags for stolen buffers
Stolen regions are not page backed or considered iomem. Prevent flags indicating such. This correctly prevents stolen buffers from attempting to directly map them. See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem() usage for where it would break otherwise. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 675e9ab30396..81c67ca9edda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -14,6 +14,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" +#include "gem/i915_gem_stolen.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" @@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); - obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : - I915_BO_FLAG_STRUCT_PAGE; + if (!i915_gem_object_is_stolen(obj)) + obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; if (!obj->ttm.cache_level_override) { cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), -- 2.25.1
[PATCH v9 04/11] drm/i915/gem: selftest should not attempt mmap of private regions
During testing make can_mmap consider whether the region is private. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..76181e28c75e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; + if (obj->mm.region && obj->mm.region->private) + return false; + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED) -- 2.25.1
[PATCH v9 01/11] drm/i915/ttm: dont trample cache_level overrides during ttm move
Various places within the driver override the default chosen cache_level. Before ttm, these overrides were permanent until explicitly changed again or for the lifetime of the buffer. TTM movement code came along and decided that it could make that decision at that time, which is usually well after object creation, so overrode the cache_level decision and reverted it back to its default decision. Add logic to indicate whether the caching mode has been set by anything other than the move logic. If so, assume that the code that overrode the defaults knows best and keep it. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++--- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 06b1b188ce5a..519887769c08 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); obj->cache_level = cache_level; + obj->ttm.cache_level_override = true; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2c88bdb8ff7c..6632ed52e919 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -605,6 +605,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; + bool cache_level_override:1; } ttm; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..27d59639177f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); + obj->ttm.cache_level_override = false; i915_gem_object_unlock(obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index a10716f4e717..4c1de0b4a10f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : I915_BO_FLAG_STRUCT_PAGE; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + if (!obj->ttm.cache_level_override) { + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), + bo->resource, bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->ttm.cache_level_override = false; + } } /** -- 2.25.1
[PATCH v9 02/11] drm/i915: limit ttm to dma32 for i965G[M]
i965G[M] cannot relocate objects above 4GiB. Ensure ttm uses dma32 on these systems. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..fd2ecfdd8fa1 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -32,10 +32,15 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = _priv->drm; + bool use_dma32 = false; + + /* i965g[m] cannot relocate objects above 4GiB. */ + if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) + use_dma32 = true; return ttm_device_init(_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, - drm->vma_offset_manager, false, false); + drm->vma_offset_manager, false, use_dma32); } /** -- 2.25.1
[PATCH v9 00/11] drm/i915: ttm for stolen
This series refactors i915's stolen memory region to use ttm. v2: handle disabled stolen similar to legacy version. relying on ttm to fail allocs works fine, but is dmesg noisy and causes testing dmesg warning regressions. v3: rebase to latest drm-tip. fix v2 code refactor which could leave a buffer pinned. locally passes fftl again now. v4: - Allow memory regions creators to do allocation. Allows stolen region to track it's own reservations. - Pre-reserve first page of stolen mem (add back WaSkipStolenMemoryFirstPage:bdw+) - Improve commit descritpion for "drm/i915: sanitize mem_flags for stolen buffers" - replace i915_gem_object_pin_pages_unlocked() call with manual locking and pinning. this avoids ww ctx class reuse during context creation -> ring vma obj alloc. v5: - detect both types of stolen as stolen buffers in "drm/i915: sanitize mem_flags for stolen buffers" - in stolen_object_init limit page size to mem region minimum. The range allocator expects the page_size to define the alignment v6: - Share first 4 patches from ttm for internal series as generic i915 ttm fixes - Drop patch 4 from v5. We don't need separate object ops just to satisfy test interfaces. The tests have now been fixed via checking whether the memory region is private to decide whether to mmap - Add new buffer pin alloc flag to allow creation of buffers in their final ttm placement instead of deferring until get_pages. This fixes legacy fallback paths for buffer allocations during stolen memory pressure. v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET v8: - Reserve I915_GEM_STOLEN_BIAS area from stolen v9: - drop patch 8 "drm/i915: allow memory region creators to alloc and free the region" store bias reservation in drm_i915_private instead. - Restrict reset selftest to only test !GuC engines. Resetting individual GuC engines from host is not supported - Wait for outstanding requests in reset selftest This prevents previous engine test context cleanup appearing as false positive stolen corruption check Robert Beckett (11): drm/i915/ttm: dont trample cache_level overrides during ttm move drm/i915: limit ttm to dma32 for i965G[M] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level drm/i915/gem: selftest should not attempt mmap of private regions drm/i915: instantiate ttm ranger manager for stolen memory drm/i915: sanitize mem_flags for stolen buffers drm/i915: ttm move/clear logic fix drm/i915/ttm: add buffer pin on alloc flag drm/i915/selftest: don't attempt engine reset of guc submission engines drm/i915/selftest: wait for requests during engine reset selftest drm/i915: stolen memory use ttm backend drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- drivers/gpu/drm/i915/gem/i915_gem_object.c| 1 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 16 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 425 ++ drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 29 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 47 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 3 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 42 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/intel_region_ttm.c | 80 +++- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 16 files changed, 411 insertions(+), 375 deletions(-) -- 2.25.1
[PATCH v8 10/10] drm/i915: stolen memory use ttm backend
refactor stolen memory region to use ttm. this necessitates using ttm resources to track reserved stolen regions instead of drm_mm_nodes. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 441 +++--- drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/intel_region_ttm.c | 42 +- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 13 files changed, 304 insertions(+), 342 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 8b807284cde1..6f3afac5e8c9 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -38,6 +38,7 @@ * forcibly disable it to allow proper screen updates. */ +#include "gem/i915_gem_stolen.h" #include #include @@ -51,6 +52,7 @@ #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "gem/i915_gem_region.h" #define for_each_fbc_id(__dev_priv, __fbc_id) \ for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; (__fbc_id)++) \ @@ -92,8 +94,8 @@ struct intel_fbc { struct mutex lock; unsigned int busy_bits; - struct drm_mm_node compressed_fb; - struct drm_mm_node compressed_llb; + struct ttm_resource *compressed_fb; + struct ttm_resource *compressed_llb; enum intel_fbc_id id; @@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); + u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_fb.start, U32_MAX)); +fb_offset, U32_MAX)); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_llb.start, U32_MAX)); +llb_offset, U32_MAX)); intel_de_write(i915, FBC_CFB_BASE, - i915->dsm.start + fbc->compressed_fb.start); + i915->dsm.start + fb_offset); intel_de_write(i915, FBC_LL_BASE, - i915->dsm.start + fbc->compressed_llb.start); + i915->dsm.start + llb_offset); } static const struct intel_fbc_funcs i8xx_fbc_funcs = { @@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc) static void g4x_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, DPFC_CB_BASE, fb_offset); } static const struct intel_fbc_funcs g4x_fbc_funcs = { @@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc) static void ilk_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset); } static const struct intel_fbc_funcs ilk_fbc_funcs = { @@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc, { struct drm_i915_private *i915 = fbc->i915; u64 end = intel_fbc_stolen_end(i915); - int ret, limit = min_limit; + int limit = min_limit; + struct ttm_resource *res; size /= limit; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb, - size <<= 1, 4096, 0, end); - if (ret == 0) + res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)
[PATCH v8 09/10] drm/i915/ttm: add buffer pin on alloc flag
For situations where allocations need to fail on alloc instead of delayed get_pages, add a new alloc flag to pin the ttm bo. This makes sure that the resource has been allocated during buffer creation, allowing it to fail with an error if the placement is exhausted. This allows existing fallback options for stolen backend allocation like create_ring_vma to work as expected. Signed-off-by: Robert Beckett --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 13 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 25 ++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 6632ed52e919..07bc11247a3e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -325,17 +325,20 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +/* object should be pinned in destination region from allocation */ +#define I915_BO_ALLOC_PINNED BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ -I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) -#define I915_BO_WAS_BOUND_BIT 10 +I915_BO_ALLOC_GPU_ONLY | \ +I915_BO_ALLOC_PINNED) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) +#define I915_BO_WAS_BOUND_BIT 11 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 27d59639177f..bb988608296d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created); + /* stolen objects are pinned for lifetime. Unpin before putting */ + if (obj->flags & I915_BO_ALLOC_PINNED) { + ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL); + ttm_bo_unpin(i915_gem_to_ttm(obj)); + ttm_bo_unreserve(i915_gem_to_ttm(obj)); + } + ttm_bo_put(i915_gem_to_ttm(obj)); } @@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, .no_wait_gpu = false, }; enum ttm_bo_type bo_type; + struct ttm_place _place; + struct ttm_placement _placement; + struct ttm_placement *placement; int ret; drm_gem_private_object_init(>drm, >base, size); @@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ i915_gem_object_make_unshrinkable(obj); + if (obj->flags & I915_BO_ALLOC_PINNED) { + i915_ttm_place_from_region(mem, &_place, obj->bo_offset, + obj->base.size, obj->flags); + _placement.num_placement = 1; + _placement.placement = &_place; + _placement.num_busy_placement = 0; + _placement.busy_placement = NULL; + placement = &_placement; + } else { + placement = _sys_placement; + } /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, * until successful initialization. */ ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size, - bo_type, _sys_placement, + bo_type, placement, page_size >> PAGE_SHIFT, , NULL, NULL, i915_ttm_bo_destroy); if (ret) @@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); obj->ttm.cache_level_override = false; + if (obj->flags & I915_BO_ALLOC_PINNED) + ttm_bo_pin(i915_gem_to_ttm(obj)); i915_gem_object_unlock(obj); return 0; -- 2.25.1
[PATCH v8 06/10] drm/i915: sanitize mem_flags for stolen buffers
Stolen regions are not page backed or considered iomem. Prevent flags indicating such. This correctly prevents stolen buffers from attempting to directly map them. See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem() usage for where it would break otherwise. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 675e9ab30396..81c67ca9edda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -14,6 +14,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" +#include "gem/i915_gem_stolen.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" @@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); - obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : - I915_BO_FLAG_STRUCT_PAGE; + if (!i915_gem_object_is_stolen(obj)) + obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; if (!obj->ttm.cache_level_override) { cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), -- 2.25.1
[PATCH v8 07/10] drm/i915: ttm move/clear logic fix
ttm managed buffers start off with system resource definitions and ttm_tt tracking structures allocated (though unpopulated). currently this prevents clearing of buffers on first move to desired placements. The desired behaviour is to clear user allocated buffers and any kernel buffers that specifically requests it only. Make the logic match the desired behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 81c67ca9edda..a3f8fc056dbc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */ +#include "drm/ttm/ttm_tt.h" #include #include "i915_deps.h" @@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } +static bool +allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct ttm_resource *dst_mem) +{ + /* never clear stolen */ + if (dst_mem->mem_type == I915_PL_STOLEN) + return false; + /* +* we want to clear user buffers and any kernel buffers +* that specifically request clearing. +*/ + if (obj->flags & I915_BO_ALLOC_USER) + return true; + + if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) + return true; + + return false; +} + /** * i915_ttm_move - The TTM move callback used by i915. * @bo: The buffer object. @@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, return PTR_ERR(dst_rsgt); clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + if (!clear || allow_clear(obj, ttm, dst_mem)) { struct i915_deps deps; i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); -- 2.25.1
[PATCH v8 08/10] drm/i915: allow memory region creators to alloc and free the region
add callbacks for alloc and free. this allows region creators to allocate any extra storage they may require. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/intel_memory_region.c | 16 +--- drivers/gpu/drm/i915/intel_memory_region.h | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e38d2db1c3e3..3da07a712f90 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -231,7 +231,10 @@ intel_memory_region_create(struct drm_i915_private *i915, struct intel_memory_region *mem; int err; - mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (ops->alloc) + mem = ops->alloc(); + else + mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return ERR_PTR(-ENOMEM); @@ -265,7 +268,10 @@ intel_memory_region_create(struct drm_i915_private *i915, if (mem->ops->release) mem->ops->release(mem); err_free: - kfree(mem); + if (mem->ops->free) + mem->ops->free(mem); + else + kfree(mem); return ERR_PTR(err); } @@ -288,7 +294,11 @@ void intel_memory_region_destroy(struct intel_memory_region *mem) GEM_WARN_ON(!list_empty_careful(>objects.list)); mutex_destroy(>objects.lock); - if (!ret) + if (ret) + return; + if (mem->ops->free) + mem->ops->free(mem); + else kfree(mem); } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 3d8378c1b447..048955b5429f 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -61,6 +61,8 @@ struct intel_memory_region_ops { resource_size_t size, resource_size_t page_size, unsigned int flags); + struct intel_memory_region *(*alloc)(void); + void (*free)(struct intel_memory_region *mem); }; struct intel_memory_region { -- 2.25.1
[PATCH v8 03/10] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP. This is divergent from existing backends code which only considers HAS_LLC. Testing shows that trusting snooping on gen5- is unreliable and bsw via ggtt mappings, so limit DGFX for now and maintain previous behaviour. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 4c1de0b4a10f..40249fa28a7a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -46,7 +46,9 @@ static enum i915_cache_level i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, struct ttm_tt *ttm) { - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915); + + return ((HAS_LLC(i915) || can_snoop) && !i915_ttm_gtt_binds_lmem(res) && ttm->caching == ttm_cached) ? I915_CACHE_LLC : I915_CACHE_NONE; -- 2.25.1
[PATCH v8 05/10] drm/i915: instantiate ttm ranger manager for stolen memory
prepare for ttm based stolen region by using ttm range manager as the resource manager for stolen region. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 ++-- drivers/gpu/drm/i915/intel_region_ttm.c | 31 +++- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 40249fa28a7a..675e9ab30396 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); /* There's some room for optimization here... */ - GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && - ttm_mem_type < I915_PL_LMEM0); + GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT); + if (ttm_mem_type == I915_PL_SYSTEM) return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, 0); + if (ttm_mem_type == I915_PL_STOLEN) + return i915->mm.stolen_region; return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, ttm_mem_type - I915_PL_LMEM0); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index fd2ecfdd8fa1..694e9acb69e2 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) /* * Map the i915 memory regions to TTM memory types. We use the - * driver-private types for now, reserving TTM_PL_VRAM for stolen + * driver-private types for now, reserving I915_PL_STOLEN for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ int intel_region_to_ttm_type(const struct intel_memory_region *mem) @@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && mem->type != INTEL_MEMORY_MOCK && - mem->type != INTEL_MEMORY_SYSTEM); + mem->type != INTEL_MEMORY_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_LOCAL); if (mem->type == INTEL_MEMORY_SYSTEM) return TTM_PL_SYSTEM; + if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM || + mem->type == INTEL_MEMORY_STOLEN_LOCAL) + return I915_PL_STOLEN; + type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); @@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = i915_ttm_buddy_man_init(bdev, mem_type, false, - resource_size(>region), - mem->io_size, - mem->min_page_size, PAGE_SIZE); + if (mem_type == I915_PL_STOLEN) { + ret = ttm_range_man_init(bdev, mem_type, false, +resource_size(>region) >> PAGE_SHIFT); + mem->is_range_manager = true; + } else { + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(>region), + mem->io_size, + mem->min_page_size, PAGE_SIZE); + } if (ret) return ret; @@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int intel_region_ttm_fini(struct intel_memory_region *mem) { struct ttm_resource_manager *man = mem->region_private; + int mem_type = intel_region_to_ttm_type(mem); int ret = -EBUSY; int count; @@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) if (ret || !man) return ret; - ret = i915_ttm_buddy_man_fini(>i915->bdev, - intel_region_to_ttm_type(mem)); + if (mem_type == I915_PL_STOLEN) + ret = ttm_range_man_fini(>i915->bdev, mem_type); + else + ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type); GEM_WARN_ON(ret); mem->region_private = NULL; -- 2.25.1
[PATCH v8 04/10] drm/i915/gem: selftest should not attempt mmap of private regions
During testing make can_mmap consider whether the region is private. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..76181e28c75e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; + if (obj->mm.region && obj->mm.region->private) + return false; + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED) -- 2.25.1
[PATCH v8 02/10] drm/i915: limit ttm to dma32 for i965G[M]
i965G[M] cannot relocate objects above 4GiB. Ensure ttm uses dma32 on these systems. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..fd2ecfdd8fa1 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -32,10 +32,15 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = _priv->drm; + bool use_dma32 = false; + + /* i965g[m] cannot relocate objects above 4GiB. */ + if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) + use_dma32 = true; return ttm_device_init(_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, - drm->vma_offset_manager, false, false); + drm->vma_offset_manager, false, use_dma32); } /** -- 2.25.1
[PATCH v8 01/10] drm/i915/ttm: dont trample cache_level overrides during ttm move
Various places within the driver override the default chosen cache_level. Before ttm, these overrides were permanent until explicitly changed again or for the lifetime of the buffer. TTM movement code came along and decided that it could make that decision at that time, which is usually well after object creation, so overrode the cache_level decision and reverted it back to its default decision. Add logic to indicate whether the caching mode has been set by anything other than the move logic. If so, assume that the code that overrode the defaults knows best and keep it. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++--- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 06b1b188ce5a..519887769c08 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); obj->cache_level = cache_level; + obj->ttm.cache_level_override = true; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2c88bdb8ff7c..6632ed52e919 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -605,6 +605,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; + bool cache_level_override:1; } ttm; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..27d59639177f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); + obj->ttm.cache_level_override = false; i915_gem_object_unlock(obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index a10716f4e717..4c1de0b4a10f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : I915_BO_FLAG_STRUCT_PAGE; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + if (!obj->ttm.cache_level_override) { + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), + bo->resource, bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->ttm.cache_level_override = false; + } } /** -- 2.25.1
[PATCH v8 00/10] drm/i915: ttm for stolen
This series refactors i915's stolen memory region to use ttm. v2: handle disabled stolen similar to legacy version. relying on ttm to fail allocs works fine, but is dmesg noisy and causes testing dmesg warning regressions. v3: rebase to latest drm-tip. fix v2 code refactor which could leave a buffer pinned. locally passes fftl again now. v4: - Allow memory regions creators to do allocation. Allows stolen region to track it's own reservations. - Pre-reserve first page of stolen mem (add back WaSkipStolenMemoryFirstPage:bdw+) - Improve commit descritpion for "drm/i915: sanitize mem_flags for stolen buffers" - replace i915_gem_object_pin_pages_unlocked() call with manual locking and pinning. this avoids ww ctx class reuse during context creation -> ring vma obj alloc. v5: - detect both types of stolen as stolen buffers in "drm/i915: sanitize mem_flags for stolen buffers" - in stolen_object_init limit page size to mem region minimum. The range allocator expects the page_size to define the alignment v6: - Share first 4 patches from ttm for internal series as generic i915 ttm fixes - Drop patch 4 from v5. We don't need separate object ops just to satisfy test interfaces. The tests have now been fixed via checking whether the memory region is private to decide whether to mmap - Add new buffer pin alloc flag to allow creation of buffers in their final ttm placement instead of deferring until get_pages. This fixes legacy fallback paths for buffer allocations during stolen memory pressure. v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET v8: - Reserve I915_GEM_STOLEN_BIAS area from stolen Robert Beckett (10): drm/i915/ttm: dont trample cache_level overrides during ttm move drm/i915: limit ttm to dma32 for i965G[M] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level drm/i915/gem: selftest should not attempt mmap of private regions drm/i915: instantiate ttm ranger manager for stolen memory drm/i915: sanitize mem_flags for stolen buffers drm/i915: ttm move/clear logic fix drm/i915: allow memory region creators to alloc and free the region drm/i915/ttm: add buffer pin on alloc flag drm/i915: stolen memory use ttm backend drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- drivers/gpu/drm/i915/gem/i915_gem_object.c| 1 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 16 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 441 +++--- drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 29 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 47 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 3 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/intel_memory_region.c| 16 +- drivers/gpu/drm/i915/intel_memory_region.h| 2 + drivers/gpu/drm/i915/intel_region_ttm.c | 80 +++- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 18 files changed, 424 insertions(+), 369 deletions(-) -- 2.25.1
[PATCH v7 10/10] drm/i915: stolen memory use ttm backend
refactor stolen memory region to use ttm. this necessitates using ttm resources to track reserved stolen regions instead of drm_mm_nodes. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 440 +++--- drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/intel_region_ttm.c | 42 +- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 13 files changed, 303 insertions(+), 342 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 8b807284cde1..6f3afac5e8c9 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -38,6 +38,7 @@ * forcibly disable it to allow proper screen updates. */ +#include "gem/i915_gem_stolen.h" #include #include @@ -51,6 +52,7 @@ #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "gem/i915_gem_region.h" #define for_each_fbc_id(__dev_priv, __fbc_id) \ for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; (__fbc_id)++) \ @@ -92,8 +94,8 @@ struct intel_fbc { struct mutex lock; unsigned int busy_bits; - struct drm_mm_node compressed_fb; - struct drm_mm_node compressed_llb; + struct ttm_resource *compressed_fb; + struct ttm_resource *compressed_llb; enum intel_fbc_id id; @@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); + u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_fb.start, U32_MAX)); +fb_offset, U32_MAX)); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_llb.start, U32_MAX)); +llb_offset, U32_MAX)); intel_de_write(i915, FBC_CFB_BASE, - i915->dsm.start + fbc->compressed_fb.start); + i915->dsm.start + fb_offset); intel_de_write(i915, FBC_LL_BASE, - i915->dsm.start + fbc->compressed_llb.start); + i915->dsm.start + llb_offset); } static const struct intel_fbc_funcs i8xx_fbc_funcs = { @@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc) static void g4x_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, DPFC_CB_BASE, fb_offset); } static const struct intel_fbc_funcs g4x_fbc_funcs = { @@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc) static void ilk_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset); } static const struct intel_fbc_funcs ilk_fbc_funcs = { @@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc, { struct drm_i915_private *i915 = fbc->i915; u64 end = intel_fbc_stolen_end(i915); - int ret, limit = min_limit; + int limit = min_limit; + struct ttm_resource *res; size /= limit; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb, - size <<= 1, 4096, 0, end); - if (ret == 0) + res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)
[PATCH v7 05/10] drm/i915: instantiate ttm ranger manager for stolen memory
prepare for ttm based stolen region by using ttm range manager as the resource manager for stolen region. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 ++-- drivers/gpu/drm/i915/intel_region_ttm.c | 31 +++- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 40249fa28a7a..675e9ab30396 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); /* There's some room for optimization here... */ - GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && - ttm_mem_type < I915_PL_LMEM0); + GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT); + if (ttm_mem_type == I915_PL_SYSTEM) return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, 0); + if (ttm_mem_type == I915_PL_STOLEN) + return i915->mm.stolen_region; return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, ttm_mem_type - I915_PL_LMEM0); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index fd2ecfdd8fa1..694e9acb69e2 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) /* * Map the i915 memory regions to TTM memory types. We use the - * driver-private types for now, reserving TTM_PL_VRAM for stolen + * driver-private types for now, reserving I915_PL_STOLEN for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ int intel_region_to_ttm_type(const struct intel_memory_region *mem) @@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && mem->type != INTEL_MEMORY_MOCK && - mem->type != INTEL_MEMORY_SYSTEM); + mem->type != INTEL_MEMORY_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_LOCAL); if (mem->type == INTEL_MEMORY_SYSTEM) return TTM_PL_SYSTEM; + if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM || + mem->type == INTEL_MEMORY_STOLEN_LOCAL) + return I915_PL_STOLEN; + type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); @@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = i915_ttm_buddy_man_init(bdev, mem_type, false, - resource_size(>region), - mem->io_size, - mem->min_page_size, PAGE_SIZE); + if (mem_type == I915_PL_STOLEN) { + ret = ttm_range_man_init(bdev, mem_type, false, +resource_size(>region) >> PAGE_SHIFT); + mem->is_range_manager = true; + } else { + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(>region), + mem->io_size, + mem->min_page_size, PAGE_SIZE); + } if (ret) return ret; @@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int intel_region_ttm_fini(struct intel_memory_region *mem) { struct ttm_resource_manager *man = mem->region_private; + int mem_type = intel_region_to_ttm_type(mem); int ret = -EBUSY; int count; @@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) if (ret || !man) return ret; - ret = i915_ttm_buddy_man_fini(>i915->bdev, - intel_region_to_ttm_type(mem)); + if (mem_type == I915_PL_STOLEN) + ret = ttm_range_man_fini(>i915->bdev, mem_type); + else + ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type); GEM_WARN_ON(ret); mem->region_private = NULL; -- 2.25.1
[PATCH v7 07/10] drm/i915: ttm move/clear logic fix
ttm managed buffers start off with system resource definitions and ttm_tt tracking structures allocated (though unpopulated). currently this prevents clearing of buffers on first move to desired placements. The desired behaviour is to clear user allocated buffers and any kernel buffers that specifically requests it only. Make the logic match the desired behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 81c67ca9edda..a3f8fc056dbc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */ +#include "drm/ttm/ttm_tt.h" #include #include "i915_deps.h" @@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } +static bool +allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct ttm_resource *dst_mem) +{ + /* never clear stolen */ + if (dst_mem->mem_type == I915_PL_STOLEN) + return false; + /* +* we want to clear user buffers and any kernel buffers +* that specifically request clearing. +*/ + if (obj->flags & I915_BO_ALLOC_USER) + return true; + + if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) + return true; + + return false; +} + /** * i915_ttm_move - The TTM move callback used by i915. * @bo: The buffer object. @@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, return PTR_ERR(dst_rsgt); clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + if (!clear || allow_clear(obj, ttm, dst_mem)) { struct i915_deps deps; i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); -- 2.25.1
[PATCH v7 09/10] drm/i915/ttm: add buffer pin on alloc flag
For situations where allocations need to fail on alloc instead of delayed get_pages, add a new alloc flag to pin the ttm bo. This makes sure that the resource has been allocated during buffer creation, allowing it to fail with an error if the placement is exhausted. This allows existing fallback options for stolen backend allocation like create_ring_vma to work as expected. Signed-off-by: Robert Beckett --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 13 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 25 ++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 6632ed52e919..07bc11247a3e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -325,17 +325,20 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +/* object should be pinned in destination region from allocation */ +#define I915_BO_ALLOC_PINNED BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ -I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) -#define I915_BO_WAS_BOUND_BIT 10 +I915_BO_ALLOC_GPU_ONLY | \ +I915_BO_ALLOC_PINNED) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) +#define I915_BO_WAS_BOUND_BIT 11 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 27d59639177f..bb988608296d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created); + /* stolen objects are pinned for lifetime. Unpin before putting */ + if (obj->flags & I915_BO_ALLOC_PINNED) { + ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL); + ttm_bo_unpin(i915_gem_to_ttm(obj)); + ttm_bo_unreserve(i915_gem_to_ttm(obj)); + } + ttm_bo_put(i915_gem_to_ttm(obj)); } @@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, .no_wait_gpu = false, }; enum ttm_bo_type bo_type; + struct ttm_place _place; + struct ttm_placement _placement; + struct ttm_placement *placement; int ret; drm_gem_private_object_init(>drm, >base, size); @@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ i915_gem_object_make_unshrinkable(obj); + if (obj->flags & I915_BO_ALLOC_PINNED) { + i915_ttm_place_from_region(mem, &_place, obj->bo_offset, + obj->base.size, obj->flags); + _placement.num_placement = 1; + _placement.placement = &_place; + _placement.num_busy_placement = 0; + _placement.busy_placement = NULL; + placement = &_placement; + } else { + placement = _sys_placement; + } /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, * until successful initialization. */ ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size, - bo_type, _sys_placement, + bo_type, placement, page_size >> PAGE_SHIFT, , NULL, NULL, i915_ttm_bo_destroy); if (ret) @@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); obj->ttm.cache_level_override = false; + if (obj->flags & I915_BO_ALLOC_PINNED) + ttm_bo_pin(i915_gem_to_ttm(obj)); i915_gem_object_unlock(obj); return 0; -- 2.25.1
[PATCH v7 04/10] drm/i915/gem: selftest should not attempt mmap of private regions
During testing make can_mmap consider whether the region is private. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..76181e28c75e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; + if (obj->mm.region && obj->mm.region->private) + return false; + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED) -- 2.25.1
[PATCH v7 08/10] drm/i915: allow memory region creators to alloc and free the region
add callbacks for alloc and free. this allows region creators to allocate any extra storage they may require. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/intel_memory_region.c | 16 +--- drivers/gpu/drm/i915/intel_memory_region.h | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e38d2db1c3e3..3da07a712f90 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -231,7 +231,10 @@ intel_memory_region_create(struct drm_i915_private *i915, struct intel_memory_region *mem; int err; - mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (ops->alloc) + mem = ops->alloc(); + else + mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return ERR_PTR(-ENOMEM); @@ -265,7 +268,10 @@ intel_memory_region_create(struct drm_i915_private *i915, if (mem->ops->release) mem->ops->release(mem); err_free: - kfree(mem); + if (mem->ops->free) + mem->ops->free(mem); + else + kfree(mem); return ERR_PTR(err); } @@ -288,7 +294,11 @@ void intel_memory_region_destroy(struct intel_memory_region *mem) GEM_WARN_ON(!list_empty_careful(>objects.list)); mutex_destroy(>objects.lock); - if (!ret) + if (ret) + return; + if (mem->ops->free) + mem->ops->free(mem); + else kfree(mem); } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 3d8378c1b447..048955b5429f 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -61,6 +61,8 @@ struct intel_memory_region_ops { resource_size_t size, resource_size_t page_size, unsigned int flags); + struct intel_memory_region *(*alloc)(void); + void (*free)(struct intel_memory_region *mem); }; struct intel_memory_region { -- 2.25.1
[PATCH v7 06/10] drm/i915: sanitize mem_flags for stolen buffers
Stolen regions are not page backed or considered iomem. Prevent flags indicating such. This correctly prevents stolen buffers from attempting to directly map them. See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem() usage for where it would break otherwise. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 675e9ab30396..81c67ca9edda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -14,6 +14,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" +#include "gem/i915_gem_stolen.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" @@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); - obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : - I915_BO_FLAG_STRUCT_PAGE; + if (!i915_gem_object_is_stolen(obj)) + obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; if (!obj->ttm.cache_level_override) { cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), -- 2.25.1
[PATCH v7 02/10] drm/i915: limit ttm to dma32 for i965G[M]
i965G[M] cannot relocate objects above 4GiB. Ensure ttm uses dma32 on these systems. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..fd2ecfdd8fa1 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -32,10 +32,15 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = _priv->drm; + bool use_dma32 = false; + + /* i965g[m] cannot relocate objects above 4GiB. */ + if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) + use_dma32 = true; return ttm_device_init(_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, - drm->vma_offset_manager, false, false); + drm->vma_offset_manager, false, use_dma32); } /** -- 2.25.1
[PATCH v7 03/10] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP. This is divergent from existing backends code which only considers HAS_LLC. Testing shows that trusting snooping on gen5- is unreliable and bsw via ggtt mappings, so limit DGFX for now and maintain previous behaviour. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 4c1de0b4a10f..40249fa28a7a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -46,7 +46,9 @@ static enum i915_cache_level i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, struct ttm_tt *ttm) { - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915); + + return ((HAS_LLC(i915) || can_snoop) && !i915_ttm_gtt_binds_lmem(res) && ttm->caching == ttm_cached) ? I915_CACHE_LLC : I915_CACHE_NONE; -- 2.25.1
[PATCH v7 01/10] drm/i915/ttm: dont trample cache_level overrides during ttm move
Various places within the driver override the default chosen cache_level. Before ttm, these overrides were permanent until explicitly changed again or for the lifetime of the buffer. TTM movement code came along and decided that it could make that decision at that time, which is usually well after object creation, so overrode the cache_level decision and reverted it back to its default decision. Add logic to indicate whether the caching mode has been set by anything other than the move logic. If so, assume that the code that overrode the defaults knows best and keep it. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++--- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 06b1b188ce5a..519887769c08 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); obj->cache_level = cache_level; + obj->ttm.cache_level_override = true; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2c88bdb8ff7c..6632ed52e919 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -605,6 +605,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; + bool cache_level_override:1; } ttm; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..27d59639177f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); + obj->ttm.cache_level_override = false; i915_gem_object_unlock(obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index a10716f4e717..4c1de0b4a10f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : I915_BO_FLAG_STRUCT_PAGE; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + if (!obj->ttm.cache_level_override) { + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), + bo->resource, bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->ttm.cache_level_override = false; + } } /** -- 2.25.1
[PATCH v7 00/10] drm/i915: ttm for stolen
This series refactors i915's stolen memory region to use ttm. v2: handle disabled stolen similar to legacy version. relying on ttm to fail allocs works fine, but is dmesg noisy and causes testing dmesg warning regressions. v3: rebase to latest drm-tip. fix v2 code refactor which could leave a buffer pinned. locally passes fftl again now. v4: - Allow memory regions creators to do allocation. Allows stolen region to track it's own reservations. - Pre-reserve first page of stolen mem (add back WaSkipStolenMemoryFirstPage:bdw+) - Improve commit descritpion for "drm/i915: sanitize mem_flags for stolen buffers" - replace i915_gem_object_pin_pages_unlocked() call with manual locking and pinning. this avoids ww ctx class reuse during context creation -> ring vma obj alloc. v5: - detect both types of stolen as stolen buffers in "drm/i915: sanitize mem_flags for stolen buffers" - in stolen_object_init limit page size to mem region minimum. The range allocator expects the page_size to define the alignment v6: - Share first 4 patches from ttm for internal series as generic i915 ttm fixes - Drop patch 4 from v5. We don't need separate object ops just to satisfy test interfaces. The tests have now been fixed via checking whether the memory region is private to decide whether to mmap - Add new buffer pin alloc flag to allow creation of buffers in their final ttm placement instead of deferring until get_pages. This fixes legacy fallback paths for buffer allocations during stolen memory pressure. v7: - fix mock_region_get_pages() to correctly handle I915_BO_INVALID_OFFSET Robert Beckett (10): drm/i915/ttm: dont trample cache_level overrides during ttm move drm/i915: limit ttm to dma32 for i965G[M] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level drm/i915/gem: selftest should not attempt mmap of private regions drm/i915: instantiate ttm ranger manager for stolen memory drm/i915: sanitize mem_flags for stolen buffers drm/i915: ttm move/clear logic fix drm/i915: allow memory region creators to alloc and free the region drm/i915/ttm: add buffer pin on alloc flag drm/i915: stolen memory use ttm backend drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- drivers/gpu/drm/i915/gem/i915_gem_object.c| 1 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 16 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 440 +++--- drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 29 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 47 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 3 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/intel_memory_region.c| 16 +- drivers/gpu/drm/i915/intel_memory_region.h| 2 + drivers/gpu/drm/i915/intel_region_ttm.c | 80 +++- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 18 files changed, 423 insertions(+), 369 deletions(-) -- 2.25.1
[PATCH v6 08/10] drm/i915: allow memory region creators to alloc and free the region
add callbacks for alloc and free. this allows region creators to allocate any extra storage they may require. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/intel_memory_region.c | 16 +--- drivers/gpu/drm/i915/intel_memory_region.h | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e38d2db1c3e3..3da07a712f90 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -231,7 +231,10 @@ intel_memory_region_create(struct drm_i915_private *i915, struct intel_memory_region *mem; int err; - mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (ops->alloc) + mem = ops->alloc(); + else + mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return ERR_PTR(-ENOMEM); @@ -265,7 +268,10 @@ intel_memory_region_create(struct drm_i915_private *i915, if (mem->ops->release) mem->ops->release(mem); err_free: - kfree(mem); + if (mem->ops->free) + mem->ops->free(mem); + else + kfree(mem); return ERR_PTR(err); } @@ -288,7 +294,11 @@ void intel_memory_region_destroy(struct intel_memory_region *mem) GEM_WARN_ON(!list_empty_careful(>objects.list)); mutex_destroy(>objects.lock); - if (!ret) + if (ret) + return; + if (mem->ops->free) + mem->ops->free(mem); + else kfree(mem); } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 3d8378c1b447..048955b5429f 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -61,6 +61,8 @@ struct intel_memory_region_ops { resource_size_t size, resource_size_t page_size, unsigned int flags); + struct intel_memory_region *(*alloc)(void); + void (*free)(struct intel_memory_region *mem); }; struct intel_memory_region { -- 2.25.1
[PATCH v6 06/10] drm/i915: sanitize mem_flags for stolen buffers
Stolen regions are not page backed or considered iomem. Prevent flags indicating such. This correctly prevents stolen buffers from attempting to directly map them. See i915_gem_object_has_struct_page() and i915_gem_object_has_iomem() usage for where it would break otherwise. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 675e9ab30396..81c67ca9edda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -14,6 +14,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" +#include "gem/i915_gem_stolen.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" @@ -124,8 +125,9 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); - obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : - I915_BO_FLAG_STRUCT_PAGE; + if (!i915_gem_object_is_stolen(obj)) + obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; if (!obj->ttm.cache_level_override) { cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), -- 2.25.1
[PATCH v6 07/10] drm/i915: ttm move/clear logic fix
ttm managed buffers start off with system resource definitions and ttm_tt tracking structures allocated (though unpopulated). currently this prevents clearing of buffers on first move to desired placements. The desired behaviour is to clear user allocated buffers and any kernel buffers that specifically requests it only. Make the logic match the desired behaviour. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 22 +++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 81c67ca9edda..a3f8fc056dbc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */ +#include "drm/ttm/ttm_tt.h" #include #include "i915_deps.h" @@ -476,6 +477,25 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } +static bool +allow_clear(struct drm_i915_gem_object *obj, struct ttm_tt *ttm, struct ttm_resource *dst_mem) +{ + /* never clear stolen */ + if (dst_mem->mem_type == I915_PL_STOLEN) + return false; + /* +* we want to clear user buffers and any kernel buffers +* that specifically request clearing. +*/ + if (obj->flags & I915_BO_ALLOC_USER) + return true; + + if (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) + return true; + + return false; +} + /** * i915_ttm_move - The TTM move callback used by i915. * @bo: The buffer object. @@ -526,7 +546,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, return PTR_ERR(dst_rsgt); clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + if (!clear || allow_clear(obj, ttm, dst_mem)) { struct i915_deps deps; i915_deps_init(, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); -- 2.25.1
[PATCH v6 09/10] drm/i915/ttm: add buffer pin on alloc flag
For situations where allocations need to fail on alloc instead of delayed get_pages, add a new alloc flag to pin the ttm bo. This makes sure that the resource has been allocated during buffer creation, allowing it to fail with an error if the placement is exhausted. This allows existing fallback options for stolen backend allocation like create_ring_vma to work as expected. Signed-off-by: Robert Beckett --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 13 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 25 ++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 6632ed52e919..07bc11247a3e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -325,17 +325,20 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +/* object should be pinned in destination region from allocation */ +#define I915_BO_ALLOC_PINNED BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ -I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) -#define I915_BO_WAS_BOUND_BIT 10 +I915_BO_ALLOC_GPU_ONLY | \ +I915_BO_ALLOC_PINNED) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) +#define I915_BO_WAS_BOUND_BIT 11 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 27d59639177f..bb988608296d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -998,6 +998,13 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!obj->ttm.created); + /* stolen objects are pinned for lifetime. Unpin before putting */ + if (obj->flags & I915_BO_ALLOC_PINNED) { + ttm_bo_reserve(i915_gem_to_ttm(obj), true, false, NULL); + ttm_bo_unpin(i915_gem_to_ttm(obj)); + ttm_bo_unreserve(i915_gem_to_ttm(obj)); + } + ttm_bo_put(i915_gem_to_ttm(obj)); } @@ -1193,6 +1200,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, .no_wait_gpu = false, }; enum ttm_bo_type bo_type; + struct ttm_place _place; + struct ttm_placement _placement; + struct ttm_placement *placement; int ret; drm_gem_private_object_init(>drm, >base, size); @@ -1222,6 +1232,17 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ i915_gem_object_make_unshrinkable(obj); + if (obj->flags & I915_BO_ALLOC_PINNED) { + i915_ttm_place_from_region(mem, &_place, obj->bo_offset, + obj->base.size, obj->flags); + _placement.num_placement = 1; + _placement.placement = &_place; + _placement.num_busy_placement = 0; + _placement.busy_placement = NULL; + placement = &_placement; + } else { + placement = _sys_placement; + } /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -1230,7 +1251,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, * until successful initialization. */ ret = ttm_bo_init_reserved(>bdev, i915_gem_to_ttm(obj), size, - bo_type, _sys_placement, + bo_type, placement, page_size >> PAGE_SHIFT, , NULL, NULL, i915_ttm_bo_destroy); if (ret) @@ -1242,6 +1263,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); obj->ttm.cache_level_override = false; + if (obj->flags & I915_BO_ALLOC_PINNED) + ttm_bo_pin(i915_gem_to_ttm(obj)); i915_gem_object_unlock(obj); return 0; -- 2.25.1
[PATCH v6 10/10] drm/i915: stolen memory use ttm backend
refactor stolen memory region to use ttm. this necessitates using ttm resources to track reserved stolen regions instead of drm_mm_nodes. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 440 +++--- drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/intel_region_ttm.c | 42 +- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 3 +- 13 files changed, 294 insertions(+), 342 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 8b807284cde1..6f3afac5e8c9 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -38,6 +38,7 @@ * forcibly disable it to allow proper screen updates. */ +#include "gem/i915_gem_stolen.h" #include #include @@ -51,6 +52,7 @@ #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "gem/i915_gem_region.h" #define for_each_fbc_id(__dev_priv, __fbc_id) \ for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; (__fbc_id)++) \ @@ -92,8 +94,8 @@ struct intel_fbc { struct mutex lock; unsigned int busy_bits; - struct drm_mm_node compressed_fb; - struct drm_mm_node compressed_llb; + struct ttm_resource *compressed_fb; + struct ttm_resource *compressed_llb; enum intel_fbc_id id; @@ -331,16 +333,20 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); + u64 llb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_llb); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + GEM_BUG_ON(llb_offset == I915_BO_INVALID_OFFSET); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_fb.start, U32_MAX)); +fb_offset, U32_MAX)); GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start, -fbc->compressed_llb.start, U32_MAX)); +llb_offset, U32_MAX)); intel_de_write(i915, FBC_CFB_BASE, - i915->dsm.start + fbc->compressed_fb.start); + i915->dsm.start + fb_offset); intel_de_write(i915, FBC_LL_BASE, - i915->dsm.start + fbc->compressed_llb.start); + i915->dsm.start + llb_offset); } static const struct intel_fbc_funcs i8xx_fbc_funcs = { @@ -448,8 +454,10 @@ static bool g4x_fbc_is_compressing(struct intel_fbc *fbc) static void g4x_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, DPFC_CB_BASE, fb_offset); } static const struct intel_fbc_funcs g4x_fbc_funcs = { @@ -499,8 +507,10 @@ static bool ilk_fbc_is_compressing(struct intel_fbc *fbc) static void ilk_fbc_program_cfb(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; + u64 fb_offset = i915_gem_stolen_reserve_offset(fbc->compressed_fb); - intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fbc->compressed_fb.start); + GEM_BUG_ON(fb_offset == I915_BO_INVALID_OFFSET); + intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fb_offset); } static const struct intel_fbc_funcs ilk_fbc_funcs = { @@ -744,21 +754,24 @@ static int find_compression_limit(struct intel_fbc *fbc, { struct drm_i915_private *i915 = fbc->i915; u64 end = intel_fbc_stolen_end(i915); - int ret, limit = min_limit; + int limit = min_limit; + struct ttm_resource *res; size /= limit; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(i915, >compressed_fb, - size <<= 1, 4096, 0, end); - if (ret == 0) + res = i915_gem_stolen_reserve_range(i915, size <<= 1, 0, end)
[PATCH v6 05/10] drm/i915: instantiate ttm ranger manager for stolen memory
prepare for ttm based stolen region by using ttm range manager as the resource manager for stolen region. Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 ++-- drivers/gpu/drm/i915/intel_region_ttm.c | 31 +++- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 40249fa28a7a..675e9ab30396 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -60,11 +60,13 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); /* There's some room for optimization here... */ - GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && - ttm_mem_type < I915_PL_LMEM0); + GEM_BUG_ON(ttm_mem_type == I915_PL_GGTT); + if (ttm_mem_type == I915_PL_SYSTEM) return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, 0); + if (ttm_mem_type == I915_PL_STOLEN) + return i915->mm.stolen_region; return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, ttm_mem_type - I915_PL_LMEM0); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index fd2ecfdd8fa1..694e9acb69e2 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -54,7 +54,7 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) /* * Map the i915 memory regions to TTM memory types. We use the - * driver-private types for now, reserving TTM_PL_VRAM for stolen + * driver-private types for now, reserving I915_PL_STOLEN for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ int intel_region_to_ttm_type(const struct intel_memory_region *mem) @@ -63,11 +63,17 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && mem->type != INTEL_MEMORY_MOCK && - mem->type != INTEL_MEMORY_SYSTEM); + mem->type != INTEL_MEMORY_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_SYSTEM && + mem->type != INTEL_MEMORY_STOLEN_LOCAL); if (mem->type == INTEL_MEMORY_SYSTEM) return TTM_PL_SYSTEM; + if (mem->type == INTEL_MEMORY_STOLEN_SYSTEM || + mem->type == INTEL_MEMORY_STOLEN_LOCAL) + return I915_PL_STOLEN; + type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); @@ -91,10 +97,16 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = i915_ttm_buddy_man_init(bdev, mem_type, false, - resource_size(>region), - mem->io_size, - mem->min_page_size, PAGE_SIZE); + if (mem_type == I915_PL_STOLEN) { + ret = ttm_range_man_init(bdev, mem_type, false, +resource_size(>region) >> PAGE_SHIFT); + mem->is_range_manager = true; + } else { + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(>region), + mem->io_size, + mem->min_page_size, PAGE_SIZE); + } if (ret) return ret; @@ -114,6 +126,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int intel_region_ttm_fini(struct intel_memory_region *mem) { struct ttm_resource_manager *man = mem->region_private; + int mem_type = intel_region_to_ttm_type(mem); int ret = -EBUSY; int count; @@ -144,8 +157,10 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) if (ret || !man) return ret; - ret = i915_ttm_buddy_man_fini(>i915->bdev, - intel_region_to_ttm_type(mem)); + if (mem_type == I915_PL_STOLEN) + ret = ttm_range_man_fini(>i915->bdev, mem_type); + else + ret = i915_ttm_buddy_man_fini(>i915->bdev, mem_type); GEM_WARN_ON(ret); mem->region_private = NULL; -- 2.25.1
[PATCH v6 02/10] drm/i915: limit ttm to dma32 for i965G[M]
i965G[M] cannot relocate objects above 4GiB. Ensure ttm uses dma32 on these systems. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..fd2ecfdd8fa1 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -32,10 +32,15 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = _priv->drm; + bool use_dma32 = false; + + /* i965g[m] cannot relocate objects above 4GiB. */ + if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) + use_dma32 = true; return ttm_device_init(_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, - drm->vma_offset_manager, false, false); + drm->vma_offset_manager, false, use_dma32); } /** -- 2.25.1
[PATCH v6 04/10] drm/i915/gem: selftest should not attempt mmap of private regions
During testing make can_mmap consider whether the region is private. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..76181e28c75e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -869,6 +869,9 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; + if (obj->mm.region && obj->mm.region->private) + return false; + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED) -- 2.25.1
[PATCH v6 03/10] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP. This is divergent from existing backends code which only considers HAS_LLC. Testing shows that trusting snooping on gen5- is unreliable and bsw via ggtt mappings, so limit DGFX for now and maintain previous behaviour. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 4c1de0b4a10f..40249fa28a7a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -46,7 +46,9 @@ static enum i915_cache_level i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, struct ttm_tt *ttm) { - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915); + + return ((HAS_LLC(i915) || can_snoop) && !i915_ttm_gtt_binds_lmem(res) && ttm->caching == ttm_cached) ? I915_CACHE_LLC : I915_CACHE_NONE; -- 2.25.1
[PATCH v6 01/10] drm/i915/ttm: dont trample cache_level overrides during ttm move
Various places within the driver override the default chosen cache_level. Before ttm, these overrides were permanent until explicitly changed again or for the lifetime of the buffer. TTM movement code came along and decided that it could make that decision at that time, which is usually well after object creation, so overrode the cache_level decision and reverted it back to its default decision. Add logic to indicate whether the caching mode has been set by anything other than the move logic. If so, assume that the code that overrode the defaults knows best and keep it. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++--- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 06b1b188ce5a..519887769c08 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); obj->cache_level = cache_level; + obj->ttm.cache_level_override = true; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2c88bdb8ff7c..6632ed52e919 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -605,6 +605,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; + bool cache_level_override:1; } ttm; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..27d59639177f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); + obj->ttm.cache_level_override = false; i915_gem_object_unlock(obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index a10716f4e717..4c1de0b4a10f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : I915_BO_FLAG_STRUCT_PAGE; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + if (!obj->ttm.cache_level_override) { + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), + bo->resource, bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->ttm.cache_level_override = false; + } } /** -- 2.25.1
[PATCH v6 00/10] drm/i915: ttm for stolen
This series refactors i915's stolen memory region to use ttm. v2: handle disabled stolen similar to legacy version. relying on ttm to fail allocs works fine, but is dmesg noisy and causes testing dmesg warning regressions. v3: rebase to latest drm-tip. fix v2 code refactor which could leave a buffer pinned. locally passes fftl again now. v4: - Allow memory regions creators to do allocation. Allows stolen region to track it's own reservations. - Pre-reserve first page of stolen mem (add back WaSkipStolenMemoryFirstPage:bdw+) - Improve commit descritpion for "drm/i915: sanitize mem_flags for stolen buffers" - replace i915_gem_object_pin_pages_unlocked() call with manual locking and pinning. this avoids ww ctx class reuse during context creation -> ring vma obj alloc. v5: - detect both types of stolen as stolen buffers in "drm/i915: sanitize mem_flags for stolen buffers" - in stolen_object_init limit page size to mem region minimum. The range allocator expects the page_size to define the alignment v6: - Share first 4 patches from ttm for internal series as generic i915 ttm fixes - Drop patch 4 from v5. We don't need separate object ops just to satisfy test interfaces. The tests have now been fixed via checking whether the memory region is private to decide whether to mmap - Add new buffer pin alloc flag to allow creation of buffers in their final ttm placement instead of deferring until get_pages. This fixes legacy fallback paths for buffer allocations during stolen memory pressure. Robert Beckett (10): drm/i915/ttm: dont trample cache_level overrides during ttm move drm/i915: limit ttm to dma32 for i965G[M] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level drm/i915/gem: selftest should not attempt mmap of private regions drm/i915: instantiate ttm ranger manager for stolen memory drm/i915: sanitize mem_flags for stolen buffers drm/i915: ttm move/clear logic fix drm/i915: allow memory region creators to alloc and free the region drm/i915/ttm: add buffer pin on alloc flag drm/i915: stolen memory use ttm backend drivers/gpu/drm/i915/display/intel_fbc.c | 78 ++-- drivers/gpu/drm/i915/gem/i915_gem_object.c| 1 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 16 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 440 +++--- drivers/gpu/drm/i915/gem/i915_gem_stolen.h| 21 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 29 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 7 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 47 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 3 + drivers/gpu/drm/i915/gt/intel_rc6.c | 4 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 7 +- drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/intel_memory_region.c| 16 +- drivers/gpu/drm/i915/intel_memory_region.h| 2 + drivers/gpu/drm/i915/intel_region_ttm.c | 80 +++- drivers/gpu/drm/i915/intel_region_ttm.h | 8 +- drivers/gpu/drm/i915/selftests/mock_region.c | 3 +- 18 files changed, 414 insertions(+), 369 deletions(-) -- 2.25.1
[PATCH v3 3/8] drm/i915: setup ggtt scratch page after memory regions
Reorder scratch page allocation so that memory regions are available to allocate the buffers Signed-off-by: Robert Beckett Reviewed-by: Thomas Hellström --- drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 20 ++-- drivers/gpu/drm/i915/gt/intel_gt_gmch.h | 6 ++ drivers/gpu/drm/i915/i915_driver.c | 16 ++-- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c index 18e488672d1b..5411df1734ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c @@ -440,8 +440,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) struct drm_i915_private *i915 = ggtt->vm.i915; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); phys_addr_t phys_addr; - u32 pte_flags; - int ret; GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); @@ -463,6 +461,24 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) } kref_init(>vm.resv_ref); + + return 0; +} + +/** + * i915_ggtt_setup_scratch_page - setup ggtt scratch page + * @i915: i915 device + */ +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + u32 pte_flags; + int ret; + + /* gen5- scratch setup currently happens in @intel_gtt_init */ + if (GRAPHICS_VER(i915) <= 5) + return 0; + ret = setup_scratch_page(>vm); if (ret) { drm_err(>drm, "Scratch setup failed\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h index 75ed55c1f30a..c6b79cb78637 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h @@ -15,6 +15,7 @@ int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt); int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915); +int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915); /* Stubs for non-x86 platforms */ #else @@ -41,6 +42,11 @@ static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915) /* No HW should be enabled for this case yet, return fail */ return -ENODEV; } + +static inline int i915_ggtt_setup_scratch_page(struct drm_i915_private *i915) +{ + return 0; +} #endif #endif /* __INTEL_GT_GMCH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index d26dcca7e654..4e8a92ffbfe9 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -69,6 +69,7 @@ #include "gem/i915_gem_mman.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_gmch.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" @@ -605,12 +606,16 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) ret = intel_gt_tiles_init(dev_priv); if (ret) - goto err_mem_regions; + goto err_ggtt; + + ret = i915_ggtt_setup_scratch_page(dev_priv); + if (ret) + goto err_ggtt; ret = i915_ggtt_enable_hw(dev_priv); if (ret) { drm_err(_priv->drm, "failed to enable GGTT\n"); - goto err_mem_regions; + goto err_ggtt; } pci_set_master(pdev); @@ -662,11 +667,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); -err_mem_regions: - intel_memory_regions_driver_release(dev_priv); err_ggtt: i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_priv); + intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_late_release(dev_priv); err_perf: i915_perf_fini(dev_priv); @@ -912,9 +916,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) intel_modeset_driver_remove_nogem(i915); out_cleanup_hw: i915_driver_hw_remove(i915); - intel_memory_regions_driver_release(i915); i915_ggtt_driver_release(i915); i915_gem_drain_freed_objects(i915); + intel_memory_regions_driver_release(i915); i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); @@ -971,9 +975,9 @@ static void i915_driver_release(struct drm_device *dev) i915_gem_driver_release(dev_priv); - intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); i915_gem_drain_freed_objects(dev_p
[PATCH v3 5/8] drm/i915: limit ttm to dma32 for i965G[M]
i965G[M] cannot relocate objects above 4GiB. Ensure ttm uses dma32 on these systems. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..fd2ecfdd8fa1 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -32,10 +32,15 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = _priv->drm; + bool use_dma32 = false; + + /* i965g[m] cannot relocate objects above 4GiB. */ + if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) + use_dma32 = true; return ttm_device_init(_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, - drm->vma_offset_manager, false, false); + drm->vma_offset_manager, false, use_dma32); } /** -- 2.25.1
[PATCH v3 6/8] drm/i915/gem: further fix mman selftest
In commit 450cede7f380 ("drm/i915/gem: Fix the mman selftest") we fixed up the mman selftest to allocate user buffers via smem only if we have lmem, otherwise it uses internal buffers. As the commit message asserts, we should only be using buffers that userland should be able to create. Internal buffers are not intended to be used by userland. Instead, fix the code to always create buffers from smem. In the case of integrated, this will create them from the shmem non-ttm backend, which is fine. This also fixes up the code to allow conversion of internal backend to ttm without breaking this test. Signed-off-by: Robert Beckett --- .../gpu/drm/i915/gem/selftests/i915_gem_mman.c | 17 ++--- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..ee2ad1281f97 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -594,17 +594,12 @@ static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) } static struct drm_i915_gem_object * -create_sys_or_internal(struct drm_i915_private *i915, - unsigned long size) +create_sys(struct drm_i915_private *i915, unsigned long size) { - if (HAS_LMEM(i915)) { - struct intel_memory_region *sys_region = - i915->mm.regions[INTEL_REGION_SMEM]; + struct intel_memory_region *sys_region = + i915->mm.regions[INTEL_REGION_SMEM]; - return __i915_gem_object_create_user(i915, size, _region, 1); - } - - return i915_gem_object_create_internal(i915, size); + return __i915_gem_object_create_user(i915, size, _region, 1); } static bool assert_mmap_offset(struct drm_i915_private *i915, @@ -615,7 +610,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, u64 offset; int ret; - obj = create_sys_or_internal(i915, size); + obj = create_sys(i915, size); if (IS_ERR(obj)) return expected && expected == PTR_ERR(obj); @@ -717,7 +712,7 @@ static int igt_mmap_offset_exhaustion(void *arg) } /* Fill the hole, further allocation attempts should then fail */ - obj = create_sys_or_internal(i915, PAGE_SIZE); + obj = create_sys(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); pr_err("Unable to create object for reclaimed hole\n"); -- 2.25.1
[PATCH v3 8/8] drm/i915: internal buffers use ttm backend
Create a kernel only internal memory region that uses ttm pool allocator to allocate volatile system pages. Refactor internal buffer backend to simply allocate from this new region. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 187 +- drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 1 + .../drm/i915/gem/selftests/i915_gem_mman.c| 3 + drivers/gpu/drm/i915/i915_pci.c | 4 +- drivers/gpu/drm/i915/intel_memory_region.c| 8 +- drivers/gpu/drm/i915/intel_memory_region.h| 2 + .../gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 8 files changed, 21 insertions(+), 191 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..a83751867ac7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -4,188 +4,9 @@ * Copyright © 2014-2016 Intel Corporation */ -#include -#include -#include - +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" #include "i915_drv.h" -#include "i915_gem.h" -#include "i915_gem_internal.h" -#include "i915_gem_object.h" -#include "i915_scatterlist.h" -#include "i915_utils.h" - -#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -static void internal_free_pages(struct sg_table *st) -{ - struct scatterlist *sg; - - for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); - } - - sg_free_table(st); - kfree(st); -} - -static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - unsigned int npages; - int max_order; - gfp_t gfp; - - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif - - gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; - if (IS_I965GM(i915) || IS_I965G(i915)) { - /* 965gm cannot relocate objects above 4GiB. */ - gfp &= ~__GFP_HIGHMEM; - gfp |= __GFP_DMA32; - } - -create_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; - - npages = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } - - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - - do { - int order = min(fls(npages) - 1, max_order); - struct page *page; - - do { - page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), - order); - if (page) - break; - if (!order--) - goto err; - - /* Limit subsequent allocations as well */ - max_order = order; - } while (1); - - sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; - st->nents++; - - npages -= 1 << order; - if (!npages) { - sg_mark_end(sg); - break; - } - - sg = __sg_next(sg); - } while (1); - - if (i915_gem_gtt_prepare_pages(obj, st)) { - /* Failed to dma-map try again with single page sg segments */ - if (get_order(st->sgl->length)) { - internal_free_pages(st); - max_order = 0; - goto create_st; - } - goto err; - } - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; - -err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - internal_free_pages(st); - - return -ENOMEM; -} - -static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - i915_gem_gtt_finish_pages(
[PATCH v3 7/8] drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level
By default i915_ttm_cache_level() decides I915_CACHE_LLC if HAS_SNOOP. This is divergent from existing backends code which only considers HAS_LLC. Testing shows that trusting snooping on gen5- is unreliable and bsw via ggtt mappings, so limit DGFX for now and maintain previous behaviour. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 4c1de0b4a10f..40249fa28a7a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -46,7 +46,9 @@ static enum i915_cache_level i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, struct ttm_tt *ttm) { - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + bool can_snoop = HAS_SNOOP(i915) && IS_DGFX(i915); + + return ((HAS_LLC(i915) || can_snoop) && !i915_ttm_gtt_binds_lmem(res) && ttm->caching == ttm_cached) ? I915_CACHE_LLC : I915_CACHE_NONE; -- 2.25.1
[PATCH v3 4/8] drm/i915: allow volatile buffers to use ttm pool allocator
Internal/volatile buffers should not be shmem backed. If a volatile buffer is requested, allow ttm to use the pool allocator to provide volatile pages as backing. Fix i915_ttm_shrink to handle !is_shmem volatile buffers by purging. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 27d59639177f..8edce04a0509 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, page_flags |= TTM_TT_FLAG_ZERO_ALLOC; caching = i915_ttm_select_tt_caching(obj); - if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached && + !i915_gem_object_is_volatile(obj)) { page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; i915_tt->is_shmem = true; @@ -531,9 +532,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags) if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM) return 0; - GEM_BUG_ON(!i915_tt->is_shmem); + GEM_BUG_ON(!i915_tt->is_shmem && obj->mm.madv != I915_MADV_DONTNEED); - if (!i915_tt->filp) + if (i915_tt->is_shmem && !i915_tt->filp) return 0; ret = ttm_bo_wait_ctx(bo, ); -- 2.25.1
[PATCH v3 2/8] drm/i915: add gen6 ppgtt dummy creation function
Internal gem objects will soon just be volatile system memory region objects. To enable this, create a separate dummy object creation function for gen6 ppgtt. The object only exists as a fake object pointing to ggtt and gains no benefit in going via the internal backend. Instead, create a dummy gem object and avoid having to maintain a custom ops api in the internal backend, which makes later refactoring easier. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 ++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1bb766c79dcb..f3b660cfeb7f 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -372,6 +372,45 @@ static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { .put_pages = pd_dummy_obj_put_pages, }; +static struct drm_i915_gem_object * +i915_gem_object_create_dummy(struct drm_i915_private *i915, phys_addr_t size) +{ + static struct lock_class_key lock_class; + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(>drm, >base, size); + i915_gem_object_init(obj, _dummy_obj_ops, _class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + + /* +* Mark the object as volatile, such that the pages are marked as +* dontneed whilst they are still pinned. As soon as they are unpinned +* they are allowed to be reaped by the shrinker, and the caller is +* expected to repopulate - the contents of this object are only valid +* whilst active and pinned. +*/ + i915_gem_object_set_volatile(obj); + + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + + return obj; +} + static struct i915_page_directory * gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { @@ -383,9 +422,7 @@ gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915, - _dummy_obj_ops, - I915_PDES * SZ_4K); + pd->pt.base = i915_gem_object_create_dummy(ppgtt->base.vm.gt->i915, I915_PDES * SZ_4K); if (IS_ERR(pd->pt.base)) { err = PTR_ERR(pd->pt.base); pd->pt.base = NULL; -- 2.25.1
[PATCH v3 1/8] drm/i915/ttm: dont trample cache_level overrides during ttm move
Various places within the driver override the default chosen cache_level. Before ttm, these overrides were permanent until explicitly changed again or for the lifetime of the buffer. TTM movement code came along and decided that it could make that decision at that time, which is usually well after object creation, so overrode the cache_level decision and reverted it back to its default decision. Add logic to indicate whether the caching mode has been set by anything other than the move logic. If so, assume that the code that overrode the defaults knows best and keep it. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 9 ++--- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 06b1b188ce5a..519887769c08 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -125,6 +125,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); obj->cache_level = cache_level; + obj->ttm.cache_level_override = true; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2c88bdb8ff7c..6632ed52e919 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -605,6 +605,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; + bool cache_level_override:1; } ttm; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..27d59639177f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1241,6 +1241,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_adjust_gem_after_move(obj); + obj->ttm.cache_level_override = false; i915_gem_object_unlock(obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index a10716f4e717..4c1de0b4a10f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -123,9 +123,12 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : I915_BO_FLAG_STRUCT_PAGE; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + if (!obj->ttm.cache_level_override) { + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), + bo->resource, bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->ttm.cache_level_override = false; + } } /** -- 2.25.1
[PATCH v3 0/8] drm/i915: ttm for internal
This series refactors i915's internal buffer backend to use ttm. It uses ttm's pool allocator to allocate volatile pages in place of the old code which rolled its own via alloc_pages. This is continuing progress to align all backends on using ttm. v2: - commit message improvements to add detail - fix i915_ttm_shrink to purge !is_shmem volatile buffers - limit ttm pool allocator to using dma32 on i965G[M] - fix mman selftest to always use smem buffers - create new internal memory region - make internal backend allocate from internal region - Fixed various issues with tests and i915 ttm usage as a result of supporting regions other than lmem via ttm. v4: - limit i915 ttm default cache_level selection to only trust v4: - limit i915 ttm default cache_level selection to only trust HAS_SNOOP on DGFX. Robert Beckett (8): drm/i915/ttm: dont trample cache_level overrides during ttm move drm/i915: add gen6 ppgtt dummy creation function drm/i915: setup ggtt scratch page after memory regions drm/i915: allow volatile buffers to use ttm pool allocator drm/i915: limit ttm to dma32 for i965G[M] drm/i915/gem: further fix mman selftest drm/i915/ttm: only trust snooping for dgfx when deciding default cache_level drm/i915: internal buffers use ttm backend drivers/gpu/drm/i915/gem/i915_gem_internal.c | 187 +- drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_object.c| 1 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 8 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 13 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 20 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 43 +++- drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 20 +- drivers/gpu/drm/i915/gt/intel_gt_gmch.h | 6 + drivers/gpu/drm/i915/i915_driver.c| 16 +- drivers/gpu/drm/i915/i915_pci.c | 4 +- drivers/gpu/drm/i915/intel_memory_region.c| 8 +- drivers/gpu/drm/i915/intel_memory_region.h| 2 + drivers/gpu/drm/i915/intel_region_ttm.c | 7 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 17 files changed, 123 insertions(+), 221 deletions(-) -- 2.25.1
[PATCH v2 4/8] drm/i915: allow volatile buffers to use ttm pool allocator
Internal/volatile buffers should not be shmem backed. If a volatile buffer is requested, allow ttm to use the pool allocator to provide volatile pages as backing. Fix i915_ttm_shrink to handle !is_shmem volatile buffers by purging. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 27d59639177f..8edce04a0509 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -309,7 +309,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, page_flags |= TTM_TT_FLAG_ZERO_ALLOC; caching = i915_ttm_select_tt_caching(obj); - if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached && + !i915_gem_object_is_volatile(obj)) { page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; i915_tt->is_shmem = true; @@ -531,9 +532,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags) if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM) return 0; - GEM_BUG_ON(!i915_tt->is_shmem); + GEM_BUG_ON(!i915_tt->is_shmem && obj->mm.madv != I915_MADV_DONTNEED); - if (!i915_tt->filp) + if (i915_tt->is_shmem && !i915_tt->filp) return 0; ret = ttm_bo_wait_ctx(bo, ); -- 2.25.1
[PATCH v2 8/8] drm/i915: internal buffers use ttm backend
Create a kernel only internal memory region that uses ttm pool allocator to allocate volatile system pages. Refactor internal buffer backend to simply allocate from this new region. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 187 +- drivers/gpu/drm/i915/gem/i915_gem_internal.h | 5 - drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 1 + .../drm/i915/gem/selftests/i915_gem_mman.c| 3 + drivers/gpu/drm/i915/i915_pci.c | 4 +- drivers/gpu/drm/i915/intel_memory_region.c| 8 +- drivers/gpu/drm/i915/intel_memory_region.h| 2 + .../gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 8 files changed, 21 insertions(+), 191 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..a83751867ac7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -4,188 +4,9 @@ * Copyright © 2014-2016 Intel Corporation */ -#include -#include -#include - +#include "gem/i915_gem_internal.h" +#include "gem/i915_gem_region.h" #include "i915_drv.h" -#include "i915_gem.h" -#include "i915_gem_internal.h" -#include "i915_gem_object.h" -#include "i915_scatterlist.h" -#include "i915_utils.h" - -#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -static void internal_free_pages(struct sg_table *st) -{ - struct scatterlist *sg; - - for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); - } - - sg_free_table(st); - kfree(st); -} - -static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - unsigned int npages; - int max_order; - gfp_t gfp; - - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif - - gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; - if (IS_I965GM(i915) || IS_I965G(i915)) { - /* 965gm cannot relocate objects above 4GiB. */ - gfp &= ~__GFP_HIGHMEM; - gfp |= __GFP_DMA32; - } - -create_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; - - npages = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } - - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - - do { - int order = min(fls(npages) - 1, max_order); - struct page *page; - - do { - page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), - order); - if (page) - break; - if (!order--) - goto err; - - /* Limit subsequent allocations as well */ - max_order = order; - } while (1); - - sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; - st->nents++; - - npages -= 1 << order; - if (!npages) { - sg_mark_end(sg); - break; - } - - sg = __sg_next(sg); - } while (1); - - if (i915_gem_gtt_prepare_pages(obj, st)) { - /* Failed to dma-map try again with single page sg segments */ - if (get_order(st->sgl->length)) { - internal_free_pages(st); - max_order = 0; - goto create_st; - } - goto err; - } - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; - -err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - internal_free_pages(st); - - return -ENOMEM; -} - -static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - i915_gem_gtt_finish_pages(
[PATCH v2 6/8] drm/i915/gem: further fix mman selftest
In commit 450cede7f380 ("drm/i915/gem: Fix the mman selftest") we fixed up the mman selftest to allocate user buffers via smem only if we have lmem, otherwise it uses internal buffers. As the commit message asserts, we should only be using buffers that userland should be able to create. Internal buffers are not intended to be used by userland. Instead, fix the code to always create buffers from smem. In the case of integrated, this will create them from the shmem non-ttm backend, which is fine. This also fixes up the code to allow conversion of internal backend to ttm without breaking this test. Signed-off-by: Robert Beckett --- .../gpu/drm/i915/gem/selftests/i915_gem_mman.c | 17 ++--- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..ee2ad1281f97 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -594,17 +594,12 @@ static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) } static struct drm_i915_gem_object * -create_sys_or_internal(struct drm_i915_private *i915, - unsigned long size) +create_sys(struct drm_i915_private *i915, unsigned long size) { - if (HAS_LMEM(i915)) { - struct intel_memory_region *sys_region = - i915->mm.regions[INTEL_REGION_SMEM]; + struct intel_memory_region *sys_region = + i915->mm.regions[INTEL_REGION_SMEM]; - return __i915_gem_object_create_user(i915, size, _region, 1); - } - - return i915_gem_object_create_internal(i915, size); + return __i915_gem_object_create_user(i915, size, _region, 1); } static bool assert_mmap_offset(struct drm_i915_private *i915, @@ -615,7 +610,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, u64 offset; int ret; - obj = create_sys_or_internal(i915, size); + obj = create_sys(i915, size); if (IS_ERR(obj)) return expected && expected == PTR_ERR(obj); @@ -717,7 +712,7 @@ static int igt_mmap_offset_exhaustion(void *arg) } /* Fill the hole, further allocation attempts should then fail */ - obj = create_sys_or_internal(i915, PAGE_SIZE); + obj = create_sys(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); pr_err("Unable to create object for reclaimed hole\n"); -- 2.25.1
[PATCH v2 5/8] drm/i915: limit ttm to dma32 for i965G[M]
i965G[M] cannot relocate objects above 4GiB. Ensure ttm uses dma32 on these systems. Signed-off-by: Robert Beckett --- drivers/gpu/drm/i915/intel_region_ttm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..fd2ecfdd8fa1 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -32,10 +32,15 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = _priv->drm; + bool use_dma32 = false; + + /* i965g[m] cannot relocate objects above 4GiB. */ + if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) + use_dma32 = true; return ttm_device_init(_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, - drm->vma_offset_manager, false, false); + drm->vma_offset_manager, false, use_dma32); } /** -- 2.25.1